From 85cee36e03754cf982b1addcfeae831d8a09f428 Mon Sep 17 00:00:00 2001 From: jyamihud Date: Thu, 14 Apr 2022 19:42:41 +0300 Subject: [PATCH] Edited the check.py and the missing algorithm. --- .gitignore | 1 + check.py | 50 ++++++++------ data/missing2.json | 10 +++ modules/missing.py | 163 +++++++++++++++++++++++++++++++++++++++++++++ modules/search.py | 33 +-------- 5 files changed, 206 insertions(+), 51 deletions(-) create mode 100644 data/missing2.json create mode 100644 modules/missing.py diff --git a/.gitignore b/.gitignore index 9a50e2e..bd280af 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ port.json config.json +data/missing.json *.pyc *~ diff --git a/check.py b/check.py index 4a6f7d4..69c7be7 100644 --- a/check.py +++ b/check.py @@ -7,8 +7,21 @@ # your ability to set it up and running. import os +import sys import json +from modules import missing + +args = sys.argv + +if "--help" in args: + print("You can use the next commands after check.py") + print() + print(" -links | Will skip checking the links") + print() + print(" --help | Displays this help menu") + exit() + # TODO: Make a pull-request before starting print("* Checking...") @@ -17,11 +30,11 @@ print("* Checking...") try: with open("data/missing.json") as json_file: - missing = json.load(json_file) + miss = json.load(json_file) except: - missing = {} + miss = {} -if missing: +if miss: print() print("* Missing Software found! Please copy-paste the next") print(" section into our Missing Software Mega-Thread at: ") @@ -29,15 +42,9 @@ if missing: print("===========================================================") print() -# TODO: Make it check if the items in the missing.json already added -# in the last pull-request, or manually by the operator. If they -# are found. Remove them from the file. And from the list. - -for i in missing: - name = i[0].upper()+i[1:].lower() - print(' - [ ] '+name+' *Searched at least '+str(missing[i]) +" times.*") +missing.List() -if missing: +if miss: print() print("===========================================================") print() @@ -71,17 +78,18 @@ for f in os.listdir("apps"): continue # Links - for link in app.get("links",[]): + if not "-links" in args: + for link in app.get("links",[]): - # TODO: Make the tester of the links closer to the kind - # of response that you will get in the web-browser. + # TODO: Make the tester of the links closer to the kind + # of response that you will get in the web-browser. - try: - urllib.request.urlopen(app.get("links",[])[link]) - except Exception as e: - if "403" not in str(e): - error = iferror() - print(" - [ ] `apps/"+f+"` "+link+" link doesn't seem to work.") + try: + urllib.request.urlopen(app.get("links",[])[link]) + except Exception as e: + if "403" not in str(e): + error = iferror() + print(" - [ ] `apps/"+f+"` "+link+" link doesn't seem to work.") # Licenses lices = app.get("licenses", []) @@ -101,7 +109,7 @@ for f in os.listdir("apps"): error = iferror() print(" - [ ] `apps/"+f+"` License '"+lic+"' is unknown.") - +print() print("===========================================================") print() print("* Check is finished!") diff --git a/data/missing2.json b/data/missing2.json new file mode 100644 index 0000000..517bb3f --- /dev/null +++ b/data/missing2.json @@ -0,0 +1,10 @@ +{ + "3dsmax": 2, + "favicon.ico": 6, + "premier": 3, + "recapcha": 1, + "server-status": 2, + "spotify": 1, + "vlc": 1, + "mastodon":326 +} diff --git a/modules/missing.py b/modules/missing.py new file mode 100644 index 0000000..a53cf85 --- /dev/null +++ b/modules/missing.py @@ -0,0 +1,163 @@ +# THIS SOFTWARE IS A PART OF FREE COMPETITOR PROJECT +# THE FOLLOWING SOURCE CODE I UNDER THE GNU +# AGPL LICENSE V3 OR ANY LATER VERSION. + +# This project is not for simple users, but for +# web-masters and a like, so we are counting on +# your ability to set it up and running. + +################################################## + +# This file is importart, since it's important to +# keep track of software that the users are +# searching, but not getting any answers to. +# The idea is very simple. If the score of any +# search is below 60%, we add the search term into +# a list. But instead of it being a dumb list, it's +# a smart list. + +# Using the similar() function we can group very +# similar terms together. So the users could misspel +# certain names. For example we might group '3D Max' +# '3DSMax' into the same data-structure. Using the +# simple count of how much users spell this or that +# name more often, we will make a suggestion for the +# maintainer. So the maintainer could add the missing +# names into the data. Or at least report them to us. + +################################################## + +import os +import json + +from difflib import SequenceMatcher # checks how similar are two strings + +def similar(a, b): + # I guess it simpifies the syntax for SequenceMatcher + # In the previous version we use Lavenshtain but it made + # it an issue for some people to install. + return SequenceMatcher(None, a, b).ratio() + +def add(name): + + # This function will add a datapoint into the missing + + # This shows up way too often. And will show up. Untill we + # will desing a logo. + if name == "favicon.ico": + return + + # first we need to make sure that the file exists + try: + with open("data/missing.json") as json_file: + missing = json.load(json_file) + + # Reverse the old file + if type(missing) == dict: + missing = [] + + except: + missing = [] + + # There could be a problem with writing so we look into it + # for a very close match. Up to about 60%. No more. + + match_missing = 0 + closest_missing = 0 + + found = False + for ind, n in enumerate(missing): + for i in n: + sim = similar(name, i) + if sim > 0.6: # At least 60% match + found = True + if match_missing < sim: + match_missing = sim + closest_missing = ind + if not found: + missing.append({name:1}) + else: + + if not name in missing[closest_missing]: + missing[closest_missing][name] = 1 + else: + missing[closest_missing][name] += 1 + + # Now we save the file + with open("data/missing.json", 'w') as f: + json.dump(missing, f, indent=4, sort_keys=True) + +def remove(name): + + # This function will remove a datapoint from the missing + + try: + with open("data/missing.json") as json_file: + missing = json.load(json_file) + + # Reverse the old file + if type(missing) == dict: + missing = [] + + except: + missing = [] + + # There could be a problem with writing so we look into it + # for a very close match. Up to about 60%. No more. + + match_missing = 0 + closest_missing = 0 + + found = False + for ind, n in enumerate(missing): + for i in n: + sim = similar(name, i) + if sim > 0.6: # At least 60% match + found = True + if match_missing < sim: + match_missing = sim + closest_missing = ind + + if found: + del missing[closest_missing] + + # Now we save the file + with open("data/missing.json", 'w') as f: + json.dump(missing, f, indent=4, sort_keys=True) + + +def List(): + + # This function will list missing in markdown format + + try: + with open("data/missing.json") as json_file: + missing = json.load(json_file) + + # Reverse the old file + if type(missing) == dict: + missing = [] + + except: + missing = [] + + print("| Done | Best Name | Other Names |") + print("| --- | --- | --- |") + for i in missing: + i = sorted(i.items(), key=lambda x:x[1]) + i = dict(i) + + s = "| | **"+list(i.keys())[0]+"**" + if len(i) > 1: + s = s + " | " + for b in i: + if b == list(i.keys())[0]: + continue + comma = ", " + if b == list(i.keys())[-1]: + comma = "" + s = s + b + comma + s = s + " |" + else: + s = s + " | |" + print(s) diff --git a/modules/search.py b/modules/search.py index f3ae8e9..c13e331 100644 --- a/modules/search.py +++ b/modules/search.py @@ -8,6 +8,7 @@ import os import json +from modules import missing from difflib import SequenceMatcher # checks how similar are two strings def similar(a, b): @@ -46,6 +47,7 @@ def search_app(name): match = m if closest: + missing.remove(closest["names"][0]) return closest, match # If there was no match for the program by name @@ -53,36 +55,7 @@ def search_app(name): # the operator of the website could see that there # was a name that didn't have a file for it. - # We open the 'missing.json' file - try: - with open("data/missing.json") as json_file: - missing = json.load(json_file) - except: - missing = {} - - # There could be a problem with writing so we look into it - # for a very close match. Up to about 60%. No more. - - match_missing = 0 - closest_missing = "" - for i in missing: - m = similar(i.lower(), name.lower()) - if m > match_missing and m > 0.6: - closest_missing = i - match_missing = 0 - write_to = closest_missing - if not write_to: - write_to = name - - # Now we add one number to it's mention - if write_to not in missing: - missing[write_to] = 1 - else: - missing[write_to] += 1 - - # Now we save the file - with open("data/missing.json", 'w') as f: - json.dump(missing, f, indent=4, sort_keys=True) + missing.add(name)