Edited the check.py and the missing algorithm.

2022-04-14 19:42:41 +03:00 · 2022-04-14 19:42:41 +03:00 · 85cee36e03
commit 85cee36e03
parent fc689343a9
5 changed files with 206 additions and 51 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 port.json
 config.json
+data/missing.json
 *.pyc
 *~
--- a/check.py
+++ b/check.py
@ -7,8 +7,21 @@
 # your ability to set it up and running.

 import os
+import sys
 import json

+from modules import missing
+
+args = sys.argv
+
+if "--help" in args:
+    print("You can use the next commands after check.py")
+    print()
+    print(" -links     | Will skip checking the links")
+    print()
+    print(" --help     | Displays this help menu")
+    exit()
+
 # TODO: Make a pull-request before starting

 print("* Checking...")
@ -17,11 +30,11 @@ print("* Checking...")

 try:
    with open("data/missing.json") as json_file:
-        missing = json.load(json_file)
+        miss = json.load(json_file)
 except:
-    missing = {}
+    miss = {}

-if missing:
+if miss:
    print()
    print("* Missing Software found! Please copy-paste the next")
    print("  section into our Missing Software Mega-Thread at: ")
@ -29,15 +42,9 @@ if missing:
    print("===========================================================")
    print()

-# TODO: Make it check if the items in the missing.json already added
-#       in the last pull-request, or manually by the operator. If they
-#       are found. Remove them from the file. And from the list.
+missing.List()

-for i in missing:
-    name = i[0].upper()+i[1:].lower()
-    print(' - [ ] '+name+' *Searched at least '+str(missing[i]) +" times.*")
-
-if missing:
+if miss:
    print()
    print("===========================================================")
    print()
@ -71,17 +78,18 @@ for f in os.listdir("apps"):
            continue

        # Links
-        for link in app.get("links",[]):
+        if not "-links" in args:
+            for link in app.get("links",[]):
            
-            # TODO: Make the tester of the links closer to the kind
-            # of response that you will get in the web-browser.
+                # TODO: Make the tester of the links closer to the kind
+                # of response that you will get in the web-browser.
            
-            try:
-                urllib.request.urlopen(app.get("links",[])[link])
-            except Exception as e:
-                if "403" not in str(e):
-                    error = iferror()
-                    print(" - [ ] `apps/"+f+"` "+link+" link doesn't seem to work.")
+                try:
+                    urllib.request.urlopen(app.get("links",[])[link])
+                except Exception as e:
+                    if "403" not in str(e):
+                        error = iferror()
+                        print(" - [ ] `apps/"+f+"` "+link+" link doesn't seem to work.")

        # Licenses
        lices = app.get("licenses", [])
@ -101,7 +109,7 @@ for f in os.listdir("apps"):
                error = iferror()
                print(" - [ ] `apps/"+f+"` License '"+lic+"' is unknown.")
                
-        
+print()        
 print("===========================================================")
 print()
 print("* Check is finished!")
--- a/data/missing2.json
+++ b/data/missing2.json
@ -0,0 +1,10 @@
+{
+    "3dsmax": 2,
+    "favicon.ico": 6,
+    "premier": 3,
+    "recapcha": 1,
+    "server-status": 2,
+    "spotify": 1,
+    "vlc": 1,
+    "mastodon":326
+}
--- a/modules/missing.py
+++ b/modules/missing.py
@ -0,0 +1,163 @@
+# THIS SOFTWARE IS A PART OF FREE COMPETITOR PROJECT
+# THE FOLLOWING SOURCE CODE I UNDER THE GNU
+# AGPL LICENSE V3 OR ANY LATER VERSION.
+
+# This project is not for simple users, but for
+# web-masters and a like, so we are counting on
+# your ability to set it up and running.
+
+##################################################
+
+# This file is importart, since it's important to
+# keep track of software that the users are
+# searching, but not getting any answers to.
+# The idea is very simple. If the score of any
+# search is below 60%, we add the search term into
+# a list. But instead of it being a dumb list, it's
+# a smart list.
+
+# Using the similar() function we can group very
+# similar terms together. So the users could misspel
+# certain names. For example we might group '3D Max'
+# '3DSMax' into the same data-structure. Using the
+# simple count of how much users spell this or that
+# name more often, we will make a suggestion for the
+# maintainer. So the maintainer could add the missing
+# names into the data. Or at least report them to us.
+
+##################################################
+
+import os
+import json
+
+from difflib import SequenceMatcher # checks how similar are two strings
+
+def similar(a, b):
+    # I guess it simpifies the syntax for SequenceMatcher
+    # In the previous version we use Lavenshtain but it made
+    # it an issue for some people to install.
+    return SequenceMatcher(None, a, b).ratio()
+
+def add(name):
+
+    # This function will add a datapoint into the missing
+
+    # This shows up way too often. And will show up. Untill we
+    # will desing a logo.
+    if name == "favicon.ico":
+        return
+    
+    # first we need to make sure that the file exists
+    try:
+        with open("data/missing.json") as json_file:
+            missing = json.load(json_file)
+
+            # Reverse the old file
+            if type(missing) == dict:
+                missing = []
+            
+    except:
+        missing = []
+
+    # There could be a problem with writing so we look into it
+    # for a very close match. Up to about 60%. No more.
+
+    match_missing = 0
+    closest_missing = 0
+    
+    found = False
+    for ind, n in enumerate(missing):
+        for i in n:
+            sim = similar(name, i)
+            if sim > 0.6: # At least 60% match
+                found = True
+                if match_missing < sim:
+                    match_missing = sim
+                    closest_missing = ind
+    if not found:
+        missing.append({name:1})
+    else:
+
+        if not name in missing[closest_missing]:
+            missing[closest_missing][name] = 1
+        else:
+            missing[closest_missing][name] += 1
+        
+    # Now we save the file
+    with open("data/missing.json", 'w') as f:
+        json.dump(missing, f, indent=4, sort_keys=True)
+
+def remove(name):
+
+    # This function will remove a datapoint from the missing
+
+    try:
+        with open("data/missing.json") as json_file:
+            missing = json.load(json_file)
+
+            # Reverse the old file
+            if type(missing) == dict:
+                missing = []
+            
+    except:
+        missing = []
+
+    # There could be a problem with writing so we look into it
+    # for a very close match. Up to about 60%. No more.
+
+    match_missing = 0
+    closest_missing = 0
+    
+    found = False
+    for ind, n in enumerate(missing):
+        for i in n:
+            sim = similar(name, i)
+            if sim > 0.6: # At least 60% match
+                found = True
+                if match_missing < sim:
+                    match_missing = sim
+                    closest_missing = ind
+
+    if found:
+        del missing[closest_missing]
+
+    # Now we save the file
+    with open("data/missing.json", 'w') as f:
+        json.dump(missing, f, indent=4, sort_keys=True)
+
+
+def List():
+
+    # This function will list missing in markdown format
+
+    try:
+        with open("data/missing.json") as json_file:
+            missing = json.load(json_file)
+
+            # Reverse the old file
+            if type(missing) == dict:
+                missing = []
+            
+    except:
+        missing = []
+
+    print("| Done | Best Name | Other Names |")
+    print("| --- | --- | --- |")
+    for i in missing:
+        i = sorted(i.items(), key=lambda x:x[1])
+        i = dict(i)
+
+        s = "| | **"+list(i.keys())[0]+"**"
+        if len(i) > 1:
+            s = s + " | "
+            for b in i:
+                if b == list(i.keys())[0]:
+                    continue
+                comma = ", "
+                if b == list(i.keys())[-1]:
+                    comma = ""
+                s = s + b + comma
+            s = s + " |"
+        else:
+            s = s + " | |"
+        print(s)
--- a/modules/search.py
+++ b/modules/search.py
@ -8,6 +8,7 @@

 import os
 import json
+from modules import missing
 from difflib import SequenceMatcher # checks how similar are two strings

 def similar(a, b):
@ -46,6 +47,7 @@ def search_app(name):
                match = m

    if closest:
+        missing.remove(closest["names"][0])
        return closest, match

    # If there was no match for the program by name
@ -53,36 +55,7 @@ def search_app(name):
    # the operator of the website could see that there
    # was a name that didn't have a file for it.

-    # We open the 'missing.json' file
-    try:
-        with open("data/missing.json") as json_file:
-            missing = json.load(json_file)
-    except:
-        missing = {}
-
-    # There could be a problem with writing so we look into it
-    # for a very close match. Up to about 60%. No more.
-
-    match_missing = 0
-    closest_missing = ""
-    for i in missing:
-        m = similar(i.lower(), name.lower())
-        if m > match_missing and m > 0.6:
-            closest_missing  = i
-            match_missing = 0
-    write_to = closest_missing
-    if not write_to:
-        write_to = name
-
-    # Now we add one number to it's mention
-    if write_to not in missing:
-        missing[write_to] = 1
-    else:
-        missing[write_to] += 1
-
-    # Now we save the file
-    with open("data/missing.json", 'w') as f:
-        json.dump(missing, f, indent=4, sort_keys=True)
+    missing.add(name)