A bit more protection against scrapers increasing view counts.

2024-11-30 14:27:26 +02:00 · 2024-11-30 14:27:26 +02:00 · 63ce632ce9
commit 63ce632ce9
parent ba14217714
3 changed files with 43 additions and 8 deletions
--- a/modules/Render.py
+++ b/modules/Render.py
@ -15,6 +15,7 @@ from modules import markdown
 from modules.Common import *

 KnownCookies = []
+RecentArticles = {}

 def guess_type(path):

@ -482,6 +483,10 @@ def ArticlePage(server, url):
    if url.endswith(".md"):
        url = url.replace(".md", "")

+    # Recording when was the last time
+    # the article loaded.
+    RecentArticles["/"+url] = time.time()
+        
    config = Set.Load()
    tab, article, *rest = url.split("/")
    Tabs = tabs()
@ -554,8 +559,10 @@ def ArticlePage(server, url):
    # Audio recording of the article
    recording = Articles.get(article, {}).get("recording", "")
    if recording:
+        html = html + '<div class="dark_box"> <center>'
+        html = html + '<b>If you are going to skim, better listen to it instead.</b><br><br>'
        html = html + '<audio controls="controls" style="min-width:100%;" src="'+recording+'"></audio>'
-        
+        html = html + '<br><br></center></div>'
        
    
    html = html + '<div class="dark_box">'
@ -1225,18 +1232,18 @@ def EditorPage(server):
        """

    if name:
-        html = html + '<input type="hidden" name="name" value="'+name+'">'
+        html = html + '<input type="hidden" name="name" value="'+name.replace("'", "&apos;")+'">'
        
    html = html + """
    
    <img style="vertical-align: middle" src="/icon/scene">
-    <input class="button" style="width:90%" required="" name="title" placeholder="Title" value='"""+article.get("title", "")+"""'>
+    <input class="button" style="width:90%" required="" name="title" placeholder="Title" value='"""+article.get("title", "").replace("'", "&apos;")+"""'>
    
    <img style="vertical-align: middle" src="/icon/image_link">
-    <input class="button" style="width:90%" name="thumbnail" placeholder="Link To Thumbnail ( Optional )" value='"""+article.get("thumbnail", "")+"""'>
+    <input class="button" style="width:90%" name="thumbnail" placeholder="Link To Thumbnail ( Optional )" value='"""+article.get("thumbnail", "").replace("'", "&apos;")+"""'>

    <img style="vertical-align: middle" src="/icon/copy_file">
-    <input class="button" style="width:90%" list="Licenses" name="license" placeholder="License ( Optional )" value='"""+article.get("license", "")+"""'>
+    <input class="button" style="width:90%" list="Licenses" name="license" placeholder="License ( Optional )" value='"""+article.get("license", "").replace("'", "&apos;")+"""'>
    <datalist id="Licenses">
    """
    for l in Licenses:
@ -1245,7 +1252,7 @@ def EditorPage(server):
    </datalist>
    
    <img style="vertical-align: middle" src="/icon/mus">
-    <input class="button" style="width:90%" name="recording" placeholder="Link To Sound Recording ( Optional )" value='"""+article.get("recording", "")+"""'>
+    <input class="button" style="width:90%" name="recording" placeholder="Link To Sound Recording ( Optional )" value='"""+article.get("recording", "").replace("'", "&apos;")+"""'>

        
    <br>
@ -1361,6 +1368,16 @@ def User(username, stretch=False):

 def Graph(server, url):

+
+    # If there are any values after ? in the path
+    # which means, that somebody is sending the old
+    # version of the graph link from the legacy code
+    # we should not count it as a view.
+
+    if "?" in server.path:
+        AccessDenied(server)
+        return
+
    # Since /graph/ is used to count views
    # we need the cookie to be generated and
    # used by the user's browser before we load
@ -1372,6 +1389,15 @@ def Graph(server, url):
        Redirect(server, server.path)
        return

+    # Sometimes scrapers try to load graph without
+    # loading the article first. We don't want to count
+    # it as a view.
+    if time.time()-10 > RecentArticles.get(url, 0):
+        print(consoleForm(server.cookie), "Article wasn't loaded, scrapers!")
+        AccessDenied(server)
+        return
+    
+    
    user = validate(server.cookie)

    html = """
@ -1671,7 +1697,9 @@ def AccessDenied(server):

 def Redirect(server, url, time=0):

-    html = """<meta http-equiv="Refresh" content=\""""+str(time)+"""; url='"""+url+"""'" />"""
+    print(consoleForm(server.cookie), "Redirecting to: "+url)
+
+    html = """<meta http-equiv="Refresh" content=\""""+str(time)+"""; url='"""+url.replace("'", "%27").replace('"', "%22")+"""'" />"""
    send(server, html, 200)

 def Login(server):
@ -2089,6 +2117,8 @@ def Publish(server):
    License     = server.parsed.get("license", [""])[0]
    recording   = server.parsed.get("recording", [""])[0]

+    
+    
    # If this tab doesn't exist, this is an error.
    if tab not in Tabs:
        AccessDenied(server)
--- a/modules/Run.py
+++ b/modules/Run.py
@ -110,6 +110,7 @@ class handler(BaseHTTPRequestHandler):

        self.path = self.path.replace("/..", "/")
        self.path = self.path.replace("%27", "'")
+        self.path = self.path.replace("%22", '"')

        parsed_url = urllib.parse.urlparse(self.path)
        self.parsed = urllib.parse.parse_qs(parsed_url.query)
--- a/modules/markdown.py
+++ b/modules/markdown.py
@ -485,6 +485,10 @@ def convert(filename, isfile=True):
            

        elif i[0] == "link":
+
+            if not i[-1]:
+                i[-1] = "/search?text="+urllib.parse.quote_plus(i[1])
+            
            textReturn = textReturn + '<a href="'+i[-1]+'">'+i[1]+"</a>"