From 63ce632ce99c9140b1a68075095316cb4ffff80c Mon Sep 17 00:00:00 2001 From: BlenderDumbass Date: Sat, 30 Nov 2024 14:27:26 +0200 Subject: [PATCH] A bit more protection against scrapers increasing view counts. --- modules/Render.py | 46 +++++++++++++++++++++++++++++++++++++-------- modules/Run.py | 1 + modules/markdown.py | 4 ++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/modules/Render.py b/modules/Render.py index 7416400..5923e2b 100644 --- a/modules/Render.py +++ b/modules/Render.py @@ -15,6 +15,7 @@ from modules import markdown from modules.Common import * KnownCookies = [] +RecentArticles = {} def guess_type(path): @@ -481,7 +482,11 @@ def ArticlePage(server, url): if url.endswith(".md"): url = url.replace(".md", "") - + + # Recording when was the last time + # the article loaded. + RecentArticles["/"+url] = time.time() + config = Set.Load() tab, article, *rest = url.split("/") Tabs = tabs() @@ -554,8 +559,10 @@ def ArticlePage(server, url): # Audio recording of the article recording = Articles.get(article, {}).get("recording", "") if recording: + html = html + '
' + html = html + 'If you are going to skim, better listen to it instead.

' html = html + '' - + html = html + '

' html = html + '
' @@ -1225,18 +1232,18 @@ def EditorPage(server): """ if name: - html = html + '' + html = html + '' html = html + """ - + - + - + """ for l in Licenses: @@ -1245,7 +1252,7 @@ def EditorPage(server): - +
@@ -1361,6 +1368,16 @@ def User(username, stretch=False): def Graph(server, url): + + # If there are any values after ? in the path + # which means, that somebody is sending the old + # version of the graph link from the legacy code + # we should not count it as a view. + + if "?" in server.path: + AccessDenied(server) + return + # Since /graph/ is used to count views # we need the cookie to be generated and # used by the user's browser before we load @@ -1371,6 +1388,15 @@ def Graph(server, url): if not server.cookie: Redirect(server, server.path) return + + # Sometimes scrapers try to load graph without + # loading the article first. We don't want to count + # it as a view. + if time.time()-10 > RecentArticles.get(url, 0): + print(consoleForm(server.cookie), "Article wasn't loaded, scrapers!") + AccessDenied(server) + return + user = validate(server.cookie) @@ -1671,7 +1697,9 @@ def AccessDenied(server): def Redirect(server, url, time=0): - html = """""" + print(consoleForm(server.cookie), "Redirecting to: "+url) + + html = """""" send(server, html, 200) def Login(server): @@ -2089,6 +2117,8 @@ def Publish(server): License = server.parsed.get("license", [""])[0] recording = server.parsed.get("recording", [""])[0] + + # If this tab doesn't exist, this is an error. if tab not in Tabs: AccessDenied(server) diff --git a/modules/Run.py b/modules/Run.py index a2847dc..cf7c6a6 100644 --- a/modules/Run.py +++ b/modules/Run.py @@ -110,6 +110,7 @@ class handler(BaseHTTPRequestHandler): self.path = self.path.replace("/..", "/") self.path = self.path.replace("%27", "'") + self.path = self.path.replace("%22", '"') parsed_url = urllib.parse.urlparse(self.path) self.parsed = urllib.parse.parse_qs(parsed_url.query) diff --git a/modules/markdown.py b/modules/markdown.py index 35c78c8..512015c 100644 --- a/modules/markdown.py +++ b/modules/markdown.py @@ -485,6 +485,10 @@ def convert(filename, isfile=True): elif i[0] == "link": + + if not i[-1]: + i[-1] = "/search?text="+urllib.parse.quote_plus(i[1]) + textReturn = textReturn + ''+i[1]+""