A bit more protection against scrapers increasing view counts.

This commit is contained in:
BlenderDumbass 2024-11-30 14:27:26 +02:00
parent ba14217714
commit 63ce632ce9
3 changed files with 43 additions and 8 deletions

View file

@ -15,6 +15,7 @@ from modules import markdown
from modules.Common import * from modules.Common import *
KnownCookies = [] KnownCookies = []
RecentArticles = {}
def guess_type(path): def guess_type(path):
@ -482,6 +483,10 @@ def ArticlePage(server, url):
if url.endswith(".md"): if url.endswith(".md"):
url = url.replace(".md", "") url = url.replace(".md", "")
# Recording when was the last time
# the article loaded.
RecentArticles["/"+url] = time.time()
config = Set.Load() config = Set.Load()
tab, article, *rest = url.split("/") tab, article, *rest = url.split("/")
Tabs = tabs() Tabs = tabs()
@ -554,8 +559,10 @@ def ArticlePage(server, url):
# Audio recording of the article # Audio recording of the article
recording = Articles.get(article, {}).get("recording", "") recording = Articles.get(article, {}).get("recording", "")
if recording: if recording:
html = html + '<div class="dark_box"> <center>'
html = html + '<b>If you are going to skim, better listen to it instead.</b><br><br>'
html = html + '<audio controls="controls" style="min-width:100%;" src="'+recording+'"></audio>' html = html + '<audio controls="controls" style="min-width:100%;" src="'+recording+'"></audio>'
html = html + '<br><br></center></div>'
html = html + '<div class="dark_box">' html = html + '<div class="dark_box">'
@ -1225,18 +1232,18 @@ def EditorPage(server):
""" """
if name: if name:
html = html + '<input type="hidden" name="name" value="'+name+'">' html = html + '<input type="hidden" name="name" value="'+name.replace("'", "&apos;")+'">'
html = html + """ html = html + """
<img style="vertical-align: middle" src="/icon/scene"> <img style="vertical-align: middle" src="/icon/scene">
<input class="button" style="width:90%" required="" name="title" placeholder="Title" value='"""+article.get("title", "")+"""'> <input class="button" style="width:90%" required="" name="title" placeholder="Title" value='"""+article.get("title", "").replace("'", "&apos;")+"""'>
<img style="vertical-align: middle" src="/icon/image_link"> <img style="vertical-align: middle" src="/icon/image_link">
<input class="button" style="width:90%" name="thumbnail" placeholder="Link To Thumbnail ( Optional )" value='"""+article.get("thumbnail", "")+"""'> <input class="button" style="width:90%" name="thumbnail" placeholder="Link To Thumbnail ( Optional )" value='"""+article.get("thumbnail", "").replace("'", "&apos;")+"""'>
<img style="vertical-align: middle" src="/icon/copy_file"> <img style="vertical-align: middle" src="/icon/copy_file">
<input class="button" style="width:90%" list="Licenses" name="license" placeholder="License ( Optional )" value='"""+article.get("license", "")+"""'> <input class="button" style="width:90%" list="Licenses" name="license" placeholder="License ( Optional )" value='"""+article.get("license", "").replace("'", "&apos;")+"""'>
<datalist id="Licenses"> <datalist id="Licenses">
""" """
for l in Licenses: for l in Licenses:
@ -1245,7 +1252,7 @@ def EditorPage(server):
</datalist> </datalist>
<img style="vertical-align: middle" src="/icon/mus"> <img style="vertical-align: middle" src="/icon/mus">
<input class="button" style="width:90%" name="recording" placeholder="Link To Sound Recording ( Optional )" value='"""+article.get("recording", "")+"""'> <input class="button" style="width:90%" name="recording" placeholder="Link To Sound Recording ( Optional )" value='"""+article.get("recording", "").replace("'", "&apos;")+"""'>
<br> <br>
@ -1361,6 +1368,16 @@ def User(username, stretch=False):
def Graph(server, url): def Graph(server, url):
# If there are any values after ? in the path
# which means, that somebody is sending the old
# version of the graph link from the legacy code
# we should not count it as a view.
if "?" in server.path:
AccessDenied(server)
return
# Since /graph/ is used to count views # Since /graph/ is used to count views
# we need the cookie to be generated and # we need the cookie to be generated and
# used by the user's browser before we load # used by the user's browser before we load
@ -1372,6 +1389,15 @@ def Graph(server, url):
Redirect(server, server.path) Redirect(server, server.path)
return return
# Sometimes scrapers try to load graph without
# loading the article first. We don't want to count
# it as a view.
if time.time()-10 > RecentArticles.get(url, 0):
print(consoleForm(server.cookie), "Article wasn't loaded, scrapers!")
AccessDenied(server)
return
user = validate(server.cookie) user = validate(server.cookie)
html = """ html = """
@ -1671,7 +1697,9 @@ def AccessDenied(server):
def Redirect(server, url, time=0): def Redirect(server, url, time=0):
html = """<meta http-equiv="Refresh" content=\""""+str(time)+"""; url='"""+url+"""'" />""" print(consoleForm(server.cookie), "Redirecting to: "+url)
html = """<meta http-equiv="Refresh" content=\""""+str(time)+"""; url='"""+url.replace("'", "%27").replace('"', "%22")+"""'" />"""
send(server, html, 200) send(server, html, 200)
def Login(server): def Login(server):
@ -2089,6 +2117,8 @@ def Publish(server):
License = server.parsed.get("license", [""])[0] License = server.parsed.get("license", [""])[0]
recording = server.parsed.get("recording", [""])[0] recording = server.parsed.get("recording", [""])[0]
# If this tab doesn't exist, this is an error. # If this tab doesn't exist, this is an error.
if tab not in Tabs: if tab not in Tabs:
AccessDenied(server) AccessDenied(server)

View file

@ -110,6 +110,7 @@ class handler(BaseHTTPRequestHandler):
self.path = self.path.replace("/..", "/") self.path = self.path.replace("/..", "/")
self.path = self.path.replace("%27", "'") self.path = self.path.replace("%27", "'")
self.path = self.path.replace("%22", '"')
parsed_url = urllib.parse.urlparse(self.path) parsed_url = urllib.parse.urlparse(self.path)
self.parsed = urllib.parse.parse_qs(parsed_url.query) self.parsed = urllib.parse.parse_qs(parsed_url.query)

View file

@ -485,6 +485,10 @@ def convert(filename, isfile=True):
elif i[0] == "link": elif i[0] == "link":
if not i[-1]:
i[-1] = "/search?text="+urllib.parse.quote_plus(i[1])
textReturn = textReturn + '<a href="'+i[-1]+'">'+i[1]+"</a>" textReturn = textReturn + '<a href="'+i[-1]+'">'+i[1]+"</a>"