From a54166a27cb1cbabfc516196dfeb2eebb857b6b9 Mon Sep 17 00:00:00 2001
From: Alex Liebowitz <alex@alexliebowitz.com>
Date: Wed, 20 Jul 2016 02:36:55 -0400
Subject: [PATCH] Add support for file uploads (for Publish page)

---
 lbrynet/lbrynet_daemon/LBRYDaemon.py        |  10 +
 lbrynet/lbrynet_daemon/LBRYDaemonControl.py |   6 +-
 lbrynet/lbrynet_daemon/LBRYDaemonServer.py  | 205 ++++++++++++++++++++
 3 files changed, 219 insertions(+), 2 deletions(-)

diff --git a/lbrynet/lbrynet_daemon/LBRYDaemon.py b/lbrynet/lbrynet_daemon/LBRYDaemon.py
index aaf504080..379209634 100644
--- a/lbrynet/lbrynet_daemon/LBRYDaemon.py
+++ b/lbrynet/lbrynet_daemon/LBRYDaemon.py
@@ -170,6 +170,7 @@ class LBRYDaemon(jsonrpc.JSONRPC):
         self.streams = {}
         self.known_dht_nodes = KNOWN_DHT_NODES
         self.first_run_after_update = False
+        self.uploaded_temp_files = []
 
         if os.name == "nt":
             from lbrynet.winhelpers.knownpaths import get_path, FOLDERID, UserHandle
@@ -708,6 +709,13 @@ class LBRYDaemon(jsonrpc.JSONRPC):
         else:
             return defer.succeed(None)
 
+    def _clean_up_temp_files(self):
+        for path in self.uploaded_temp_files:
+            try:
+                os.remove(path)
+            except OSError:
+                pass
+
     def _shutdown(self):
         log.info("Closing lbrynet session")
         log.info("Status at time of shutdown: " + self.startup_status[0])
@@ -720,6 +728,8 @@ class LBRYDaemon(jsonrpc.JSONRPC):
         if self.lbry_ui_manager.update_checker.running:
             self.lbry_ui_manager.update_checker.stop()
 
+        self._clean_up_temp_files()
+
         d = self._upload_log(log_type="close", exclude_previous=False if self.first_run else True)
         d.addCallback(lambda _: self._stop_server())
         d.addErrback(lambda err: True)
diff --git a/lbrynet/lbrynet_daemon/LBRYDaemonControl.py b/lbrynet/lbrynet_daemon/LBRYDaemonControl.py
index 309570873..dff9784ee 100644
--- a/lbrynet/lbrynet_daemon/LBRYDaemonControl.py
+++ b/lbrynet/lbrynet_daemon/LBRYDaemonControl.py
@@ -12,7 +12,7 @@ from twisted.web import server
 from twisted.internet import reactor, defer
 from jsonrpc.proxy import JSONRPCProxy
 
-from lbrynet.lbrynet_daemon.LBRYDaemonServer import LBRYDaemonServer
+from lbrynet.lbrynet_daemon.LBRYDaemonServer import LBRYDaemonServer, LBRYDaemonRequest
 from lbrynet.conf import API_CONNECTION_STRING, API_INTERFACE, API_ADDRESS, API_PORT, \
                             DEFAULT_WALLET, UI_ADDRESS, DEFAULT_UI_BRANCH, LOG_FILE_NAME
 
@@ -109,7 +109,9 @@ def start():
         if args.launchui:
             d.addCallback(lambda _: webbrowser.open(UI_ADDRESS))
 
-        reactor.listenTCP(API_PORT, server.Site(lbry.root), interface=API_INTERFACE)
+        lbrynet_server = server.Site(lbry.root)
+        lbrynet_server.requestFactory = LBRYDaemonRequest
+        reactor.listenTCP(API_PORT, lbrynet_server, interface=API_INTERFACE)
         reactor.run()
 
         if not args.logtoconsole and not args.quiet:
diff --git a/lbrynet/lbrynet_daemon/LBRYDaemonServer.py b/lbrynet/lbrynet_daemon/LBRYDaemonServer.py
index 74906da0b..b28520471 100644
--- a/lbrynet/lbrynet_daemon/LBRYDaemonServer.py
+++ b/lbrynet/lbrynet_daemon/LBRYDaemonServer.py
@@ -4,6 +4,10 @@ import shutil
 import json
 import sys
 import mimetypes
+import mimetools
+import tempfile
+import time
+import cgi
 
 from datetime import datetime
 from appdirs import user_data_dir
@@ -29,6 +33,184 @@ handler = logging.handlers.RotatingFileHandler(lbrynet_log, maxBytes=2097152, ba
 log.addHandler(handler)
 log.setLevel(logging.INFO)
 
+class LBRYDaemonRequest(server.Request):
+    """
+    For LBRY specific request functionality. Currently just provides
+    handling for large multipart POST requests, taken from here:
+    http://sammitch.ca/2013/07/handling-large-requests-in-twisted/
+
+    For multipart POST requests, this populates self.args with temp
+    file objects instead of strings. Note that these files don't auto-delete
+    on close because we want to be able to move and rename them.
+
+    """
+
+    # max amount of memory to allow any ~single~ request argument [ie: POSTed file]
+    # note: this value seems to be taken with a grain of salt, memory usage may spike
+    #       FAR above this value in some cases.
+    #       eg: set the memory limit to 5 MB, write 2 blocks of 4MB, mem usage will
+    #           have spiked to 8MB before the data is rolled to disk after the
+    #           second write completes.
+    memorylimit = 1024*1024*100
+
+    # enable/disable debug logging
+    do_log = True
+
+    # re-defined only for debug/logging purposes
+    def gotLength(self, length):
+        if self.do_log:
+            print '%f Headers received, Content-Length: %d' % (time.time(), length)
+        server.Request.gotLength(self, length)
+
+    # re-definition of twisted.web.server.Request.requestreceived, the only difference
+    # is that self.parse_multipart() is used rather than cgi.parse_multipart()
+    def requestReceived(self, command, path, version):
+        from twisted.web.http import parse_qs
+        if self.do_log:
+            print '%f Request Received' % time.time()
+        print self.content
+
+        self.content.seek(0,0)
+        self.args = {}
+        self.stack = []
+
+        self.method, self.uri = command, path
+        self.clientproto = version
+        x = self.uri.split(b'?', 1)
+
+        if len(x) == 1:
+            self.path = self.uri
+        else:
+            self.path, argstring = x
+            self.args = parse_qs(argstring, 1)
+
+        # cache the client and server information, we'll need this later to be
+        # serialized and sent with the request so CGIs will work remotely
+        self.client = self.channel.transport.getPeer()
+        self.host = self.channel.transport.getHost()
+
+        # Argument processing
+        args = self.args
+        ctype = self.requestHeaders.getRawHeaders(b'content-type')
+        if ctype is not None:
+            ctype = ctype[0]
+
+        if self.method == b"POST" and ctype:
+            mfd = b'multipart/form-data'
+            key, pdict = cgi.parse_header(ctype)
+            if key == b'application/x-www-form-urlencoded':
+                args.update(parse_qs(self.content.read(), 1))
+            elif key == mfd:
+                try:
+                    self.content.seek(0,0)
+                    args.update(self.parse_multipart(self.content, pdict))
+                    #args.update(cgi.parse_multipart(self.content, pdict))
+
+                except KeyError as e:
+                    if e.args[0] == b'content-disposition':
+                        # Parse_multipart can't cope with missing
+                        # content-dispostion headers in multipart/form-data
+                        # parts, so we catch the exception and tell the client
+                        # it was a bad request.
+                        self.channel.transport.write(
+                                b"HTTP/1.1 400 Bad Request\r\n\r\n")
+                        self.channel.transport.loseConnection()
+                        return
+                    raise
+
+            self.content.seek(0, 0)
+
+        self.process()
+
+    # re-definition of cgi.parse_multipart that uses a single temporary file to store
+    # data rather than storing 2 to 3 copies in various lists.
+    def parse_multipart(self, fp, pdict):
+        if self.do_log:
+            print '%f Parsing Multipart data: ' % time.time()
+        rewind = fp.tell() #save cursor
+        fp.seek(0,0) #reset cursor
+
+        boundary = ""
+        if 'boundary' in pdict:
+            boundary = pdict['boundary']
+        if not cgi.valid_boundary(boundary):
+            raise ValueError,  ('Invalid boundary in multipart form: %r'
+                                % (boundary,))
+
+        nextpart = "--" + boundary
+        lastpart = "--" + boundary + "--"
+        partdict = {}
+        terminator = ""
+
+        while terminator != lastpart:
+            c_bytes = -1
+
+            data = tempfile.NamedTemporaryFile(delete=False)
+            if terminator:
+                # At start of next part.  Read headers first.
+                headers = mimetools.Message(fp)
+                clength = headers.getheader('content-length')
+                if clength:
+                    try:
+                        c_bytes = int(clength)
+                    except ValueError:
+                        pass
+                if c_bytes > 0:
+                    data.write(fp.read(c_bytes))
+            # Read lines until end of part.
+            while 1:
+                line = fp.readline()
+                if not line:
+                    terminator = lastpart # End outer loop
+                    break
+                if line[:2] == "--":
+                    terminator = line.strip()
+                    if terminator in (nextpart, lastpart):
+                        break
+                data.write(line)
+            # Done with part.
+            if data.tell() == 0:
+                continue
+            if c_bytes < 0:
+                # if a Content-Length header was not supplied with the MIME part
+                # then the trailing line break must be removed.
+                # we have data, read the last 2 bytes
+                rewind = min(2, data.tell())
+                data.seek(-rewind, os.SEEK_END)
+                line = data.read(2)
+                if line[-2:] == "\r\n":
+                    data.seek(-2, os.SEEK_END)
+                    data.truncate()
+                elif line[-1:] == "\n":
+                    data.seek(-1, os.SEEK_END)
+                    data.truncate()
+
+            line = headers['content-disposition']
+            if not line:
+                continue
+            key, params = cgi.parse_header(line)
+            if key != 'form-data':
+                continue
+            if 'name' in params:
+                name = params['name']
+                # kludge in the filename
+                if 'filename' in params:
+                    fname_index = name + '_filename'
+                    if fname_index in partdict:
+                        partdict[fname_index].append(params['filename'])
+                    else:
+                        partdict[fname_index] = [params['filename']]
+            else:
+                # Unnamed parts are not returned at all.
+                continue
+            data.seek(0,0)
+            if name in partdict:
+                partdict[name].append(data)
+            else:
+                partdict[name] = [data]
+
+        fp.seek(rewind)  # Restore cursor
+        return partdict
 
 class LBRYindex(resource.Resource):
     def __init__(self, ui_dir):
@@ -179,12 +361,35 @@ class HostedLBRYFile(resource.Resource):
     #     call.addErrback(lambda err: log.info("Error: " + str(err)))
     #     call.cancel()
 
+class LBRYFileUpload(resource.Resource):
+    """
+    Accepts a file sent via the file upload widget in the web UI, saves
+    it into a temporary dir, and responds with a JSON string containing
+    the path of the newly created file.
+    """
+
+    def __init__(self, api):
+        self._api = api
+
+    def render_POST(self, request):
+        origfilename = request.args['file_filename'][0]
+        uploaded_file = request.args['file'][0]  # Temp file created by request
+
+        # Move to a new temporary dir and restore the original file name
+        newdirpath = tempfile.mkdtemp()
+        newpath = os.path.join(newdirpath, origfilename)
+        shutil.move(uploaded_file.name, newpath)
+        self._api.uploaded_temp_files.append(newpath)
+
+        return json.dumps(newpath)
+
 
 class LBRYDaemonServer(object):
     def _setup_server(self, wallet):
         self.root = LBRYindex(os.path.join(os.path.join(data_dir, "lbry-ui"), "active"))
         self._api = LBRYDaemon(self.root, wallet_type=wallet)
         self.root.putChild("view", HostedLBRYFile(self._api))
+        self.root.putChild("upload", LBRYFileUpload(self._api))
         self.root.putChild(API_ADDRESS, self._api)
         return defer.succeed(True)