From 1c1b1b315f2f89584abe9a7558945dea2fbee708 Mon Sep 17 00:00:00 2001
From: MarcoFalke <falke.marco@gmail.com>
Date: Wed, 9 Sep 2015 14:24:56 +0200
Subject: [PATCH] [uacomment] Sanitize per BIP-0014

* SanitizeString() can be requested to be more strict
* Throw error when SanitizeString() changes uacomments
* Fix tests
---
 src/init.cpp             | 11 +++++++++--
 src/test/util_tests.cpp  |  4 ++--
 src/utilstrencodings.cpp | 17 ++++++++++-------
 src/utilstrencodings.h   | 16 +++++++++++++++-
 4 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/init.cpp b/src/init.cpp
index 5759b4b42..2239cc072 100644
--- a/src/init.cpp
+++ b/src/init.cpp
@@ -1014,8 +1014,15 @@ bool AppInit2(boost::thread_group& threadGroup, CScheduler& scheduler)
 
     RegisterNodeSignals(GetNodeSignals());
 
-    // format user agent, check total size
-    strSubVersion = FormatSubVersion(CLIENT_NAME, CLIENT_VERSION, mapMultiArgs.count("-uacomment") ? mapMultiArgs["-uacomment"] : std::vector<string>());
+    // sanitize comments per BIP-0014, format user agent and check total size
+    std::vector<string> uacomments;
+    BOOST_FOREACH(string cmt, mapMultiArgs["-uacomment"])
+    {
+        if (cmt != SanitizeString(cmt, SAFE_CHARS_UA_COMMENT))
+            return InitError(strprintf("User Agent comment (%s) contains unsafe characters.", cmt));
+        uacomments.push_back(SanitizeString(cmt, SAFE_CHARS_UA_COMMENT));
+    }
+    strSubVersion = FormatSubVersion(CLIENT_NAME, CLIENT_VERSION, uacomments);
     if (strSubVersion.size() > MAX_SUBVERSION_LENGTH) {
         return InitError(strprintf("Total length of network version string %i exceeds maximum of %i characters. Reduce the number and/or size of uacomments.",
             strSubVersion.size(), MAX_SUBVERSION_LENGTH));
diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp
index e956cc5b9..bde16a517 100644
--- a/src/test/util_tests.cpp
+++ b/src/test/util_tests.cpp
@@ -413,10 +413,10 @@ BOOST_AUTO_TEST_CASE(test_FormatSubVersion)
     comments.push_back(std::string("comment1"));
     std::vector<std::string> comments2;
     comments2.push_back(std::string("comment1"));
-    comments2.push_back(std::string("comment2"));
+    comments2.push_back(SanitizeString(std::string("Comment2; .,_?@; !\"#$%&'()*+-/<=>[]\\^`{|}~"), SAFE_CHARS_UA_COMMENT)); // Semicolon is discouraged but not forbidden by BIP-0014
     BOOST_CHECK_EQUAL(FormatSubVersion("Test", 99900, std::vector<std::string>()),std::string("/Test:0.9.99/"));
     BOOST_CHECK_EQUAL(FormatSubVersion("Test", 99900, comments),std::string("/Test:0.9.99(comment1)/"));
-    BOOST_CHECK_EQUAL(FormatSubVersion("Test", 99900, comments2),std::string("/Test:0.9.99(comment1; comment2)/"));
+    BOOST_CHECK_EQUAL(FormatSubVersion("Test", 99900, comments2),std::string("/Test:0.9.99(comment1; Comment2; .,_?@; )/"));
 }
 
 BOOST_AUTO_TEST_CASE(test_ParseFixedPoint)
diff --git a/src/utilstrencodings.cpp b/src/utilstrencodings.cpp
index 1f7a2cae2..76c22f735 100644
--- a/src/utilstrencodings.cpp
+++ b/src/utilstrencodings.cpp
@@ -14,17 +14,20 @@
 
 using namespace std;
 
-string SanitizeString(const string& str)
+static const string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+static const string SAFE_CHARS[] =
+{
+    CHARS_ALPHA_NUM + " .,;_/:?@()", // SAFE_CHARS_DEFAULT
+    CHARS_ALPHA_NUM + " .,;_?@" // SAFE_CHARS_UA_COMMENT
+};
+
+string SanitizeString(const string& str, int rule)
 {
-    /**
-     * safeChars chosen to allow simple messages/URLs/email addresses, but avoid anything
-     * even possibly remotely dangerous like & or >
-     */
-    static string safeChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890 .,;_/:?@()");
     string strResult;
     for (std::string::size_type i = 0; i < str.size(); i++)
     {
-        if (safeChars.find(str[i]) != std::string::npos)
+        if (SAFE_CHARS[rule].find(str[i]) != std::string::npos)
             strResult.push_back(str[i]);
     }
     return strResult;
diff --git a/src/utilstrencodings.h b/src/utilstrencodings.h
index dcd56751f..ce93e8349 100644
--- a/src/utilstrencodings.h
+++ b/src/utilstrencodings.h
@@ -22,7 +22,21 @@
 /** This is needed because the foreach macro can't get over the comma in pair<t1, t2> */
 #define PAIRTYPE(t1, t2)    std::pair<t1, t2>
 
-std::string SanitizeString(const std::string& str);
+/** Used by SanitizeString() */
+enum SafeChars
+{
+    SAFE_CHARS_DEFAULT, //!< The full set of allowed chars
+    SAFE_CHARS_UA_COMMENT //!< BIP-0014 subset
+};
+
+/**
+* Remove unsafe chars. Safe chars chosen to allow simple messages/URLs/email
+* addresses, but avoid anything even possibly remotely dangerous like & or >
+* @param[in] str    The string to sanitize
+* @param[in] rule   The set of safe chars to choose (default: least restrictive)
+* @return           A new string without unsafe chars
+*/
+std::string SanitizeString(const std::string& str, int rule = SAFE_CHARS_DEFAULT);
 std::vector<unsigned char> ParseHex(const char* psz);
 std::vector<unsigned char> ParseHex(const std::string& str);
 signed char HexDigit(char c);