torcontrol: Handle escapes in Tor QuotedStrings
https://trac.torproject.org/projects/tor/ticket/14999 is tracking an encoding bug with the Tor control protocol, where many of the QuotedString instances that Tor outputs are in fact CStrings, but it is not documented which ones are which. https://spec.torproject.org/control-spec section 2.1.1 provides a future-proofed rule for handing QuotedStrings, which this commit implements. This commit merges all six commits from https://github.com/zcash/zcash/pull/2251
This commit is contained in:
parent
0182a11737
commit
49a199bb51
2 changed files with 87 additions and 11 deletions
|
@ -119,29 +119,60 @@ BOOST_AUTO_TEST_CASE(util_ParseTorReplyMapping)
|
|||
{"Foo", "Bar Baz"},
|
||||
});
|
||||
|
||||
// Escapes (which are left escaped by the parser)
|
||||
// Escapes
|
||||
CheckParseTorReplyMapping(
|
||||
"Foo=\"Bar\\ Baz\"", {
|
||||
{"Foo", "Bar\\ Baz"},
|
||||
{"Foo", "Bar Baz"},
|
||||
});
|
||||
CheckParseTorReplyMapping(
|
||||
"Foo=\"Bar\\Baz\"", {
|
||||
{"Foo", "Bar\\Baz"},
|
||||
{"Foo", "BarBaz"},
|
||||
});
|
||||
CheckParseTorReplyMapping(
|
||||
"Foo=\"Bar\\@Baz\"", {
|
||||
{"Foo", "Bar\\@Baz"},
|
||||
{"Foo", "Bar@Baz"},
|
||||
});
|
||||
CheckParseTorReplyMapping(
|
||||
"Foo=\"Bar\\\"Baz\" Spam=\"\\\"Eggs\\\"\"", {
|
||||
{"Foo", "Bar\\\"Baz"},
|
||||
{"Spam", "\\\"Eggs\\\""},
|
||||
{"Foo", "Bar\"Baz"},
|
||||
{"Spam", "\"Eggs\""},
|
||||
});
|
||||
CheckParseTorReplyMapping(
|
||||
"Foo=\"Bar\\\\Baz\"", {
|
||||
{"Foo", "Bar\\\\Baz"},
|
||||
{"Foo", "Bar\\Baz"},
|
||||
});
|
||||
|
||||
// C escapes
|
||||
CheckParseTorReplyMapping(
|
||||
"Foo=\"Bar\\nBaz\\t\" Spam=\"\\rEggs\" Octals=\"\\1a\\11\\17\\18\\81\\377\\378\\400\\2222\" Final=Check", {
|
||||
{"Foo", "Bar\nBaz\t"},
|
||||
{"Spam", "\rEggs"},
|
||||
{"Octals", "\1a\11\17\1" "881\377\37" "8\40" "0\222" "2"},
|
||||
{"Final", "Check"},
|
||||
});
|
||||
CheckParseTorReplyMapping(
|
||||
"Valid=Mapping Escaped=\"Escape\\\\\"", {
|
||||
{"Valid", "Mapping"},
|
||||
{"Escaped", "Escape\\"},
|
||||
});
|
||||
CheckParseTorReplyMapping(
|
||||
"Valid=Mapping Bare=\"Escape\\\"", {});
|
||||
CheckParseTorReplyMapping(
|
||||
"OneOctal=\"OneEnd\\1\" TwoOctal=\"TwoEnd\\11\"", {
|
||||
{"OneOctal", "OneEnd\1"},
|
||||
{"TwoOctal", "TwoEnd\11"},
|
||||
});
|
||||
|
||||
// Special handling for null case
|
||||
// (needed because string comparison reads the null as end-of-string)
|
||||
BOOST_TEST_MESSAGE(std::string("CheckParseTorReplyMapping(Null=\"\\0\")"));
|
||||
auto ret = ParseTorReplyMapping("Null=\"\\0\"");
|
||||
BOOST_CHECK_EQUAL(ret.size(), 1);
|
||||
auto r_it = ret.begin();
|
||||
BOOST_CHECK_EQUAL(r_it->first, "Null");
|
||||
BOOST_CHECK_EQUAL(r_it->second.size(), 1);
|
||||
BOOST_CHECK_EQUAL(r_it->second[0], '\0');
|
||||
|
||||
// A more complex valid grammar. PROTOCOLINFO accepts a VersionLine that
|
||||
// takes a key=value pair followed by an OptArguments, making this valid.
|
||||
// Because an OptArguments contains no semantic data, there is no point in
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// Copyright (c) 2015-2016 The Bitcoin Core developers
|
||||
// Copyright (c) 2017 The Zcash developers
|
||||
// Distributed under the MIT software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
|
@ -291,17 +292,61 @@ static std::map<std::string,std::string> ParseTorReplyMapping(const std::string
|
|||
++ptr; // skip opening '"'
|
||||
bool escape_next = false;
|
||||
while (ptr < s.size() && (escape_next || s[ptr] != '"')) {
|
||||
escape_next = (s[ptr] == '\\');
|
||||
// Repeated backslashes must be interpreted as pairs
|
||||
escape_next = (s[ptr] == '\\' && !escape_next);
|
||||
value.push_back(s[ptr]);
|
||||
++ptr;
|
||||
}
|
||||
if (ptr == s.size()) // unexpected end of line
|
||||
return std::map<std::string,std::string>();
|
||||
++ptr; // skip closing '"'
|
||||
/* TODO: unescape value - according to the spec this depends on the
|
||||
* context, some strings use C-LogPrintf style escape codes, some
|
||||
* don't. So may be better handled at the call site.
|
||||
/**
|
||||
* Unescape value. Per https://spec.torproject.org/control-spec section 2.1.1:
|
||||
*
|
||||
* For future-proofing, controller implementors MAY use the following
|
||||
* rules to be compatible with buggy Tor implementations and with
|
||||
* future ones that implement the spec as intended:
|
||||
*
|
||||
* Read \n \t \r and \0 ... \377 as C escapes.
|
||||
* Treat a backslash followed by any other character as that character.
|
||||
*/
|
||||
std::string escaped_value;
|
||||
for (size_t i = 0; i < value.size(); ++i) {
|
||||
if (value[i] == '\\') {
|
||||
// This will always be valid, because if the QuotedString
|
||||
// ended in an odd number of backslashes, then the parser
|
||||
// would already have returned above, due to a missing
|
||||
// terminating double-quote.
|
||||
++i;
|
||||
if (value[i] == 'n') {
|
||||
escaped_value.push_back('\n');
|
||||
} else if (value[i] == 't') {
|
||||
escaped_value.push_back('\t');
|
||||
} else if (value[i] == 'r') {
|
||||
escaped_value.push_back('\r');
|
||||
} else if ('0' <= value[i] && value[i] <= '7') {
|
||||
size_t j;
|
||||
// Octal escape sequences have a limit of three octal digits,
|
||||
// but terminate at the first character that is not a valid
|
||||
// octal digit if encountered sooner.
|
||||
for (j = 1; j < 3 && (i+j) < value.size() && '0' <= value[i+j] && value[i+j] <= '7'; ++j) {}
|
||||
// Tor restricts first digit to 0-3 for three-digit octals.
|
||||
// A leading digit of 4-7 would therefore be interpreted as
|
||||
// a two-digit octal.
|
||||
if (j == 3 && value[i] > '3') {
|
||||
j--;
|
||||
}
|
||||
escaped_value.push_back(strtol(value.substr(i, j).c_str(), NULL, 8));
|
||||
// Account for automatic incrementing at loop end
|
||||
i += j - 1;
|
||||
} else {
|
||||
escaped_value.push_back(value[i]);
|
||||
}
|
||||
} else {
|
||||
escaped_value.push_back(value[i]);
|
||||
}
|
||||
}
|
||||
value = escaped_value;
|
||||
} else { // Unquoted value. Note that values can contain '=' at will, just no spaces
|
||||
while (ptr < s.size() && s[ptr] != ' ') {
|
||||
value.push_back(s[ptr]);
|
||||
|
|
Loading…
Reference in a new issue