Merge #13041: build: Add linter checking for accidental introduction of locale dependence
698cfd0811
docs: Mention lint-locale-dependence.sh in developer-notes.md (practicalswift)0a4ea2f458
build: Add linter for checking accidental locale dependence (practicalswift) Pull request description: This linter will check for code accidentally introducing locale dependencies. Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. We should avoid using locale dependent functions if possible. Context: https://github.com/bitcoin/bitcoin/pull/12881#issuecomment-378564722 Example output: ``` $ contrib/devtools/lint-locale-dependence.sh The locale dependent function tolower(...) appears to be used: src/init.cpp: if (s[0] == '0' && std::tolower(s[1]) == 'x') { Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible. Advice not applicable in this specific case? Add an exception by updating the ignore list in contrib/devtools/lint-locale-dependence.sh ``` **Note to reviewers:** What is the most appropriate `LOCALE_DEPENDENT_FUNCTIONS` function list? What should be added or removed? Tree-SHA512: 14e448828804bb02bf59070647e38b52fce120c700c903a4a8472769a2cee5dd529bd3fc182386993cb8720482cf4250b63a0a477db61b941ae4babe5c65025f
This commit is contained in:
commit
5779dc4f76
2 changed files with 258 additions and 1 deletions
|
@ -499,7 +499,35 @@ Strings and formatting
|
||||||
|
|
||||||
- Use `ParseInt32`, `ParseInt64`, `ParseUInt32`, `ParseUInt64`, `ParseDouble` from `utilstrencodings.h` for number parsing
|
- Use `ParseInt32`, `ParseInt64`, `ParseUInt32`, `ParseUInt64`, `ParseDouble` from `utilstrencodings.h` for number parsing
|
||||||
|
|
||||||
- *Rationale*: These functions do overflow checking, and avoid pesky locale issues
|
- *Rationale*: These functions do overflow checking, and avoid pesky locale issues.
|
||||||
|
|
||||||
|
- Avoid using locale dependent functions if possible. You can use the provided
|
||||||
|
[`lint-locale-dependence.sh`](/contrib/devtools/lint-locale-dependence.sh)
|
||||||
|
to check for accidental use of locale dependent functions.
|
||||||
|
|
||||||
|
- *Rationale*: Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix.
|
||||||
|
|
||||||
|
- These functions are known to be locale dependent:
|
||||||
|
`alphasort`, `asctime`, `asprintf`, `atof`, `atoi`, `atol`, `atoll`, `atoq`,
|
||||||
|
`btowc`, `ctime`, `dprintf`, `fgetwc`, `fgetws`, `fprintf`, `fputwc`,
|
||||||
|
`fputws`, `fscanf`, `fwprintf`, `getdate`, `getwc`, `getwchar`, `isalnum`,
|
||||||
|
`isalpha`, `isblank`, `iscntrl`, `isdigit`, `isgraph`, `islower`, `isprint`,
|
||||||
|
`ispunct`, `isspace`, `isupper`, `iswalnum`, `iswalpha`, `iswblank`,
|
||||||
|
`iswcntrl`, `iswctype`, `iswdigit`, `iswgraph`, `iswlower`, `iswprint`,
|
||||||
|
`iswpunct`, `iswspace`, `iswupper`, `iswxdigit`, `isxdigit`, `mblen`,
|
||||||
|
`mbrlen`, `mbrtowc`, `mbsinit`, `mbsnrtowcs`, `mbsrtowcs`, `mbstowcs`,
|
||||||
|
`mbtowc`, `mktime`, `putwc`, `putwchar`, `scanf`, `snprintf`, `sprintf`,
|
||||||
|
`sscanf`, `stoi`, `stol`, `stoll`, `strcasecmp`, `strcasestr`, `strcoll`,
|
||||||
|
`strfmon`, `strftime`, `strncasecmp`, `strptime`, `strtod`, `strtof`,
|
||||||
|
`strtoimax`, `strtol`, `strtold`, `strtoll`, `strtoq`, `strtoul`,
|
||||||
|
`strtoull`, `strtoumax`, `strtouq`, `strxfrm`, `swprintf`, `tolower`,
|
||||||
|
`toupper`, `towctrans`, `towlower`, `towupper`, `ungetwc`, `vasprintf`,
|
||||||
|
`vdprintf`, `versionsort`, `vfprintf`, `vfscanf`, `vfwprintf`, `vprintf`,
|
||||||
|
`vscanf`, `vsnprintf`, `vsprintf`, `vsscanf`, `vswprintf`, `vwprintf`,
|
||||||
|
`wcrtomb`, `wcscasecmp`, `wcscoll`, `wcsftime`, `wcsncasecmp`, `wcsnrtombs`,
|
||||||
|
`wcsrtombs`, `wcstod`, `wcstof`, `wcstoimax`, `wcstol`, `wcstold`,
|
||||||
|
`wcstoll`, `wcstombs`, `wcstoul`, `wcstoull`, `wcstoumax`, `wcswidth`,
|
||||||
|
`wcsxfrm`, `wctob`, `wctomb`, `wctrans`, `wctype`, `wcwidth`, `wprintf`
|
||||||
|
|
||||||
- For `strprintf`, `LogPrint`, `LogPrintf` formatting characters don't need size specifiers
|
- For `strprintf`, `LogPrint`, `LogPrintf` formatting characters don't need size specifiers
|
||||||
|
|
||||||
|
|
229
test/lint/lint-locale-dependence.sh
Executable file
229
test/lint/lint-locale-dependence.sh
Executable file
|
@ -0,0 +1,229 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
KNOWN_VIOLATIONS=(
|
||||||
|
"src/base58.cpp:.*isspace"
|
||||||
|
"src/bitcoin-tx.cpp.*stoul"
|
||||||
|
"src/bitcoin-tx.cpp.*trim_right"
|
||||||
|
"src/bitcoin-tx.cpp:.*atoi"
|
||||||
|
"src/core_read.cpp.*is_digit"
|
||||||
|
"src/dbwrapper.cpp.*stoul"
|
||||||
|
"src/dbwrapper.cpp:.*vsnprintf"
|
||||||
|
"src/httprpc.cpp.*trim"
|
||||||
|
"src/init.cpp:.*atoi"
|
||||||
|
"src/netbase.cpp.*to_lower"
|
||||||
|
"src/qt/rpcconsole.cpp:.*atoi"
|
||||||
|
"src/qt/rpcconsole.cpp:.*isdigit"
|
||||||
|
"src/rest.cpp:.*strtol"
|
||||||
|
"src/rpc/server.cpp.*to_upper"
|
||||||
|
"src/test/dbwrapper_tests.cpp:.*snprintf"
|
||||||
|
"src/test/getarg_tests.cpp.*split"
|
||||||
|
"src/torcontrol.cpp:.*atoi"
|
||||||
|
"src/torcontrol.cpp:.*strtol"
|
||||||
|
"src/uint256.cpp:.*isspace"
|
||||||
|
"src/uint256.cpp:.*tolower"
|
||||||
|
"src/util.cpp:.*atoi"
|
||||||
|
"src/util.cpp:.*fprintf"
|
||||||
|
"src/util.cpp:.*tolower"
|
||||||
|
"src/utilmoneystr.cpp:.*isdigit"
|
||||||
|
"src/utilmoneystr.cpp:.*isspace"
|
||||||
|
"src/utilstrencodings.cpp:.*atoi"
|
||||||
|
"src/utilstrencodings.cpp:.*isspace"
|
||||||
|
"src/utilstrencodings.cpp:.*strtol"
|
||||||
|
"src/utilstrencodings.cpp:.*strtoll"
|
||||||
|
"src/utilstrencodings.cpp:.*strtoul"
|
||||||
|
"src/utilstrencodings.cpp:.*strtoull"
|
||||||
|
"src/utilstrencodings.h:.*atoi"
|
||||||
|
)
|
||||||
|
|
||||||
|
REGEXP_IGNORE_EXTERNAL_DEPENDENCIES="^src/(crypto/ctaes/|leveldb/|secp256k1/|tinyformat.h|univalue/)"
|
||||||
|
|
||||||
|
LOCALE_DEPENDENT_FUNCTIONS=(
|
||||||
|
alphasort # LC_COLLATE (via strcoll)
|
||||||
|
asctime # LC_TIME (directly)
|
||||||
|
asprintf # (via vasprintf)
|
||||||
|
atof # LC_NUMERIC (via strtod)
|
||||||
|
atoi # LC_NUMERIC (via strtol)
|
||||||
|
atol # LC_NUMERIC (via strtol)
|
||||||
|
atoll # (via strtoll)
|
||||||
|
atoq
|
||||||
|
btowc # LC_CTYPE (directly)
|
||||||
|
ctime # (via asctime or localtime)
|
||||||
|
dprintf # (via vdprintf)
|
||||||
|
fgetwc
|
||||||
|
fgetws
|
||||||
|
fold_case # boost::locale::fold_case
|
||||||
|
fprintf # (via vfprintf)
|
||||||
|
fputwc
|
||||||
|
fputws
|
||||||
|
fscanf # (via __vfscanf)
|
||||||
|
fwprintf # (via __vfwprintf)
|
||||||
|
getdate # via __getdate_r => isspace // __localtime_r
|
||||||
|
getwc
|
||||||
|
getwchar
|
||||||
|
is_digit # boost::algorithm::is_digit
|
||||||
|
is_space # boost::algorithm::is_space
|
||||||
|
isalnum # LC_CTYPE
|
||||||
|
isalpha # LC_CTYPE
|
||||||
|
isblank # LC_CTYPE
|
||||||
|
iscntrl # LC_CTYPE
|
||||||
|
isctype # LC_CTYPE
|
||||||
|
isdigit # LC_CTYPE
|
||||||
|
isgraph # LC_CTYPE
|
||||||
|
islower # LC_CTYPE
|
||||||
|
isprint # LC_CTYPE
|
||||||
|
ispunct # LC_CTYPE
|
||||||
|
isspace # LC_CTYPE
|
||||||
|
isupper # LC_CTYPE
|
||||||
|
iswalnum # LC_CTYPE
|
||||||
|
iswalpha # LC_CTYPE
|
||||||
|
iswblank # LC_CTYPE
|
||||||
|
iswcntrl # LC_CTYPE
|
||||||
|
iswctype # LC_CTYPE
|
||||||
|
iswdigit # LC_CTYPE
|
||||||
|
iswgraph # LC_CTYPE
|
||||||
|
iswlower # LC_CTYPE
|
||||||
|
iswprint # LC_CTYPE
|
||||||
|
iswpunct # LC_CTYPE
|
||||||
|
iswspace # LC_CTYPE
|
||||||
|
iswupper # LC_CTYPE
|
||||||
|
iswxdigit # LC_CTYPE
|
||||||
|
isxdigit # LC_CTYPE
|
||||||
|
localeconv # LC_NUMERIC + LC_MONETARY
|
||||||
|
mblen # LC_CTYPE
|
||||||
|
mbrlen
|
||||||
|
mbrtowc
|
||||||
|
mbsinit
|
||||||
|
mbsnrtowcs
|
||||||
|
mbsrtowcs
|
||||||
|
mbstowcs # LC_CTYPE
|
||||||
|
mbtowc # LC_CTYPE
|
||||||
|
mktime
|
||||||
|
normalize # boost::locale::normalize
|
||||||
|
# printf # LC_NUMERIC
|
||||||
|
putwc
|
||||||
|
putwchar
|
||||||
|
scanf # LC_NUMERIC
|
||||||
|
setlocale
|
||||||
|
snprintf
|
||||||
|
sprintf
|
||||||
|
sscanf
|
||||||
|
stod
|
||||||
|
stof
|
||||||
|
stoi
|
||||||
|
stol
|
||||||
|
stold
|
||||||
|
stoll
|
||||||
|
stoul
|
||||||
|
stoull
|
||||||
|
strcasecmp
|
||||||
|
strcasestr
|
||||||
|
strcoll # LC_COLLATE
|
||||||
|
# strerror
|
||||||
|
strfmon
|
||||||
|
strftime # LC_TIME
|
||||||
|
strncasecmp
|
||||||
|
strptime
|
||||||
|
strtod # LC_NUMERIC
|
||||||
|
strtof
|
||||||
|
strtoimax
|
||||||
|
strtol # LC_NUMERIC
|
||||||
|
strtold
|
||||||
|
strtoll
|
||||||
|
strtoq
|
||||||
|
strtoul # LC_NUMERIC
|
||||||
|
strtoull
|
||||||
|
strtoumax
|
||||||
|
strtouq
|
||||||
|
strxfrm # LC_COLLATE
|
||||||
|
swprintf
|
||||||
|
to_lower # boost::locale::to_lower
|
||||||
|
to_title # boost::locale::to_title
|
||||||
|
to_upper # boost::locale::to_upper
|
||||||
|
tolower # LC_CTYPE
|
||||||
|
toupper # LC_CTYPE
|
||||||
|
towctrans
|
||||||
|
towlower # LC_CTYPE
|
||||||
|
towupper # LC_CTYPE
|
||||||
|
trim # boost::algorithm::trim
|
||||||
|
trim_left # boost::algorithm::trim_left
|
||||||
|
trim_right # boost::algorithm::trim_right
|
||||||
|
ungetwc
|
||||||
|
vasprintf
|
||||||
|
vdprintf
|
||||||
|
versionsort
|
||||||
|
vfprintf
|
||||||
|
vfscanf
|
||||||
|
vfwprintf
|
||||||
|
vprintf
|
||||||
|
vscanf
|
||||||
|
vsnprintf
|
||||||
|
vsprintf
|
||||||
|
vsscanf
|
||||||
|
vswprintf
|
||||||
|
vwprintf
|
||||||
|
wcrtomb
|
||||||
|
wcscasecmp
|
||||||
|
wcscoll # LC_COLLATE
|
||||||
|
wcsftime # LC_TIME
|
||||||
|
wcsncasecmp
|
||||||
|
wcsnrtombs
|
||||||
|
wcsrtombs
|
||||||
|
wcstod # LC_NUMERIC
|
||||||
|
wcstof
|
||||||
|
wcstoimax
|
||||||
|
wcstol # LC_NUMERIC
|
||||||
|
wcstold
|
||||||
|
wcstoll
|
||||||
|
wcstombs # LC_CTYPE
|
||||||
|
wcstoul # LC_NUMERIC
|
||||||
|
wcstoull
|
||||||
|
wcstoumax
|
||||||
|
wcswidth
|
||||||
|
wcsxfrm # LC_COLLATE
|
||||||
|
wctob
|
||||||
|
wctomb # LC_CTYPE
|
||||||
|
wctrans
|
||||||
|
wctype
|
||||||
|
wcwidth
|
||||||
|
wprintf
|
||||||
|
)
|
||||||
|
|
||||||
|
function join_array {
|
||||||
|
local IFS="$1"
|
||||||
|
shift
|
||||||
|
echo "$*"
|
||||||
|
}
|
||||||
|
|
||||||
|
REGEXP_IGNORE_KNOWN_VIOLATIONS=$(join_array "|" "${KNOWN_VIOLATIONS[@]}")
|
||||||
|
|
||||||
|
# Invoke "git grep" only once in order to minimize run-time
|
||||||
|
REGEXP_LOCALE_DEPENDENT_FUNCTIONS=$(join_array "|" "${LOCALE_DEPENDENT_FUNCTIONS[@]}")
|
||||||
|
GIT_GREP_OUTPUT=$(git grep -E "[^a-zA-Z0-9_\`'\"<>](${REGEXP_LOCALE_DEPENDENT_FUNCTIONS}(|_r|_s))[^a-zA-Z0-9_\`'\"<>]" -- "*.cpp" "*.h")
|
||||||
|
|
||||||
|
EXIT_CODE=0
|
||||||
|
for LOCALE_DEPENDENT_FUNCTION in "${LOCALE_DEPENDENT_FUNCTIONS[@]}"; do
|
||||||
|
MATCHES=$(grep -E "[^a-zA-Z0-9_\`'\"<>]${LOCALE_DEPENDENT_FUNCTION}(|_r|_s)[^a-zA-Z0-9_\`'\"<>]" <<< "${GIT_GREP_OUTPUT}" | \
|
||||||
|
grep -vE "\.(c|cpp|h):\s*(//|\*|/\*|\").*${LOCALE_DEPENDENT_FUNCTION}" | \
|
||||||
|
grep -vE 'fprintf\(.*(stdout|stderr)')
|
||||||
|
if [[ ${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES} != "" ]]; then
|
||||||
|
MATCHES=$(grep -vE "${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES}" <<< "${MATCHES}")
|
||||||
|
fi
|
||||||
|
if [[ ${REGEXP_IGNORE_KNOWN_VIOLATIONS} != "" ]]; then
|
||||||
|
MATCHES=$(grep -vE "${REGEXP_IGNORE_KNOWN_VIOLATIONS}" <<< "${MATCHES}")
|
||||||
|
fi
|
||||||
|
if [[ ${MATCHES} != "" ]]; then
|
||||||
|
echo "The locale dependent function ${LOCALE_DEPENDENT_FUNCTION}(...) appears to be used:"
|
||||||
|
echo "${MATCHES}"
|
||||||
|
echo
|
||||||
|
EXIT_CODE=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [[ ${EXIT_CODE} != 0 ]]; then
|
||||||
|
echo "Unnecessary locale dependence can cause bugs that are very"
|
||||||
|
echo "tricky to isolate and fix. Please avoid using locale dependent"
|
||||||
|
echo "functions if possible."
|
||||||
|
echo
|
||||||
|
echo "Advice not applicable in this specific case? Add an exception"
|
||||||
|
echo "by updating the ignore list in $0"
|
||||||
|
fi
|
||||||
|
exit ${EXIT_CODE}
|
Loading…
Reference in a new issue