mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-08-04 02:22:34 +02:00
lint: Convert lint-locale-dependence.sh to Python
This commit is contained in:
258
test/lint/lint-locale-dependence.py
Executable file
258
test/lint/lint-locale-dependence.py
Executable file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2018-2022 The Bitcoin Core developers
|
||||
# Distributed under the MIT software license, see the accompanying
|
||||
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
#
|
||||
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
|
||||
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
|
||||
# whereas no such call is made in bitcoind.
|
||||
#
|
||||
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
|
||||
# specified by the user's LC_ALL (or LC_*) environment variable as the new
|
||||
# C locale.
|
||||
#
|
||||
# In contrast, bitcoind does not opt in to localization -- no call to
|
||||
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
|
||||
# thus ignored.
|
||||
#
|
||||
# This results in situations where bitcoind is guaranteed to be running
|
||||
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
|
||||
# depending on the user's environment variables.
|
||||
#
|
||||
# An example: Assuming the environment variable LC_ALL=de_DE then the
|
||||
# call std::to_string(1.23) will return "1.230000" in bitcoind but
|
||||
# "1,230000" in bitcoin-qt.
|
||||
#
|
||||
# From the Qt documentation:
|
||||
# "On Unix/Linux Qt is configured to use the system locale settings by default.
|
||||
# This can cause a conflict when using POSIX functions, for instance, when
|
||||
# converting between data types such as floats and strings, since the notation
|
||||
# may differ between locales. To get around this problem, call the POSIX function
|
||||
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
|
||||
# or QCoreApplication to reset the locale that is used for number formatting to
|
||||
# "C"-locale."
|
||||
#
|
||||
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
|
||||
# https://stackoverflow.com/a/34878283 for more details.
|
||||
#
|
||||
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
from subprocess import check_output, CalledProcessError
|
||||
|
||||
|
||||
KNOWN_VIOLATIONS = [
|
||||
"src/dbwrapper.cpp:.*vsnprintf",
|
||||
"src/test/dbwrapper_tests.cpp:.*snprintf",
|
||||
"src/test/fuzz/locale.cpp",
|
||||
"src/test/fuzz/string.cpp",
|
||||
"src/test/util_tests.cpp"
|
||||
]
|
||||
|
||||
REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
|
||||
"src/crypto/ctaes/",
|
||||
"src/leveldb/",
|
||||
"src/secp256k1/",
|
||||
"src/minisketch/",
|
||||
"src/tinyformat.h",
|
||||
"src/univalue/"
|
||||
]
|
||||
|
||||
LOCALE_DEPENDENT_FUNCTIONS = [
|
||||
"alphasort", # LC_COLLATE (via strcoll)
|
||||
"asctime", # LC_TIME (directly)
|
||||
"asprintf", # (via vasprintf)
|
||||
"atof", # LC_NUMERIC (via strtod)
|
||||
"atoi", # LC_NUMERIC (via strtol)
|
||||
"atol", # LC_NUMERIC (via strtol)
|
||||
"atoll", # (via strtoll)
|
||||
"atoq",
|
||||
"btowc", # LC_CTYPE (directly)
|
||||
"ctime", # (via asctime or localtime)
|
||||
"dprintf", # (via vdprintf)
|
||||
"fgetwc",
|
||||
"fgetws",
|
||||
"fold_case", # boost::locale::fold_case
|
||||
"fprintf", # (via vfprintf)
|
||||
"fputwc",
|
||||
"fputws",
|
||||
"fscanf", # (via __vfscanf)
|
||||
"fwprintf", # (via __vfwprintf)
|
||||
"getdate", # via __getdate_r => isspace // __localtime_r
|
||||
"getwc",
|
||||
"getwchar",
|
||||
"is_digit", # boost::algorithm::is_digit
|
||||
"is_space", # boost::algorithm::is_space
|
||||
"isalnum", # LC_CTYPE
|
||||
"isalpha", # LC_CTYPE
|
||||
"isblank", # LC_CTYPE
|
||||
"iscntrl", # LC_CTYPE
|
||||
"isctype", # LC_CTYPE
|
||||
"isdigit", # LC_CTYPE
|
||||
"isgraph", # LC_CTYPE
|
||||
"islower", # LC_CTYPE
|
||||
"isprint", # LC_CTYPE
|
||||
"ispunct", # LC_CTYPE
|
||||
"isspace", # LC_CTYPE
|
||||
"isupper", # LC_CTYPE
|
||||
"iswalnum", # LC_CTYPE
|
||||
"iswalpha", # LC_CTYPE
|
||||
"iswblank", # LC_CTYPE
|
||||
"iswcntrl", # LC_CTYPE
|
||||
"iswctype", # LC_CTYPE
|
||||
"iswdigit", # LC_CTYPE
|
||||
"iswgraph", # LC_CTYPE
|
||||
"iswlower", # LC_CTYPE
|
||||
"iswprint", # LC_CTYPE
|
||||
"iswpunct", # LC_CTYPE
|
||||
"iswspace", # LC_CTYPE
|
||||
"iswupper", # LC_CTYPE
|
||||
"iswxdigit", # LC_CTYPE
|
||||
"isxdigit", # LC_CTYPE
|
||||
"localeconv", # LC_NUMERIC + LC_MONETARY
|
||||
"mblen", # LC_CTYPE
|
||||
"mbrlen",
|
||||
"mbrtowc",
|
||||
"mbsinit",
|
||||
"mbsnrtowcs",
|
||||
"mbsrtowcs",
|
||||
"mbstowcs", # LC_CTYPE
|
||||
"mbtowc", # LC_CTYPE
|
||||
"mktime",
|
||||
"normalize", # boost::locale::normalize
|
||||
"printf", # LC_NUMERIC
|
||||
"putwc",
|
||||
"putwchar",
|
||||
"scanf", # LC_NUMERIC
|
||||
"setlocale",
|
||||
"snprintf",
|
||||
"sprintf",
|
||||
"sscanf",
|
||||
"std::locale::global",
|
||||
"std::to_string",
|
||||
"stod",
|
||||
"stof",
|
||||
"stoi",
|
||||
"stol",
|
||||
"stold",
|
||||
"stoll",
|
||||
"stoul",
|
||||
"stoull",
|
||||
"strcasecmp",
|
||||
"strcasestr",
|
||||
"strcoll", # LC_COLLATE
|
||||
#"strerror",
|
||||
"strfmon",
|
||||
"strftime", # LC_TIME
|
||||
"strncasecmp",
|
||||
"strptime",
|
||||
"strtod", # LC_NUMERIC
|
||||
"strtof",
|
||||
"strtoimax",
|
||||
"strtol", # LC_NUMERIC
|
||||
"strtold",
|
||||
"strtoll",
|
||||
"strtoq",
|
||||
"strtoul", # LC_NUMERIC
|
||||
"strtoull",
|
||||
"strtoumax",
|
||||
"strtouq",
|
||||
"strxfrm", # LC_COLLATE
|
||||
"swprintf",
|
||||
"to_lower", # boost::locale::to_lower
|
||||
"to_title", # boost::locale::to_title
|
||||
"to_upper", # boost::locale::to_upper
|
||||
"tolower", # LC_CTYPE
|
||||
"toupper", # LC_CTYPE
|
||||
"towctrans",
|
||||
"towlower", # LC_CTYPE
|
||||
"towupper", # LC_CTYPE
|
||||
"trim", # boost::algorithm::trim
|
||||
"trim_left", # boost::algorithm::trim_left
|
||||
"trim_right", # boost::algorithm::trim_right
|
||||
"ungetwc",
|
||||
"vasprintf",
|
||||
"vdprintf",
|
||||
"versionsort",
|
||||
"vfprintf",
|
||||
"vfscanf",
|
||||
"vfwprintf",
|
||||
"vprintf",
|
||||
"vscanf",
|
||||
"vsnprintf",
|
||||
"vsprintf",
|
||||
"vsscanf",
|
||||
"vswprintf",
|
||||
"vwprintf",
|
||||
"wcrtomb",
|
||||
"wcscasecmp",
|
||||
"wcscoll", # LC_COLLATE
|
||||
"wcsftime", # LC_TIME
|
||||
"wcsncasecmp",
|
||||
"wcsnrtombs",
|
||||
"wcsrtombs",
|
||||
"wcstod", # LC_NUMERIC
|
||||
"wcstof",
|
||||
"wcstoimax",
|
||||
"wcstol", # LC_NUMERIC
|
||||
"wcstold",
|
||||
"wcstoll",
|
||||
"wcstombs", # LC_CTYPE
|
||||
"wcstoul", # LC_NUMERIC
|
||||
"wcstoull",
|
||||
"wcstoumax",
|
||||
"wcswidth",
|
||||
"wcsxfrm", # LC_COLLATE
|
||||
"wctob",
|
||||
"wctomb", # LC_CTYPE
|
||||
"wctrans",
|
||||
"wctype",
|
||||
"wcwidth",
|
||||
"wprintf"
|
||||
]
|
||||
|
||||
|
||||
def find_locale_dependent_function_uses():
|
||||
regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
|
||||
exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
|
||||
git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
|
||||
git_grep_output = list()
|
||||
|
||||
try:
|
||||
git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
|
||||
except CalledProcessError as e:
|
||||
if e.returncode > 1:
|
||||
raise e
|
||||
|
||||
return git_grep_output
|
||||
|
||||
|
||||
def main():
|
||||
exit_code = 0
|
||||
|
||||
regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
|
||||
git_grep_output = find_locale_dependent_function_uses()
|
||||
|
||||
for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
|
||||
matches = [line for line in git_grep_output
|
||||
if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
|
||||
and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
|
||||
and not re.search(regexp_ignore_known_violations, line)]
|
||||
if matches:
|
||||
print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
|
||||
for match in matches:
|
||||
print(match)
|
||||
print("")
|
||||
exit_code = 1
|
||||
|
||||
if exit_code == 1:
|
||||
print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n")
|
||||
print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user