scripts: move update-translations.py to maintainer-tools repo
This commit is contained in:
parent
e00ecb3d7a
commit
3d50fe2c1f
3 changed files with 2 additions and 229 deletions
|
@ -120,18 +120,6 @@ If there are 'unsupported' symbols, the return value will be 1 a list like this
|
||||||
.../64/test_bitcoin: symbol std::out_of_range::~out_of_range() from unsupported version GLIBCXX_3.4.15
|
.../64/test_bitcoin: symbol std::out_of_range::~out_of_range() from unsupported version GLIBCXX_3.4.15
|
||||||
.../64/test_bitcoin: symbol _ZNSt8__detail15_List_nod from unsupported version GLIBCXX_3.4.15
|
.../64/test_bitcoin: symbol _ZNSt8__detail15_List_nod from unsupported version GLIBCXX_3.4.15
|
||||||
|
|
||||||
update-translations.py
|
|
||||||
======================
|
|
||||||
|
|
||||||
Run this script from the root of the repository to update all translations from transifex.
|
|
||||||
It will do the following automatically:
|
|
||||||
|
|
||||||
- fetch all translations
|
|
||||||
- post-process them into valid and committable format
|
|
||||||
- add missing translations to the build system (TODO)
|
|
||||||
|
|
||||||
See doc/translation-process.md for more information.
|
|
||||||
|
|
||||||
circular-dependencies.py
|
circular-dependencies.py
|
||||||
========================
|
========================
|
||||||
|
|
||||||
|
|
|
@ -1,215 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# Copyright (c) 2014 Wladimir J. van der Laan
|
|
||||||
# Distributed under the MIT software license, see the accompanying
|
|
||||||
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
||||||
'''
|
|
||||||
Run this script from the root of the repository to update all translations from
|
|
||||||
transifex.
|
|
||||||
It will do the following automatically:
|
|
||||||
|
|
||||||
- fetch all translations using the tx tool
|
|
||||||
- post-process them into valid and committable format
|
|
||||||
- remove invalid control characters
|
|
||||||
- remove location tags (makes diffs less noisy)
|
|
||||||
|
|
||||||
TODO:
|
|
||||||
- auto-add new translations to the build system according to the translation process
|
|
||||||
'''
|
|
||||||
import subprocess
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import io
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
|
|
||||||
# Name of transifex tool
|
|
||||||
TX = 'tx'
|
|
||||||
# Name of source language file
|
|
||||||
SOURCE_LANG = 'bitcoin_en.ts'
|
|
||||||
# Directory with locale files
|
|
||||||
LOCALE_DIR = 'src/qt/locale'
|
|
||||||
# Minimum number of messages for translation to be considered at all
|
|
||||||
MIN_NUM_MESSAGES = 10
|
|
||||||
# Regexp to check for Bitcoin addresses
|
|
||||||
ADDRESS_REGEXP = re.compile('([13]|bc1)[a-zA-Z0-9]{30,}')
|
|
||||||
|
|
||||||
def check_at_repository_root():
|
|
||||||
if not os.path.exists('.git'):
|
|
||||||
print('No .git directory found')
|
|
||||||
print('Execute this script at the root of the repository', file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def fetch_all_translations():
|
|
||||||
if subprocess.call([TX, 'pull', '-f', '-a']):
|
|
||||||
print('Error while fetching translations', file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def find_format_specifiers(s):
|
|
||||||
'''Find all format specifiers in a string.'''
|
|
||||||
pos = 0
|
|
||||||
specifiers = []
|
|
||||||
while True:
|
|
||||||
percent = s.find('%', pos)
|
|
||||||
if percent < 0:
|
|
||||||
break
|
|
||||||
specifiers.append(s[percent+1])
|
|
||||||
pos = percent+2
|
|
||||||
return specifiers
|
|
||||||
|
|
||||||
def split_format_specifiers(specifiers):
|
|
||||||
'''Split format specifiers between numeric (Qt) and others (strprintf)'''
|
|
||||||
numeric = []
|
|
||||||
other = []
|
|
||||||
for s in specifiers:
|
|
||||||
if s in {'1','2','3','4','5','6','7','8','9'}:
|
|
||||||
numeric.append(s)
|
|
||||||
else:
|
|
||||||
other.append(s)
|
|
||||||
|
|
||||||
# If both numeric format specifiers and "others" are used, assume we're dealing
|
|
||||||
# with a Qt-formatted message. In the case of Qt formatting (see https://doc.qt.io/qt-5/qstring.html#arg)
|
|
||||||
# only numeric formats are replaced at all. This means "(percentage: %1%)" is valid, without needing
|
|
||||||
# any kind of escaping that would be necessary for strprintf. Without this, this function
|
|
||||||
# would wrongly detect '%)' as a printf format specifier.
|
|
||||||
if numeric:
|
|
||||||
other = []
|
|
||||||
|
|
||||||
# numeric (Qt) can be present in any order, others (strprintf) must be in specified order
|
|
||||||
return set(numeric),other
|
|
||||||
|
|
||||||
def sanitize_string(s):
|
|
||||||
'''Sanitize string for printing'''
|
|
||||||
return s.replace('\n',' ')
|
|
||||||
|
|
||||||
def check_format_specifiers(source, translation, errors, numerus):
|
|
||||||
source_f = split_format_specifiers(find_format_specifiers(source))
|
|
||||||
# assert that no source messages contain both Qt and strprintf format specifiers
|
|
||||||
# if this fails, go change the source as this is hacky and confusing!
|
|
||||||
assert(not(source_f[0] and source_f[1]))
|
|
||||||
try:
|
|
||||||
translation_f = split_format_specifiers(find_format_specifiers(translation))
|
|
||||||
except IndexError:
|
|
||||||
errors.append("Parse error in translation for '%s': '%s'" % (sanitize_string(source), sanitize_string(translation)))
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
if source_f != translation_f:
|
|
||||||
if numerus and source_f == (set(), ['n']) and translation_f == (set(), []) and translation.find('%') == -1:
|
|
||||||
# Allow numerus translations to omit %n specifier (usually when it only has one possible value)
|
|
||||||
return True
|
|
||||||
errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation)))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def all_ts_files(suffix=''):
|
|
||||||
for filename in os.listdir(LOCALE_DIR):
|
|
||||||
# process only language files, and do not process source language
|
|
||||||
if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix:
|
|
||||||
continue
|
|
||||||
if suffix: # remove provided suffix
|
|
||||||
filename = filename[0:-len(suffix)]
|
|
||||||
filepath = os.path.join(LOCALE_DIR, filename)
|
|
||||||
yield(filename, filepath)
|
|
||||||
|
|
||||||
FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]')
|
|
||||||
def remove_invalid_characters(s):
|
|
||||||
'''Remove invalid characters from translation string'''
|
|
||||||
return FIX_RE.sub(b'', s)
|
|
||||||
|
|
||||||
# Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for
|
|
||||||
# comparison, disable by default)
|
|
||||||
_orig_escape_cdata = None
|
|
||||||
def escape_cdata(text):
|
|
||||||
text = _orig_escape_cdata(text)
|
|
||||||
text = text.replace("'", ''')
|
|
||||||
text = text.replace('"', '"')
|
|
||||||
return text
|
|
||||||
|
|
||||||
def contains_bitcoin_addr(text, errors):
|
|
||||||
if text is not None and ADDRESS_REGEXP.search(text) is not None:
|
|
||||||
errors.append('Translation "%s" contains a bitcoin address. This will be removed.' % (text))
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def postprocess_translations(reduce_diff_hacks=False):
|
|
||||||
print('Checking and postprocessing...')
|
|
||||||
|
|
||||||
if reduce_diff_hacks:
|
|
||||||
global _orig_escape_cdata
|
|
||||||
_orig_escape_cdata = ET._escape_cdata
|
|
||||||
ET._escape_cdata = escape_cdata
|
|
||||||
|
|
||||||
for (filename,filepath) in all_ts_files():
|
|
||||||
os.rename(filepath, filepath+'.orig')
|
|
||||||
|
|
||||||
have_errors = False
|
|
||||||
for (filename,filepath) in all_ts_files('.orig'):
|
|
||||||
# pre-fixups to cope with transifex output
|
|
||||||
parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8'
|
|
||||||
with open(filepath + '.orig', 'rb') as f:
|
|
||||||
data = f.read()
|
|
||||||
# remove control characters; this must be done over the entire file otherwise the XML parser will fail
|
|
||||||
data = remove_invalid_characters(data)
|
|
||||||
tree = ET.parse(io.BytesIO(data), parser=parser)
|
|
||||||
|
|
||||||
# iterate over all messages in file
|
|
||||||
root = tree.getroot()
|
|
||||||
for context in root.findall('context'):
|
|
||||||
for message in context.findall('message'):
|
|
||||||
numerus = message.get('numerus') == 'yes'
|
|
||||||
source = message.find('source').text
|
|
||||||
translation_node = message.find('translation')
|
|
||||||
# pick all numerusforms
|
|
||||||
if numerus:
|
|
||||||
translations = [i.text for i in translation_node.findall('numerusform')]
|
|
||||||
else:
|
|
||||||
translations = [translation_node.text]
|
|
||||||
|
|
||||||
for translation in translations:
|
|
||||||
if translation is None:
|
|
||||||
continue
|
|
||||||
errors = []
|
|
||||||
valid = check_format_specifiers(source, translation, errors, numerus) and not contains_bitcoin_addr(translation, errors)
|
|
||||||
|
|
||||||
for error in errors:
|
|
||||||
print('%s: %s' % (filename, error))
|
|
||||||
|
|
||||||
if not valid: # set type to unfinished and clear string if invalid
|
|
||||||
translation_node.clear()
|
|
||||||
translation_node.set('type', 'unfinished')
|
|
||||||
have_errors = True
|
|
||||||
|
|
||||||
# Remove location tags
|
|
||||||
for location in message.findall('location'):
|
|
||||||
message.remove(location)
|
|
||||||
|
|
||||||
# Remove entire message if it is an unfinished translation
|
|
||||||
if translation_node.get('type') == 'unfinished':
|
|
||||||
context.remove(message)
|
|
||||||
|
|
||||||
# check if document is (virtually) empty, and remove it if so
|
|
||||||
num_messages = 0
|
|
||||||
for context in root.findall('context'):
|
|
||||||
for message in context.findall('message'):
|
|
||||||
num_messages += 1
|
|
||||||
if num_messages < MIN_NUM_MESSAGES:
|
|
||||||
print('Removing %s, as it contains only %i messages' % (filepath, num_messages))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# write fixed-up tree
|
|
||||||
# if diff reduction requested, replace some XML to 'sanitize' to qt formatting
|
|
||||||
if reduce_diff_hacks:
|
|
||||||
out = io.BytesIO()
|
|
||||||
tree.write(out, encoding='utf-8')
|
|
||||||
out = out.getvalue()
|
|
||||||
out = out.replace(b' />', b'/>')
|
|
||||||
with open(filepath, 'wb') as f:
|
|
||||||
f.write(out)
|
|
||||||
else:
|
|
||||||
tree.write(filepath, encoding='utf-8')
|
|
||||||
return have_errors
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
check_at_repository_root()
|
|
||||||
fetch_all_translations()
|
|
||||||
postprocess_translations()
|
|
||||||
|
|
|
@ -65,9 +65,9 @@ username = USERNAME
|
||||||
The Transifex Bitcoin project config file is included as part of the repo. It can be found at `.tx/config`, however you shouldn’t need to change anything.
|
The Transifex Bitcoin project config file is included as part of the repo. It can be found at `.tx/config`, however you shouldn’t need to change anything.
|
||||||
|
|
||||||
### Synchronising translations
|
### Synchronising translations
|
||||||
To assist in updating translations, we have created a script to help.
|
To assist in updating translations, a helper script is available in the [maintainer-tools repo](https://github.com/bitcoin-core/bitcoin-maintainer-tools).
|
||||||
|
|
||||||
1. `python contrib/devtools/update-translations.py`
|
1. `python3 ../bitcoin-maintainer-tools/update-translations.py`
|
||||||
2. `git add` new translations from `src/qt/locale/`
|
2. `git add` new translations from `src/qt/locale/`
|
||||||
3. Update `src/qt/bitcoin_locale.qrc` manually or via
|
3. Update `src/qt/bitcoin_locale.qrc` manually or via
|
||||||
```bash
|
```bash
|
||||||
|
|
Loading…
Reference in a new issue