1
0
Fork 0
mirror of https://github.com/opentx/opentx.git synced 2025-07-13 11:29:51 +03:00
opentx/tools/charset.py
2020-12-18 20:37:11 +01:00

87 lines
2.6 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# used ? Δ~\n\t
import os
standard_chars = """ !"#$%&'()*+,-./0123456789:;<=>?°ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz~|≥"""
extra_chars = "".join([chr(0x10000+i) for i in range(21)])
def cjk_chars(lang):
charset = set()
tools_path = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(tools_path, "../radio/src/translations/%s.h.txt" % lang), encoding='utf-8') as f:
data = f.read()
for c in data:
if 0x4E00 <= ord(c) <= 0x9FFF:
charset.add(c)
# print(ord(c))
result = list(charset)
result.sort()
return result
special_chars = {
"en": "",
"fr": "éèàîç",
"de": "ÄäÖöÜüß",
"cz": "ěščřžýáíéňóůúďťĚŠČŘŽÝÁÍÉŇÓÚŮĎŤ",
"nl": "",
"es": "Ññ",
"fi": "åäöÅÄÖ",
"it": "àù",
"pl": "ąćęłńóśżźĄĆĘŁŃÓŚŻŹ",
"pt": "ÁáÂâÃãÀàÇçÉéÊêÍíÓóÔôÕõÚú",
"se": "åäöÅÄÖ",
"cn": "".join(cjk_chars("cn")),
"tw": "".join(cjk_chars("tw")),
}
subset_lowercase = {
"Č": "č",
"Ě": "ě",
"Š": "š",
"Ú": "ú",
"Ů": "ů",
"Ž": "ž"
}
# print("CN charset: %d symbols" % len(special_chars["cn"]))
def get_chars(subset):
result = standard_chars + extra_chars
if False: # subset == "all":
for key, chars in special_chars.items():
result += "".join([char for char in chars if char not in result])
else:
if subset in special_chars:
result += "".join([char for char in special_chars[subset] if char not in subset_lowercase])
return result
def get_chars_encoding(subset):
result = {}
if subset in ("cn", "tw"):
chars = get_chars(subset)
for char in chars:
if char in special_chars[subset]:
index = special_chars[subset].index(char) + 1
if index >= 0x100:
index += 1
result[char] = "\\%03o\\%03o" % (0xFE + ((index >> 8) & 0x01), index & 0xFF)
elif char not in standard_chars and char not in extra_chars:
result[char] = "\\%03o" % (0xC0 + chars.index(char) - len(standard_chars))
else:
offset = 128 - len(standard_chars)
chars = get_chars(subset)
for char in chars:
if char not in standard_chars:
result[char] = "\\%03o" % (offset + chars.index(char))
for upper, lower in subset_lowercase.items():
if lower in result:
result[upper] = result[lower]
return result