#!/usr/bin/env python # -*- coding: utf-8 -*- # used ? Δ~\n\t import os standard_chars = """ !"#$%&'()*+,-./0123456789:;<=>?°ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz~|≥""" extra_chars = "".join([chr(0x10000+i) for i in range(21)]) def is_special_char(c): # only 'our' special chars and CJK Unified Ideographs return 192 <= ord(c) <= 383 or 0x4E00 <= ord(c) <= 0x9FFF def get_special_chars(): result = {} for lang in["cn", "cz", "da", "de", "en", "es", "fi", "fr", "he", "it", "jp", "nl", "pl", "pt", "ru", "se", "tw"]: charset = set() tools_path = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(tools_path, "../radio/src/translations/%s.h" % lang), encoding='utf-8') as f: data = f.read() for c in data: if is_special_char(c): charset.add(c) data = list(charset) data.sort() result[lang] = data return result special_chars = get_special_chars() def get_chars(subset): result = standard_chars + extra_chars result += "".join([char for char in special_chars[subset]]) return result def get_chars_encoding(subset): result = {} if subset in ("cn", "tw"): chars = get_chars(subset) for char in chars: if char in special_chars[subset]: index = special_chars[subset].index(char) + 1 if index >= 0x100: index += 1 result[char] = "\\%03o\\%03o" % (0xFE + ((index >> 8) & 0x01), index & 0xFF) elif char not in standard_chars and char not in extra_chars: result[char] = "\\%03o" % (0xC0 + chars.index(char) - len(standard_chars)) else: offset = 128 - len(standard_chars) chars = get_chars(subset) for char in chars: if char not in standard_chars: result[char] = "\\%03o" % (offset + chars.index(char)) return result special_chars_BW = { "en": "", "fr": "éèàîç", "da": "åæøÅÆØ", "de": "ÄäÖöÜüß", "cz": "áčéěíóřšúůýÁÍŘÝžÉ", "nl": "", "es": "ÑñÁáÉéÍíÓóÚú", "fi": "åäöÅÄÖ", "it": "àù", "pl": "ąćęłńóśżźĄĆĘŁŃÓŚŻŹ", "pt": "ÁáÂâÃãÀàÇçÉéÊêÍíÓóÔôÕõÚú", "ru": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя", "se": "åäöÅÄÖ", "cn": "", "tw": "", } subset_lowercase_BW = { "Č": "č", "Ě": "ě", "Š": "š", "Ú": "ú", "Ů": "ů", "Ž": "ž" } def get_chars_BW(subset): result = standard_chars + extra_chars if subset in special_chars_BW: if (subset == "cz"): result += "".join([char for char in special_chars_BW[subset] if char not in subset_lowercase_BW]) else: result += "".join([char for char in special_chars_BW[subset]]) return result def get_chars_encoding_BW(subset): result = {} offset = 128 - len(standard_chars) chars = get_chars_BW(subset) for char in chars: if char not in standard_chars: result[char] = "\\%03o" % (offset + chars.index(char)) if (subset == "cz"): for upper, lower in subset_lowercase_BW.items(): if lower in result: result[upper] = result[lower] return result