Blob Blame History Raw
#!/usr/bin/env python3

codepoints = 0

file = '/usr/share/unicode/ucd/UnicodeData.txt'

with open(file, mode='rt', encoding='utf-8') as unicode_data:
    for line in unicode_data.readlines():
        codepoint_string, name, category = line.split(';')[:3]
        codepoint = int(codepoint_string, 16)
        char = chr(codepoint)
        codepoints = codepoints + 1

print(codepoints)
# Unicode 15 has 34924
assert(codepoints > 34900)