import os
import re
from itertools import chain
from typing import Iterable
import unicodedata2 as unicodedata
DIR = os.path.join(os.path.dirname(__file__), 'data', 'emoji')
SEQUENCES_FN = 'emoji-sequences.txt'
ZWJ_SEQUENCES_FN = 'emoji-zwj-sequences.txt'
#: Emoji that should be included in the emoji suggestions dropdown
SUGGESTABLE_EMOJI: dict[str, str] = {}
#: Other emoji; allowed to be used, but not suggested due to there's too many of them
OTHER_ALLOWED_EMOJI: dict[str, str] = {}
GROUP_HOMES = {
'Basic_Emoji': SUGGESTABLE_EMOJI,
'Emoji_Keycap_Sequence': SUGGESTABLE_EMOJI,
'RGI_Emoji_Flag_Sequence': SUGGESTABLE_EMOJI,
'RGI_Emoji_Tag_Sequence': SUGGESTABLE_EMOJI,
'RGI_Emoji_Modifier_Sequence': OTHER_ALLOWED_EMOJI,
'Family': OTHER_ALLOWED_EMOJI,
'Role': OTHER_ALLOWED_EMOJI,
'Gendered': OTHER_ALLOWED_EMOJI,
'Hair': OTHER_ALLOWED_EMOJI,
'Other': SUGGESTABLE_EMOJI,
}
[docs]
def _get_emoji() -> Iterable[tuple[str, str, str]]:
with open(os.path.join(DIR, SEQUENCES_FN)) as seq_f:
for line in seq_f.readlines():
if match := re.match(
r'^('
r'(?P<points>[0-9A-F ]+?)|'
r'(?P<start>[0-9A-F]+)\.\.(?P<end>[0-9A-F]+)'
r')\s*;\s*(?P<block>\S+)\s*; *(?P<name>.+?) *#.*$',
line,
):
groups = match.groupdict()
if points := groups.get('points'):
yield groups['block'], ''.join(
chr(int(point, base=16)) for point in points.split(' ')
), groups['name']
else:
for kp in range(
int(groups['start'], base=16),
int(groups['end'], base=16) + 1,
1,
):
char = chr(kp)
yield groups['block'], char, unicodedata.name(char).lower()
[docs]
def _get_zwj_sequences() -> Iterable[tuple[str, str, str]]:
with open(os.path.join(DIR, ZWJ_SEQUENCES_FN)) as seq_f:
group = ''
for line in seq_f.readlines():
if match := re.match(r'^# RGI_Emoji_ZWJ_Sequence: (?P<group>.*)$', line):
group = match.groupdict()['group']
continue
if match := re.match(
r'^' r'(?P<points>[0-9A-F *]+?) *' r';.*; *(?P<description>.+?) *#.*$',
line,
):
groups = match.groupdict()
yield (
group,
''.join(
(chr(int(point, 16)) for point in groups['points'].split(' '))
),
groups['description'],
)
for group, char, name in chain(_get_emoji(), _get_zwj_sequences()):
if name.startswith('emoji '):
# don't suggest modifiers (eg. lone hair or skin tone indicators)
OTHER_ALLOWED_EMOJI[char] = name
else:
GROUP_HOMES[group][char] = name
# some checks that we've read stuff correctly:
[docs]
def _check_emoji(src: dict[str, str], char: str, expected_name: str) -> None:
assert (
src[char] == expected_name
), f'got {src[char]!r} for {char!r}, expected {expected_name!r}'
_check_emoji(SUGGESTABLE_EMOJI, '⭐', 'star')
_check_emoji(SUGGESTABLE_EMOJI, '🐕🦺', 'service dog')
_check_emoji(SUGGESTABLE_EMOJI, '🤯', 'shocked face with exploding head')
_check_emoji(SUGGESTABLE_EMOJI, '', 'splatter')
_check_emoji(OTHER_ALLOWED_EMOJI, '🧑🏿🦳', 'person: dark skin tone, white hair')
_check_emoji(OTHER_ALLOWED_EMOJI, '🦳', 'emoji component white hair')
if __name__ == '__main__':
from pprint import pprint
pprint(SUGGESTABLE_EMOJI)
pprint(OTHER_ALLOWED_EMOJI)