#!/usr/bin/env python3 import sys import re try: import unicodedata except: pass """ This program slurps in a .XCompose file on standard input (or several concatenated together, since it won't follow includes) and outputs the compose sequences in an S-expression-like syntax, showing the prefix tree of sequences. This should bring together some of the groups that use a prefix-character, like * for the Greek alphabet and # for musical symbols. And scatter other related things far and wide. But it might be fun to look at. Prefix conflicts (in which you have a compose sequence that is the proper prefix of another) and exact conflicts (in which you have a compose sequence listed two with two different translations) cannot be handled gracefully in this notation, and they are not handled gracefully by this program. The tie is not broken in a consistent or predictable fashion, etc: this is a case of GIGO. Deal with it. """ def showdict(data, indent): first=True for key in sorted(data.keys()): value=data[key] if first: first=False else: print() print(" "*max(indent,0) + "("+key, end=" ") # Sneaky trick: we don't want to go newline-indent over and # over for long sequences, i.e. cases where there is only # one possible follower. So we skip the newlines in those # cases, and tell the next-lower iteration not to do the whole # indent thing by passing a negative indent. We don't just # pass 0 or 1 because if another iteration *further down* # turns out not to be an only case, it will need to know # the right indent to pass along. So a case like # R-O-{CK|LL}, the O is unique after the R, so no linefeed, # but then the {C|L} are not unique after the O. if type(value)==dict: if len(value)>1: print() showdict(value, abs(indent)+4), else: showdict(value, -abs(indent+4)), else: print(" "+value, end=" ") if "-n" in sys.argv: try: print(unicodedata.name(value.decode('utf-8')),end=" ") except: pass print(")",end=" ") listing={} try: while True: line=sys.stdin.__next__() startpos=0 name=[] dupsfound=[] while True: m=re.match("\s*<(\w+)>",line[startpos:]) if not m: break word=m.group(1) name.append(word) startpos+=m.end() if startpos<=0: continue m=re.match(r'[^"]*"(.+?)"',line) if not m: # shouldn't happen, but just in case val='???' print("couldn't make sense of line: "+line) else: val=m.group(1) cur=listing for elt in name[:-1]: if type(cur)==dict: if not elt in cur: cur[elt]={} cur=cur[elt] # This will fail for prefix conflicts else: break # prefix conflict # Presumably by now we're at the end, pointing to an empty dict. if type(cur)==dict: cur[name[-1]]=val else: # fail. Prefix conflict. Let's ignore it. pass except StopIteration: print("hit end") showdict(listing,0)