diff options
-rw-r--r-- | dotXCompose | 5 | ||||
-rw-r--r-- | treeprint.py | 77 |
2 files changed, 82 insertions, 0 deletions
diff --git a/dotXCompose b/dotXCompose index 9ca14c4..83376be 100644 --- a/dotXCompose +++ b/dotXCompose @@ -303,6 +303,8 @@ include "%L" <Multi_key> <underscore> <k> : "ₖ" U2096 # LATIN SUBSCRIPT SMALL LETTER K <Multi_key> <underscore> <m> : "ₘ" U2098 # LATIN SUBSCRIPT SMALL LETTER M <Multi_key> <underscore> <n> : "ₙ" U2099 # LATIN SUBSCRIPT SMALL LETTER N +<Multi_key> <underscore> <minus> : "₋" U208B # SUBSCRIPT MINUS +<Multi_key> <underscore> <plus> : "₊" U208A # SUBSCRIPT PLUS # Custom additions: Greek letters. Mapping corresponds to Emacs Greek @@ -614,6 +616,8 @@ include "%L" <Multi_key> <asciicircum> <ampersand> <y> : "ʸ" U02B8 # SUPERSCRIPT Y #Maybe add: ˀˁ˃˂ <Multi_key> <asciicircum> <minus> : "⁻" U207B # SUPERSCRIPT MINUS +<Multi_key> <asciicircum> <plus> : "⁺" U207A # SUPERSCRIPT PLUS + <Multi_key> <asciitilde> <asciitilde> : "≈" U2248 # ALMOST EQUAL TO <Multi_key> <s> <h> : "ʃ" U0283 # LATIN SMALL LETTER ESH <Multi_key> <z> <h> : "ʒ" U0292 # LATIN SMALL LETTER EZH @@ -672,6 +676,7 @@ include "%L" <Multi_key> <numbersign> <C> : "𝄡" U0001d121 # MUSICAL SYMBOL C CLEF <Multi_key> <numbersign> <o> <slash> : "♪" U266a # EIGHTH NOTE <Multi_key> <numbersign> <o> <o> : "♫" U266b # BEAMED EIGHTH NOTES +<Multi_key> <numbersign> <percent> : "♫" U266b # BEAMED EIGHTH NOTES # Combining accents, for making things you don't have precomposed chars or keystrokes for: diff --git a/treeprint.py b/treeprint.py new file mode 100644 index 0000000..ce4f672 --- /dev/null +++ b/treeprint.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +import sys +import re + +""" +This program slurps in a .XCompose file on standard input (or several +concatenated together, since it won't follow includes) and outputs the +compose sequences in an S-expression-like syntax, showing the prefix tree +of sequences. This should bring together some of the groups that use a +prefix-character, like * for the Greek alphabet and # for musical symbols. +And scatter other related things far and wide. But it might be fun to look +at. + +Prefix conflicts (in which you have a compose sequence that is the proper +prefix of another) and exact conflicts (in which you have a compose +sequence listed two with two different translations) cannot be handled +gracefully in this notation, and they are not handled gracefully by this +program. The tie is not broken in a consistent or predictable fashion, +etc: this is a case of GIGO. Deal with it. +""" + +def showdict(data, indent): + for (key, value) in data.iteritems(): + print " "*indent + "("+key, + if type(value)==dict: + print "" + showdict(value, indent+4) + else: + print " "+value+")" + + +listing={} + +try: + while True: + line=sys.stdin.next() + # print "((%s))"%line + startpos=0 + name=[] + dupsfound=[] + while True: + m=re.match("\s*<(\w+)>",line[startpos:]) + if not m: + break + word=m.group(1) + name.append(word) + startpos+=m.end() + if startpos<=0: + continue + m=re.match(r'[^"]*"(.+?)"',line) + if not m: + # shouldn't happen, but just in case + val='???' + print "couldn't make sense of line: "+line + else: + val=m.group(1) + cur=listing + for elt in name[:-1]: + if type(cur)==dict: + if not cur.has_key(elt): + cur[elt]={} + cur=cur[elt] # This will fail for prefix conflicts + else: + break # prefix conflict + # Presumably by now we're at the end, pointing to an empty dict. + if type(cur)==dict: + cur[name[-1]]=val + else: + # fail. Prefix conflict. Let's ignore it. + pass +except StopIteration: + print "hit end" + +showdict(listing,0) + + |