diff options
author | Mark E. Shoulson <mark@kli.org> | 2019-12-19 09:21:19 -0500 |
---|---|---|
committer | Mark E. Shoulson <mark@kli.org> | 2020-05-11 10:58:06 -0400 |
commit | da9918431d37f85a38c0e1ab7e4479996b757695 (patch) | |
tree | a5ffed1646594f409d5933f0e010684252e8a1c6 | |
parent | Add corners, etc. (diff) | |
download | dotXCompose-da9918431d37f85a38c0e1ab7e4479996b757695.tar.gz dotXCompose-da9918431d37f85a38c0e1ab7e4479996b757695.tar.bz2 dotXCompose-da9918431d37f85a38c0e1ab7e4479996b757695.zip |
Fixed some .py utils to use python3; added a few chars.
After all, Python2 reaches EOL very soon!
More emoji added.
Also improved the Makefile.
A few additions, improvements to translator.
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | dotXCompose | 18 | ||||
-rw-r--r-- | emoji-base | 28 | ||||
-rwxr-xr-x | emojitrans2.pl | 12 | ||||
-rwxr-xr-x | scan4dups.py | 74 | ||||
-rwxr-xr-x | treeprint.py | 36 |
6 files changed, 97 insertions, 77 deletions
@@ -1,4 +1,8 @@ -all: emoji.compose modletters.compose tags.compose maths.compose +COMPOSED=emoji.compose modletters.compose tags.compose maths.compose +all: $(COMPOSED) %.compose: %-base emojitrans2.pl ./emojitrans2.pl < $< > $@ + +clean: + rm -f $(COMPOSED) diff --git a/dotXCompose b/dotXCompose index eb45132..76f776d 100644 --- a/dotXCompose +++ b/dotXCompose @@ -98,15 +98,16 @@ include "%L" # Already present for me: # <Multi_key> <space> <space> : " " U00A0 # NO-BREAK SPACE # Narrow no-break space, needed for some Latin languages like French -<Multi_key> <space> <n> : " " U202f # NARROW NO-BREAK SPACE +<Multi_key> <space> <n> : " " U202f # NARROW NO-BREAK SPACE # Technically, NO-BREAK SPACE is not supposed to be fixed-width. This is: <Multi_key> <space> <numbersign> : " " U2007 # FIGURE SPACE -# Do we want/need these? -<Multi_key> <d> <a> <g> : "†" U2020 # DAGGER -<Multi_key> <d> <d> <a> <g> : "‡" U2021 # DOUBLE DAGGER # We used to have THIN SPACE as <space> <apostrophe>, but now that’s remapped # to " ‘", for conveniently enclosing things in proper single-quotes. <Multi_key> <backslash> <comma> : " " U2009 # THIN SPACE +# (heh, heh... space bar) +<Multi_key> <space> <bar> : " " U200A # HAIR SPACE +<Multi_key> <d> <a> <g> : "†" U2020 # DAGGER +<Multi_key> <d> <d> <a> <g> : "‡" U2021 # DOUBLE DAGGER <Multi_key> <s> <e> <c> : "§" U00A7 # SECTION SIGN # It's in the Asian section, but it's a general-purpose punctuation: <Multi_key> <quotedbl> <quotedbl> : "〃" U3003 # DITTO MARK @@ -147,6 +148,7 @@ include "%L" <Multi_key> <Up> <minus> : "⇡" U21E1 # UPWARDS DASHED ARROW <Multi_key> <Right> <minus> : "⇢" U21E2 # RIGHTWARDS DASHED ARROW <Multi_key> <Down> <minus> : "⇣" U21E3 # DOWNWARDS DASHED ARROW +<Multi_key> <z> <z> <greater> : "↯" U21AF # DOWNWARDS ZIGZAG ARROW # Arrow keys don't always work: some apps trap them for cursor control and # other boring things. The arrow symbols have alternate keystrokes. Do @@ -299,6 +301,9 @@ include "%L" <Multi_key> <equal> <equal> : "≡" U2261 # IDENTICAL TO <Multi_key> <colon> <equal> : "≔" U2254 # COLON EQUALS <Multi_key> <equal> <colon> : "≕" U2255 # EQUALS COLON +<Multi_key> <2> <equal> : "⩵" U2A75 # TWO CONSECUTIVE EQUALS SIGNS +<Multi_key> <equal> <ampersand> <equal> : "⩵" U2A75 # TWO CONSECUTIVE EQUALS SIGNS +<Multi_key> <3> <equal> : "⩶" U2A76 # THREE CONSECUTIVE EQUALS SIGNS # Using <slash> conflicts. <Multi_key> <equal> <bar> <equal> : "≢" U2262 # NOT IDENTICAL TO # We already have ± @@ -348,6 +353,7 @@ include "%L" # )- conflicts with system for }. <Multi_key> <parenright> <underscore> : "⟌" U27CC # LONG DIVISION <Multi_key> <period> <quotedbl> : "∴" U2234 # THEREFORE +<Multi_key> <Multi_key> <t> <h> <e> <r> <e> <4> : "∴" U2234 # THEREFORE <Multi_key> <quotedbl> <period> : "∵" U2235 # BECAUSE <Multi_key> <Multi_key> <b> <e> <c> <a> <u> <s> <e> : "∵" U2235 # BECAUSE <Multi_key> <percent> <percent> : "‱" U2031 # PER TEN THOUSAND (basis points) @@ -459,6 +465,7 @@ include "%L" <Multi_key> <7> <quotedbl> : "『" U300E # LEFT WHITE CORNER BRACKET <Multi_key> <L> <quotedbl> : "』" U300F # RIGHT WHITE CORNER BRACKET # How about these for the "corners"? Confusing with {L[} etc? +# and don't forget about {L_[} which we have for ⸤ <Multi_key> <7> <parenleft> : "⌜" U231C # TOP LEFT CORNER <Multi_key> <7> <parenright> : "⌝" U231D # TOP RIGHT CORNER <Multi_key> <L> <parenleft> : "⌞" U231E # BOTTOM LEFT CORNER @@ -886,6 +893,9 @@ include "%L" <Multi_key> <space> <M> : " " U2003 # EM SPACE <Multi_key> <space> <3> <M> : " " U2004 # THREE-PER-EM SPACE <Multi_key> <space> <4> <M> : " " U2005 # FOUR-PER-EM SPACE +<Multi_key> <space> <6> <M> : " " U2006 # SIX-PER-EM SPACE +<Multi_key> <space> <comma> : " " U2008 # PUNCTUATION SPACE +<Multi_key> <space> <plus> : " " U205F # MEDIUM MATHEMATICAL SPACE <Multi_key> <parenleft> <parenright>: "◌" U25CC # DOTTED CIRCLE <Multi_key> <bracketleft> <bracketright>: "⬚" U2B1A # DOTTED SQUARE <Multi_key> <asterisk> <parenleft> : "﴾" UFD3E # ORNATE LEFT PARENTHESIS @@ -1083,7 +1083,7 @@ <MM> {Lipstic} : "💄" U1F484 # LIPSTICK <MM> {Lipstck} : "💄" U1F484 # LIPSTICK #- 1F485;NAIL POLISH;So;0;ON;;;;;N;;;;; -### <MM> {nail polish} : "💅" U1F485 # NAIL POLISH +<MM> {nailpol} : "💅" U1F485 # NAIL POLISH #- 1F486;FACE MASSAGE;So;0;ON;;;;;N;;;;; ### <MM> {face massage} : "💆" U1F486 # FACE MASSAGE #- 1F487;HAIRCUT;So;0;ON;;;;;N;;;;; @@ -1221,19 +1221,21 @@ #- 1F4C4;PAGE FACING UP;So;0;ON;;;;;N;;;;; ### <MM> {page facing up} : "📄" U1F4C4 # PAGE FACING UP #- 1F4C5;CALENDAR;So;0;ON;;;;;N;;;;; -### <MM> {calendar} : "📅" U1F4C5 # CALENDAR +<MM> {calenda} : "📅" U1F4C5 # CALENDAR +<MM> {calendr} : "📅" U1F4C5 # CALENDAR +<MM> {calndar} : "📅" U1F4C5 # CALENDAR #- 1F4C6;TEAR-OFF CALENDAR;So;0;ON;;;;;N;;;;; -### <MM> {tear-off calendar} : "📆" U1F4C6 # TEAR-OFF CALENDAR +<MM> {date} : "📆" U1F4C6 # TEAR-OFF CALENDAR #- 1F4C7;CARD INDEX;So;0;ON;;;;;N;;;;; ### <MM> {card index} : "📇" U1F4C7 # CARD INDEX #- 1F4C8;CHART WITH UPWARDS TREND;So;0;ON;;;;;N;;;;; -### <MM> {chart with upwards trend} : "📈" U1F4C8 # CHART WITH UPWARDS TREND +<MM> {upchart} : "📈" U1F4C8 # CHART WITH UPWARDS TREND #- 1F4C9;CHART WITH DOWNWARDS TREND;So;0;ON;;;;;N;;;;; -### <MM> {chart with downwards trend} : "📉" U1F4C9 # CHART WITH DOWNWARDS TREND +<MM> {dnchart} : "📉" U1F4C9 # CHART WITH DOWNWARDS TREND #- 1F4CA;BAR CHART;So;0;ON;;;;;N;;;;; <MM> {barchar} : "📊" U1F4CA # BAR CHART #- 1F4CB;CLIPBOARD;So;0;ON;;;;;N;;;;; -### <MM> {clipboard} : "📋" U1F4CB # CLIPBOARD +<MM> {clipboa} : "📋" U1F4CB # CLIPBOARD #- 1F4CC;PUSHPIN;So;0;ON;;;;;N;;;;; <MM> {pushpin} : "📌" U1F4CC # PUSHPIN #- 1F4CD;ROUND PUSHPIN;So;0;ON;;;;;N;;;;; @@ -1277,7 +1279,7 @@ #- 1F4E0;FAX MACHINE;So;0;ON;;;;;N;;;;; ### <MM> {fax machine} : "📠" U1F4E0 # FAX MACHINE #- 1F4E1;SATELLITE ANTENNA;So;0;ON;;;;;N;;;;; -### <MM> {satellite antenna} : "📡" U1F4E1 # SATELLITE ANTENNA +<MM> {satdish} : "📡" U1F4E1 # SATELLITE ANTENNA #- 1F4E2;PUBLIC ADDRESS LOUDSPEAKER;So;0;ON;;;;;N;;;;; ### <MM> {public address loudspeaker} : "📢" U1F4E2 # PUBLIC ADDRESS LOUDSPEAKER #- 1F4E3;CHEERING MEGAPHONE;So;0;ON;;;;;N;;;;; @@ -1318,7 +1320,7 @@ #- 1F4F4;MOBILE PHONE OFF;So;0;ON;;;;;N;;;;; ### <MM> {mobile phone off} : "📴" U1F4F4 # MOBILE PHONE OFF #- 1F4F5;NO MOBILE PHONES;So;0;ON;;;;;N;;;;; -### <MM> {no mobile phones} : "📵" U1F4F5 # NO MOBILE PHONES +<MM> {nophone} : "📵" U1F4F5 # NO MOBILE PHONES #- 1F4F6;ANTENNA WITH BARS;So;0;ON;;;;;N;;;;; ### <MM> {antenna with bars} : "📶" U1F4F6 # ANTENNA WITH BARS #- 1F4F7;CAMERA;So;0;ON;;;;;N;;;;; @@ -1542,7 +1544,7 @@ #- 1F578;SPIDER WEB;So;0;ON;;;;;N;;;;; <MM> {web} : "🕸" U1F578 # SPIDER WEB #- 1F579;JOYSTICK;So;0;ON;;;;;N;;;;; -### <MM> {joystick} : "🕹" U1F579 # JOYSTICK +<MM> {joystic} : "🕹" U1F579 # JOYSTICK #- 1F57B;LEFT HAND TELEPHONE RECEIVER;So;0;ON;;;;;N;;;;; ### <MM> {left hand telephone receiver} : "🕻" U1F57B # LEFT HAND TELEPHONE RECEIVER #- 1F57C;TELEPHONE RECEIVER WITH PAGE;So;0;ON;;;;;N;;;;; @@ -2244,6 +2246,8 @@ <MM> {noway} : "🛇" U1F6C7 # PROHIBITED SIGN #- 1F6C8;CIRCLED INFORMATION SOURCE;So;0;ON;;;;;N;;;;; <MM> {(info)} : "🛈" U1F6C8 # CIRCLED INFORMATION SOURCE +#- 2139;INFORMATION SOURCE;Ll;0;L;<font> 0069;;;;N;;;;; +<MM> {info} : "ℹ" U2139 # INFORMATION SOURCE ## careful for conflicts with {boy} and {girl} #- 1F6C9;BOYS SYMBOL;So;0;ON;;;;;N;;;;; <MM> {BOYS} : "🛉" U1F6C9 # BOYS SYMBOL @@ -2271,9 +2275,9 @@ #- 1F6E2;OIL DRUM;So;0;ON;;;;;N;;;;; <MM> {oildrum} : "🛢" U1F6E2 # OIL DRUM #- 1F6E3;MOTORWAY;So;0;ON;;;;;N;;;;; -### <MM> {motorway} : "🛣" U1F6E3 # MOTORWAY +<MM> {highway} : "🛣" U1F6E3 # MOTORWAY #- 1F6E4;RAILWAY TRACK;So;0;ON;;;;;N;;;;; -### <MM> {railway track} : "🛤" U1F6E4 # RAILWAY TRACK +<MM> {RRtrack} : "🛤" U1F6E4 # RAILWAY TRACK #- 1F6E5;MOTOR BOAT;So;0;ON;;;;;N;;;;; ### <MM> {motor boat} : "🛥" U1F6E5 # MOTOR BOAT #- 1F6E6;UP-POINTING MILITARY AIRPLANE;So;0;ON;;;;;N;;;;; @@ -2423,7 +2427,7 @@ #- 1F939;JUGGLING;So;0;ON;;;;;N;;;;; ### <MM> {juggling} : "🤹" U1F939 # JUGGLING #- 1F93A;FENCER;So;0;ON;;;;;N;;;;; -### <MM> {fencer} : "🤺" U1F93A # FENCER +<MM> {fencer} : "🤺" U1F93A # FENCER #- 1F93B;MODERN PENTATHLON;So;0;ON;;;;;N;;;;; ### <MM> {modern pentathlon} : "🤻" U1F93B # MODERN PENTATHLON #- 1F93C;WRESTLERS;So;0;ON;;;;;N;;;;; diff --git a/emojitrans2.pl b/emojitrans2.pl index e3eec7c..420129f 100755 --- a/emojitrans2.pl +++ b/emojitrans2.pl @@ -38,6 +38,18 @@ BEGIN { binmode(STDOUT, ":utf8"); '*' => 'asterisk', '&' => 'ampersand', '♫' => 'Multi_key', + '←' => 'Left', + '→' => 'Right', + '↑' => 'Up', + '↓' => 'Down', + '⇐' => 'BackSpace', + '⇤' => 'Home', + '⇥' => 'End', + '⇑' => 'Prior', # PageUp + '⇓' => 'Next', # PageDown + '↵' => 'Return', + '∇' => 'Delete', # Del, get it? + '˅' => 'Insert', # it'll do. ); sub splitup { diff --git a/scan4dups.py b/scan4dups.py index 9ce6193..a80bf94 100755 --- a/scan4dups.py +++ b/scan4dups.py @@ -6,42 +6,40 @@ import re listing={} -try: +for line in sys.stdin: + # print "((%s))"%line + startpos=0 + name='' + dupsfound=[] while True: - line=sys.stdin.next() - # print "((%s))"%line - startpos=0 - name='' - dupsfound=[] - while True: - m=re.match("\s*<(\w+)>",line[startpos:]) - if not m: - break - word=m.group(1) - name+=' '+word - startpos+=m.end() - if startpos<=0: - continue - m=re.match(r'[^"]*"(.+)"',line) + m=re.match("\s*<(\w+)>",line[startpos:]) if not m: - # shouldn't happen, but just in case - val='???' - print "couldn't make sense of line: "+line - else: - val=m.group(1) - if listing.has_key(name): - if val != listing[name]: - print "Exact conflict found: (%s )[%s][%s]"%(name, - listing[name], val) - else: # It's easier to read if lines have different indentations - print "\tRedundant definition: (%s )[%s]"%(name, val) - else: - listing[name]=val -except StopIteration: - print "hit end" + break + word=m.group(1) + name+=' '+word + startpos+=m.end() + if startpos<=0: + continue + m=re.match(r'[^"]*"(.+)"',line) + if not m: + # shouldn't happen, but just in case + val='???' + print("couldn't make sense of line: "+line) + else: + val=m.group(1) + if name in listing: + if val != listing[name]: + print("Exact conflict found: (%s )[%s][%s]"%(name, + listing[name], val)) + else: # It's easier to read if lines have different indentations + print("\tRedundant definition: (%s )[%s]"%(name, val)) + else: + listing[name]=val + +print("hit end") # NOW check for prefix conflicts: -print "Checking prefixes." -for key in listing.keys(): +print("Checking prefixes.") +for key in listing: # print "Key: (%s)"%key pref='' # Careful when splitting. The key always starts with a space. @@ -51,9 +49,9 @@ for key in listing.keys(): continue pref+=" "+word # print "checking (%s)"%pref - if listing.has_key(pref): - print "Prefix conflict found: " \ - "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref], - key, listing[key]) + if pref in listing: + print("Prefix conflict found: " + "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref], + key, listing[key])) + - diff --git a/treeprint.py b/treeprint.py index bba74a3..987fcb3 100755 --- a/treeprint.py +++ b/treeprint.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import re @@ -31,8 +31,8 @@ def showdict(data, indent): if first: first=False else: - print - print " "*max(indent,0) + "("+key, + print() + print(" "*max(indent,0) + "("+key, end=" ") # Sneaky trick: we don't want to go newline-indent over and # over for long sequences, i.e. cases where there is only # one possible follower. So we skip the newlines in those @@ -45,25 +45,24 @@ def showdict(data, indent): # but then the {C|L} are not unique after the O. if type(value)==dict: if len(value)>1: - print "" + print() showdict(value, abs(indent)+4), else: - showdict(value, -(abs(indent)+4)), + showdict(value, -abs(indent+4)), else: - print " "+value.encode('utf-8'), + print(" "+value, end=" ") if "-n" in sys.argv: try: - print unicodedata.name(value), + print(unicodedata.name(value.decode('utf-8')),end=" ") except: pass - print ")", + print(")",end=" ") listing={} try: while True: - line=sys.stdin.next().decode('utf-8') - # print "((%s))"%line + line=sys.stdin.__next__() startpos=0 name=[] dupsfound=[] @@ -72,7 +71,7 @@ try: if not m: break word=m.group(1) - name.append(str(word)) # The keys are ordinary strings, not unicode + name.append(word) startpos+=m.end() if startpos<=0: continue @@ -80,13 +79,13 @@ try: if not m: # shouldn't happen, but just in case val='???' - print "couldn't make sense of line: "+line + print("couldn't make sense of line: "+line) else: val=m.group(1) cur=listing for elt in name[:-1]: if type(cur)==dict: - if not cur.has_key(elt): + if not elt in cur: cur[elt]={} cur=cur[elt] # This will fail for prefix conflicts else: @@ -98,15 +97,8 @@ try: # fail. Prefix conflict. Let's ignore it. pass except StopIteration: - # print "hit end" - pass + print("hit end") -# Actually, you could get almost as nice a listing just by using yaml, -# but now that we have special no-newlines-for-singletons handling, -# showdict looks nicer. showdict(listing,0) -# #print "\n\n-=- YAML -=-" -# import yaml -# print yaml.dump(listing, default_style=r'"', allow_unicode=True) -# # Huh. Yaml "allow_unicode=True" still escapes non-BMP chars. + |