From da9918431d37f85a38c0e1ab7e4479996b757695 Mon Sep 17 00:00:00 2001
From: "Mark E. Shoulson" <mark@kli.org>
Date: Thu, 19 Dec 2019 09:21:19 -0500
Subject: Fixed some .py utils to use python3; added a few chars.

After all, Python2 reaches EOL very soon!

More emoji added.

Also improved the Makefile.

A few additions, improvements to translator.
---
 Makefile       |  6 ++++-
 dotXCompose    | 18 ++++++++++----
 emoji-base     | 28 ++++++++++++----------
 emojitrans2.pl | 12 ++++++++++
 scan4dups.py   | 74 ++++++++++++++++++++++++++++------------------------------
 treeprint.py   | 36 +++++++++++-----------------
 6 files changed, 97 insertions(+), 77 deletions(-)
diff --git a/Makefile b/Makefile
index 48602d3..80ed052 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,8 @@
-all: emoji.compose modletters.compose tags.compose maths.compose
+COMPOSED=emoji.compose modletters.compose tags.compose maths.compose
+all: $(COMPOSED)
 
 %.compose: %-base emojitrans2.pl
 	./emojitrans2.pl < $< > $@
+
+clean:
+	rm -f $(COMPOSED)
diff --git a/dotXCompose b/dotXCompose
index eb45132..76f776d 100644
--- a/dotXCompose
+++ b/dotXCompose
@@ -98,15 +98,16 @@ include "%L"
 # Already present for me:
 # <Multi_key> <space> <space>		: " "	U00A0		# NO-BREAK SPACE
 # Narrow no-break space, needed for some Latin languages like French
-<Multi_key> <space> <n> : " "    U202f # NARROW NO-BREAK SPACE
+<Multi_key> <space> <n>                 : " "   U202f # NARROW NO-BREAK SPACE
 # Technically, NO-BREAK SPACE is not supposed to be fixed-width.  This is:
 <Multi_key> <space> <numbersign> :   " "      U2007             # FIGURE SPACE
-# Do we want/need these?
-<Multi_key> <d> <a> <g>			: "†"	U2020		# DAGGER
-<Multi_key> <d> <d> <a> <g>		: "‡"	U2021		# DOUBLE DAGGER
 # We used to have THIN SPACE as <space> <apostrophe>, but now that’s remapped
 # to " ‘", for conveniently enclosing things in proper single-quotes.
 <Multi_key> <backslash> <comma>		: " "	U2009		# THIN SPACE
+# (heh, heh... space bar)
+<Multi_key> <space> <bar>               : " "   U200A           # HAIR SPACE
+<Multi_key> <d> <a> <g>			: "†"	U2020		# DAGGER
+<Multi_key> <d> <d> <a> <g>		: "‡"	U2021		# DOUBLE DAGGER
 <Multi_key> <s> <e> <c> : "§"   U00A7   # SECTION SIGN
 # It's in the Asian section, but it's a general-purpose punctuation:
 <Multi_key> <quotedbl> <quotedbl>       : "〃"	U3003		# DITTO MARK
@@ -147,6 +148,7 @@ include "%L"
 <Multi_key> <Up> <minus>     		: "⇡"	U21E1		# UPWARDS DASHED ARROW
 <Multi_key> <Right> <minus>     	: "⇢"	U21E2		# RIGHTWARDS DASHED ARROW
 <Multi_key> <Down> <minus>      	: "⇣"	U21E3		# DOWNWARDS DASHED ARROW
+<Multi_key> <z> <z> <greater>           : "↯"   U21AF           # DOWNWARDS ZIGZAG ARROW
 
 # Arrow keys don't always work: some apps trap them for cursor control and
 # other boring things.  The arrow symbols have alternate keystrokes.  Do
@@ -299,6 +301,9 @@ include "%L"
 <Multi_key> <equal> <equal>		: "≡"	U2261		# IDENTICAL TO
 <Multi_key> <colon> <equal> 		: "≔"  U2254		# COLON EQUALS
 <Multi_key> <equal> <colon> 		: "≕"  U2255		# EQUALS COLON
+<Multi_key> <2> <equal>                 : "⩵"  U2A75            # TWO CONSECUTIVE EQUALS SIGNS
+<Multi_key> <equal> <ampersand> <equal> : "⩵"  U2A75            # TWO CONSECUTIVE EQUALS SIGNS
+<Multi_key> <3> <equal>                 : "⩶"  U2A76            # THREE CONSECUTIVE EQUALS SIGNS
 # Using <slash> conflicts.
 <Multi_key> <equal> <bar> <equal>	: "≢"	U2262		# NOT IDENTICAL TO
 # We already have ±
@@ -348,6 +353,7 @@ include "%L"
 # )- conflicts with system for }.
 <Multi_key> <parenright> <underscore>    : "⟌"   	U27CC		# LONG DIVISION
 <Multi_key> <period> <quotedbl>	   	: "∴"	U2234  		# THEREFORE
+<Multi_key> <Multi_key> <t> <h> <e> <r> <e> <4> : "∴"  U2234    # THEREFORE
 <Multi_key> <quotedbl> <period>	   	: "∵"	U2235  		# BECAUSE
 <Multi_key> <Multi_key> <b> <e> <c> <a> <u> <s> <e>	   	: "∵"	U2235  		# BECAUSE
 <Multi_key> <percent> <percent>		: "‱"	U2031	# PER TEN THOUSAND (basis points)
@@ -459,6 +465,7 @@ include "%L"
 <Multi_key> <7> <quotedbl>	    : "『"    U300E   # LEFT WHITE CORNER BRACKET
 <Multi_key> <L> <quotedbl>	    : "』"    U300F   # RIGHT WHITE CORNER BRACKET
 # How about these for the "corners"?  Confusing with {L[} etc?
+# and don't forget about {L_[} which we have for ⸤
 <Multi_key> <7> <parenleft>         : "⌜"     U231C   # TOP LEFT CORNER
 <Multi_key> <7> <parenright>        : "⌝"     U231D   # TOP RIGHT CORNER
 <Multi_key> <L> <parenleft>         : "⌞"     U231E   # BOTTOM LEFT CORNER
@@ -886,6 +893,9 @@ include "%L"
 <Multi_key> <space> <M>		  : " " U2003  # EM SPACE
 <Multi_key> <space> <3> <M>	  : " "	U2004  # THREE-PER-EM SPACE
 <Multi_key> <space> <4> <M>	  : " "	U2005  # FOUR-PER-EM SPACE
+<Multi_key> <space> <6> <M>	  : " "	U2006    # SIX-PER-EM SPACE
+<Multi_key> <space> <comma>	  : " "	U2008    # PUNCTUATION SPACE
+<Multi_key> <space> <plus>	  : " "	U205F    # MEDIUM MATHEMATICAL SPACE
 <Multi_key> <parenleft> <parenright>: "◌" U25CC # DOTTED CIRCLE
 <Multi_key> <bracketleft> <bracketright>: "⬚" U2B1A # DOTTED SQUARE
 <Multi_key> <asterisk> <parenleft>      : "﴾"   UFD3E           # ORNATE LEFT PARENTHESIS
diff --git a/emoji-base b/emoji-base
index 8f39373..6dd8c3e 100644
--- a/emoji-base
+++ b/emoji-base
@@ -1083,7 +1083,7 @@
 <MM> {Lipstic} :  "💄"   U1F484	# LIPSTICK
 <MM> {Lipstck} :  "💄"   U1F484	# LIPSTICK
 #- 1F485;NAIL POLISH;So;0;ON;;;;;N;;;;;
-### <MM> {nail polish} :  "💅"   U1F485	# NAIL POLISH
+<MM> {nailpol} :  "💅"   U1F485	# NAIL POLISH
 #- 1F486;FACE MASSAGE;So;0;ON;;;;;N;;;;;
 ### <MM> {face massage} :  "💆"   U1F486	# FACE MASSAGE
 #- 1F487;HAIRCUT;So;0;ON;;;;;N;;;;;
@@ -1221,19 +1221,21 @@
 #- 1F4C4;PAGE FACING UP;So;0;ON;;;;;N;;;;;
 ### <MM> {page facing up} :  "📄"   U1F4C4	# PAGE FACING UP
 #- 1F4C5;CALENDAR;So;0;ON;;;;;N;;;;;
-### <MM> {calendar} :  "📅"   U1F4C5	# CALENDAR
+<MM> {calenda} :  "📅"   U1F4C5	# CALENDAR
+<MM> {calendr} :  "📅"   U1F4C5	# CALENDAR
+<MM> {calndar} :  "📅"   U1F4C5	# CALENDAR
 #- 1F4C6;TEAR-OFF CALENDAR;So;0;ON;;;;;N;;;;;
-### <MM> {tear-off calendar} :  "📆"   U1F4C6	# TEAR-OFF CALENDAR
+<MM> {date} :  "📆"   U1F4C6	# TEAR-OFF CALENDAR
 #- 1F4C7;CARD INDEX;So;0;ON;;;;;N;;;;;
 ### <MM> {card index} :  "📇"   U1F4C7	# CARD INDEX
 #- 1F4C8;CHART WITH UPWARDS TREND;So;0;ON;;;;;N;;;;;
-### <MM> {chart with upwards trend} :  "📈"   U1F4C8	# CHART WITH UPWARDS TREND
+<MM> {upchart} :  "📈"   U1F4C8	# CHART WITH UPWARDS TREND
 #- 1F4C9;CHART WITH DOWNWARDS TREND;So;0;ON;;;;;N;;;;;
-### <MM> {chart with downwards trend} :  "📉"   U1F4C9	# CHART WITH DOWNWARDS TREND
+<MM> {dnchart} :  "📉"   U1F4C9	# CHART WITH DOWNWARDS TREND
 #- 1F4CA;BAR CHART;So;0;ON;;;;;N;;;;;
 <MM> {barchar} :  "📊"   U1F4CA	# BAR CHART
 #- 1F4CB;CLIPBOARD;So;0;ON;;;;;N;;;;;
-### <MM> {clipboard} :  "📋"   U1F4CB	# CLIPBOARD
+<MM> {clipboa} :  "📋"   U1F4CB	# CLIPBOARD
 #- 1F4CC;PUSHPIN;So;0;ON;;;;;N;;;;;
 <MM> {pushpin} :  "📌"   U1F4CC	# PUSHPIN
 #- 1F4CD;ROUND PUSHPIN;So;0;ON;;;;;N;;;;;
@@ -1277,7 +1279,7 @@
 #- 1F4E0;FAX MACHINE;So;0;ON;;;;;N;;;;;
 ### <MM> {fax machine} :  "📠"   U1F4E0	# FAX MACHINE
 #- 1F4E1;SATELLITE ANTENNA;So;0;ON;;;;;N;;;;;
-### <MM> {satellite antenna} :  "📡"   U1F4E1	# SATELLITE ANTENNA
+<MM> {satdish} :  "📡"   U1F4E1	# SATELLITE ANTENNA
 #- 1F4E2;PUBLIC ADDRESS LOUDSPEAKER;So;0;ON;;;;;N;;;;;
 ### <MM> {public address loudspeaker} :  "📢"   U1F4E2	# PUBLIC ADDRESS LOUDSPEAKER
 #- 1F4E3;CHEERING MEGAPHONE;So;0;ON;;;;;N;;;;;
@@ -1318,7 +1320,7 @@
 #- 1F4F4;MOBILE PHONE OFF;So;0;ON;;;;;N;;;;;
 ### <MM> {mobile phone off} :  "📴"   U1F4F4	# MOBILE PHONE OFF
 #- 1F4F5;NO MOBILE PHONES;So;0;ON;;;;;N;;;;;
-### <MM> {no mobile phones} :  "📵"   U1F4F5	# NO MOBILE PHONES
+<MM> {nophone} :  "📵"   U1F4F5	# NO MOBILE PHONES
 #- 1F4F6;ANTENNA WITH BARS;So;0;ON;;;;;N;;;;;
 ### <MM> {antenna with bars} :  "📶"   U1F4F6	# ANTENNA WITH BARS
 #- 1F4F7;CAMERA;So;0;ON;;;;;N;;;;;
@@ -1542,7 +1544,7 @@
 #- 1F578;SPIDER WEB;So;0;ON;;;;;N;;;;;
 <MM> {web} :  "🕸"   U1F578	# SPIDER WEB
 #- 1F579;JOYSTICK;So;0;ON;;;;;N;;;;;
-### <MM> {joystick} :  "🕹"   U1F579	# JOYSTICK
+<MM> {joystic} :  "🕹"   U1F579	# JOYSTICK
 #- 1F57B;LEFT HAND TELEPHONE RECEIVER;So;0;ON;;;;;N;;;;;
 ### <MM> {left hand telephone receiver} :  "🕻"   U1F57B	# LEFT HAND TELEPHONE RECEIVER
 #- 1F57C;TELEPHONE RECEIVER WITH PAGE;So;0;ON;;;;;N;;;;;
@@ -2244,6 +2246,8 @@
 <MM> {noway} :  "🛇"   U1F6C7	# PROHIBITED SIGN
 #- 1F6C8;CIRCLED INFORMATION SOURCE;So;0;ON;;;;;N;;;;;
 <MM> {(info)} :  "🛈"   U1F6C8	# CIRCLED INFORMATION SOURCE
+#- 2139;INFORMATION SOURCE;Ll;0;L;<font> 0069;;;;N;;;;;
+<MM> {info}   :  "ℹ"   U2139        # INFORMATION SOURCE
 ## careful for conflicts with {boy} and {girl}
 #- 1F6C9;BOYS SYMBOL;So;0;ON;;;;;N;;;;;
 <MM> {BOYS} :  "🛉"   U1F6C9	# BOYS SYMBOL
@@ -2271,9 +2275,9 @@
 #- 1F6E2;OIL DRUM;So;0;ON;;;;;N;;;;;
 <MM> {oildrum} :  "🛢"   U1F6E2	# OIL DRUM
 #- 1F6E3;MOTORWAY;So;0;ON;;;;;N;;;;;
-### <MM> {motorway} :  "🛣"   U1F6E3	# MOTORWAY
+<MM> {highway} :  "🛣"   U1F6E3	# MOTORWAY
 #- 1F6E4;RAILWAY TRACK;So;0;ON;;;;;N;;;;;
-### <MM> {railway track} :  "🛤"   U1F6E4	# RAILWAY TRACK
+<MM> {RRtrack} :  "🛤"   U1F6E4	# RAILWAY TRACK
 #- 1F6E5;MOTOR BOAT;So;0;ON;;;;;N;;;;;
 ### <MM> {motor boat} :  "🛥"   U1F6E5	# MOTOR BOAT
 #- 1F6E6;UP-POINTING MILITARY AIRPLANE;So;0;ON;;;;;N;;;;;
@@ -2423,7 +2427,7 @@
 #- 1F939;JUGGLING;So;0;ON;;;;;N;;;;;
 ### <MM> {juggling} :  "🤹"   U1F939	# JUGGLING
 #- 1F93A;FENCER;So;0;ON;;;;;N;;;;;
-### <MM> {fencer} :  "🤺"   U1F93A	# FENCER
+<MM> {fencer} :  "🤺"   U1F93A	# FENCER
 #- 1F93B;MODERN PENTATHLON;So;0;ON;;;;;N;;;;;
 ### <MM> {modern pentathlon} :  "🤻"   U1F93B	# MODERN PENTATHLON
 #- 1F93C;WRESTLERS;So;0;ON;;;;;N;;;;;
diff --git a/emojitrans2.pl b/emojitrans2.pl
index e3eec7c..420129f 100755
--- a/emojitrans2.pl
+++ b/emojitrans2.pl
@@ -38,6 +38,18 @@ BEGIN { binmode(STDOUT, ":utf8");
 	     '*' => 'asterisk',
              '&' => 'ampersand',
 	     '♫' => 'Multi_key',
+             '←' => 'Left',
+             '→' => 'Right',
+             '↑' => 'Up',
+             '↓' => 'Down',
+             '⇐' => 'BackSpace',
+             '⇤' => 'Home',
+             '⇥' => 'End',
+             '⇑' => 'Prior',    # PageUp
+             '⇓' => 'Next',     # PageDown
+             '↵' => 'Return',
+             '∇' => 'Delete',   # Del, get it?
+             '˅' => 'Insert',   # it'll do.
 );
 
 sub splitup {
diff --git a/scan4dups.py b/scan4dups.py
index 9ce6193..a80bf94 100755
--- a/scan4dups.py
+++ b/scan4dups.py
@@ -6,42 +6,40 @@ import re
 
 listing={}
 
-try:
+for line in sys.stdin:
+    # print "((%s))"%line
+    startpos=0
+    name=''
+    dupsfound=[]
     while True:
-        line=sys.stdin.next()
-        # print "((%s))"%line
-        startpos=0
-        name=''
-        dupsfound=[]
-        while True:
-            m=re.match("\s*<(\w+)>",line[startpos:])
-            if not m:
-                break
-            word=m.group(1)
-            name+=' '+word
-            startpos+=m.end()
-        if startpos<=0:
-            continue
-        m=re.match(r'[^"]*"(.+)"',line)
+        m=re.match("\s*<(\w+)>",line[startpos:])
         if not m:
-            # shouldn't happen, but just in case
-            val='???'
-            print "couldn't make sense of line: "+line
-        else:
-            val=m.group(1)
-        if listing.has_key(name):
-            if val != listing[name]:
-                print "Exact conflict found: (%s )[%s][%s]"%(name, 
-                                                             listing[name], val)
-            else:   # It's easier to read if lines have different indentations
-                print "\tRedundant definition: (%s )[%s]"%(name, val)
-        else:
-            listing[name]=val
-except StopIteration:
-    print "hit end"
+            break
+        word=m.group(1)
+        name+=' '+word
+        startpos+=m.end()
+    if startpos<=0:
+        continue
+    m=re.match(r'[^"]*"(.+)"',line)
+    if not m:
+        # shouldn't happen, but just in case
+        val='???'
+        print("couldn't make sense of line: "+line)
+    else:
+        val=m.group(1)
+    if name in listing:
+        if val != listing[name]:
+            print("Exact conflict found: (%s )[%s][%s]"%(name,
+                                                         listing[name], val))
+        else:   # It's easier to read if lines have different indentations
+            print("\tRedundant definition: (%s )[%s]"%(name, val))
+    else:
+        listing[name]=val
+
+print("hit end")
 # NOW check for prefix conflicts:
-print "Checking prefixes."
-for key in listing.keys():
+print("Checking prefixes.")
+for key in listing:
     # print "Key: (%s)"%key
     pref=''
     # Careful when splitting.  The key always starts with a space.
@@ -51,9 +49,9 @@ for key in listing.keys():
             continue
         pref+=" "+word
         # print "checking (%s)"%pref
-        if listing.has_key(pref):
-            print "Prefix conflict found: " \
-                "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref],
-                                          key, listing[key])
+        if pref in listing:
+            print("Prefix conflict found: "
+                  "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref],
+                                            key, listing[key]))
+
 
-    
diff --git a/treeprint.py b/treeprint.py
index bba74a3..987fcb3 100755
--- a/treeprint.py
+++ b/treeprint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys
 import re
@@ -31,8 +31,8 @@ def showdict(data, indent):
         if first:
             first=False
         else:
-            print
-        print " "*max(indent,0) + "("+key,
+            print()
+        print(" "*max(indent,0) + "("+key, end=" ")
         # Sneaky trick: we don't want to go newline-indent over and
         # over for long sequences, i.e. cases where there is only
         # one possible follower.  So we skip the newlines in those
@@ -45,25 +45,24 @@ def showdict(data, indent):
         # but then the {C|L} are not unique after the O.
         if type(value)==dict:
             if len(value)>1:
-                print ""
+                print()
                 showdict(value, abs(indent)+4),
             else:
-                showdict(value, -(abs(indent)+4)),
+                showdict(value, -abs(indent+4)),
         else:
-            print "    "+value.encode('utf-8'),
+            print("    "+value, end=" ")
             if "-n" in sys.argv:
                 try:
-                    print unicodedata.name(value),
+                    print(unicodedata.name(value.decode('utf-8')),end=" ")
                 except:
                     pass
-        print ")",
+        print(")",end=" ")
 
 listing={}
 
 try:
     while True:
-        line=sys.stdin.next().decode('utf-8')
-        # print "((%s))"%line
+        line=sys.stdin.__next__()
         startpos=0
         name=[]
         dupsfound=[]
@@ -72,7 +71,7 @@ try:
             if not m:
                 break
             word=m.group(1)
-            name.append(str(word)) # The keys are ordinary strings, not unicode
+            name.append(word)
             startpos+=m.end()
         if startpos<=0:
             continue
@@ -80,13 +79,13 @@ try:
         if not m:
             # shouldn't happen, but just in case
             val='???'
-            print "couldn't make sense of line: "+line
+            print("couldn't make sense of line: "+line)
         else:
             val=m.group(1)
         cur=listing
         for elt in name[:-1]:
             if type(cur)==dict:
-                if not cur.has_key(elt):
+                if not elt in cur:
                     cur[elt]={}
                 cur=cur[elt]        # This will fail for prefix conflicts
             else:
@@ -98,15 +97,8 @@ try:
             # fail.  Prefix conflict.  Let's ignore it.
             pass
 except StopIteration:
-    # print "hit end"
-    pass
+    print("hit end")
 
-# Actually, you could get almost as nice a listing just by using yaml,
-# but now that we have special no-newlines-for-singletons handling,
-# showdict looks nicer.
 showdict(listing,0)
 
-# #print "\n\n-=- YAML -=-"
-# import yaml
-# print yaml.dump(listing, default_style=r'"', allow_unicode=True)
-# # Huh.  Yaml "allow_unicode=True" still escapes non-BMP chars.
+    
-- 
cgit v1.2.3