aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark E. Shoulson <mark@kli.org>2019-12-19 09:21:19 -0500
committerMark E. Shoulson <mark@kli.org>2020-05-11 10:58:06 -0400
commitda9918431d37f85a38c0e1ab7e4479996b757695 (patch)
treea5ffed1646594f409d5933f0e010684252e8a1c6
parentAdd corners, etc. (diff)
downloaddotXCompose-da9918431d37f85a38c0e1ab7e4479996b757695.tar.gz
dotXCompose-da9918431d37f85a38c0e1ab7e4479996b757695.tar.bz2
dotXCompose-da9918431d37f85a38c0e1ab7e4479996b757695.zip
Fixed some .py utils to use python3; added a few chars.
After all, Python2 reaches EOL very soon! More emoji added. Also improved the Makefile. A few additions, improvements to translator.
-rw-r--r--Makefile6
-rw-r--r--dotXCompose18
-rw-r--r--emoji-base28
-rwxr-xr-xemojitrans2.pl12
-rwxr-xr-xscan4dups.py74
-rwxr-xr-xtreeprint.py36
6 files changed, 97 insertions, 77 deletions
diff --git a/Makefile b/Makefile
index 48602d3..80ed052 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,8 @@
-all: emoji.compose modletters.compose tags.compose maths.compose
+COMPOSED=emoji.compose modletters.compose tags.compose maths.compose
+all: $(COMPOSED)
%.compose: %-base emojitrans2.pl
./emojitrans2.pl < $< > $@
+
+clean:
+ rm -f $(COMPOSED)
diff --git a/dotXCompose b/dotXCompose
index eb45132..76f776d 100644
--- a/dotXCompose
+++ b/dotXCompose
@@ -98,15 +98,16 @@ include "%L"
# Already present for me:
# <Multi_key> <space> <space> : " " U00A0 # NO-BREAK SPACE
# Narrow no-break space, needed for some Latin languages like French
-<Multi_key> <space> <n> : " " U202f # NARROW NO-BREAK SPACE
+<Multi_key> <space> <n> : " " U202f # NARROW NO-BREAK SPACE
# Technically, NO-BREAK SPACE is not supposed to be fixed-width. This is:
<Multi_key> <space> <numbersign> : " " U2007 # FIGURE SPACE
-# Do we want/need these?
-<Multi_key> <d> <a> <g> : "†" U2020 # DAGGER
-<Multi_key> <d> <d> <a> <g> : "‡" U2021 # DOUBLE DAGGER
# We used to have THIN SPACE as <space> <apostrophe>, but now that’s remapped
# to " ‘", for conveniently enclosing things in proper single-quotes.
<Multi_key> <backslash> <comma> : " " U2009 # THIN SPACE
+# (heh, heh... space bar)
+<Multi_key> <space> <bar> : " " U200A # HAIR SPACE
+<Multi_key> <d> <a> <g> : "†" U2020 # DAGGER
+<Multi_key> <d> <d> <a> <g> : "‡" U2021 # DOUBLE DAGGER
<Multi_key> <s> <e> <c> : "§" U00A7 # SECTION SIGN
# It's in the Asian section, but it's a general-purpose punctuation:
<Multi_key> <quotedbl> <quotedbl> : "〃" U3003 # DITTO MARK
@@ -147,6 +148,7 @@ include "%L"
<Multi_key> <Up> <minus> : "⇡" U21E1 # UPWARDS DASHED ARROW
<Multi_key> <Right> <minus> : "⇢" U21E2 # RIGHTWARDS DASHED ARROW
<Multi_key> <Down> <minus> : "⇣" U21E3 # DOWNWARDS DASHED ARROW
+<Multi_key> <z> <z> <greater> : "↯" U21AF # DOWNWARDS ZIGZAG ARROW
# Arrow keys don't always work: some apps trap them for cursor control and
# other boring things. The arrow symbols have alternate keystrokes. Do
@@ -299,6 +301,9 @@ include "%L"
<Multi_key> <equal> <equal> : "≡" U2261 # IDENTICAL TO
<Multi_key> <colon> <equal> : "≔" U2254 # COLON EQUALS
<Multi_key> <equal> <colon> : "≕" U2255 # EQUALS COLON
+<Multi_key> <2> <equal> : "⩵" U2A75 # TWO CONSECUTIVE EQUALS SIGNS
+<Multi_key> <equal> <ampersand> <equal> : "⩵" U2A75 # TWO CONSECUTIVE EQUALS SIGNS
+<Multi_key> <3> <equal> : "⩶" U2A76 # THREE CONSECUTIVE EQUALS SIGNS
# Using <slash> conflicts.
<Multi_key> <equal> <bar> <equal> : "≢" U2262 # NOT IDENTICAL TO
# We already have ±
@@ -348,6 +353,7 @@ include "%L"
# )- conflicts with system for }.
<Multi_key> <parenright> <underscore> : "⟌" U27CC # LONG DIVISION
<Multi_key> <period> <quotedbl> : "∴" U2234 # THEREFORE
+<Multi_key> <Multi_key> <t> <h> <e> <r> <e> <4> : "∴" U2234 # THEREFORE
<Multi_key> <quotedbl> <period> : "∵" U2235 # BECAUSE
<Multi_key> <Multi_key> <b> <e> <c> <a> <u> <s> <e> : "∵" U2235 # BECAUSE
<Multi_key> <percent> <percent> : "‱" U2031 # PER TEN THOUSAND (basis points)
@@ -459,6 +465,7 @@ include "%L"
<Multi_key> <7> <quotedbl> : "『" U300E # LEFT WHITE CORNER BRACKET
<Multi_key> <L> <quotedbl> : "』" U300F # RIGHT WHITE CORNER BRACKET
# How about these for the "corners"? Confusing with {L[} etc?
+# and don't forget about {L_[} which we have for ⸤
<Multi_key> <7> <parenleft> : "⌜" U231C # TOP LEFT CORNER
<Multi_key> <7> <parenright> : "⌝" U231D # TOP RIGHT CORNER
<Multi_key> <L> <parenleft> : "⌞" U231E # BOTTOM LEFT CORNER
@@ -886,6 +893,9 @@ include "%L"
<Multi_key> <space> <M> : " " U2003 # EM SPACE
<Multi_key> <space> <3> <M> : " " U2004 # THREE-PER-EM SPACE
<Multi_key> <space> <4> <M> : " " U2005 # FOUR-PER-EM SPACE
+<Multi_key> <space> <6> <M> : " " U2006 # SIX-PER-EM SPACE
+<Multi_key> <space> <comma> : " " U2008 # PUNCTUATION SPACE
+<Multi_key> <space> <plus> : " " U205F # MEDIUM MATHEMATICAL SPACE
<Multi_key> <parenleft> <parenright>: "◌" U25CC # DOTTED CIRCLE
<Multi_key> <bracketleft> <bracketright>: "⬚" U2B1A # DOTTED SQUARE
<Multi_key> <asterisk> <parenleft> : "﴾" UFD3E # ORNATE LEFT PARENTHESIS
diff --git a/emoji-base b/emoji-base
index 8f39373..6dd8c3e 100644
--- a/emoji-base
+++ b/emoji-base
@@ -1083,7 +1083,7 @@
<MM> {Lipstic} : "💄" U1F484 # LIPSTICK
<MM> {Lipstck} : "💄" U1F484 # LIPSTICK
#- 1F485;NAIL POLISH;So;0;ON;;;;;N;;;;;
-### <MM> {nail polish} : "💅" U1F485 # NAIL POLISH
+<MM> {nailpol} : "💅" U1F485 # NAIL POLISH
#- 1F486;FACE MASSAGE;So;0;ON;;;;;N;;;;;
### <MM> {face massage} : "💆" U1F486 # FACE MASSAGE
#- 1F487;HAIRCUT;So;0;ON;;;;;N;;;;;
@@ -1221,19 +1221,21 @@
#- 1F4C4;PAGE FACING UP;So;0;ON;;;;;N;;;;;
### <MM> {page facing up} : "📄" U1F4C4 # PAGE FACING UP
#- 1F4C5;CALENDAR;So;0;ON;;;;;N;;;;;
-### <MM> {calendar} : "📅" U1F4C5 # CALENDAR
+<MM> {calenda} : "📅" U1F4C5 # CALENDAR
+<MM> {calendr} : "📅" U1F4C5 # CALENDAR
+<MM> {calndar} : "📅" U1F4C5 # CALENDAR
#- 1F4C6;TEAR-OFF CALENDAR;So;0;ON;;;;;N;;;;;
-### <MM> {tear-off calendar} : "📆" U1F4C6 # TEAR-OFF CALENDAR
+<MM> {date} : "📆" U1F4C6 # TEAR-OFF CALENDAR
#- 1F4C7;CARD INDEX;So;0;ON;;;;;N;;;;;
### <MM> {card index} : "📇" U1F4C7 # CARD INDEX
#- 1F4C8;CHART WITH UPWARDS TREND;So;0;ON;;;;;N;;;;;
-### <MM> {chart with upwards trend} : "📈" U1F4C8 # CHART WITH UPWARDS TREND
+<MM> {upchart} : "📈" U1F4C8 # CHART WITH UPWARDS TREND
#- 1F4C9;CHART WITH DOWNWARDS TREND;So;0;ON;;;;;N;;;;;
-### <MM> {chart with downwards trend} : "📉" U1F4C9 # CHART WITH DOWNWARDS TREND
+<MM> {dnchart} : "📉" U1F4C9 # CHART WITH DOWNWARDS TREND
#- 1F4CA;BAR CHART;So;0;ON;;;;;N;;;;;
<MM> {barchar} : "📊" U1F4CA # BAR CHART
#- 1F4CB;CLIPBOARD;So;0;ON;;;;;N;;;;;
-### <MM> {clipboard} : "📋" U1F4CB # CLIPBOARD
+<MM> {clipboa} : "📋" U1F4CB # CLIPBOARD
#- 1F4CC;PUSHPIN;So;0;ON;;;;;N;;;;;
<MM> {pushpin} : "📌" U1F4CC # PUSHPIN
#- 1F4CD;ROUND PUSHPIN;So;0;ON;;;;;N;;;;;
@@ -1277,7 +1279,7 @@
#- 1F4E0;FAX MACHINE;So;0;ON;;;;;N;;;;;
### <MM> {fax machine} : "📠" U1F4E0 # FAX MACHINE
#- 1F4E1;SATELLITE ANTENNA;So;0;ON;;;;;N;;;;;
-### <MM> {satellite antenna} : "📡" U1F4E1 # SATELLITE ANTENNA
+<MM> {satdish} : "📡" U1F4E1 # SATELLITE ANTENNA
#- 1F4E2;PUBLIC ADDRESS LOUDSPEAKER;So;0;ON;;;;;N;;;;;
### <MM> {public address loudspeaker} : "📢" U1F4E2 # PUBLIC ADDRESS LOUDSPEAKER
#- 1F4E3;CHEERING MEGAPHONE;So;0;ON;;;;;N;;;;;
@@ -1318,7 +1320,7 @@
#- 1F4F4;MOBILE PHONE OFF;So;0;ON;;;;;N;;;;;
### <MM> {mobile phone off} : "📴" U1F4F4 # MOBILE PHONE OFF
#- 1F4F5;NO MOBILE PHONES;So;0;ON;;;;;N;;;;;
-### <MM> {no mobile phones} : "📵" U1F4F5 # NO MOBILE PHONES
+<MM> {nophone} : "📵" U1F4F5 # NO MOBILE PHONES
#- 1F4F6;ANTENNA WITH BARS;So;0;ON;;;;;N;;;;;
### <MM> {antenna with bars} : "📶" U1F4F6 # ANTENNA WITH BARS
#- 1F4F7;CAMERA;So;0;ON;;;;;N;;;;;
@@ -1542,7 +1544,7 @@
#- 1F578;SPIDER WEB;So;0;ON;;;;;N;;;;;
<MM> {web} : "🕸" U1F578 # SPIDER WEB
#- 1F579;JOYSTICK;So;0;ON;;;;;N;;;;;
-### <MM> {joystick} : "🕹" U1F579 # JOYSTICK
+<MM> {joystic} : "🕹" U1F579 # JOYSTICK
#- 1F57B;LEFT HAND TELEPHONE RECEIVER;So;0;ON;;;;;N;;;;;
### <MM> {left hand telephone receiver} : "🕻" U1F57B # LEFT HAND TELEPHONE RECEIVER
#- 1F57C;TELEPHONE RECEIVER WITH PAGE;So;0;ON;;;;;N;;;;;
@@ -2244,6 +2246,8 @@
<MM> {noway} : "🛇" U1F6C7 # PROHIBITED SIGN
#- 1F6C8;CIRCLED INFORMATION SOURCE;So;0;ON;;;;;N;;;;;
<MM> {(info)} : "🛈" U1F6C8 # CIRCLED INFORMATION SOURCE
+#- 2139;INFORMATION SOURCE;Ll;0;L;<font> 0069;;;;N;;;;;
+<MM> {info} : "ℹ" U2139 # INFORMATION SOURCE
## careful for conflicts with {boy} and {girl}
#- 1F6C9;BOYS SYMBOL;So;0;ON;;;;;N;;;;;
<MM> {BOYS} : "🛉" U1F6C9 # BOYS SYMBOL
@@ -2271,9 +2275,9 @@
#- 1F6E2;OIL DRUM;So;0;ON;;;;;N;;;;;
<MM> {oildrum} : "🛢" U1F6E2 # OIL DRUM
#- 1F6E3;MOTORWAY;So;0;ON;;;;;N;;;;;
-### <MM> {motorway} : "🛣" U1F6E3 # MOTORWAY
+<MM> {highway} : "🛣" U1F6E3 # MOTORWAY
#- 1F6E4;RAILWAY TRACK;So;0;ON;;;;;N;;;;;
-### <MM> {railway track} : "🛤" U1F6E4 # RAILWAY TRACK
+<MM> {RRtrack} : "🛤" U1F6E4 # RAILWAY TRACK
#- 1F6E5;MOTOR BOAT;So;0;ON;;;;;N;;;;;
### <MM> {motor boat} : "🛥" U1F6E5 # MOTOR BOAT
#- 1F6E6;UP-POINTING MILITARY AIRPLANE;So;0;ON;;;;;N;;;;;
@@ -2423,7 +2427,7 @@
#- 1F939;JUGGLING;So;0;ON;;;;;N;;;;;
### <MM> {juggling} : "🤹" U1F939 # JUGGLING
#- 1F93A;FENCER;So;0;ON;;;;;N;;;;;
-### <MM> {fencer} : "🤺" U1F93A # FENCER
+<MM> {fencer} : "🤺" U1F93A # FENCER
#- 1F93B;MODERN PENTATHLON;So;0;ON;;;;;N;;;;;
### <MM> {modern pentathlon} : "🤻" U1F93B # MODERN PENTATHLON
#- 1F93C;WRESTLERS;So;0;ON;;;;;N;;;;;
diff --git a/emojitrans2.pl b/emojitrans2.pl
index e3eec7c..420129f 100755
--- a/emojitrans2.pl
+++ b/emojitrans2.pl
@@ -38,6 +38,18 @@ BEGIN { binmode(STDOUT, ":utf8");
'*' => 'asterisk',
'&' => 'ampersand',
'♫' => 'Multi_key',
+ '←' => 'Left',
+ '→' => 'Right',
+ '↑' => 'Up',
+ '↓' => 'Down',
+ '⇐' => 'BackSpace',
+ '⇤' => 'Home',
+ '⇥' => 'End',
+ '⇑' => 'Prior', # PageUp
+ '⇓' => 'Next', # PageDown
+ '↵' => 'Return',
+ '∇' => 'Delete', # Del, get it?
+ '˅' => 'Insert', # it'll do.
);
sub splitup {
diff --git a/scan4dups.py b/scan4dups.py
index 9ce6193..a80bf94 100755
--- a/scan4dups.py
+++ b/scan4dups.py
@@ -6,42 +6,40 @@ import re
listing={}
-try:
+for line in sys.stdin:
+ # print "((%s))"%line
+ startpos=0
+ name=''
+ dupsfound=[]
while True:
- line=sys.stdin.next()
- # print "((%s))"%line
- startpos=0
- name=''
- dupsfound=[]
- while True:
- m=re.match("\s*<(\w+)>",line[startpos:])
- if not m:
- break
- word=m.group(1)
- name+=' '+word
- startpos+=m.end()
- if startpos<=0:
- continue
- m=re.match(r'[^"]*"(.+)"',line)
+ m=re.match("\s*<(\w+)>",line[startpos:])
if not m:
- # shouldn't happen, but just in case
- val='???'
- print "couldn't make sense of line: "+line
- else:
- val=m.group(1)
- if listing.has_key(name):
- if val != listing[name]:
- print "Exact conflict found: (%s )[%s][%s]"%(name,
- listing[name], val)
- else: # It's easier to read if lines have different indentations
- print "\tRedundant definition: (%s )[%s]"%(name, val)
- else:
- listing[name]=val
-except StopIteration:
- print "hit end"
+ break
+ word=m.group(1)
+ name+=' '+word
+ startpos+=m.end()
+ if startpos<=0:
+ continue
+ m=re.match(r'[^"]*"(.+)"',line)
+ if not m:
+ # shouldn't happen, but just in case
+ val='???'
+ print("couldn't make sense of line: "+line)
+ else:
+ val=m.group(1)
+ if name in listing:
+ if val != listing[name]:
+ print("Exact conflict found: (%s )[%s][%s]"%(name,
+ listing[name], val))
+ else: # It's easier to read if lines have different indentations
+ print("\tRedundant definition: (%s )[%s]"%(name, val))
+ else:
+ listing[name]=val
+
+print("hit end")
# NOW check for prefix conflicts:
-print "Checking prefixes."
-for key in listing.keys():
+print("Checking prefixes.")
+for key in listing:
# print "Key: (%s)"%key
pref=''
# Careful when splitting. The key always starts with a space.
@@ -51,9 +49,9 @@ for key in listing.keys():
continue
pref+=" "+word
# print "checking (%s)"%pref
- if listing.has_key(pref):
- print "Prefix conflict found: " \
- "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref],
- key, listing[key])
+ if pref in listing:
+ print("Prefix conflict found: "
+ "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref],
+ key, listing[key]))
+
-
diff --git a/treeprint.py b/treeprint.py
index bba74a3..987fcb3 100755
--- a/treeprint.py
+++ b/treeprint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import sys
import re
@@ -31,8 +31,8 @@ def showdict(data, indent):
if first:
first=False
else:
- print
- print " "*max(indent,0) + "("+key,
+ print()
+ print(" "*max(indent,0) + "("+key, end=" ")
# Sneaky trick: we don't want to go newline-indent over and
# over for long sequences, i.e. cases where there is only
# one possible follower. So we skip the newlines in those
@@ -45,25 +45,24 @@ def showdict(data, indent):
# but then the {C|L} are not unique after the O.
if type(value)==dict:
if len(value)>1:
- print ""
+ print()
showdict(value, abs(indent)+4),
else:
- showdict(value, -(abs(indent)+4)),
+ showdict(value, -abs(indent+4)),
else:
- print " "+value.encode('utf-8'),
+ print(" "+value, end=" ")
if "-n" in sys.argv:
try:
- print unicodedata.name(value),
+ print(unicodedata.name(value.decode('utf-8')),end=" ")
except:
pass
- print ")",
+ print(")",end=" ")
listing={}
try:
while True:
- line=sys.stdin.next().decode('utf-8')
- # print "((%s))"%line
+ line=sys.stdin.__next__()
startpos=0
name=[]
dupsfound=[]
@@ -72,7 +71,7 @@ try:
if not m:
break
word=m.group(1)
- name.append(str(word)) # The keys are ordinary strings, not unicode
+ name.append(word)
startpos+=m.end()
if startpos<=0:
continue
@@ -80,13 +79,13 @@ try:
if not m:
# shouldn't happen, but just in case
val='???'
- print "couldn't make sense of line: "+line
+ print("couldn't make sense of line: "+line)
else:
val=m.group(1)
cur=listing
for elt in name[:-1]:
if type(cur)==dict:
- if not cur.has_key(elt):
+ if not elt in cur:
cur[elt]={}
cur=cur[elt] # This will fail for prefix conflicts
else:
@@ -98,15 +97,8 @@ try:
# fail. Prefix conflict. Let's ignore it.
pass
except StopIteration:
- # print "hit end"
- pass
+ print("hit end")
-# Actually, you could get almost as nice a listing just by using yaml,
-# but now that we have special no-newlines-for-singletons handling,
-# showdict looks nicer.
showdict(listing,0)
-# #print "\n\n-=- YAML -=-"
-# import yaml
-# print yaml.dump(listing, default_style=r'"', allow_unicode=True)
-# # Huh. Yaml "allow_unicode=True" still escapes non-BMP chars.
+