From 37aca55143ca337d8ba60d316ccf19abf482e837 Mon Sep 17 00:00:00 2001 From: "Mark E. Shoulson" Date: Thu, 19 Dec 2019 09:21:19 -0500 Subject: Fixed some .py utils to use python3; added a few chars. After all, Python2 reaches EOL very soon! --- dotXCompose | 13 +++++++---- emoji-base | 2 ++ scan4dups.py | 74 +++++++++++++++++++++++++++++------------------------------- treeprint.py | 36 ++++++++++++----------------- 4 files changed, 61 insertions(+), 64 deletions(-) diff --git a/dotXCompose b/dotXCompose index eb45132..a451532 100644 --- a/dotXCompose +++ b/dotXCompose @@ -98,15 +98,16 @@ include "%L" # Already present for me: # : " " U00A0 # NO-BREAK SPACE # Narrow no-break space, needed for some Latin languages like French - : " " U202f # NARROW NO-BREAK SPACE + : " " U202f # NARROW NO-BREAK SPACE # Technically, NO-BREAK SPACE is not supposed to be fixed-width. This is: : " " U2007 # FIGURE SPACE -# Do we want/need these? - : "†" U2020 # DAGGER - : "‡" U2021 # DOUBLE DAGGER # We used to have THIN SPACE as , but now that’s remapped # to " ‘", for conveniently enclosing things in proper single-quotes. : " " U2009 # THIN SPACE +# (heh, heh... space bar) + : " " U200A # HAIR SPACE + : "†" U2020 # DAGGER + : "‡" U2021 # DOUBLE DAGGER : "§" U00A7 # SECTION SIGN # It's in the Asian section, but it's a general-purpose punctuation: : "〃" U3003 # DITTO MARK @@ -459,6 +460,7 @@ include "%L" <7> : "『" U300E # LEFT WHITE CORNER BRACKET : "』" U300F # RIGHT WHITE CORNER BRACKET # How about these for the "corners"? Confusing with {L[} etc? +# and don't forget about {L_[} which we have for ⸤ <7> : "⌜" U231C # TOP LEFT CORNER <7> : "⌝" U231D # TOP RIGHT CORNER : "⌞" U231E # BOTTOM LEFT CORNER @@ -886,6 +888,9 @@ include "%L" : " " U2003 # EM SPACE <3> : " " U2004 # THREE-PER-EM SPACE <4> : " " U2005 # FOUR-PER-EM SPACE + <6> : " " U2006 # SIX-PER-EM SPACE + : " " U2008 # PUNCTUATION SPACE + : " " U205F # MEDIUM MATHEMATICAL SPACE : "◌" U25CC # DOTTED CIRCLE : "⬚" U2B1A # DOTTED SQUARE : "﴾" UFD3E # ORNATE LEFT PARENTHESIS diff --git a/emoji-base b/emoji-base index 8f39373..fcc9e7b 100644 --- a/emoji-base +++ b/emoji-base @@ -2244,6 +2244,8 @@ {noway} : "🛇" U1F6C7 # PROHIBITED SIGN #- 1F6C8;CIRCLED INFORMATION SOURCE;So;0;ON;;;;;N;;;;; {(info)} : "🛈" U1F6C8 # CIRCLED INFORMATION SOURCE +#- 2139;INFORMATION SOURCE;Ll;0;L; 0069;;;;N;;;;; + {info} : "ℹ" U2139 # INFORMATION SOURCE ## careful for conflicts with {boy} and {girl} #- 1F6C9;BOYS SYMBOL;So;0;ON;;;;;N;;;;; {BOYS} : "🛉" U1F6C9 # BOYS SYMBOL diff --git a/scan4dups.py b/scan4dups.py index 9ce6193..a80bf94 100755 --- a/scan4dups.py +++ b/scan4dups.py @@ -6,42 +6,40 @@ import re listing={} -try: +for line in sys.stdin: + # print "((%s))"%line + startpos=0 + name='' + dupsfound=[] while True: - line=sys.stdin.next() - # print "((%s))"%line - startpos=0 - name='' - dupsfound=[] - while True: - m=re.match("\s*<(\w+)>",line[startpos:]) - if not m: - break - word=m.group(1) - name+=' '+word - startpos+=m.end() - if startpos<=0: - continue - m=re.match(r'[^"]*"(.+)"',line) + m=re.match("\s*<(\w+)>",line[startpos:]) if not m: - # shouldn't happen, but just in case - val='???' - print "couldn't make sense of line: "+line - else: - val=m.group(1) - if listing.has_key(name): - if val != listing[name]: - print "Exact conflict found: (%s )[%s][%s]"%(name, - listing[name], val) - else: # It's easier to read if lines have different indentations - print "\tRedundant definition: (%s )[%s]"%(name, val) - else: - listing[name]=val -except StopIteration: - print "hit end" + break + word=m.group(1) + name+=' '+word + startpos+=m.end() + if startpos<=0: + continue + m=re.match(r'[^"]*"(.+)"',line) + if not m: + # shouldn't happen, but just in case + val='???' + print("couldn't make sense of line: "+line) + else: + val=m.group(1) + if name in listing: + if val != listing[name]: + print("Exact conflict found: (%s )[%s][%s]"%(name, + listing[name], val)) + else: # It's easier to read if lines have different indentations + print("\tRedundant definition: (%s )[%s]"%(name, val)) + else: + listing[name]=val + +print("hit end") # NOW check for prefix conflicts: -print "Checking prefixes." -for key in listing.keys(): +print("Checking prefixes.") +for key in listing: # print "Key: (%s)"%key pref='' # Careful when splitting. The key always starts with a space. @@ -51,9 +49,9 @@ for key in listing.keys(): continue pref+=" "+word # print "checking (%s)"%pref - if listing.has_key(pref): - print "Prefix conflict found: " \ - "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref], - key, listing[key]) + if pref in listing: + print("Prefix conflict found: " + "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref], + key, listing[key])) + - diff --git a/treeprint.py b/treeprint.py index bba74a3..987fcb3 100755 --- a/treeprint.py +++ b/treeprint.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import re @@ -31,8 +31,8 @@ def showdict(data, indent): if first: first=False else: - print - print " "*max(indent,0) + "("+key, + print() + print(" "*max(indent,0) + "("+key, end=" ") # Sneaky trick: we don't want to go newline-indent over and # over for long sequences, i.e. cases where there is only # one possible follower. So we skip the newlines in those @@ -45,25 +45,24 @@ def showdict(data, indent): # but then the {C|L} are not unique after the O. if type(value)==dict: if len(value)>1: - print "" + print() showdict(value, abs(indent)+4), else: - showdict(value, -(abs(indent)+4)), + showdict(value, -abs(indent+4)), else: - print " "+value.encode('utf-8'), + print(" "+value, end=" ") if "-n" in sys.argv: try: - print unicodedata.name(value), + print(unicodedata.name(value.decode('utf-8')),end=" ") except: pass - print ")", + print(")",end=" ") listing={} try: while True: - line=sys.stdin.next().decode('utf-8') - # print "((%s))"%line + line=sys.stdin.__next__() startpos=0 name=[] dupsfound=[] @@ -72,7 +71,7 @@ try: if not m: break word=m.group(1) - name.append(str(word)) # The keys are ordinary strings, not unicode + name.append(word) startpos+=m.end() if startpos<=0: continue @@ -80,13 +79,13 @@ try: if not m: # shouldn't happen, but just in case val='???' - print "couldn't make sense of line: "+line + print("couldn't make sense of line: "+line) else: val=m.group(1) cur=listing for elt in name[:-1]: if type(cur)==dict: - if not cur.has_key(elt): + if not elt in cur: cur[elt]={} cur=cur[elt] # This will fail for prefix conflicts else: @@ -98,15 +97,8 @@ try: # fail. Prefix conflict. Let's ignore it. pass except StopIteration: - # print "hit end" - pass + print("hit end") -# Actually, you could get almost as nice a listing just by using yaml, -# but now that we have special no-newlines-for-singletons handling, -# showdict looks nicer. showdict(listing,0) -# #print "\n\n-=- YAML -=-" -# import yaml -# print yaml.dump(listing, default_style=r'"', allow_unicode=True) -# # Huh. Yaml "allow_unicode=True" still escapes non-BMP chars. + -- cgit v1.2.3