From a7af357841237c90348893b9724d36617b8864e4 Mon Sep 17 00:00:00 2001 From: Mark Shoulson Date: Thu, 14 May 2009 16:23:09 -0400 Subject: Rewrite of scan4dups so it's MUCH simpler and actually works for a change. A few fixes and additions. Need to ponder the dups. --- scan4dups.py | 68 +++++++++++++++++++----------------------------------------- 1 file changed, 21 insertions(+), 47 deletions(-) (limited to 'scan4dups.py') diff --git a/scan4dups.py b/scan4dups.py index 80a5eb7..11bcebc 100644 --- a/scan4dups.py +++ b/scan4dups.py @@ -3,67 +3,41 @@ import sys import re -class node(dict): - name="" - parent=None -# Top of tree: -top=node() - -def name2leaf(ending): - rv="" - p=ending - while p is not None: - rv=p.name+" "+rv - p=p.parent - return rv +listing={} try: while True: line=sys.stdin.next() # print "((%s))"%line startpos=0 - ptr=top + name='' dupsfound=[] - lastAdded=None while True: m=re.match("\s*<(\w+)>",line[startpos:]) if not m: break word=m.group(1) - # print "found word: %s"%word - # Now, there is a prefix conflict if: (a) I am about to add an - # element to an otherwise empty (i.e. terminal) directory (a - # leaf), or (b) At the end of this element, the dictionary I - # end on is *not* empty (i.e. NOT a leaf). We check (a) here, - # and (b) after the loop. - # - # Waitasec, it's okay if I'm adding to a leaf *if I just added - # that leaf*! - if not ptr.keys() and ptr!=lastAdded and ptr!=top: - dupsfound.append(name2leaf(ptr)) - try: - next=ptr[word] - except KeyError: - next=node() - next.parent=ptr - next.name=word - lastAdded=next - ptr[word]=next - ptr=next + name+=' '+word + if listing.has_key(name): + dupsfound.append(name) startpos+=m.end() - if startpos!=0: # Skip if the line had nothing. - if ptr.keys(): # Dup if the end is NOT a leaf. - # By rights I should follow each other key all the way - # down to all its possible ends. Too much work. But - # I will go one more down, OK? - for other in ptr.keys(): - dupsfound.append(name2leaf(ptr[other])+"(+?)") - # The (+?) is because there might be more after that. - mystring=name2leaf(ptr) - # print "processed: (%s)"%mystring - for i in dupsfound: - print "Prefix conflict found: (%s) vs (%s)"%(mystring, i) + if startpos<=0: + continue + m=re.match(r'[^"]*"(.+)"',line) + if not m: + # shouldn't happen, but just in case + listing[name]='???' + print "couldn't make sense of line: "+line + else: + listing[name]=m.group(1) + for i in dupsfound: + if listing[name]==listing[i]: + msg="Redundant definition: " + else: + msg="Prefix conflict found: " + print msg+"(%s )[%s] vs (%s )[%s]"% \ + (name, listing[name], i, listing[i]) except StopIteration: print "hit end" pass -- cgit v1.2.3 From e839448160948c219aac1247e6ae6eb4d0200dd0 Mon Sep 17 00:00:00 2001 From: Mark Shoulson Date: Mon, 18 May 2009 18:29:56 -0400 Subject: Comments on flaws remaining in scan4dups, some additional chars. --- dotXCompose | 9 ++++++++- scan4dups.py | 6 ++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'scan4dups.py') diff --git a/dotXCompose b/dotXCompose index b3d80b8..ce67240 100644 --- a/dotXCompose +++ b/dotXCompose @@ -48,7 +48,9 @@ include "%L" : "→" rightarrow # RIGHTWARDS ARROW : "↓" downarrow # DOWNWARDS ARROW : "↔" U2194 # LEFT RIGHT ARROW (kragen's) + : "↔" U2194 # LEFT RIGHT ARROW (kragen's) : "↕" U2195 # UP DOWN ARROW (kragen's) + : "↵" U21B5 # DOWNWARDS ARROW WITH CORNER LEFTWARDS : "☚" U261A # BLACK LEFT POINTING INDEX : "☛" U261B # BLACK RIGHT POINTING INDEX @@ -64,6 +66,7 @@ include "%L" : "⇒" U21D2 # RIGHTWARDS DOUBLE ARROW : "⇐" U21D0 # LEFTWARDS DOUBLE ARROW : "⇔" U21D4 # LEFT RIGHT DOUBLE ARROW + : "⇔" U21D4 # LEFT RIGHT DOUBLE ARROW : "⇑" U21D1 # UPWARDS DOUBLE ARROW : "⇓" U21D3 # DOWNWARDS DOUBLE ARROW : "⇕" U21D5 # UP DOWN DOUBLE ARROW @@ -106,7 +109,6 @@ include "%L" # would conflict, with for N WITH UNDERDOT, etc. : "∌" U220C # DOES NOT CONTAIN AS MEMBER : "≅" U2245 # APPROXIMATELY EQUAL TO (It actually means "congruent"!) - : "≔" U2254 # COLON EQUALS : "≟" U225f # QUESTIONED EQUAL TO : "≝" U225D # EQUAL TO BY DEFINITION : "≡" U2261 # IDENTICAL TO @@ -166,6 +168,11 @@ include "%L" : "ℿ" U213C # DOUBLE-STRUCK CAPITAL PI : "⅀" U2140 # DOUBLE-STRUCK N-ARY SUMMATION : "⅀" U2140 # DOUBLE-STRUCK N-ARY SUMMATION +# The *look* double-struck. + : "⦃" U2983 # LEFT WHITE CURLY BRACKET + : "⦄" U2984 # RIGHT WHITE CURLY BRACKET +# ⦅⦆⦇⦈⦉⦊ too? +# The rest of that block? Some there may be worth it. : "ℓ" U2113 # SCRIPT SMALL L : "⊏" U228F # SQUARE IMAGE OF : "⊑" U2291 # SQUARE IMAGE OF OR EQUAL TO diff --git a/scan4dups.py b/scan4dups.py index 11bcebc..f504623 100644 --- a/scan4dups.py +++ b/scan4dups.py @@ -31,6 +31,12 @@ try: print "couldn't make sense of line: "+line else: listing[name]=m.group(1) + # THIS IS STILL FAULTY. + # What if a long one comes through first, and then a short one? + # + # Probably have to do two passes: record them all in the hash + # (and check for exact duplicates), then go though all the + # keys and check all their prefixes. for i in dupsfound: if listing[name]==listing[i]: msg="Redundant definition: " -- cgit v1.2.3 From af3ce62c2d4ee2bd148d25079dd5962f1f07a218 Mon Sep 17 00:00:00 2001 From: Mark Shoulson Date: Mon, 18 May 2009 21:58:39 -0400 Subject: Another rewrite of scan4dups to fix things still being missed. Added NABLA and INTEGRAL: how did we ever manage without them?? --- dotXCompose | 7 +++++++ scan4dups.py | 44 ++++++++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 18 deletions(-) (limited to 'scan4dups.py') diff --git a/dotXCompose b/dotXCompose index ce67240..73f0df2 100644 --- a/dotXCompose +++ b/dotXCompose @@ -52,6 +52,10 @@ include "%L" : "↕" U2195 # UP DOWN ARROW (kragen's) : "↵" U21B5 # DOWNWARDS ARROW WITH CORNER LEFTWARDS +# Arrow keys don't always work: some apps trap them for cursor control and +# other boring things. The arrow symbols have alternate keystrokes. Do +# we need others for these printer's fists? If so, what? The -= and =- +# we had before are not necessarily the best choices. : "☚" U261A # BLACK LEFT POINTING INDEX : "☛" U261B # BLACK RIGHT POINTING INDEX : "☜" U261C # WHITE LEFT POINTING INDEX @@ -150,6 +154,9 @@ include "%L" : "∴" U2234 # THEREFORE : "∵" U2235 # BECAUSE : "‱" U2031 # PER TEN THOUSAND (basis points) +# OK, absolutely cannot believe we made it this long without NABLA or INTEGRAL + : "∇" U2207 # NABLA + : "∫" U222B # INTEGRAL : "⃗" U20D7 # COMBINING RIGHT ARROW ABOVE (vector) # There's a whole passel of these guys starting at U+1D538 but I have no fonts for those. : "ℂ" U2102 # DOUBLE-STRUCK CAPITAL C (set of complex numbers) diff --git a/scan4dups.py b/scan4dups.py index f504623..00aef78 100644 --- a/scan4dups.py +++ b/scan4dups.py @@ -19,33 +19,41 @@ try: break word=m.group(1) name+=' '+word - if listing.has_key(name): - dupsfound.append(name) startpos+=m.end() if startpos<=0: continue m=re.match(r'[^"]*"(.+)"',line) if not m: # shouldn't happen, but just in case - listing[name]='???' + val='???' print "couldn't make sense of line: "+line else: - listing[name]=m.group(1) - # THIS IS STILL FAULTY. - # What if a long one comes through first, and then a short one? - # - # Probably have to do two passes: record them all in the hash - # (and check for exact duplicates), then go though all the - # keys and check all their prefixes. - for i in dupsfound: - if listing[name]==listing[i]: - msg="Redundant definition: " + val=m.group(1) + if listing.has_key(name): + if val != listing[name]: + print "Exact conflict found: (%s )[%s][%s]"%(name, + listing[name], val) else: - msg="Prefix conflict found: " - print msg+"(%s )[%s] vs (%s )[%s]"% \ - (name, listing[name], i, listing[i]) + print "Redundant definition: (%s )[%s]"%(name, val) + else: + listing[name]=val except StopIteration: print "hit end" - pass -print "Done." +# NOW check for prefix conflicts: +print "Checking prefixes." +for key in listing.keys(): + # print "Key: (%s)"%key + pref='' + # Careful when splitting. They key always starts with a space. + for word in key.split(" ")[:-1]: # chop the last one; that'll always match. + # Skip the empty first entry + if not word: + continue + pref+=" "+word + # print "checking (%s)"%pref + if listing.has_key(pref): + print "Prefix conflict found: " \ + "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref], + key, listing[key]) + -- cgit v1.2.3 From f81c068f810285f3e7ac5bcc48642a3c584b5844 Mon Sep 17 00:00:00 2001 From: Mark Shoulson Date: Thu, 21 May 2009 19:34:00 -0400 Subject: Add some math symbols that should have been there already. --- dotXCompose | 4 ++++ scan4dups.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'scan4dups.py') diff --git a/dotXCompose b/dotXCompose index 73f0df2..7dfe31b 100644 --- a/dotXCompose +++ b/dotXCompose @@ -155,8 +155,12 @@ include "%L" : "∵" U2235 # BECAUSE : "‱" U2031 # PER TEN THOUSAND (basis points) # OK, absolutely cannot believe we made it this long without NABLA or INTEGRAL +# or PARTIAL DIFFERENTIAL : "∇" U2207 # NABLA : "∫" U222B # INTEGRAL +

: "∂" U2202 # PARTIAL DIFFERENTIAL + : "∂" U2202 # PARTIAL DIFFERENTIAL +# Would we prefer 20D1 COMBINING RIGHT HARPOON ABOVE? : "⃗" U20D7 # COMBINING RIGHT ARROW ABOVE (vector) # There's a whole passel of these guys starting at U+1D538 but I have no fonts for those. : "ℂ" U2102 # DOUBLE-STRUCK CAPITAL C (set of complex numbers) diff --git a/scan4dups.py b/scan4dups.py index 00aef78..e4ba418 100644 --- a/scan4dups.py +++ b/scan4dups.py @@ -1,4 +1,4 @@ -#!/bin/py +#!/usr/bin/env python import sys import re -- cgit v1.2.3