From a7af357841237c90348893b9724d36617b8864e4 Mon Sep 17 00:00:00 2001
From: Mark Shoulson <mark@kli.org>
Date: Thu, 14 May 2009 16:23:09 -0400
Subject: Rewrite of scan4dups so it's MUCH simpler and actually works for a
 change.  A few fixes and additions.  Need to ponder the dups.

---
 scan4dups.py | 68 +++++++++++++++++++-----------------------------------------
 1 file changed, 21 insertions(+), 47 deletions(-)

(limited to 'scan4dups.py')
diff --git a/scan4dups.py b/scan4dups.py
index 80a5eb7..11bcebc 100644
--- a/scan4dups.py
+++ b/scan4dups.py
@@ -3,67 +3,41 @@
 import sys
 import re
 
-class node(dict):
-    name=""
-    parent=None
 
-# Top of tree:
-top=node()
-
-def name2leaf(ending):
-    rv=""
-    p=ending
-    while p is not None:
-        rv=p.name+" "+rv
-        p=p.parent
-    return rv
+listing={}
 
 try:
     while True:
         line=sys.stdin.next()
         # print "((%s))"%line
         startpos=0
-        ptr=top
+        name=''
         dupsfound=[]
-        lastAdded=None
         while True:
             m=re.match("\s*<(\w+)>",line[startpos:])
             if not m:
                 break
             word=m.group(1)
-            # print "found word: %s"%word
-            # Now, there is a prefix conflict if: (a) I am about to add an
-            # element to an otherwise empty (i.e. terminal) directory (a
-            # leaf), or (b) At the end of this element, the dictionary I
-            # end on is *not* empty (i.e. NOT a leaf).  We check (a) here,
-            # and (b) after the loop.
-            #
-            # Waitasec, it's okay if I'm adding to a leaf *if I just added
-            # that leaf*!
-            if not ptr.keys() and ptr!=lastAdded and ptr!=top:
-                dupsfound.append(name2leaf(ptr))
-            try:
-                next=ptr[word]
-            except KeyError:
-                next=node()
-                next.parent=ptr
-                next.name=word
-                lastAdded=next
-                ptr[word]=next
-            ptr=next
+            name+=' '+word
+            if listing.has_key(name):
+                dupsfound.append(name)
             startpos+=m.end()
-        if startpos!=0:                 # Skip if the line had nothing.
-            if ptr.keys():              # Dup if the end is NOT a leaf.
-                # By rights I should follow each other key all the way
-                # down to all its possible ends.  Too much work.  But
-                # I will go one more down, OK?
-                for other in ptr.keys():
-                    dupsfound.append(name2leaf(ptr[other])+"(+?)")
-                    # The (+?) is because there might be more after that.
-            mystring=name2leaf(ptr)
-            # print "processed: (%s)"%mystring
-            for i in dupsfound:
-                print "Prefix conflict found: (%s) vs (%s)"%(mystring, i)
+        if startpos<=0:
+            continue
+        m=re.match(r'[^"]*"(.+)"',line)
+        if not m:
+            # shouldn't happen, but just in case
+            listing[name]='???'
+            print "couldn't make sense of line: "+line
+        else:
+            listing[name]=m.group(1)
+        for i in dupsfound:
+            if listing[name]==listing[i]:
+                msg="Redundant definition: "
+            else:
+                msg="Prefix conflict found: "
+            print msg+"(%s )[%s] vs (%s )[%s]"% \
+                (name, listing[name], i, listing[i])
 except StopIteration:
     print "hit end"
     pass
-- 
cgit v1.2.3


From e839448160948c219aac1247e6ae6eb4d0200dd0 Mon Sep 17 00:00:00 2001
From: Mark Shoulson <mark@kli.org>
Date: Mon, 18 May 2009 18:29:56 -0400
Subject: Comments on flaws remaining in scan4dups, some additional chars.

---
 dotXCompose  | 9 ++++++++-
 scan4dups.py | 6 ++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'scan4dups.py')

diff --git a/dotXCompose b/dotXCompose
index b3d80b8..ce67240 100644
--- a/dotXCompose
+++ b/dotXCompose
@@ -48,7 +48,9 @@ include "%L"
 <Multi_key> <Right> <Right>		: "→"	rightarrow	# RIGHTWARDS ARROW
 <Multi_key> <Down> <Down>		: "↓"	downarrow	# DOWNWARDS ARROW
 <Multi_key> <Left> <Right>		: "↔"	U2194           # LEFT RIGHT ARROW (kragen's)
+<Multi_key> <Right> <Left>		: "↔"	U2194           # LEFT RIGHT ARROW (kragen's)
 <Multi_key> <Up> <Down>			: "↕"	U2195           # UP DOWN ARROW (kragen's)
+<Multi_key> <Down> <Left>		: "↵"	U21B5		# DOWNWARDS ARROW WITH CORNER LEFTWARDS
 
 <Multi_key> <F> <Left>		: "☚"	U261A		# BLACK LEFT POINTING INDEX
 <Multi_key> <F> <Right>		: "☛"	U261B		# BLACK RIGHT POINTING INDEX
@@ -64,6 +66,7 @@ include "%L"
 <Multi_key> <equal> <Right> <Right>	: "⇒"	U21D2		# RIGHTWARDS DOUBLE ARROW
 <Multi_key> <equal> <Left> <Left>	: "⇐"	U21D0		# LEFTWARDS DOUBLE ARROW
 <Multi_key> <equal> <Left> <Right>	: "⇔"	U21D4	# LEFT RIGHT DOUBLE ARROW
+<Multi_key> <equal> <Right> <Left>	: "⇔"	U21D4	# LEFT RIGHT DOUBLE ARROW
 <Multi_key> <equal> <Up> <Up>		: "⇑"	U21D1	# UPWARDS DOUBLE ARROW
 <Multi_key> <equal> <Down> <Down>	: "⇓"	U21D3	# DOWNWARDS DOUBLE ARROW
 <Multi_key> <equal> <Up> <Down>		: "⇕"	U21D5	# UP DOWN DOUBLE ARROW
@@ -106,7 +109,6 @@ include "%L"
 # <exclam><n><i> would conflict, with <exclam> <n> for N WITH UNDERDOT, etc.
 <Multi_key> <U220B> <slash>		: "∌"	U220C		# DOES NOT CONTAIN AS MEMBER
 <Multi_key> <asciitilde> <equal>			: "≅"	U2245		# APPROXIMATELY EQUAL TO (It actually means "congruent"!)
-<Multi_key> <colon> <equal>		: "≔"	U2254		# COLON EQUALS
 <Multi_key> <equal> <question>		: "≟"	U225f		# QUESTIONED EQUAL TO
 <Multi_key> <equal> <d> <e> <f>		: "≝"	U225D		# EQUAL TO BY DEFINITION
 <Multi_key> <equal> <equal>		: "≡"	U2261		# IDENTICAL TO
@@ -166,6 +168,11 @@ include "%L"
 <Multi_key> <bar> <Greek_PI> 	 : "ℿ" U213C	# DOUBLE-STRUCK CAPITAL PI
 <Multi_key> <bar> <asterisk> <S> : "⅀" U2140	# DOUBLE-STRUCK N-ARY SUMMATION
 <Multi_key> <bar> <Greek_SIGMA>  : "⅀" U2140	# DOUBLE-STRUCK N-ARY SUMMATION
+# The *look* double-struck.
+<Multi_key> <bar> <braceleft>	 : "⦃" U2983	# LEFT WHITE CURLY BRACKET
+<Multi_key> <bar> <braceright>	 : "⦄" U2984	# RIGHT WHITE CURLY BRACKET 
+# ⦅⦆⦇⦈⦉⦊ too?
+# The rest of that block?  Some there may be worth it.
 <Multi_key> <l> <l>		: "ℓ" U2113	# SCRIPT SMALL L
 <Multi_key> <bracketleft> <bracketleft> : "⊏"   U228F           # SQUARE IMAGE OF
 <Multi_key> <bracketleft> <equal>       : "⊑"   U2291           # SQUARE IMAGE OF OR EQUAL TO
diff --git a/scan4dups.py b/scan4dups.py
index 11bcebc..f504623 100644
--- a/scan4dups.py
+++ b/scan4dups.py
@@ -31,6 +31,12 @@ try:
             print "couldn't make sense of line: "+line
         else:
             listing[name]=m.group(1)
+        # THIS IS STILL FAULTY.
+        # What if a long one comes through first, and then a short one?
+        #
+        # Probably have to do two passes: record them all in the hash
+        # (and check for exact duplicates), then go though all the
+        # keys and check all their prefixes.
         for i in dupsfound:
             if listing[name]==listing[i]:
                 msg="Redundant definition: "
-- 
cgit v1.2.3


From af3ce62c2d4ee2bd148d25079dd5962f1f07a218 Mon Sep 17 00:00:00 2001
From: Mark Shoulson <mark@kli.org>
Date: Mon, 18 May 2009 21:58:39 -0400
Subject: Another rewrite of scan4dups to fix things still being missed.  Added
 NABLA and INTEGRAL: how did we ever manage without them??

---
 dotXCompose  |  7 +++++++
 scan4dups.py | 44 ++++++++++++++++++++++++++------------------
 2 files changed, 33 insertions(+), 18 deletions(-)

(limited to 'scan4dups.py')

diff --git a/dotXCompose b/dotXCompose
index ce67240..73f0df2 100644
--- a/dotXCompose
+++ b/dotXCompose
@@ -52,6 +52,10 @@ include "%L"
 <Multi_key> <Up> <Down>			: "↕"	U2195           # UP DOWN ARROW (kragen's)
 <Multi_key> <Down> <Left>		: "↵"	U21B5		# DOWNWARDS ARROW WITH CORNER LEFTWARDS
 
+# Arrow keys don't always work: some apps trap them for cursor control and
+# other boring things.  The arrow symbols have alternate keystrokes.  Do
+# we need others for these printer's fists?  If so, what?  The -= and =-
+# we had before are not necessarily the best choices.
 <Multi_key> <F> <Left>		: "☚"	U261A		# BLACK LEFT POINTING INDEX
 <Multi_key> <F> <Right>		: "☛"	U261B		# BLACK RIGHT POINTING INDEX
 <Multi_key> <f> <Left>		: "☜"	U261C		# WHITE LEFT POINTING INDEX
@@ -150,6 +154,9 @@ include "%L"
 <Multi_key> <period> <quotedbl>	   	: "∴"	U2234  		# THEREFORE
 <Multi_key> <quotedbl> <period>	   	: "∵"	U2235  		# BECAUSE
 <Multi_key> <percent> <percent>		: "‱"	U2031	# PER TEN THOUSAND (basis points)
+# OK, absolutely cannot believe we made it this long without NABLA or INTEGRAL
+<Multi_key> <ampersand> <d> <e> <l>	: "∇"	U2207	        # NABLA
+<Multi_key> <ampersand> <i> <n> <t>	: "∫"	U222B		# INTEGRAL
 <Multi_key> <asciicircum> <greater>     : "⃗"   U20D7           # COMBINING RIGHT ARROW ABOVE (vector)
 # There's a whole passel of these guys starting at U+1D538 but I have no fonts for those.
 <Multi_key> <bar> <C>                   : "ℂ"   U2102           # DOUBLE-STRUCK CAPITAL C (set of complex numbers)
diff --git a/scan4dups.py b/scan4dups.py
index f504623..00aef78 100644
--- a/scan4dups.py
+++ b/scan4dups.py
@@ -19,33 +19,41 @@ try:
                 break
             word=m.group(1)
             name+=' '+word
-            if listing.has_key(name):
-                dupsfound.append(name)
             startpos+=m.end()
         if startpos<=0:
             continue
         m=re.match(r'[^"]*"(.+)"',line)
         if not m:
             # shouldn't happen, but just in case
-            listing[name]='???'
+            val='???'
             print "couldn't make sense of line: "+line
         else:
-            listing[name]=m.group(1)
-        # THIS IS STILL FAULTY.
-        # What if a long one comes through first, and then a short one?
-        #
-        # Probably have to do two passes: record them all in the hash
-        # (and check for exact duplicates), then go though all the
-        # keys and check all their prefixes.
-        for i in dupsfound:
-            if listing[name]==listing[i]:
-                msg="Redundant definition: "
+            val=m.group(1)
+        if listing.has_key(name):
+            if val != listing[name]:
+                print "Exact conflict found: (%s )[%s][%s]"%(name, 
+                                                             listing[name], val)
             else:
-                msg="Prefix conflict found: "
-            print msg+"(%s )[%s] vs (%s )[%s]"% \
-                (name, listing[name], i, listing[i])
+                print "Redundant definition: (%s )[%s]"%(name, val)
+        else:
+            listing[name]=val
 except StopIteration:
     print "hit end"
-    pass
-print "Done."
+# NOW check for prefix conflicts:
+print "Checking prefixes."
+for key in listing.keys():
+    # print "Key: (%s)"%key
+    pref=''
+    # Careful when splitting.  They key always starts with a space.
+    for word in key.split(" ")[:-1]: # chop the last one; that'll always match.
+        # Skip the empty first entry
+        if not word:
+            continue
+        pref+=" "+word
+        # print "checking (%s)"%pref
+        if listing.has_key(pref):
+            print "Prefix conflict found: " \
+                "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref],
+                                          key, listing[key])
+
     
-- 
cgit v1.2.3


From f81c068f810285f3e7ac5bcc48642a3c584b5844 Mon Sep 17 00:00:00 2001
From: Mark Shoulson <mark@kli.org>
Date: Thu, 21 May 2009 19:34:00 -0400
Subject: Add some math symbols that should have been there already.

---
 dotXCompose  | 4 ++++
 scan4dups.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'scan4dups.py')

diff --git a/dotXCompose b/dotXCompose
index 73f0df2..7dfe31b 100644
--- a/dotXCompose
+++ b/dotXCompose
@@ -155,8 +155,12 @@ include "%L"
 <Multi_key> <quotedbl> <period>	   	: "∵"	U2235  		# BECAUSE
 <Multi_key> <percent> <percent>		: "‱"	U2031	# PER TEN THOUSAND (basis points)
 # OK, absolutely cannot believe we made it this long without NABLA or INTEGRAL
+# or PARTIAL DIFFERENTIAL
 <Multi_key> <ampersand> <d> <e> <l>	: "∇"	U2207	        # NABLA
 <Multi_key> <ampersand> <i> <n> <t>	: "∫"	U222B		# INTEGRAL
+<Multi_key> <ampersand> <p> <a> <r> <t>   : "∂" U2202		# PARTIAL DIFFERENTIAL
+<Multi_key> <asterisk> <period> <period> <d>   : "∂" U2202	# PARTIAL DIFFERENTIAL
+# Would we prefer 20D1 COMBINING RIGHT HARPOON ABOVE?
 <Multi_key> <asciicircum> <greater>     : "⃗"   U20D7           # COMBINING RIGHT ARROW ABOVE (vector)
 # There's a whole passel of these guys starting at U+1D538 but I have no fonts for those.
 <Multi_key> <bar> <C>                   : "ℂ"   U2102           # DOUBLE-STRUCK CAPITAL C (set of complex numbers)
diff --git a/scan4dups.py b/scan4dups.py
index 00aef78..e4ba418 100644
--- a/scan4dups.py
+++ b/scan4dups.py
@@ -1,4 +1,4 @@
-#!/bin/py
+#!/usr/bin/env python
 
 import sys
 import re
-- 
cgit v1.2.3