aboutsummaryrefslogtreecommitdiff
path: root/scan4dups.py
diff options
context:
space:
mode:
authorMark Shoulson <mark@kli.org>2009-05-18 21:58:39 -0400
committerMark Shoulson <mark@kli.org>2009-05-18 21:58:39 -0400
commitaf3ce62c2d4ee2bd148d25079dd5962f1f07a218 (patch)
tree676de641a636d34c1b58992f584390636c9111c0 /scan4dups.py
parentComments on flaws remaining in scan4dups, some additional chars. (diff)
downloaddotXCompose-af3ce62c2d4ee2bd148d25079dd5962f1f07a218.tar.gz
dotXCompose-af3ce62c2d4ee2bd148d25079dd5962f1f07a218.tar.bz2
dotXCompose-af3ce62c2d4ee2bd148d25079dd5962f1f07a218.zip
Another rewrite of scan4dups to fix things still being missed. Added NABLA and INTEGRAL: how did we ever manage without them??
Diffstat (limited to 'scan4dups.py')
-rw-r--r--scan4dups.py44
1 files changed, 26 insertions, 18 deletions
diff --git a/scan4dups.py b/scan4dups.py
index f504623..00aef78 100644
--- a/scan4dups.py
+++ b/scan4dups.py
@@ -19,33 +19,41 @@ try:
break
word=m.group(1)
name+=' '+word
- if listing.has_key(name):
- dupsfound.append(name)
startpos+=m.end()
if startpos<=0:
continue
m=re.match(r'[^"]*"(.+)"',line)
if not m:
# shouldn't happen, but just in case
- listing[name]='???'
+ val='???'
print "couldn't make sense of line: "+line
else:
- listing[name]=m.group(1)
- # THIS IS STILL FAULTY.
- # What if a long one comes through first, and then a short one?
- #
- # Probably have to do two passes: record them all in the hash
- # (and check for exact duplicates), then go though all the
- # keys and check all their prefixes.
- for i in dupsfound:
- if listing[name]==listing[i]:
- msg="Redundant definition: "
+ val=m.group(1)
+ if listing.has_key(name):
+ if val != listing[name]:
+ print "Exact conflict found: (%s )[%s][%s]"%(name,
+ listing[name], val)
else:
- msg="Prefix conflict found: "
- print msg+"(%s )[%s] vs (%s )[%s]"% \
- (name, listing[name], i, listing[i])
+ print "Redundant definition: (%s )[%s]"%(name, val)
+ else:
+ listing[name]=val
except StopIteration:
print "hit end"
- pass
-print "Done."
+# NOW check for prefix conflicts:
+print "Checking prefixes."
+for key in listing.keys():
+ # print "Key: (%s)"%key
+ pref=''
+ # Careful when splitting. They key always starts with a space.
+ for word in key.split(" ")[:-1]: # chop the last one; that'll always match.
+ # Skip the empty first entry
+ if not word:
+ continue
+ pref+=" "+word
+ # print "checking (%s)"%pref
+ if listing.has_key(pref):
+ print "Prefix conflict found: " \
+ "(%s )[%s] vs (%s )[%s]"%(pref, listing[pref],
+ key, listing[key])
+