scan4dups.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

#!/bin/py 
 
import sys
import re
 
 
listing={}
 
try:
    while True:
        line=sys.stdin.next()
        # print "((%s))"%line 
        startpos=0
        name=''
        dupsfound=[]
        while True:
            m=re.match("\s*<(\w+)>",line[startpos:])
            if not m:
                break
            word=m.group(1)
            name+=' '+word
            if listing.has_key(name):
                dupsfound.append(name)
            startpos+=m.end()
        if startpos<=0:
            continue
        m=re.match(r'[^"]*"(.+)"',line)
        if not m:
            # shouldn't happen, but just in case 
            listing[name]='???'
            print "couldn't make sense of line: "+line
        else:
            listing[name]=m.group(1)
        # THIS IS STILL FAULTY. 
        # What if a long one comes through first, and then a short one? 
        # 
        # Probably have to do two passes: record them all in the hash 
        # (and check for exact duplicates), then go though all the 
        # keys and check all their prefixes. 
        for i in dupsfound:
            if listing[name]==listing[i]:
                msg="Redundant definition: "
            else:
                msg="Prefix conflict found: "
            print msg+"(%s )[%s] vs (%s )[%s]"% \ 
                (name, listing[name], i, listing[i])
except StopIteration:
    print "hit end"
    pass
print "Done."