-
Notifications
You must be signed in to change notification settings - Fork 1
/
Diff-char.py
30 lines (24 loc) · 909 Bytes
/
Diff-char.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import xml.etree.cElementTree as ET
import pprint
import re
lower = re.compile(r'^([a-z]|_)*$')
lower_colon = re.compile(r'^([a-z]|_)*:([a-z]|_)*$')
problemchars = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')
def key_type(element, keys):
if element.tag == "tag":
if lower.search(element.attrib['k']):
keys["lower"] += 1
elif lower_colon.search(element.attrib['k']):
keys["lower_colon"] += 1
elif problemchars.search(element.attrib['k']):
keys["problemchars"] += 1
else:
keys["other"] += 1
return keys
def process_map(filename):
keys = {"lower": 0, "lower_colon": 0, "problemchars": 0, "other": 0}
for _, element in ET.iterparse(filename):
keys = key_type(element, keys)
return keys
keys = process_map('mumbai_sample.osm')
pprint.pprint(keys)