2
2
Scripts for processing data from the IETF DataTracker
3
3
"""
4
4
5
- from ietfdata .datatracker import *
6
- from ietfdata .datatracker_ext import *
5
+ from bigbang .config import CONFIG
6
+
7
+ import bigbang .datasets .organizations as bdo
8
+
7
9
from datetime import date , datetime , timezone
8
10
from dateutil .parser import *
9
11
import json as json
10
12
11
13
import pandas as pd
12
14
import re
13
15
16
+
17
+ from ietfdata .datatracker import *
18
+ from ietfdata .datatracker_ext import *
19
+ from ietfdata .rfcindex import *
20
+
21
+ import sys
22
+
23
+ # adding the cache configuration path here
24
+ cache_path = os .path .abspath (os .path .join (os .path .dirname (__file__ ), CONFIG .ietfdata_cache_path ))
25
+ sys .path .insert (0 , cache_path )
26
+ print (f"cache path: { cache_path } " )
27
+
14
28
dt = DataTrackerExt ()
15
29
ri = RFCIndex ()
16
30
31
+ odf = bdo .load_data ()
17
32
18
- def rfc_author_data (rfc ):
33
+ def rfc_author_data (rfc , normalize = True ):
19
34
record = {}
20
35
21
36
record ["title" ] = rfc .title
@@ -39,11 +54,16 @@ def rfc_author_data(rfc):
39
54
for author in dt .document_authors (draft ):
40
55
person = dt .person (author .person )
41
56
57
+ affiliation = author .affiliation
58
+
59
+ if normalize :
60
+ affiliation = normalize_affiliation (affiliation )
61
+
42
62
author = {
43
63
"id" : person .id ,
44
64
"country" : author .country ,
45
65
"name" : person .name ,
46
- "affiliation" : author . affiliation ,
66
+ "affiliation" : affiliation ,
47
67
}
48
68
49
69
record ["authors" ].append (author )
@@ -164,7 +184,7 @@ def email_from_uri(email_uri):
164
184
return m .group (1 ) if m else None
165
185
166
186
167
- dt = DataTracker (use_cache = True )
187
+ dt = DataTracker ()
168
188
169
189
170
190
def get_group_histories (wg_name ):
@@ -178,7 +198,7 @@ def get_group_histories(wg_name):
178
198
group_role_histories = [
179
199
dt .group_role_histories (
180
200
group = grp_hist ,
181
- name = dt .role_name (RoleNameURI ("/api/v1/name/rolename/chair/" )),
201
+ name = dt .role_name (RoleNameURI (uri = "/api/v1/name/rolename/chair/" )),
182
202
)
183
203
for grp_hist in group_histories
184
204
]
@@ -210,7 +230,7 @@ def leadership_ranges(group_acronym):
210
230
for r in list (
211
231
dt .group_role_histories (
212
232
group = h ,
213
- name = dt .role_name (RoleNameURI ("/api/v1/name/rolename/chair/" )),
233
+ name = dt .role_name (RoleNameURI (uri = "/api/v1/name/rolename/chair/" )),
214
234
)
215
235
)
216
236
]
@@ -234,3 +254,18 @@ def leadership_ranges(group_acronym):
234
254
agged = agged .sort_values (by = "datetime_max" )
235
255
236
256
return ghcr_df , agged
257
+
258
+
259
+ def normalize_affiliation (affil ):
260
+ """
261
+
262
+ Probably should be somewhere else.
263
+ """
264
+ affil = affil .strip ()
265
+
266
+ lookup = bdo .lookup_normalized (affil , odf )
267
+
268
+ if lookup is not None :
269
+ affil = lookup
270
+
271
+ return affil
0 commit comments