From 8a8cc09f0b6e1665f91c510c80a88914fbada21d Mon Sep 17 00:00:00 2001 From: sb Date: Fri, 24 May 2024 12:10:20 -0400 Subject: [PATCH] use RFC Index for gathering authorship data --- bigbang/analysis/datatracker.py | 87 ++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/bigbang/analysis/datatracker.py b/bigbang/analysis/datatracker.py index cf14505..31feef6 100644 --- a/bigbang/analysis/datatracker.py +++ b/bigbang/analysis/datatracker.py @@ -12,12 +12,96 @@ import re dt = DataTrackerExt() +ri = RFCIndex() +def rfc_author_data(rfc): + record = {} + + record['title'] = rfc.title + record['draft'] = rfc.draft + record['date'] = rfc.date() + record['wg'] = rfc.wg + record['docid'] = rfc.doc_id + + draft = None + if rfc.draft is not None: + draft = dt.document_from_draft(rfc.draft[:-3]) + if draft is None: + draft = dt.document_from_rfc(rfc.doc_id) + else: + draft = dt.document_from_rfc(rfc.doc_id) + if draft is not None: + + record['draft-date'] = draft.time + record['authors'] = [] + + + for author in dt.document_authors(draft): + person = dt.person(author.person) + + author = { + "id" : person.id, + "country" : author.country, + "name" : person.name, + "affiliation" : author.affiliation + } + + record['authors'].append(author) + + record['revision'] = draft.rev + + return record + + else: + return None + +def authorship_from_rfc_data(rfc_data): + records = [] + + for author in rfc_data['authors']: + author_record = author.copy() + + author_record['draft'] = rfc_data['draft'] + author_record['title'] = rfc_data['title'] + author_record['date'] = rfc_data['date'].strftime('%Y-%m-%d') # format this to string! + author_record['wg'] = rfc_data['wg'] + author_record['docid'] = rfc_data['docid'] + + records.append(author_record) + + return records + +def rfc_authors_from_working_group(acr): + """ + Get a dataframe of all authors of RFCs published + by the working group. + """ + + author_records = [] + + for rfc in ri.rfcs(wg=acr): + rfc_data = rfc_author_data(rfc) + if rfc_data is not None: + + authorship = authorship_from_rfc_data(rfc_data) + author_records.extend(authorship) + else: + print(f"No rfc data for {rfc}") + + df = pd.DataFrame.from_records(author_records) + + return df + def draft_authors_from_working_group(acr): """ Get a dataframe of all authors of drafts published by the working group. + + NOTE: In a change in late 2023 or early 2024, the IETD DataTracker + API changed, and rfc documents are no longer listed with their + drafts as submissions. This version of the query is now deprecated. + """ # identify group @@ -30,11 +114,12 @@ def draft_authors_from_working_group(acr): group=g, doctype=dt.document_type_from_slug("rfc") #"draft" ): # status argument # interested in all submissions, or just the most recent? - + if draft.rfc: submissions = [dt.submission(sub_url) for sub_url in draft.submissions] submissions = sorted(submissions, key=lambda s: s.submission_date, reverse=True) + print(f"len(submissions) == {len(submissions)}") if len(submissions) > 0: latest = submissions[0]