Skip to content

Commit

Permalink
Fix bug in guidestar service fetching
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Mar 23, 2024
1 parent 2bcbc74 commit 0549b87
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
18 changes: 11 additions & 7 deletions operators/entities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,13 @@ def fetchBranchData(ga):


## SERVICES
def unwind_services(ga: GuidestarAPI, source='entities'):
def unwind_services(ga: GuidestarAPI):
def func(rows: ResourceWrapper):
if rows.res.name != 'orgs':
yield from rows
else:
count = 0
for _, row in enumerate(rows):
if row['source'] != source:
continue
regNum = row['id']

branches = ga.branches(regNum)
Expand All @@ -292,7 +290,7 @@ def func(rows: ResourceWrapper):
ret['data']['actual_branch_ids'] = [b['branchId'] for b in branches]
ret['id'] = 'guidestar:' + service['serviceId']
count += 1
if count % 10 == 0:
if count % 1000 == 0:
print('COLLECTED {} services'.format(count))
yield ret
return DF.Flow(
Expand Down Expand Up @@ -325,9 +323,11 @@ def update_from_taxonomy(names, responses, situations):
).process()

def func(rows):
count = 0
full_count = 0
for row in rows:
full_count += 1
if 'data' not in row:
# print('NO DATA', row)
yield row
continue

Expand All @@ -343,6 +343,7 @@ def func(rows):
orgId = data.pop('organization_id')
actual_branch_ids = data.pop('actual_branch_ids')
row['branches'] = ['guidestar:' + b['branchId'] for b in (data.pop('branches') or []) if b['branchId'] in actual_branch_ids]
row['organizations'] = []

record_type = data.pop('recordType')
assert record_type == 'GreenInfo'
Expand Down Expand Up @@ -421,8 +422,8 @@ def func(rows):

if national:
row['branches'].append(f'guidestar:{orgId}:national')
if len(row['branches']) == 0:
continue
# if len(row['branches']) == 0:
# continue

when = data.pop('whenServiceActive')
if when == 'All Year':
Expand Down Expand Up @@ -499,8 +500,11 @@ def func(rows):
row['situations'] = sorted(situations)
row['responses'] = sorted(responses)
assert all(v in (None, '0') for v in data.values()), repr(data_source_url) + ':' + repr(data)
count += 1
yield row

print('DONE EMITTING SERVICES', count,'/',full_count)

return DF.Flow(
func,
)
Expand Down
4 changes: 2 additions & 2 deletions srm_tools/update_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def func(row):


def test_hash(table_fields):
def func(rows):
def func(rows: DF.ResourceWrapper):
count_existing = 0
count_new = 0
count_different = 0
Expand All @@ -48,7 +48,7 @@ def func(rows):
else:
count_new += 1
yield row
print(f'Existing: {count_existing}, New: {count_new}, Different: {count_different}')
print(f'{rows.res.name} -- Existing: {count_existing}, New: {count_new}, Different: {count_different}')
return func

def airtable_updater_flow(table, source_id, table_fields, fetch_data_flow, update_data_flow, manage_status=True, airtable_base=None):
Expand Down

0 comments on commit 0549b87

Please sign in to comment.