Skip to content

Commit 485f4ae

Browse files
committed
Fixed return code problems and code cleanups
1 parent a57b577 commit 485f4ae

File tree

5 files changed

+53
-159
lines changed

5 files changed

+53
-159
lines changed

company_dns.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,12 @@ async def general_query(request):
6262
try:
6363
gq.query = request.path_params['company_name']
6464
# Log the query request as a debug message
65-
logger.debug(f'Querying for general data for {request.path_params["company_name"]}')
65+
logger.debug(f'Performing general query for company name: [{request.path_params["company_name"]}]')
6666
company_wiki_data = gq.get_firmographics_wikipedia()
67+
if company_wiki_data['code'] != 200:
68+
logger.error(f'There were [0] results for resource [company_name].')
69+
return JSONResponse(company_wiki_data)
70+
6771
general_company_data = gq.merge_data(company_wiki_data['data'], company_wiki_data['data']['cik'])
6872
# Call check_status_and_return to check the status of the data and return the data or an error message
6973
checked_data = _check_status_and_return(general_company_data, request.path_params['company_name'])
@@ -79,16 +83,20 @@ async def general_query(request):
7983

8084
# -------------------------------------------------------------- #
8185
# BEGIN: Helper functions
82-
def _check_status_and_return(data, resource_name):
83-
if data.get('code') != 200:
86+
# TODO: This function may not be needed as it is and the code could be moved into _handle_request. Essentially we're checking for a 200 status code and returning the data that includes the error message. The change would likely be to log the error message and return the data as is. This would mean this funtion would be removed.
87+
def _check_status_and_return(result_data, resource_name):
88+
return_code = result_data.get('code')
89+
result_count = result_data.get('total')
90+
return_msg = result_data.get('message')
91+
if return_code != 200:
8492
# Log the error message
85-
logger.error(f'Data for resource {resource_name} not found')
93+
logger.error(f'There were [{result_count}] results for resource [{resource_name}].')
8694
# Return an error message that the data was not found using the resource name
87-
return {'error': f'Data for resource {resource_name} not found', 'code': 404}
88-
return data
95+
return {'message': return_msg, 'code': return_code, 'data': result_data}
96+
return result_data
8997

90-
def _prepare_logging(log_level=logging.DEBUG):
91-
logging.basicConfig(format='%(levelname)s:\t%(asctime)s [module: %(name)s] %(message)s', level=log_level)
98+
def _prepare_logging(log_level=logging.INFO):
99+
logging.basicConfig(format='%(levelname)s:\t%(asctime)s [module: %(name)s] %(message)s', level=logging.DEBUG)
92100
return logging.getLogger(__file__)
93101

94102
def _handle_request(request, handler, func, path_param, *args, **kwargs):
@@ -195,6 +203,6 @@ async def dispatch(self, request, call_next):
195203

196204
if __name__ == "__main__":
197205
try:
198-
uvicorn.run(app, host='0.0.0.0', port=8000, log_level="debug", lifespan='off')
206+
uvicorn.run(app, host='0.0.0.0', port=8000, log_level='info', lifespan='off')
199207
except KeyboardInterrupt:
200208
logger.info("Server was shut down by the user.")

lib/edgar.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python3
21
import sqlite3
32
import sys
43
import requests
@@ -90,13 +89,13 @@ def __init__(
9089
'Accept-Encoding': 'gzip, deflate'
9190
}
9291

92+
# What we are are to query
93+
self.query = None
94+
9395
# Command line naming helpers
9496
self.NAME = name
9597
self.DESC = description
9698

97-
# What we are are to query
98-
self.query = None
99-
10099
# Define the form type we're after
101100
self.form_type = '10-'
102101

lib/firmographics.py

Lines changed: 8 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,16 @@
1-
#!/usr/local/bin/python3
2-
31
from . import edgar
42
from . import wikipedia
5-
import argparse
6-
import pprint
73
import sys
84
import urllib.parse as url_parse
95
import logging
106
from geopy.geocoders import ArcGIS
117

128
__author__ = "Michael Hay"
13-
__copyright__ = "Copyright 2023, Mediumroast, Inc. All rights reserved."
9+
__copyright__ = "Copyright 2024, Mediumroast, Inc. All rights reserved."
1410
__license__ = "Apache 2.0"
1511
__version__ = "2.1.0"
1612
__maintainer__ = "Michael Hay"
17-
__status__ = "Alpha"
18-
__date__ = '2023-April-1'
13+
__status__ = "Production"
1914
__contact__ = 'https://github.com/miha42-github/company_dns'
2015

2116
#### Globals ####
@@ -51,60 +46,23 @@ def __init__(
5146
self,
5247
database=None,
5348
name='general',
54-
description='A module and simple CLI too to search for company data in Wikipedia, EDGAR, and also a merger of the two data sources.'):
49+
description='A module to search for company data in Wikipedia, EDGAR, and also a merger of the two data sources.'):
5550

5651
# Construct the object to determine lat long pairs
5752
self.locator = ArcGIS(timeout=2, user_agent="company_dns")
5853

5954
# Contains the company name or CIK
6055
self.query = None
6156

62-
# Command line naming helpers
63-
self.NAME = name
64-
self.DESC = description
65-
6657
# Define the db_cache location
6758
self.db_file='./company_dns.db' if database is None else database
6859

60+
# Naming helpers
61+
self.NAME = name
62+
self.DESC = description
63+
6964
# Set up the logging
7065
self.logger = logging.getLogger(self.NAME)
71-
72-
def get_cli_args(self):
73-
"""Parse common CLI arguments including system configs and behavior switches.
74-
"""
75-
# Set up the argument parser
76-
parser = argparse.ArgumentParser(prog=self.NAME, description=self.DESC)
77-
78-
# Setup the command line switches
79-
parser.add_argument(
80-
'--query',
81-
help="Company name to search for in Wikipedia or EDGAR",
82-
type=str,
83-
dest='query',
84-
required=True
85-
)
86-
parser.add_argument(
87-
"--debug",
88-
help="Turn on debugging",
89-
dest="debug",
90-
type=int,
91-
default=0,
92-
)
93-
parser.add_argument(
94-
'--operation',
95-
help="Company name to search for in Wikipedia.",
96-
type=str,
97-
dest='operation',
98-
choices=['merge', 'ciks', 'details', 'summaries', 'firmographics_wiki', 'firmographics_edgar'],
99-
default='merge',
100-
required=True
101-
)
102-
103-
# Parse the CLI
104-
cli_args = parser.parse_args()
105-
106-
# Return parsed arguments
107-
return cli_args
10866

10967
def locate (self, place):
11068
# Log the place to locate via a debug message
@@ -277,30 +235,4 @@ def merge_data(self, wiki_data, cik, company_name=None):
277235
'module': my_class + '-> ' + my_function,
278236
'data': final_company,
279237
'dependencies': DEPENDENCIES
280-
}
281-
282-
283-
284-
if __name__ == '__main__':
285-
query = GeneralQueries('../company_dns.db')
286-
cli_args = query.get_cli_args()
287-
query.query = cli_args.query
288-
DEBUG = cli_args.debug
289-
290-
results = dict()
291-
if cli_args.operation == 'ciks':
292-
results = query.get_all_ciks()
293-
elif cli_args.operation == 'details':
294-
results = query.get_all_details()
295-
elif cli_args.operation == 'summaries':
296-
results = query.get_all_summaries()
297-
elif cli_args.operation == 'firmographics_wiki':
298-
results = query.get_firmographics_wikipedia()
299-
elif cli_args.operation == 'firmographics_edgar':
300-
results = query.get_firmographics_edgar()
301-
elif cli_args.operation == 'merge':
302-
wiki_data = query.get_firmographics_wikipedia()
303-
if wiki_data['code'] != 200: results = wiki_data
304-
results = query.merge_data(wiki_data, wiki_data['cik'])
305-
306-
if not DEBUG: pprint.pprint(results)
238+
}

lib/sic.py

Lines changed: 16 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,13 @@
1-
#!/usr/bin/env python3
2-
3-
import argparse
4-
import pprint
51
import sys
6-
import pprint
72
import sqlite3
8-
import re
9-
import urllib.parse as url_parse
3+
import logging
104

115
__author__ = "Michael Hay"
12-
__copyright__ = "Copyright 2023, Mediumroast, Inc. All rights reserved."
6+
__copyright__ = "Copyright 2024, Mediumroast, Inc. All rights reserved."
137
__license__ = "Apache 2.0"
14-
__version__ = "1.1.0"
8+
__version__ = "2.0.0"
159
__maintainer__ = "Michael Hay"
16-
__status__ = "Beta"
17-
__date__ = '2023-April-1'
10+
__status__ = "Production"
1811
__contact__ = 'https://github.com/miha42-github/company_dns'
1912

2013
#### Globals ####
@@ -67,56 +60,22 @@ def __init__(
6760
self,
6861
db_file='./company_dns.db',
6962
name='sic',
70-
description='A module and simple CLI too to lookup SIC data.'):
63+
description='A module to lookup SIC data.'):
7164

7265
# The SQLite database connection and cursor
7366
self.e_conn = sqlite3.connect(db_file)
7467
self.ec = self.e_conn.cursor()
7568
self.db_file = db_file
7669

77-
# Command line naming helpers
70+
# Naming helpers
7871
self.NAME = name
7972
self.DESC = description
8073

8174
# Query object
8275
self.query = None
8376

84-
def get_cli_args(self):
85-
"""Parse common CLI arguments including system configs and behavior switches.
86-
"""
87-
# Set up the argument parser
88-
parser = argparse.ArgumentParser(prog=self.NAME, description=self.DESC)
89-
90-
# Setup the command line switches
91-
parser.add_argument(
92-
'--query',
93-
help="Description of the SIC to search for in data cache.",
94-
type=str,
95-
dest='query',
96-
required=True
97-
)
98-
parser.add_argument(
99-
'--operation',
100-
help="Type of details to search for.",
101-
type=str,
102-
dest='operation',
103-
choices=['description', 'code'],
104-
default='description',
105-
required=True
106-
)
107-
parser.add_argument(
108-
"--debug",
109-
help="Turn on debugging",
110-
dest="debug",
111-
type=int,
112-
default=0,
113-
)
114-
115-
# Parse the CLI
116-
cli_args = parser.parse_args()
117-
118-
# Return parsed arguments
119-
return cli_args
77+
# Set up the logging
78+
self.logger = logging.getLogger(self.NAME)
12079

12180
def get_all_sic_by_no(self):
12281
"""Using a query string find and return a dictionary containing all SICs with additional metadata.
@@ -237,13 +196,17 @@ def get_all_sic_by_name(self):
237196
}
238197
tmp_sics = {}
239198

199+
self.logger.debug('Querying db cache for [' + self.query + ']')
200+
240201
# Define the SQL Query
241202
sql_query = "SELECT sic.division, sic.major_group, sic.industry_group, sic.sic, sic.description, " + \
242203
"major_groups.description a FROM sic INNER JOIN major_groups ON major_groups.major_group = sic.major_group WHERE sic.description LIKE '%" + self.query + "%' "
243204

244205
# Issue the query
245206
for row in self.ec.execute(sql_query):
246207

208+
self.logger.debug('Processing row [' + row + ']')
209+
247210
# Get the fields in a structure we can manipulate
248211
sic_code = str(row[SICS])
249212
sic_desc = str(row[SICS_DESC])
@@ -278,7 +241,11 @@ def get_all_sic_by_name(self):
278241

279242
final_sics['sics'] = tmp_sics
280243
final_sics['total'] = len(tmp_sics)
244+
245+
self.logger.info('Found a total of [' + str(final_sics['total']) + '] SICs returning data.')
246+
281247
if final_sics['total'] == 0:
248+
final_sics['sics'] = []
282249
return {
283250
'code': 404,
284251
'message': 'No SICs found for query [' + self.query + '].',
@@ -497,20 +464,3 @@ def get_division_desc_by_id(self):
497464
'data': final_descs,
498465
'dependencies': DEPENDENCIES
499466
}
500-
501-
502-
503-
if __name__ == '__main__':
504-
q = SICQueries(db_file='../company_dns.db')
505-
cli_args = q.get_cli_args()
506-
q.query = cli_args.query
507-
DEBUG = cli_args.debug
508-
509-
results = dict()
510-
511-
if cli_args.operation == 'description':
512-
results = q.get_all_sic_by_name()
513-
elif cli_args.operation == 'code':
514-
results = q.get_all_sic_by_no()
515-
516-
if not DEBUG: pprint.pprint(results)

lib/wikipedia.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,13 @@ def get_firmographics(self):
9595
lookup_error = {
9696
'code': 404,
9797
'message': 'Unable to find a company by the name [' + self.query + ']. Maybe you should try an alternative structure like [' + self.query + ' Inc.,' + self.query + ' Corp., or ' + self.query + ' Corporation].',
98-
'errorType': 'LookupError',
98+
'error': 'LookupError',
9999
'module': my_class + '-> ' + my_function,
100-
'dependencies': DEPENDENCIES
100+
'dependencies': DEPENDENCIES,
101+
'data': {
102+
'total': 0,
103+
'results': []
104+
}
101105
}
102106

103107
# TODO try to do the right thing by trying different common combinations like Company, Inc.; Company Corp, etc.
@@ -106,8 +110,9 @@ def get_firmographics(self):
106110
self.logger.info('Starting retrieval of firmographics for [' + self.query + '] via its wikipedia page.')
107111
company_page = wptools.page(self.query, silent=True)
108112
# Log the completion of the page creation
109-
self.logger.info('Completed firmographics retrieval [' + self.query + '] via its wikipedia page.')
113+
self.debug(f'Page results for [{self.query}]: {company_page}')
110114
except:
115+
self.logger.error('A wikipedia page for [' + self.query + '] was not found.')
111116
return lookup_error
112117

113118
# Prepare to get the infoblox for the company
@@ -185,7 +190,7 @@ def get_firmographics(self):
185190

186191
# City
187192
firmographics['city'] = self._get_item(company_info, ['location_city', 'hq_location_city', 'location'], r'\[\[\]\]', 0)
188-
firmographics['city'] = firmographics['city'].replace('[[', '').replace(']]', '') if re.search('(\[)|(\])', firmographics['city']) else firmographics['city']
193+
firmographics['city'] = firmographics['city'].replace('[[', '').replace(']]', '') if re.search(r'(\[)|(\])', firmographics['city']) else firmographics['city']
189194
firmographics['city'] = firmographics['city'].replace('<br>', ', ') if re.search('<br>', firmographics['city']) else firmographics['city']
190195

191196

0 commit comments

Comments
 (0)