Skip to content

Commit

Permalink
feat: update dataset on 3 Oct 2021
Browse files Browse the repository at this point in the history
refactor: move around dataset gen tooling

Signed-off-by: Sean Pianka <pianka@eml.cc>
  • Loading branch information
seanpianka committed Oct 3, 2021
1 parent d0b7e55 commit e37d309
Show file tree
Hide file tree
Showing 8 changed files with 28,475 additions and 28,378 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include README.rst VERSION.txt
recursive-exclude tests *
recursive-exclude ci *
recursive-exclude scripts *
recursive-include zipcodes *
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ The Python `sqlite3` module is not required in order to use this package.
'zip_code_type': 'STANDARD'}[
```

⚠️ The zipcode data was last updated on: **Nov. 13th, 2019** ⚠️
⚠️ The zipcode data was last updated on: **Oct. 3, 2021** ⚠️

[![Downloads](https://pepy.tech/badge/zipcodes/month)](https://pepy.tech/project/zipcodes/month)
[![Supported Versions](https://img.shields.io/pypi/pyversions/zipcodes.svg)](https://pypi.org/project/zipcodes)
Expand Down
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.3
1.2.0
60 changes: 0 additions & 60 deletions ci/__init__.py

This file was deleted.

65 changes: 65 additions & 0 deletions ci/_internal_utils.py → scripts/build_zipcode_dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import csv
import time
import json
from pprint import pprint


def update_gps_coordinates(gps_data, base_data):
Expand Down Expand Up @@ -106,3 +108,66 @@ def split_by_comma(s):
:return: list of provided string split by comma
"""
return [i.strip() for i in s.split(",") if i]


def main():
""" This script loads the raw zipcode data from scripts/data and combines
them both into the final dataset that is used by the library.
We define a dict that holds the "schema" the JSON returned by library
(query) calls. The dict's keys are the field name from the transformed
dataset, and the value is a dict that contains the "public" field name in
this library's API, along with an optional pre-processing function for the
transformation.
"""
# This is the key layout of the unitedstatezipcodes' dataset mapped to this
# library's public API.
SCHEMA = {
"zip": {"public": "zip_code"},
"type": {"public": "zip_code_type"},
"decommissioned": {"public": "active", "transform": lambda v: not bool(int(v))},
"primary_city": {"public": "city"},
"acceptable_cities": {
"public": "acceptable_cities",
"transform": split_by_comma,
},
"unacceptable_cities": {
"public": "unacceptable_cities",
"transform": split_by_comma,
},
"state": {"public": "state"},
"county": {"public": "county"},
"timezone": {"public": "timezone"},
"area_codes": {"public": "area_codes", "transform": split_by_comma},
"world_region": {"public": "world_region"},
"country": {"public": "country"},
"latitude": {"public": "lat"},
"longitude": {"public": "long"},
}

# Original free data sample from unitedstateszipcodes
base_zipcodes_filename = "scripts/data/zip_code_database.csv"
# Data set with ostensibly more accurate long/lat data
gps_zipcodes_filename = "scripts/data/zip-codes-database-FREE.csv"

gps_data = parse_csv(gps_zipcodes_filename)
base_data = parse_csv(base_zipcodes_filename)

pprint("GPS Keys: {}".format(list(gps_data[0].keys())))
pprint("Base Keys: {}".format(list(base_data[0].keys())))

# Begin transforming base place data.
base_data = update_gps_coordinates(gps_data, base_data)
base_data = strip_unsupported_schema(base_data, SCHEMA)
base_data = perform_kv_transforms(base_data, SCHEMA)

print("Writing zipcode information for {} places".format(len(base_data)))

with open("zips.json", "w") as f:
json.dump(base_data, f)

print("To zip for production, run:\n$ bzip2 zips.json")


main()
File renamed without changes.
Loading

1 comment on commit e37d309

@seanpianka
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This closes #14

Please sign in to comment.