Skip to content

Commit

Permalink
fifth avenue coder
Browse files Browse the repository at this point in the history
  • Loading branch information
danvk committed Nov 29, 2024
1 parent 7c30363 commit 48428b2
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 2 deletions.
2 changes: 2 additions & 0 deletions data/originals/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Files in this folder are "originals"—not derived from some other data source.
- Street listings
- `manhattan-streets.txt`: https://geographic.org/streetview/usa/ny/new_york/new_york.html
- `brooklyn-streets.txt`: https://geographic.org/streetview/usa/ny/kings/brooklyn.html
- `nyc-streets.geojson`: https://github.com/nypl-spacetime/nyc-historical-streets
- Locations for Fifth Avenue collection: https://github.com/NYPL-publicdomain/fifth-avenue

```bash
perl -pe 's/ .*//' | uniq | perl -ne 'print unless /^([WE] )?\d/' | perl -ne 'print if /.../'
Expand Down
1 change: 1 addition & 0 deletions data/originals/fifth-avenue.geojson

Large diffs are not rendered by default.

63 changes: 62 additions & 1 deletion oldnyc/geocode/coders/special_cases.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import sys
from collections import Counter

from oldnyc.geocode.geocode_types import Coder, LatLngLocation
import pygeojson

from oldnyc.geocode.geocode_types import Coder, LatLngLocation, Point
from oldnyc.item import Item


Expand Down Expand Up @@ -69,3 +71,62 @@ def finalize(self):

def name(self):
return "special"


UNLOCATED_FIFTH_AVE = {
"1113223", # cover photo
"1113224", # title page
"1113270", # misfiled?
"1113303", # index of merchants
"1113304",
"1113305",
"1113306",
}


class FifthAvenueCoder(Coder):
"""Special casing for "Fifth Avenue, Start to Finish" collection.
This is an awkward fit for other coders because one cross street (Fifth Avenue)
is only mentioned as part of the source, and all the streets mentioned in the
title are crossing streets or addresses on Fifth Avenue.
It's also easy to geocode because of a previous NYPL Labs project:
https://github.com/NYPL-publicdomain/fifth-avenue
"""

def __init__(self):
self.id_to_point = dict[str, Point]()
self.claimed = set[str]()
fc = pygeojson.load_feature_collection(open("data/originals/fifth-avenue.geojson"))
for f in fc.features:
assert f.geometry
assert isinstance(f.geometry, pygeojson.GeometryCollection)
assert len(f.geometry.geometries) == 2
g = f.geometry.geometries[0]
assert isinstance(g, pygeojson.Point)
# other geometry is a LineString indicating field of view
id = f.properties["data"]["imageId"]
lng, lat = g.coordinates[:2]
self.id_to_point[id] = (lat, lng)

def code_record(self, r: Item):
loc = self.code_one_record(r)
if loc:
return [loc]

def code_one_record(self, r: Item):
if r.source != "Fifth Avenue, New York, from start to finish":
return None
if r.id in UNLOCATED_FIFTH_AVE:
return None
pt = self.id_to_point[r.id]
self.claimed.add(r.id)
return LatLngLocation(lat=pt[0], lng=pt[1], source="Fifth Avenue") if pt else None

def finalize(self):
sys.stderr.write(f"Fifth Avenue: {len(self.claimed)} claimed\n")
# assert len(self.claimed) == len(self.id_to_point)

def name(self):
return "fifth"
3 changes: 2 additions & 1 deletion oldnyc/geocode/geocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"subjects": subjects.SubjectsCoder,
"gpt": gpt.GptCoder,
"special": special_cases.SpecialCasesCoder,
"fifth": special_cases.FifthAvenueCoder,
}


Expand Down Expand Up @@ -90,7 +91,7 @@ def main():
parser.add_argument(
"-c",
"--coders",
default="title-cross,title-address,gpt,special,subjects",
default="fifth,title-cross,title-address,gpt,special,subjects",
help="Set to a comma-separated list of coders. Coders run in the specified order.",
)

Expand Down

0 comments on commit 48428b2

Please sign in to comment.