Merge pull request #42 from nearmap/getting-started-guide-gen6

Gen 6 Enhancements
nearmap · Aug 16, 2024 · 43aa06b · 43aa06b
2 parents d16bc92 + 93f4945
commit 43aa06b
Show file tree

Hide file tree

Showing 17 changed files with 5,004 additions and 17,390 deletions.
diff --git a/nmaipy/constants.py b/nmaipy/constants.py
@@ -6,6 +6,8 @@
 UNTIL_COL_NAME = "until"
 SURVEY_RESOURCE_ID_COL_NAME = "survey_resource_id"
 
+DEFAULT_URL_ROOT = "api.nearmap.com/ai/features/v4/bulk"
+
 
 MAX_RETRIES = 50
 
@@ -50,13 +52,21 @@ def area_units(self):
 BUILDING_NEW_ID = "1878ccf6-46ec-55a7-a20b-0cf658afb755"  # New semantic building definition
 ROOF_ID = "c08255a4-ba9f-562b-932c-ff76f2faeeeb"
 BUILDING_LIFECYCLE_ID = "91987430-6739-5e16-b92f-b830dd7d52a6"  # damage scores are attached to this class
+BUILDING_UNDER_CONSTRUCTION_ID = "4794d3ec-0ee7-5def-ad56-f82ff7639bce"
 
 
 TRAMPOLINE_ID = "753621ee-0b9f-515e-9bcf-ea40b96612ab"
 POOL_ID = "0339726f-081e-5a6e-b9a9-42d95c1b5c8a"
 CONSTRUCTION_ID = "a2a81381-13c6-57dc-a967-af696e45f6c7"
 SOLAR_ID = "3680e1b8-8ae1-5a15-8ec7-820078ef3298"
 SOLAR_HW_ID = "c1143023-135b-54fd-9a07-8de0ff55de51"
+CAR_ID = "8337e0e1-e171-5292-89cc-99c0da2a4fe4"
+WHEELED_CONSTRUCTION_VEHICLE_ID = "75efd1e7-c253-59f0-b3aa-95f9c17efa93"
+CONSTRUCTION_CRANE_ID = "6a2c2adb-0914-56b3-8a2d-871b803a0dd7"
+BOAT_ID = "62a0958e-2139-5688-a776-b88c6049d50e"
+SILO_ID = "b64ecdb0-6810-5c70-835c-9e2a5f2a4d84"
+SKYLIGHT_ID = "3f5a737e-6d56-538a-ac26-f2934bbbb695"
+PLAYGROUND_ID = "7741703d-4ce4-54e1-a9ee-05a0a1851137"
 
 VEG_VERYLOW_ID = "a7d921b7-393c-4121-b317-e9cda3e4c19b"
 VEG_LOW_ID = "2780fa70-7713-437c-ad98-656b8a5cc4f2"
@@ -119,6 +129,46 @@ def area_units(self):
     ]
 )
 
+CLASS_1050_TARP = "abb1f304-ce01-527b-b799-cbfd07551b2c"  # "temporary repair",
+CLASS_1052_RUST = "526496bf-7344-5024-82d7-77ceb671feb4"  # "rust",
+CLASS_1079_MISSING_SHINGLES = "dec855e2-ae6f-56b5-9cbb-f9967ff8ca12"  # "missing tiles or shingles",
+CLASS_1139_DEBRIS = "8ab218a7-8173-5f1e-a5cb-bb2cd386a73e"  # "debris",
+CLASS_1140_EXPOSED_DECK = "2905ba1c-6d96-58bc-9b1b-5911b3ead023" # "exposed_deck",
+CLASS_1051_PONDING = "f41e02b0-adc0-5b46-ac95-8c59aa9fe317" # "ponding",
+CLASS_1144_STAINING = "319f552f-f4b7-520d-9b16-c8abb394b043"
+CLASS_1146_WORN_SHINGLES = "97a6f930-82ae-55f2-b856-635e2250af29"
+CLASS_1147_EXPOSED_UNDERLAYMENT = "2322ca41-5d3d-5782-b2b7-1a2ffd0c4b78"
+CLASS_1149_PATCHING = "8b30838b-af41-5d1d-bdbd-29e682fe3b00"
+CLASS_1186_STRUCTURAL_DAMAGE = "c0224852-4310-57dd-95fe-42bff1c0a3f0"
+
+# Roof Shapes
+CLASS_1013_HIP = "ac0a5f75-d8aa-554c-8a43-cee9684ef9e9"
+CLASS_1014_GABLE = "59c6e27e-6ef2-5b5c-90e7-31cfca78c0c2"
+CLASS_1015_DUTCH_GABLE = "3719eb40-d6d1-5071-bbe6-379a551bb65f"
+CLASS_1019_GAMBREL = "4bb630b9-f9eb-5f95-85b8-f0c6caf16e9b"
+CLASS_1020_CONICAL = "89582082-e5b8-5853-bc94-3a0392cab98a"
+CLASS_1173_PARAPET = "1234ea84-e334-5c58-88a9-6554be3dfc05"
+CLASS_1174_MANSARD = "7eb3b1b6-0d75-5b1f-b41c-b14146ff0c54"
+CLASS_1176_JERKINHEAD = "924afbab-aae6-5c26-92e8-9173e4320495"
+CLASS_1178_QUONSET = "e92bc8a2-9fa3-5094-b3b6-2881d94642ab"
+CLASS_1180_BOWSTRING_TRUSS = "09b925d2-df1d-599b-89f1-3ffd39df791e"
+
+# Roof Materials
+CLASS_1191_FLAT = "1ab60ef7-e770-5ab6-995e-124676b2be11"
+CLASS_1007_TILE = "516fdfd5-0be9-59fe-b849-92faef8ef26e"
+CLASS_1008_ASPHALT_SHINGLE = "4bbf8dbd-cc81-5773-961f-0121101422be"
+CLASS_1009_METAL_PANEL = "4424186a-0b42-5608-a5a0-d4432695c260"
+CLASS_1100_BALLASTED = "4558c4fb-3ddf-549d-b2d2-471384be23d1"
+CLASS_1101_MOD_BIT = "87437e20-d9f5-57e1-8b87-4a9c81ec3b65"
+CLASS_1103_TPO  = "383930f1-d866-5aa3-9f97-553311f3162d"
+CLASS_1104_EPDM = "64db6ea0-7248-53f5-b6a6-6ed733c5f9b8"
+CLASS_1105_WOOD_SHAKE = "9fc4c92e-4405-573e-bce6-102b74ab89a3"
+CLASS_1160_CLAY_TILE = "09ed6bf9-182a-5c79-ae59-f5531181d298"
+CLASS_1163_SLATE = "cdc50dcc-e522-5361-8f02-4e30673311bb"
+CLASS_1165_BUILT_UP = "3563c8f1-e81e-52c7-bd56-eaa937010403"
+CLASS_1168_ROOF_COATING = "b2573072-b3a5-5f7c-973f-06b7649665ff"
+
+
 # ROLLUP API COLUMN IDs
 ROLLUP_SURVEY_DATE_ID = "b02a3652-8a87-5d20-849c-1afb3df67b19"
 ROLLUP_SYSTEM_VERSION_ID = "3811c6c8-b61e-5c3d-9d14-5e0dcacb4708"

diff --git a/nmaipy/coverage_utils.py b/nmaipy/coverage_utils.py
@@ -63,15 +63,15 @@ def poly2coordstring(poly):
 
 
 def get_surveys_from_point(
-    lon, lat, since, until, apikey, coverage_type, include_disaster=False, has_3d=False, limit=100
+    lon, lat, since, until, apikey, coverage_type, include_disaster=False, has_3d=False, prerelease=False, limit=100
 ):
     fields = "id,captureDate,resources,tags"
     if coverage_type == STANDARD_COVERAGE:
-        url = f"https://api.nearmap.com/coverage/v2/point/{lon},{lat}?fields={fields}&limit={limit}&resources=tiles:Vert,aifeatures,3d&apikey={apikey}"
+        url = f"https://api.nearmap.com/coverage/v2/point/{lon},{lat}?fields={fields}&limit={limit}&resources=tiles:Vert,aifeatures,3d"
         if include_disaster:
             url += f"&include=disaster"
     elif coverage_type == AI_COVERAGE:
-        url = f"https://api.nearmap.com/ai/features/v4/coverage.json?point={lon},{lat}&limit={limit}&apikey={apikey}"
+        url = f"https://api.nearmap.com/ai/features/v4/coverage.json?point={lon},{lat}&limit={limit}"
         if has_3d:
             url += "&3dCoverage=true"
     else:
@@ -80,7 +80,9 @@ def get_surveys_from_point(
         url += f"&since={since}"
     if until is not None:
         url += f"&until={until}"
-
+    if prerelease:
+        url += "&prerelease=true"
+    url += f"&apikey={apikey}"
     response = get_payload(url)
     if not isinstance(response, int):
         if coverage_type == STANDARD_COVERAGE:
@@ -97,6 +99,16 @@ def get_surveys_from_point(
         return None, None
 
 
+def get_survey_resource_id_from_survey_id_query(resources):
+    """
+    Get the survey resource id from the resources list, after being given the "resources" field from the survey_id query coverage/v2/surveys/{survey_id} API.
+    """
+    if len(resources) == 1:
+       return resources[0]["id"]
+    else:
+        raise Exception("More than one resource returned from survey_id query")
+
+
 def get_survey_resource_id_from_standard_coverage(resources):
     """
     Get the survey resource id from the resources list. This is the id that can be used with the AI Feature API to get an exact match (rather than since/until dates).
@@ -143,6 +155,7 @@ def threaded_get_coverage_from_point_results(
     coverage_type=STANDARD_COVERAGE,
     include_disaster=False,
     has_3d=False,
+    prerelease=False,
     limit=100,
 ):
     """
@@ -181,6 +194,7 @@ def threaded_get_coverage_from_point_results(
                     coverage_type,
                     include_disaster,
                     has_3d,
+                    prerelease,
                     limit,
                 )
             )
@@ -202,6 +216,7 @@ def get_coverage_from_points(
     id_col="id",
     include_disaster=False,
     has_3d=False,
+    prerelease=False,
     limit=100,
 ):
     """
@@ -252,6 +267,7 @@ def get_coverage_from_points(
                 coverage_type=coverage_type,
                 include_disaster=include_disaster,
                 has_3d=has_3d,
+                prerelease=prerelease,
                 limit=limit,
             )
             c_with_idx = []
@@ -309,3 +325,142 @@ def get_coverage_from_points(
         return df_coverage
     else:
         return None
+
+
+def threaded_get_coverage_from_survey_ids(
+    df,
+    apikey,
+    survey_id_col="survey_id",
+    threads=20,
+    prerelease=False,
+    limit=100,
+):
+    """
+    Wrapper function to get coverage from a dataframe with survey_id's in it, using a thread pool.
+    """
+    jobs = []
+
+    df = df.copy()
+
+    # Send each parcel to a thread worker
+    with concurrent.futures.ThreadPoolExecutor(threads) as executor:
+        # Set since_col/until_col to string "yyyy-mm-dd" format if datetimes
+
+        for i, row in df.iterrows():
+            jobs.append(
+                executor.submit(
+                    get_surveys_from_id,
+                    row[survey_id_col],
+                    apikey,
+                    limit,
+                )
+            )
+
+    results = []
+    for job in jobs:
+        df_job, _ = job.result()
+        results.append(pd.DataFrame(df_job))
+    return results
+
+
+def get_surveys_from_id(
+    survey_id, apikey, limit=100
+):
+    fields = "id,captureDate,resources"
+    url = f"https://api.nearmap.com/coverage/v2/surveys/{survey_id}?fields={fields}&limit={limit}&resources=tiles:Vert,aifeatures,3d"
+    url += f"&apikey={apikey}"
+    response = get_payload(url)
+    if not isinstance(response, int):
+        return id_check_response_to_dataframe(response), response
+    elif response == FORBIDDEN_403:
+        logging.info(f"Unauthorised area request at {survey_id=} with code {response}")
+        return None, None
+    else:
+        logging.error(f"Failed request at {survey_id=} with code {response}")
+        return None, None
+
+
+def get_coverage_from_survey_ids(
+    df,
+    api_key,
+    chunk_size=10000,
+    threads=20,
+    coverage_chunk_cache_dir="coverage_chunks",
+    id_col="id",
+    limit=100,
+):
+    """
+    Given a GeoDataFrame with survey_ids as a column, get a set of all survey resource IDs that are attached to those survey_ids (such as aifeatures, tiles)
+
+    Parameters:
+    -----------
+    df : GeoDataFrame
+        A GeoDataFrame of points to check for coverage of imagery, 3D and AI.
+    api_key : str
+        The Nearmap API key to use for authentication.
+    chunk_size : int, optional
+        The number of points to process in each chunk. Default is 10000.
+    threads : int, optional
+        The number of threads to use for making API calls. Default is 20.
+    coverage_chunk_cache_dir : str, optional
+        The directory to cache coverage chunks. Default is "coverage_chunks".
+    id_col : str, optional
+        The name of the column in `df_points` that contains the unique identifier for each point.
+
+    Returns:
+    --------
+    df_coverage : DataFrame
+        A DataFrame containing the coverage data for each survey_id.
+    """
+    df_coverage = []
+    df_coverage_empty = None
+    coverage_chunk_cache_dir = Path(coverage_chunk_cache_dir)
+    coverage_chunk_cache_dir.mkdir(parents=True, exist_ok=True)
+
+    for i in tqdm(range(0, len(df), chunk_size)):
+        f = coverage_chunk_cache_dir / f"coverage_chunk_{i}-{i+chunk_size}.parquet"
+        if not f.exists():
+            df_point_chunk = df.iloc[i : i + chunk_size, :]
+            logging.debug(f"Pulling chunk from API for {f}.")
+            # Multi-threaded pulls are ok - the API is designed to cope fine with 10-20 threads running in parallel pulling requests.
+            c = threaded_get_coverage_from_survey_ids(
+                df_point_chunk,
+                survey_id_col="survey_id",
+                apikey=api_key,
+                threads=threads,
+                limit=limit,
+            )
+            c_with_idx = []
+            for j in range(len(c)):
+                row_id = df_point_chunk.iloc[j].name
+                c_tmp = c[j].copy()
+                if len(c_tmp) > 0:
+                    c_tmp[id_col] = row_id
+                    c_with_idx.append(c_tmp)
+            if len(c_with_idx) > 0:
+                c = pd.concat(c_with_idx)
+                c["survey_resource_id"] = c["resources"].apply(get_survey_resource_id_from_survey_id_query)
+                c = c.rename(columns={"id": "survey_id"})
+            if (df_coverage_empty is None):  # Set an empty dataframe with the right columns for writing dummy parquet cache files
+                df_coverage_empty = pd.DataFrame([], columns=c.columns).astype(c.dtypes)
+            else:
+                c = df_coverage_empty
+            if c is not None:
+                c.to_parquet(f)
+        else:
+            logging.debug(f"Reading chunk from parquet for {f}.")
+            c = pd.read_parquet(f)
+
+        if c is not None:
+            if len(c) > 0:
+                c = c.loc[
+                    :,
+                    [id_col, "captureDate", "survey_id", "survey_resource_id", "tiles", "aifeatures", "3d"],
+                ].set_index(id_col)
+                c["captureDate"] = pd.to_datetime(c["captureDate"])
+                df_coverage.append(c)
+    if len(df_coverage) > 0:
+        df_coverage = pd.concat(df_coverage)
+        return df_coverage
+    else:
+        return None
diff --git a/nmaipy/feature_api.py b/nmaipy/feature_api.py
@@ -444,7 +444,7 @@ def _create_request_string(
 
         # Add dates if given
         if ((since is not None) or (until is not None)) and (survey_resource_id is not None):
-            raise ValueError("Invalid combination of since, until and survey_resource_id requested")
+            logger.debug(f"Request made with survey_resource_id {survey_resource_id} and either since or until - ignoring dates.")
         elif (since is not None) or (until is not None):
             if since:
                 request_string += f"&since={since}"
@@ -838,6 +838,10 @@ def payload_gdf(cls, payload: dict, aoi_id: Optional[str] = None) -> Tuple[gpd.G
             "system_version": payload["systemVersion"],
             "link": cls.add_location_marker_to_link(payload["link"]),
             "date": cls.link_to_date(payload["link"]),
+            "survey_id": payload["surveyId"],
+            "survey_resource_id": payload["resourceId"],
+            "perspective": payload["perspective"],
+            "postcat": payload["postcat"],
         }
 
         columns = [
@@ -1030,12 +1034,15 @@ def get_features_gdf(
 
                     # Creat metadata
                     metadata_df = metadata_df.drop_duplicates().iloc[0]
-
                     metadata = {
                         "aoi_id": metadata_df["aoi_id"],
                         "system_version": metadata_df["system_version"],
                         "link": metadata_df["link"],
                         "date": metadata_df["date"],
+                        "survey_id": metadata_df["survey_id"],
+                        "survey_resource_id": metadata_df["survey_resource_id"],
+                        "perspective": metadata_df["perspective"],
+                        "postcat": metadata_df["postcat"],
                     }
 
                 except (AIFeatureAPIError, AIFeatureAPIGridError) as e: