From 86bd3f7b09723a214c9193c3eae94cb51d8fac6a Mon Sep 17 00:00:00 2001 From: darnjo Date: Wed, 11 Oct 2023 01:33:25 -0700 Subject: [PATCH] #54: Added expansions collection and metrics and expanded field frequencies --- lib/orgs-exporter/data-access.js | 420 ------------------------ lib/orgs-exporter/index.js | 38 --- lib/orgs-exporter/utils.js | 26 -- lib/replication/index.js | 29 +- lib/replication/replication-iterator.js | 5 +- lib/replication/utils.js | 309 +++++++++-------- 6 files changed, 203 insertions(+), 624 deletions(-) delete mode 100644 lib/orgs-exporter/data-access.js delete mode 100644 lib/orgs-exporter/index.js delete mode 100644 lib/orgs-exporter/utils.js diff --git a/lib/orgs-exporter/data-access.js b/lib/orgs-exporter/data-access.js deleted file mode 100644 index e016489..0000000 --- a/lib/orgs-exporter/data-access.js +++ /dev/null @@ -1,420 +0,0 @@ -const https = require('https'); - -const { sleep } = require('../../common'); - -const REQUEST_TIMEOUT_MS = (process.env.REQUEST_TIMEOUT_S || 30) * 1000; -const { API_KEY, SERVER_URL, ENDORSEMENTS_PATH, UOI_GOOGLE_SHEET_URL } = process.env; - -const DATA_DICTIONARY_DISPLAY_NAME = 'Data Dictionary', - DATA_DICTIONARY_IDX_PAYLOAD_DISPLAY_NAME = 'Data Dictionary with IDX Payload', - WEB_API_CORE_DISPLAY_NAME = 'Web API Core', - LEGACY_WEB_API_DISPLAY_NAME = 'Web API', - LEGACY_SEARCH_STRING = 'legacy', - MLS_ORGANIZATION_TYPE = 'MLS', - ORGANIZATION_COLUMN = 'OrganizationStatus', - ACTIVE_STATUS_FLAG = '1', - CURRENT_DATA_DICTIONARY_VERSIONS = ['1.7'], - CURRENT_WEB_API_CORE_VERSIONS = ['2.0.0'], - MAPPED_ENDORSEMENTS_FIELD_NAME = 'Endorsements', - CERTIFIED_STATUS = 'Certified', - CERTIFIED_CURRENT_DISPLAY_NAME = 'Certified Current', - PASSED_CURRENT_DISPLAY_NAME = 'Passed Current', - CERTIFIED_LEGACY_DISPLAY_NAME = 'Certified Legacy', - UNCERTIFIED_DISPLAY_NAME = 'Uncertified', - CERTIFICATION_SUMMARY_BASE_URL = 'https://certification.reso.org/summary'; - -const CERTIFIABLE_ORG_TYPES = [ - 'MLS', - 'Technology Company', - 'Commercial', - 'Brokerage', - 'Pooled Platform' -]; - -const ENDORSEMENTS = { - DATA_DICTIONARY: 'data_dictionary', - DATA_DICTIONARY_IDX_PAYLOAD: 'data_dictionary_with_IDX_payload', - WEB_API_CORE: 'web_api_server_core' -}; - -const STATUSES = { - PASSED: 'passed', - REVOKED: 'revoked', - CERTIFIED: 'certified', - NOTIFIED: 'recipient_notified' -}; - -const buildEndorsementsFilterOptions = (from = 0) => { - return { - options: { - from, - endorsementFilter: [], - statusFilter: [STATUSES.PASSED, STATUSES.REVOKED, STATUSES.CERTIFIED, STATUSES.NOTIFIED], - showMyResults: false, - providerUoi: null, - searchKey: '', - sortBy: 'asc' - } - }; -}; - -const isOrganizationActive = record => - record[ORGANIZATION_COLUMN] && record[ORGANIZATION_COLUMN] === ACTIVE_STATUS_FLAG; - -const getEndorsementDisplayName = status => { - if (status === STATUSES.PASSED || status === STATUSES.NOTIFIED) return 'Passed'; - if (status === STATUSES.CERTIFIED) return 'Certified'; - return status; -}; - -const getEndorsementTypeDisplayName = type => { - if (type === ENDORSEMENTS.WEB_API_CORE) return WEB_API_CORE_DISPLAY_NAME; - if (type === ENDORSEMENTS.DATA_DICTIONARY) return DATA_DICTIONARY_DISPLAY_NAME; - if (type === ENDORSEMENTS.DATA_DICTIONARY_IDX_PAYLOAD) - return DATA_DICTIONARY_IDX_PAYLOAD_DISPLAY_NAME; - return `${type}`; -}; - -const getOrgs = async () => await get(UOI_GOOGLE_SHEET_URL); - -const covertGoogleSheetJsonToOrgsJson = ({ values = [] } = {}) => { - const ORGS_COLUMNS = [ - 'OrganizationUniqueId', - 'OrganizationType', - 'OrganizationName', - 'OrganizationAddress1', - 'OrganizationCity', - 'OrganizationStateOrProvince', - 'OrganizationPostalCode', - 'OrganizationWebsite', - 'OrganizationCountry', - 'ModificationTimestamp', - 'OrganizationLatitude', - 'OrganizationLongitude', - 'OrganizationMemberCount', - 'OrganizationCertName', - 'OrganizationDdStatus', - 'OrganizationDdVersion', - 'OrganizationWebApiStatus', - 'OrganizationWebApiVersion' - ]; - - const [keys = [], ...data] = values; - - return data.reduce((results, items = []) => { - const transformed = items.reduce((acc, item, index) => { - acc[keys[index]] = item; - return acc; - }, {}); - - if (isOrganizationActive(transformed)) { - results.push( - ORGS_COLUMNS.reduce((acc, columnName) => { - if (columnName === 'OrganizationLatitude') { - const lat = parseFloat(transformed[columnName]); - transformed[columnName] = isNaN(lat) ? null : lat; - } - - if (columnName === 'OrganizationLongitude') { - const lng = parseFloat(transformed[columnName]); - transformed[columnName] = isNaN(lng) ? null : lng; - } - - if (columnName === 'OrganizationMemberCount') { - const count = parseInt(transformed[columnName]?.replace(',', ''), 10) || null; - transformed[columnName] = count; - } - - acc[columnName] = transformed[columnName] || null; - return acc; - }, {}) - ); - } - return results; - }, []); -}; - -const fetchOrgs = async () => covertGoogleSheetJsonToOrgsJson(await getOrgs()); - -const fetchEndorsements = async () => { - let lastIndex = 0, - lastStatusCode = 0; - const results = {}; - - do { - const { statusCode, data } = await post( - SERVER_URL + ENDORSEMENTS_PATH, - buildEndorsementsFilterOptions(lastIndex) - ); - - const { lastUoiIndex, reportsByOrgs = {} } = data; - - //if there's no data in the response, we've reached the end: terminate - if (!Object.keys(reportsByOrgs).length) break; - - lastIndex = lastUoiIndex; - lastStatusCode = statusCode; - - Object.entries(reportsByOrgs).map(([uoi, endorsements = []]) => { - if (!results[uoi]) results[uoi] = []; - results[uoi].push( - ...endorsements.map(({ type, version, status, providerUoi, statusUpdatedAt }) => { - return { - Endorsement: getEndorsementTypeDisplayName(type), - Version: version, - Status: getEndorsementDisplayName(status), - ProviderUoi: providerUoi, - StatusUpdatedAt: statusUpdatedAt - }; - }) - ); - }); - - //sleep 1s so we don't hammer the server if it's busy - await sleep(); - } while (lastStatusCode >= 200 && lastStatusCode < 300); - - return results; -}; - -const fetchOrgsAndEndorsements = async () => { - const endorsements = await fetchEndorsements(); - const orgs = await fetchOrgs(); - - return orgs.map(org => { - const { - OrganizationUniqueId, - OrganizationType, - OrganizationDdStatus, - OrganizationDdVersion, - OrganizationWebApiStatus, - OrganizationWebApiVersion, - ...rest - } = org; - - const orgEndorsements = []; - - if (endorsements[OrganizationUniqueId]?.length) { - orgEndorsements.push(...endorsements[OrganizationUniqueId]); - } - - const certificationStatus = computeRecipientEndorsementStatus( - orgEndorsements, - OrganizationType, - OrganizationDdStatus, - OrganizationWebApiStatus - ); - - const result = { - OrganizationUniqueId, - OrganizationType, - ...rest - }; - - if (CERTIFIABLE_ORG_TYPES.includes(OrganizationType)) { - result.CertificationStatus = certificationStatus; - result.CertificationSummaryUrl = `${CERTIFICATION_SUMMARY_BASE_URL}/${OrganizationUniqueId}`; - } else { - return result; - } - - if (orgEndorsements?.length) { - result[MAPPED_ENDORSEMENTS_FIELD_NAME] = endorsements[OrganizationUniqueId]; - } else if (certificationStatus === CERTIFIED_LEGACY_DISPLAY_NAME) { - const legacy = []; - - if (OrganizationDdVersion) { - legacy.push({ - Endorsement: DATA_DICTIONARY_DISPLAY_NAME, - Version: OrganizationDdVersion, - Status: CERTIFIED_LEGACY_DISPLAY_NAME - }); - } - - if (OrganizationWebApiVersion) { - legacy.push({ - Endorsement: LEGACY_WEB_API_DISPLAY_NAME, - Version: OrganizationWebApiVersion, - Status: CERTIFIED_LEGACY_DISPLAY_NAME - }); - } - - result[MAPPED_ENDORSEMENTS_FIELD_NAME] = legacy; - } - - return result; - }); -}; - -const computeRecipientEndorsementStatus = ( - endorsements = [], - organizationType = '', - organizationDdStatus = '', - organizationWebApiStatus = '' -) => { - const isMlsRecipient = - organizationType?.toLowerCase() === MLS_ORGANIZATION_TYPE.trim().toLowerCase(); - - if (!endorsements?.length) { - if ( - isMlsRecipient && - organizationDdStatus?.toLowerCase().includes(LEGACY_SEARCH_STRING) && - organizationWebApiStatus?.toLowerCase().includes(LEGACY_SEARCH_STRING) - ) { - return CERTIFIED_LEGACY_DISPLAY_NAME; - } else if ( - organizationDdStatus?.toLowerCase().includes(LEGACY_SEARCH_STRING) || - organizationWebApiStatus?.toLowerCase().includes(LEGACY_SEARCH_STRING) - ) { - return CERTIFIED_LEGACY_DISPLAY_NAME; - } else { - return UNCERTIFIED_DISPLAY_NAME; - } - } - - const isCertified = Object.values( - endorsements.reduce((acc, { ProviderUoi, Endorsement, Status, Version }) => { - if (!acc[ProviderUoi]) - acc[ProviderUoi] = { - hasWebApi: false, - hasDD: false - }; - - if (!acc[ProviderUoi].hasWebApi) { - acc[ProviderUoi].hasWebApi = - Endorsement === WEB_API_CORE_DISPLAY_NAME && - CURRENT_WEB_API_CORE_VERSIONS.includes(Version) && - Status === CERTIFIED_STATUS; - } - - if (!acc[ProviderUoi].hasDD) { - acc[ProviderUoi].hasDD = - Endorsement === DATA_DICTIONARY_DISPLAY_NAME && - CURRENT_DATA_DICTIONARY_VERSIONS.includes(Version) && - Status === CERTIFIED_STATUS; - } - - return acc; - }, {}) - ).reduce((acc, { hasWebApi, hasDD }) => { - //{ T00000044: { hasWebApi: true, hasDD: true } } - if (!acc) { - acc = isMlsRecipient ? hasWebApi && hasDD : hasWebApi || hasDD; - } - return acc; - }, false); - - return isCertified ? CERTIFIED_CURRENT_DISPLAY_NAME : PASSED_CURRENT_DISPLAY_NAME; -}; - -const buildTotalsTemplate = () => { - return { - [`${CERTIFIED_CURRENT_DISPLAY_NAME}`]: 0, - [`${PASSED_CURRENT_DISPLAY_NAME}`]: 0, - [`${CERTIFIED_LEGACY_DISPLAY_NAME}`]: 0, - [`${UNCERTIFIED_DISPLAY_NAME}`]: 0 - }; -}; - -const computeEndorsementStats = async (orgAndEndorsementsData = []) => - orgAndEndorsementsData.reduce((stats, item) => { - const { CertificationStatus, OrganizationType } = item; - - if (!CertificationStatus) return stats; - - if (!stats?.All) { - stats.All = buildTotalsTemplate(); - } - - if (!stats[OrganizationType]) { - stats[OrganizationType] = buildTotalsTemplate(); - } - - if (CertificationStatus === CERTIFIED_CURRENT_DISPLAY_NAME) { - stats.All[CERTIFIED_CURRENT_DISPLAY_NAME]++; - stats[OrganizationType][CERTIFIED_CURRENT_DISPLAY_NAME]++; - } - - if (CertificationStatus === PASSED_CURRENT_DISPLAY_NAME) { - stats.All[PASSED_CURRENT_DISPLAY_NAME]++; - stats[OrganizationType][PASSED_CURRENT_DISPLAY_NAME]++; - } - - if (CertificationStatus === CERTIFIED_LEGACY_DISPLAY_NAME) { - stats.All[CERTIFIED_LEGACY_DISPLAY_NAME]++; - stats[OrganizationType][CERTIFIED_LEGACY_DISPLAY_NAME]++; - } - - if (CertificationStatus === UNCERTIFIED_DISPLAY_NAME) { - stats.All[UNCERTIFIED_DISPLAY_NAME]++; - stats[OrganizationType][UNCERTIFIED_DISPLAY_NAME]++; - } - - return stats; - }, {}); - -const post = async (url, body = {}) => { - const options = { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `ApiKey ${API_KEY}` - }, - maxRedirects: 5, - timeout: REQUEST_TIMEOUT_MS - }; - - return new Promise((resolve, reject) => { - const req = https.request(url, options, res => { - let rawData = ''; - - res.on('data', chunk => { - rawData += chunk; - }); - - res.on('end', () => { - try { - resolve({ statusCode: res?.statusCode, data: JSON.parse(rawData) }); - } catch (err) { - reject(new Error(err)); - } - }); - }); - - req.on('error', err => { - reject(new Error(err)); - }); - - req.write(JSON.stringify(body)); - req.end(); - }); -}; - -const get = async url => { - return new Promise((resolve, reject) => { - const req = https.get(url, res => { - let rawData = ''; - - res.on('data', chunk => { - rawData += chunk; - }); - - res.on('end', () => { - try { - resolve(JSON.parse(rawData)); - } catch (err) { - reject(new Error(err)); - } - }); - }); - - req.on('error', err => { - reject(new Error(err)); - }); - }); -}; - -module.exports = { - fetchOrgsAndEndorsements, - computeEndorsementStats -}; diff --git a/lib/orgs-exporter/index.js b/lib/orgs-exporter/index.js deleted file mode 100644 index 00178b3..0000000 --- a/lib/orgs-exporter/index.js +++ /dev/null @@ -1,38 +0,0 @@ -'use strict'; - -const { fetchOrgsAndEndorsements, computeEndorsementStats } = require('./data-access'); -const { writeDataToS3 } = require('./utils'); - -const ORG_RESULTS_BUCKET_NAME = 'reso-public', - ORG_RESULTS_FILE_NAME = 'OrgsAndEndorsements.json'; - -exports.handler = async event => { - try { - const data = await fetchOrgsAndEndorsements(); - const stats = await computeEndorsementStats(data); - - const serializedData = JSON.stringify({ - Description: 'RESO Organizations and Endorsements', - GeneratedOn: new Date().toISOString(), - Stats: stats, - Data: data - }); - - await writeDataToS3({ - bucketName: ORG_RESULTS_BUCKET_NAME, - fileName: ORG_RESULTS_FILE_NAME, - serializedData - }); - - return { - statusCode: 200, - body: { orgCount: data?.length } - }; - } catch (err) { - console.error(`ERROR: ${err}, event: ${event}`); - return { - statusCode: 400, - body: JSON.stringify('ERROR fetching endorsements!') - }; - } -}; diff --git a/lib/orgs-exporter/utils.js b/lib/orgs-exporter/utils.js deleted file mode 100644 index eea69df..0000000 --- a/lib/orgs-exporter/utils.js +++ /dev/null @@ -1,26 +0,0 @@ -'use strict'; - -const AWS = require('@aws-sdk/client-s3'); - -const { AWS_REGION } = process.env; - -const writeDataToS3 = async ({ - bucketName, - fileName, - serializedData, - contentType = 'application/json; charset=utf-8' -} = {}) => { - const s3 = new AWS.S3({ apiVersion: '2006-03-01', region: AWS_REGION }); - const params = { - Bucket: bucketName, - Key: fileName, - Body: serializedData, - ContentType: contentType - }; - - await s3.putObject(params).promise(); -}; - -module.exports = { - writeDataToS3 -}; diff --git a/lib/replication/index.js b/lib/replication/index.js index c5e47e8..2568e13 100644 --- a/lib/replication/index.js +++ b/lib/replication/index.js @@ -3,6 +3,7 @@ const { replicationIterator, REPLICATION_STRATEGIES } = require('./replication-iterator'); const { writeFile, mkdir } = require('fs/promises'); const { join } = require('path'); +const { sleep } = require('../../common'); const { DEFAULT_DD_VERSION, @@ -28,20 +29,26 @@ const replicate = async ({ outputPath, limit, resourceName, - expansions, + expansions: expansionArrayOrCommaSeparatedString, metadataReportJson = {}, pathToMetadataReportJson = '', filter, top, orderby, - rateLimitedWaitTimeMinutes = 60 + rateLimitedWaitTimeMinutes = 60, + secondsDelayBetweenRequests = 1 }) => { + + // error if unknown strategy is specified if (!Object.values(REPLICATION_STRATEGIES).includes(strategy)) { throw new Error(`Unknown strategy: '${strategy}'!`); } - // expansions will be a comma-separated list for now, if passed - const expansionsArray = expansions?.split(',').map(x => x?.trim()) || []; + // expansions will be a comma-separated list if passed from the command line and array if called from a library + // TODO: clean up + const expansionsArray = Array.isArray(expansionArrayOrCommaSeparatedString) + ? expansionArrayOrCommaSeparatedString + : expansionArrayOrCommaSeparatedString?.split(',').map(x => x?.trim()) || []; const requests = await prepareRequests({ serviceRootUri, @@ -53,7 +60,7 @@ const replicate = async ({ top, orderby }); - + const startTime = Date.now(), shouldSaveResults = !!outputPath, resourceAvailabilityMap = {}; @@ -94,7 +101,8 @@ const replicate = async ({ responseTimeMs, startTime, stopTime, - responseBytes + responseBytes, + pagesFetched }); if (shouldSaveResults) { @@ -105,6 +113,7 @@ const replicate = async ({ } if (!!limit && totalRecordsFetched >= limit) { + console.log(`Reached specified record limit of ${limit}\n`); break; } } catch (err) { @@ -113,11 +122,19 @@ const replicate = async ({ return; } } + // throttle requests if requested + if (secondsDelayBetweenRequests) { + await sleep(secondsDelayBetweenRequests * 1000); + } } catch (err) { //TODO: add logic to allow fast fail, in which case we'd return here console.error(err); return; } + // throttle requests if requested + if (secondsDelayBetweenRequests) { + await sleep(secondsDelayBetweenRequests * 1000); + } } try { diff --git a/lib/replication/replication-iterator.js b/lib/replication/replication-iterator.js index 15c93cc..0a13348 100644 --- a/lib/replication/replication-iterator.js +++ b/lib/replication/replication-iterator.js @@ -115,8 +115,6 @@ async function* replicationIterator({ initialRequestUri = '', maxErrorCount = 3, headers[ODATA_PREFER_HEADER_NAME] = `${ODATA_MAX_PAGE_SIZE_HEADER_NAME}=${pageSize}`; } - console.log(`Initial request uri: ${requestUri}\n`); - do { let responseJson = null, responseBytes = 0, @@ -134,7 +132,6 @@ async function* replicationIterator({ initialRequestUri = '', maxErrorCount = 3, if (requestUri === lastRequestUri) { //same request shouldn't be made twice - console.log('No more requests. Exiting...'); return; } @@ -144,7 +141,7 @@ async function* replicationIterator({ initialRequestUri = '', maxErrorCount = 3, try { //request records - console.log(`Fetching records from '${requestUri}'...`); + console.log(`\nFetching records from '${requestUri}'...`); const startTimeMs = new Date(); const response = await fetch(requestUri, { headers }); diff --git a/lib/replication/utils.js b/lib/replication/utils.js index ef95223..609e14c 100644 --- a/lib/replication/utils.js +++ b/lib/replication/utils.js @@ -35,21 +35,22 @@ const scorePayload = ({ records = [], // TODO: consider not mutating resourceAvailabilityMap = {}, - resourceName = '', - parentResourceName, - isExpansion = false, - expansions = [], + resourceName, responseTimeMs, responseBytes = 0, - dateField = 'ModificationTimestamp' + dateField = 'ModificationTimestamp', + pagesFetched, + ...otherParams }) => { - // init if the resource doesn't exist + const { expandedTypeInfo = [] } = otherParams; + + const isExpansion = !!((expandedTypeInfo && expandedTypeInfo?.length) || undefined), + [{ fieldName: expandedFieldName, modelName: expandedResourceName } = {}] = expandedTypeInfo || []; + if (!resourceAvailabilityMap?.[resourceName]) { resourceAvailabilityMap[resourceName] = { resourceName, - parentResourceName, numRecordsFetched: 0, - isExpansion, recordCount: 0, pageSize: records?.length ?? 0, // TODO: allow passing the date field name @@ -57,36 +58,67 @@ const scorePayload = ({ dateHigh: null, dateLow: null, // field availability map is fieldName and frequency - fieldAvailabilityMap: {} + fieldAvailabilityMap: {}, + pagesFetched, + expansionAvailabilityMap: {}, + expandedTypeInfo, + isExpansion }; } + // init + if (isExpansion) { + // TODO: refactor - init parent item if it doesn't exist + // this is possible depending on whether we're starting with a query which is an expansion or not + + resourceAvailabilityMap[resourceName].expansionAvailabilityMap[expandedResourceName] = { + parentResourceName: resourceName, + resourceName: expandedResourceName, + numRecordsFetched: 0, + recordCount: 0, + pageSize: records?.length ?? 0, + // TODO: allow passing the date field name + dateField, + dateHigh: null, + dateLow: null, + // field availability map is fieldName and frequency + fieldAvailabilityMap: {}, + pagesFetched, + expansionAvailabilityMap: {}, + expandedTypeInfo, + isExpansion + }; + } + + // availability map schema is the same, regardless if expansions or resource + let availabilityMap = {}; + if (isExpansion) { + availabilityMap = resourceAvailabilityMap[resourceName].expansionAvailabilityMap[expandedResourceName]; + } else { + availabilityMap = resourceAvailabilityMap[resourceName]; + } + // update legacy property for current reports - resourceAvailabilityMap[resourceName].numRecordsFetched += records?.length || 0; + availabilityMap.numRecordsFetched += records?.length || 0; + availabilityMap.pagesFetched = pagesFetched; //init if not present - if (!resourceAvailabilityMap?.[resourceName]?.responses) { - resourceAvailabilityMap[resourceName].responses = []; + if (!availabilityMap?.responses) { + availabilityMap.responses = []; } - let responseInfo = {}; - // Update responses - if (!isExpansion) { - responseInfo = { - requestUri, - responseTimeMs - }; - } - responseInfo.responseBytes = responseBytes; - resourceAvailabilityMap[resourceName].responses.push(responseInfo); + availabilityMap.responses.push({ + requestUri, + responseTimeMs, + responseBytes + }); records.forEach(record => { Object.entries(record).forEach(([fieldName, value]) => { // init if the field if it doesn't exist - if (!resourceAvailabilityMap?.[resourceName]?.fieldAvailabilityMap?.[fieldName]) { - resourceAvailabilityMap[resourceName].fieldAvailabilityMap[fieldName] = { - resourceName, - parentResourceName, + if (!availabilityMap?.fieldAvailabilityMap?.[fieldName]) { + availabilityMap.fieldAvailabilityMap[fieldName] = { + resourceName: isExpansion ? expandedResourceName : resourceName, fieldName, frequency: 0 }; @@ -98,44 +130,14 @@ const scorePayload = ({ // functions aren't allowed here, so this covers everything if (!!value && (isPrimitive || Object.values(value)?.length)) { // increment usage - resourceAvailabilityMap[resourceName].fieldAvailabilityMap[fieldName].frequency++; - - // Update resource max and min dates - if (fieldName === dateField) { - const dateValue = new Date(value), - currentDateLowValue = resourceAvailabilityMap?.[resourceName]?.dateLow - ? new Date(resourceAvailabilityMap[resourceName].dateLow) - : null, - currentDateHighValue = resourceAvailabilityMap?.[resourceName]?.dateHigh - ? new Date(resourceAvailabilityMap[resourceName].dateHigh) - : null; - - if (dateValue) { - if (currentDateLowValue) { - resourceAvailabilityMap[resourceName].dateLow = new Date(Math.min(currentDateLowValue, dateValue)).toISOString(); - } else { - resourceAvailabilityMap[resourceName].dateLow = dateValue.toISOString(); - } + availabilityMap.fieldAvailabilityMap[fieldName].frequency++; - if (currentDateHighValue) { - resourceAvailabilityMap[resourceName].dateHigh = new Date(Math.max(currentDateHighValue, dateValue)).toISOString(); - } else { - resourceAvailabilityMap[resourceName].dateHigh = dateValue.toISOString(); - } - } - } - - if (fieldName === POSTAL_CODE_FIELD_NAME) { - if (!resourceAvailabilityMap?.[resourceName]?.postalCodes) { - resourceAvailabilityMap[resourceName].postalCodes = new Set(); - } - resourceAvailabilityMap[resourceName].postalCodes.add(value); - } + processSpecialFields({ availabilityMap, fieldName, dateField, value }); // TODO: Enumerations // process expansions, if present - if (expansions?.includes(fieldName)) { + if (isExpansion && fieldName === expandedFieldName) { // TODO: look up the resource name for the expanded field and determine whether it's a collection or not // for now, just use Media @@ -143,24 +145,56 @@ const scorePayload = ({ requestUri, records: isArray ? value : [value], resourceAvailabilityMap, - parentResourceName, resourceName, - isExpansion: true, isCollection: isArray, - expansions, - responseBytes: calculateJsonSize(value) + responseBytes: calculateJsonSize(value), + ...otherParams }); } } - - if (isExpansion) { - //console.log('Is Expansion! ' + resourceName + ', ' + fieldName); - //TODO: anything else relevant to expansions (inside of recursive call) - } }); }); }; +/** + * Updates various properties in the given availabilityMap depending on whether + * the current field is something that needs to be processed + * + * @param {Object} args an object containing the availabilityMap to update, as well as other related params + * + */ +const processSpecialFields = ({ availabilityMap, fieldName, dateField, value } = {}) => { + if (!value) return; + + // Update resource max and min dates + if (fieldName === dateField) { + const dateValue = new Date(value), + currentDateLowValue = availabilityMap?.dateLow ? new Date(availabilityMap.dateLow) : null, + currentDateHighValue = availabilityMap?.dateHigh ? new Date(availabilityMap.dateHigh) : null; + + if (dateValue) { + if (currentDateLowValue) { + availabilityMap.dateLow = new Date(Math.min(currentDateLowValue, dateValue)).toISOString(); + } else { + availabilityMap.dateLow = dateValue.toISOString(); + } + + if (currentDateHighValue) { + availabilityMap.dateHigh = new Date(Math.max(currentDateHighValue, dateValue)).toISOString(); + } else { + availabilityMap.dateHigh = dateValue.toISOString(); + } + } + } + + if (fieldName === POSTAL_CODE_FIELD_NAME) { + if (!availabilityMap?.postalCodes) { + availabilityMap.postalCodes = new Set(); + } + availabilityMap.postalCodes.add(value); + } +}; + /** * Determines whether the given value is an object, array, or primitive * @param {Object} value to test @@ -182,36 +216,33 @@ const isObjectOrArrayOrPrimitive = value => { * Processes data, keyed by resources, fields, and enumerations, into its * first round of aggregation * + * Current Resource Aggregation + * + * { + * "resourceName": "Office", + * "recordCount": 1751, + * "numRecordsFetched": 1709, + * "numSamples": 18, + * "pageSize": 100, + * "averageResponseBytes": 106953, + * "averageResponseTimeMillis": 547, + * "dateField": "ModificationTimestamp", + * "dateLow": "2019-08-14T13:59:06Z", + * "dateHigh": "2023-08-28T13:46:24Z", + * "keyFields": [ + * "OfficeKey" + * ] + * }, ... + * * @param {Map} resourceAvailabilityMap map containing availability data * @returns consolidated availability data set in canonical resources, fields, lookups format */ const consolidateResults = (resourceAvailabilityMap = {}) => - // each responses item is { requestUri, responseTimeMs, startTime, stopTime, recordCount: records?.value?.length || 0, responseStatus } Object.values(resourceAvailabilityMap ?? {}).reduce( (acc, resourceData) => { - const { fieldAvailabilityMap = {}, ...remainingResourceData } = resourceData; - - /* - Current Resource Aggregation + const { fieldAvailabilityMap = {}, expansionAvailabilityMap = {}, /* expandedTypeInfo = [],*/ ...remainingResourceData } = resourceData; - { - "resourceName": "Office", - "recordCount": 1751, - "numRecordsFetched": 1709, - "numSamples": 18, - "pageSize": 100, - "averageResponseBytes": 106953, - "averageResponseTimeMillis": 547, - "dateField": "ModificationTimestamp", - "dateLow": "2019-08-14T13:59:06Z", - "dateHigh": "2023-08-28T13:46:24Z", - "keyFields": [ - "OfficeKey" - ] - }, - */ - - if (Object.values(remainingResourceData)) { + if (remainingResourceData && Object.values(remainingResourceData)?.length) { // need to compute the following stats from the resources requests array to support older reports // recordCount, averageResponseBytes, averageResponseTimeMillis, dateLow, dateHigh const { numRecordsFetched = 0 } = remainingResourceData; @@ -221,10 +252,8 @@ const consolidateResults = (resourceAvailabilityMap = {}) => acc.totalResponseBytes += responseBytes; acc.totalResponseTimeMs += responseTimeMs; acc.totalRecordCount += recordCount; - acc.responseBytesValues.push(responseBytes); acc.responseTimeValues.push(responseTimeMs); - return acc; }, { @@ -251,37 +280,50 @@ const consolidateResults = (resourceAvailabilityMap = {}) => postalCodes: postalCodesSet = new Set(), ...rest } = remainingResourceData; + const postalCodes = postalCodesSet?.size ? Array.from(postalCodesSet) : undefined; + + let expandedResources, expandedFields; + if (expansionAvailabilityMap && Object.values(expansionAvailabilityMap)?.length) { + ({ resources: expandedResources = [], fields: expandedFields = [] } = consolidateResults(expansionAvailabilityMap)); + acc.expansions.push(...expandedResources); + acc.fields.push(...expandedFields); + } + + acc.resources.push({ + ...rest, + ...resourceStats, + responses, + recordCount, + postalCodes + }); + if (isExpansion) { - // do not include time-based stats for expansions since it's that of their parent resource - const { numRecordsFetched = 0, medianBytes = 0, stdDevBytes = 0, averageResponseBytes = 0 } = resourceStats; - acc.resources.push({ - ...rest, - isExpansion, - numRecordsFetched, - medianBytes, - stdDevBytes, - averageResponseBytes, - postalCodes + Object.values(resourceAvailabilityMap || {}).forEach(availabilityMap => { + const { parentResourceName, fieldAvailabilityMap: expandedAvailabilityMap } = availabilityMap; + + //need to push expandedAvailabilityMap.values into fields along with the parent resource for each one + if (expandedAvailabilityMap && Object.values(expandedAvailabilityMap)?.length) { + acc.fields.push( + ...Object.values(expandedAvailabilityMap).map(expandedField => { + return { + ...expandedField, + parentResourceName + }; + }) + ); + } }); } else { - acc.resources.push({ - ...rest, - ...resourceStats, - // only add responses for non-expanded items - responses, - recordCount, - postalCodes - }); + acc.fields.push(...Object.values(fieldAvailabilityMap)); } - - acc.fields.push(...Object.values(fieldAvailabilityMap)); } return acc; }, { resources: [], + expansions: [], fields: [], lookups: [] } @@ -492,9 +534,8 @@ const handleError = async ({ error = {}, rateLimitedWaitTimeMinutes = 60 }) => { console.error(`HTTP request error! Status code: ${statusCode}, message: '${message}'`); if (parseInt(statusCode) === ERROR_CODES.HTTP.RATE_LIMITED) { - console.warn(`${Date.now().toISOString()} - HTTP ${ERROR_CODES.HTTP.RATE_LIMITED} error!`); - console.warn(`\t -> Waiting ${rateLimitedWaitTimeMinutes}m before making the next request...`); - await sleep(rateLimitedWaitTimeMinutes * 1000); + console.warn(` -> ${new Date().toISOString()} - Waiting ${rateLimitedWaitTimeMinutes}m before making the next request...`); + await sleep(rateLimitedWaitTimeMinutes * 60 * 1000); } return { errorType, statusCode, message }; @@ -514,7 +555,7 @@ const createRequestsFromMetadataReport = ({ serviceRootUri, metadataReportJson = const { fields = [] } = metadataReportJson; const { requests = [] } = fields.reduce( - (acc, { resourceName, fieldName, isExpansion, typeName }) => { + (acc, { resourceName, fieldName, isExpansion, typeName: modelName }) => { if (!acc.resources.has(resourceName)) { acc.resources.add(resourceName); acc.requests.push({ resourceName }); @@ -522,7 +563,7 @@ const createRequestsFromMetadataReport = ({ serviceRootUri, metadataReportJson = if (isExpansion && !acc.expansions.has(resourceName + fieldName)) { acc.expansions.add(resourceName + fieldName); - acc.requests.push({ resourceName, fieldName, isExpansion, typeName }); + acc.requests.push({ resourceName, fieldName, isExpansion, modelName }); } return acc; @@ -534,27 +575,36 @@ const createRequestsFromMetadataReport = ({ serviceRootUri, metadataReportJson = } ); - return requests.map(({ resourceName, fieldName, isExpansion, typeName }) => { - const expansions = isExpansion ? [fieldName] : undefined; - const expandedFields = isExpansion ? [{ fieldName, typeName }] : undefined; + return requests.map(({ resourceName, fieldName, isExpansion, modelName }) => { + // each request only queries one expansion at a time for Certification + const expandedTypeInfo = isExpansion ? [{ fieldName, modelName }] : undefined; return { - requestUri: createODataRequestUri({ serviceRootUri, resourceName, expansions, filter, top, orderby }), + requestUri: createODataRequestUri({ serviceRootUri, resourceName, expandedTypeInfo, filter, top, orderby }), resourceName, - expansions, - expandedFields + expandedTypeInfo }; }); }; const createRequestFromParameters = ({ serviceRootUri, resourceName, expansions = [], filter, top }) => { + const expandedTypeInfo = + expansions && expansions?.length + ? expansions.map(fieldName => { + return { + fieldName + /* TODO: look up type info from reference metadata, when possible */ + }; + }) + : undefined; + return { requestUri: createODataRequestUri({ serviceRootUri, resourceName, expansions, filter, top }), resourceName, - expansions + expandedTypeInfo }; }; -const createODataRequestUri = ({ serviceRootUri, resourceName, expansions = [], filter, top }) => { +const createODataRequestUri = ({ serviceRootUri, resourceName, expandedTypeInfo = [], filter, top }) => { // TODO: sanitize any existing parameters // const { $select, $top, $filter, $orderby } = odataQueryOptions; try { @@ -573,8 +623,8 @@ const createODataRequestUri = ({ serviceRootUri, resourceName, expansions = [], searchParams.push(`$top=${top}`); } - if (!!expansions && expansions?.length) { - searchParams.push(`$expand=${expansions.join(',')}`); + if (!!expandedTypeInfo && expandedTypeInfo?.length) { + searchParams.push(`$expand=${expandedTypeInfo.map(x => x?.fieldName?.trim()).join(',')}`); } if (!!filter && filter?.length) { @@ -613,8 +663,7 @@ const prepareRequests = async ({ metadataReportJson: usePaths ? JSON.parse(await readFile(pathToMetadataReportJson, { encoding: 'utf8' })) : metadataReportJson, filter, top, - orderby, - expansions + orderby }) ); } else if (useParams) {