From dcb7127f5d57850b5a2b1f569fbb523697a7286e Mon Sep 17 00:00:00 2001 From: Natalie Diaz Date: Tue, 21 Oct 2025 09:58:27 -0700 Subject: [PATCH] Spanner fixes --- .../query/get_observations_contained_in.json | 660 +++++++++--------- .../get_node_edges_first_page_chain.sql | 8 +- .../query_builder/get_node_edges_in_chain.sql | 8 +- .../get_node_edges_out_chain.sql | 8 +- .../get_node_edges_second_page_chain.sql | 8 +- internal/server/spanner/golden/query_test.go | 33 +- internal/server/spanner/query_builder.go | 6 +- internal/server/spanner/statements.go | 28 +- 8 files changed, 397 insertions(+), 362 deletions(-) diff --git a/internal/server/spanner/golden/query/get_observations_contained_in.json b/internal/server/spanner/golden/query/get_observations_contained_in.json index 1aee784b0..9ce66cfa9 100644 --- a/internal/server/spanner/golden/query/get_observations_contained_in.json +++ b/internal/server/spanner/golden/query/get_observations_contained_in.json @@ -801,142 +801,6 @@ "IsDcAggregate": false, "ProvenanceURL": "https://wonder.cdc.gov/ucd-icd10.html" }, - { - "VariableMeasured": "Median_Age_Person", - "ObservationAbout": "geoId/10001", - "FacetId": "1543127381", - "Observations": [ - { - "Date": "2011", - "Value": "36.4" - }, - { - "Date": "2012", - "Value": "36.6" - }, - { - "Date": "2013", - "Value": "36.7" - }, - { - "Date": "2014", - "Value": "36.8" - }, - { - "Date": "2015", - "Value": "37.0" - }, - { - "Date": "2016", - "Value": "37.2" - }, - { - "Date": "2017", - "Value": "37.3" - }, - { - "Date": "2018", - "Value": "37.6" - }, - { - "Date": "2019", - "Value": "37.7" - }, - { - "Date": "2020", - "Value": "38.1" - }, - { - "Date": "2021", - "Value": "38.2" - }, - { - "Date": "2022", - "Value": "38.5" - }, - { - "Date": "2023", - "Value": "38.5" - } - ], - "ImportName": "CensusACS5YearSurvey", - "ObservationPeriod": "", - "MeasurementMethod": "CensusACS5yrSurvey", - "Unit": "Year", - "ScalingFactor": "", - "IsDcAggregate": false, - "ProvenanceURL": "https://www.census.gov/programs-surveys/acs/data/data-via-ftp.html" - }, - { - "VariableMeasured": "Median_Age_Person", - "ObservationAbout": "geoId/10001", - "FacetId": "1755512982", - "Observations": [ - { - "Date": "2010", - "Value": "36.1" - }, - { - "Date": "2011", - "Value": "36.4" - }, - { - "Date": "2012", - "Value": "36.6" - }, - { - "Date": "2013", - "Value": "36.7" - }, - { - "Date": "2014", - "Value": "36.8" - }, - { - "Date": "2015", - "Value": "37.0" - }, - { - "Date": "2016", - "Value": "37.2" - }, - { - "Date": "2017", - "Value": "37.3" - }, - { - "Date": "2018", - "Value": "37.6" - }, - { - "Date": "2019", - "Value": "37.7" - }, - { - "Date": "2020", - "Value": "38.1" - }, - { - "Date": "2021", - "Value": "38.2" - }, - { - "Date": "2022", - "Value": "38.5" - }, - { - "Date": "2023", - "Value": "38.5" - } - ], - "ImportName": "CensusACS5YearSurvey_SubjectTables_S0101", - "ObservationPeriod": "", - "MeasurementMethod": "CensusACS5yrSurveySubjectTable", - "Unit": "Years", - "ScalingFactor": "", - "IsDcAggregate": false, - "ProvenanceURL": "https://data.census.gov/table?q=S0101:+Age+and+Sex\u0026tid=ACSST1Y2022.S0101" - }, { "VariableMeasured": "Count_Person", "ObservationAbout": "geoId/10003", @@ -1744,239 +1608,103 @@ "ProvenanceURL": "https://wonder.cdc.gov/ucd-icd10.html" }, { - "VariableMeasured": "Median_Age_Person", - "ObservationAbout": "geoId/10003", - "FacetId": "1543127381", + "VariableMeasured": "Count_Person", + "ObservationAbout": "geoId/10005", + "FacetId": "1006847329", "Observations": [ { - "Date": "2011", - "Value": "37.0" + "Date": "2000", + "Value": "156638.0" }, { - "Date": "2012", - "Value": "37.0" + "Date": "2010", + "Value": "197145.0" }, { - "Date": "2013", - "Value": "37.2" - }, + "Date": "2020", + "Value": "237378.0" + } + ], + "ImportName": "USDecennialCensus_RedistrictingRelease", + "ObservationPeriod": "", + "MeasurementMethod": "USDecennialCensus", + "Unit": "", + "ScalingFactor": "", + "IsDcAggregate": false, + "ProvenanceURL": "https://www.census.gov/programs-surveys/decennial-census/about/rdo/summary-files.html" + }, + { + "VariableMeasured": "Count_Person", + "ObservationAbout": "geoId/10005", + "FacetId": "1125128424", + "Observations": [ { - "Date": "2014", - "Value": "37.3" + "Date": "1970", + "Value": "80356.0" }, { - "Date": "2015", - "Value": "37.5" + "Date": "1971", + "Value": "83900.0" }, { - "Date": "2016", - "Value": "37.6" + "Date": "1972", + "Value": "85300.0" }, { - "Date": "2017", - "Value": "37.8" + "Date": "1973", + "Value": "87200.0" }, { - "Date": "2018", - "Value": "38.1" + "Date": "1974", + "Value": "89700.0" }, { - "Date": "2019", - "Value": "38.4" + "Date": "1975", + "Value": "91500.0" }, { - "Date": "2020", - "Value": "38.7" + "Date": "1976", + "Value": "93700.0" }, { - "Date": "2021", - "Value": "38.8" + "Date": "1977", + "Value": "94500.0" }, { - "Date": "2022", - "Value": "39.0" + "Date": "1978", + "Value": "95700.0" }, { - "Date": "2023", - "Value": "39.2" - } - ], - "ImportName": "CensusACS5YearSurvey", - "ObservationPeriod": "", - "MeasurementMethod": "CensusACS5yrSurvey", - "Unit": "Year", - "ScalingFactor": "", - "IsDcAggregate": false, - "ProvenanceURL": "https://www.census.gov/programs-surveys/acs/data/data-via-ftp.html" - }, - { - "VariableMeasured": "Median_Age_Person", - "ObservationAbout": "geoId/10003", - "FacetId": "1755512982", - "Observations": [ - { - "Date": "2010", - "Value": "36.8" + "Date": "1979", + "Value": "97500.0" }, { - "Date": "2011", - "Value": "37.0" + "Date": "1980", + "Value": "98004.0" }, { - "Date": "2012", - "Value": "37.0" + "Date": "1981", + "Value": "98334.0" }, { - "Date": "2013", - "Value": "37.2" + "Date": "1982", + "Value": "98448.0" }, { - "Date": "2014", - "Value": "37.3" + "Date": "1983", + "Value": "99806.0" }, { - "Date": "2015", - "Value": "37.5" + "Date": "1984", + "Value": "102069.0" }, { - "Date": "2016", - "Value": "37.6" + "Date": "1985", + "Value": "103943.0" }, { - "Date": "2017", - "Value": "37.8" - }, - { - "Date": "2018", - "Value": "38.1" - }, - { - "Date": "2019", - "Value": "38.4" - }, - { - "Date": "2020", - "Value": "38.7" - }, - { - "Date": "2021", - "Value": "38.8" - }, - { - "Date": "2022", - "Value": "39.0" - }, - { - "Date": "2023", - "Value": "39.2" - } - ], - "ImportName": "CensusACS5YearSurvey_SubjectTables_S0101", - "ObservationPeriod": "", - "MeasurementMethod": "CensusACS5yrSurveySubjectTable", - "Unit": "Years", - "ScalingFactor": "", - "IsDcAggregate": false, - "ProvenanceURL": "https://data.census.gov/table?q=S0101:+Age+and+Sex\u0026tid=ACSST1Y2022.S0101" - }, - { - "VariableMeasured": "Count_Person", - "ObservationAbout": "geoId/10005", - "FacetId": "1006847329", - "Observations": [ - { - "Date": "2000", - "Value": "156638.0" - }, - { - "Date": "2010", - "Value": "197145.0" - }, - { - "Date": "2020", - "Value": "237378.0" - } - ], - "ImportName": "USDecennialCensus_RedistrictingRelease", - "ObservationPeriod": "", - "MeasurementMethod": "USDecennialCensus", - "Unit": "", - "ScalingFactor": "", - "IsDcAggregate": false, - "ProvenanceURL": "https://www.census.gov/programs-surveys/decennial-census/about/rdo/summary-files.html" - }, - { - "VariableMeasured": "Count_Person", - "ObservationAbout": "geoId/10005", - "FacetId": "1125128424", - "Observations": [ - { - "Date": "1970", - "Value": "80356.0" - }, - { - "Date": "1971", - "Value": "83900.0" - }, - { - "Date": "1972", - "Value": "85300.0" - }, - { - "Date": "1973", - "Value": "87200.0" - }, - { - "Date": "1974", - "Value": "89700.0" - }, - { - "Date": "1975", - "Value": "91500.0" - }, - { - "Date": "1976", - "Value": "93700.0" - }, - { - "Date": "1977", - "Value": "94500.0" - }, - { - "Date": "1978", - "Value": "95700.0" - }, - { - "Date": "1979", - "Value": "97500.0" - }, - { - "Date": "1980", - "Value": "98004.0" - }, - { - "Date": "1981", - "Value": "98334.0" - }, - { - "Date": "1982", - "Value": "98448.0" - }, - { - "Date": "1983", - "Value": "99806.0" - }, - { - "Date": "1984", - "Value": "102069.0" - }, - { - "Date": "1985", - "Value": "103943.0" - }, - { - "Date": "1986", - "Value": "106186.0" + "Date": "1986", + "Value": "106186.0" }, { "Date": "1987", @@ -2681,6 +2409,278 @@ "IsDcAggregate": false, "ProvenanceURL": "https://wonder.cdc.gov/ucd-icd10.html" }, + { + "VariableMeasured": "Median_Age_Person", + "ObservationAbout": "geoId/10001", + "FacetId": "1543127381", + "Observations": [ + { + "Date": "2011", + "Value": "36.4" + }, + { + "Date": "2012", + "Value": "36.6" + }, + { + "Date": "2013", + "Value": "36.7" + }, + { + "Date": "2014", + "Value": "36.8" + }, + { + "Date": "2015", + "Value": "37.0" + }, + { + "Date": "2016", + "Value": "37.2" + }, + { + "Date": "2017", + "Value": "37.3" + }, + { + "Date": "2018", + "Value": "37.6" + }, + { + "Date": "2019", + "Value": "37.7" + }, + { + "Date": "2020", + "Value": "38.1" + }, + { + "Date": "2021", + "Value": "38.2" + }, + { + "Date": "2022", + "Value": "38.5" + }, + { + "Date": "2023", + "Value": "38.5" + } + ], + "ImportName": "CensusACS5YearSurvey", + "ObservationPeriod": "", + "MeasurementMethod": "CensusACS5yrSurvey", + "Unit": "Year", + "ScalingFactor": "", + "IsDcAggregate": false, + "ProvenanceURL": "https://www.census.gov/programs-surveys/acs/data/data-via-ftp.html" + }, + { + "VariableMeasured": "Median_Age_Person", + "ObservationAbout": "geoId/10001", + "FacetId": "1755512982", + "Observations": [ + { + "Date": "2010", + "Value": "36.1" + }, + { + "Date": "2011", + "Value": "36.4" + }, + { + "Date": "2012", + "Value": "36.6" + }, + { + "Date": "2013", + "Value": "36.7" + }, + { + "Date": "2014", + "Value": "36.8" + }, + { + "Date": "2015", + "Value": "37.0" + }, + { + "Date": "2016", + "Value": "37.2" + }, + { + "Date": "2017", + "Value": "37.3" + }, + { + "Date": "2018", + "Value": "37.6" + }, + { + "Date": "2019", + "Value": "37.7" + }, + { + "Date": "2020", + "Value": "38.1" + }, + { + "Date": "2021", + "Value": "38.2" + }, + { + "Date": "2022", + "Value": "38.5" + }, + { + "Date": "2023", + "Value": "38.5" + } + ], + "ImportName": "CensusACS5YearSurvey_SubjectTables_S0101", + "ObservationPeriod": "", + "MeasurementMethod": "CensusACS5yrSurveySubjectTable", + "Unit": "Years", + "ScalingFactor": "", + "IsDcAggregate": false, + "ProvenanceURL": "https://data.census.gov/table?q=S0101:+Age+and+Sex\u0026tid=ACSST1Y2022.S0101" + }, + { + "VariableMeasured": "Median_Age_Person", + "ObservationAbout": "geoId/10003", + "FacetId": "1543127381", + "Observations": [ + { + "Date": "2011", + "Value": "37.0" + }, + { + "Date": "2012", + "Value": "37.0" + }, + { + "Date": "2013", + "Value": "37.2" + }, + { + "Date": "2014", + "Value": "37.3" + }, + { + "Date": "2015", + "Value": "37.5" + }, + { + "Date": "2016", + "Value": "37.6" + }, + { + "Date": "2017", + "Value": "37.8" + }, + { + "Date": "2018", + "Value": "38.1" + }, + { + "Date": "2019", + "Value": "38.4" + }, + { + "Date": "2020", + "Value": "38.7" + }, + { + "Date": "2021", + "Value": "38.8" + }, + { + "Date": "2022", + "Value": "39.0" + }, + { + "Date": "2023", + "Value": "39.2" + } + ], + "ImportName": "CensusACS5YearSurvey", + "ObservationPeriod": "", + "MeasurementMethod": "CensusACS5yrSurvey", + "Unit": "Year", + "ScalingFactor": "", + "IsDcAggregate": false, + "ProvenanceURL": "https://www.census.gov/programs-surveys/acs/data/data-via-ftp.html" + }, + { + "VariableMeasured": "Median_Age_Person", + "ObservationAbout": "geoId/10003", + "FacetId": "1755512982", + "Observations": [ + { + "Date": "2010", + "Value": "36.8" + }, + { + "Date": "2011", + "Value": "37.0" + }, + { + "Date": "2012", + "Value": "37.0" + }, + { + "Date": "2013", + "Value": "37.2" + }, + { + "Date": "2014", + "Value": "37.3" + }, + { + "Date": "2015", + "Value": "37.5" + }, + { + "Date": "2016", + "Value": "37.6" + }, + { + "Date": "2017", + "Value": "37.8" + }, + { + "Date": "2018", + "Value": "38.1" + }, + { + "Date": "2019", + "Value": "38.4" + }, + { + "Date": "2020", + "Value": "38.7" + }, + { + "Date": "2021", + "Value": "38.8" + }, + { + "Date": "2022", + "Value": "39.0" + }, + { + "Date": "2023", + "Value": "39.2" + } + ], + "ImportName": "CensusACS5YearSurvey_SubjectTables_S0101", + "ObservationPeriod": "", + "MeasurementMethod": "CensusACS5yrSurveySubjectTable", + "Unit": "Years", + "ScalingFactor": "", + "IsDcAggregate": false, + "ProvenanceURL": "https://data.census.gov/table?q=S0101:+Age+and+Sex\u0026tid=ACSST1Y2022.S0101" + }, { "VariableMeasured": "Median_Age_Person", "ObservationAbout": "geoId/10005", diff --git a/internal/server/spanner/golden/query_builder/get_node_edges_first_page_chain.sql b/internal/server/spanner/golden/query_builder/get_node_edges_first_page_chain.sql index 7de9b0e89..e95e2f96f 100644 --- a/internal/server/spanner/golden/query_builder/get_node_edges_first_page_chain.sql +++ b/internal/server/spanner/golden/query_builder/get_node_edges_first_page_chain.sql @@ -3,8 +3,14 @@ m.subject_id IN ('dc/g/UN'))<-[e:Edge WHERE e.predicate = 'specializationOf']-{1,10}(n:Node) - RETURN + RETURN DISTINCT m.subject_id, + n.subject_id AS value + NEXT MATCH (n) + WHERE + n.subject_id = value + RETURN + subject_id, 'specializationOf+' AS predicate, '' AS provenance, n.value, diff --git a/internal/server/spanner/golden/query_builder/get_node_edges_in_chain.sql b/internal/server/spanner/golden/query_builder/get_node_edges_in_chain.sql index 8719cb3be..2f076b99c 100644 --- a/internal/server/spanner/golden/query_builder/get_node_edges_in_chain.sql +++ b/internal/server/spanner/golden/query_builder/get_node_edges_in_chain.sql @@ -3,8 +3,14 @@ m.subject_id IN ('dc/g/Farm_FarmInventoryStatus'))<-[e:Edge WHERE e.predicate = 'specializationOf']-{1,10}(n:Node) - RETURN + RETURN DISTINCT m.subject_id, + n.subject_id AS value + NEXT MATCH (n) + WHERE + n.subject_id = value + RETURN + subject_id, 'specializationOf+' AS predicate, '' AS provenance, n.value, diff --git a/internal/server/spanner/golden/query_builder/get_node_edges_out_chain.sql b/internal/server/spanner/golden/query_builder/get_node_edges_out_chain.sql index 7a4c4069a..78a76107d 100644 --- a/internal/server/spanner/golden/query_builder/get_node_edges_out_chain.sql +++ b/internal/server/spanner/golden/query_builder/get_node_edges_out_chain.sql @@ -3,8 +3,14 @@ m.subject_id IN ('dc/g/Person_Gender'))-[e:Edge WHERE e.predicate = 'specializationOf']->{1,10}(n:Node) - RETURN + RETURN DISTINCT m.subject_id, + n.subject_id AS value + NEXT MATCH (n) + WHERE + n.subject_id = value + RETURN + subject_id, 'specializationOf+' AS predicate, '' AS provenance, n.value, diff --git a/internal/server/spanner/golden/query_builder/get_node_edges_second_page_chain.sql b/internal/server/spanner/golden/query_builder/get_node_edges_second_page_chain.sql index 5d18116ac..b0c9d584d 100644 --- a/internal/server/spanner/golden/query_builder/get_node_edges_second_page_chain.sql +++ b/internal/server/spanner/golden/query_builder/get_node_edges_second_page_chain.sql @@ -3,8 +3,14 @@ m.subject_id IN ('dc/g/UN'))<-[e:Edge WHERE e.predicate = 'specializationOf']-{1,10}(n:Node) - RETURN + RETURN DISTINCT m.subject_id, + n.subject_id AS value + NEXT MATCH (n) + WHERE + n.subject_id = value + RETURN + subject_id, 'specializationOf+' AS predicate, '' AS provenance, n.value, diff --git a/internal/server/spanner/golden/query_test.go b/internal/server/spanner/golden/query_test.go index 31d2abfce..ea3e767ee 100644 --- a/internal/server/spanner/golden/query_test.go +++ b/internal/server/spanner/golden/query_test.go @@ -18,6 +18,7 @@ import ( "context" "path" "runtime" + "sort" "testing" "github.com/datacommonsorg/mixer/internal/server/spanner" @@ -101,7 +102,12 @@ func TestGetObservations(t *testing.T) { goldenFile := c.golden + ".json" runQueryGoldenTest(t, goldenFile, func(ctx context.Context) (interface{}, error) { - return client.GetObservations(ctx, c.variables, c.entities) + actual, err := client.GetObservations(ctx, c.variables, c.entities) + if err != nil { + return nil, err + } + sortObservations(actual) + return actual, nil }) } } @@ -118,7 +124,12 @@ func TestGetObservationsContainedInPlace(t *testing.T) { goldenFile := c.golden + ".json" runQueryGoldenTest(t, goldenFile, func(ctx context.Context) (interface{}, error) { - return client.GetObservationsContainedInPlace(ctx, c.variables, c.containedInPlace) + actual, err := client.GetObservationsContainedInPlace(ctx, c.variables, c.containedInPlace) + if err != nil { + return nil, err + } + sortObservations(actual) + return actual, nil }) } } @@ -222,3 +233,21 @@ func simplifyNodes(results map[string][]*spanner.Edge) map[string][]*spanner.Edg } return filtered } + +// sortObservations sorts Observations by variable, entity, facet (primary key) to ensure deterministic order in tests. +// The final Observation responses will be sorted later based on facet rank. +func sortObservations(results []*spanner.Observation) { + sort.Slice(results, func(i, j int) bool { + a, b := results[i], results[j] + + if a.VariableMeasured != b.VariableMeasured { + return a.VariableMeasured < b.VariableMeasured + } + + if a.ObservationAbout != b.ObservationAbout { + return a.ObservationAbout < b.ObservationAbout + } + + return a.FacetId < b.FacetId + }) +} diff --git a/internal/server/spanner/query_builder.go b/internal/server/spanner/query_builder.go index d0587f4b9..966a531ff 100644 --- a/internal/server/spanner/query_builder.go +++ b/internal/server/spanner/query_builder.go @@ -108,11 +108,7 @@ func GetNodeEdgesByIDQuery(ids []string, arc *v2.Arc, offset int32) *spanner.Sta switch arc.Decorator { case CHAIN: prefix = statements.chainedEdgePrefix - if len(arc.Filter) > 0 { - returnEdges = statements.returnFilterChainedEdges - } else { - returnEdges = statements.returnChainedEdges - } + returnEdges = statements.returnChainedEdges default: prefix = statements.edgePrefix if len(arc.Filter) > 0 { diff --git a/internal/server/spanner/statements.go b/internal/server/spanner/statements.go index e090ae901..6c406bb67 100644 --- a/internal/server/spanner/statements.go +++ b/internal/server/spanner/statements.go @@ -49,8 +49,6 @@ var statements = struct { returnChainedEdges string // Subquery to return Edges with filters. returnFilterEdges string - // Subquery to return Edges for arcs with chaining and filters. - returnFilterChainedEdges string // Subquery to apply page offset. applyOffset string // Subquery to apply page limit. @@ -135,8 +133,14 @@ var statements = struct { value, provenance`, returnChainedEdges: ` - RETURN + RETURN DISTINCT m.subject_id, + n.subject_id AS value + NEXT MATCH (n) + WHERE + n.subject_id = value + RETURN + subject_id, @result_predicate AS predicate, '' AS provenance, n.value, @@ -168,24 +172,6 @@ var statements = struct { predicate, value, provenance`, - returnFilterChainedEdges: ` - RETURN - m.subject_id, - n.subject_id AS value - NEXT MATCH (n) - WHERE - n.subject_id = value - RETURN - subject_id, - @result_predicate AS predicate, - '' AS provenance, - n.value, - n.bytes, - n.name, - n.types - ORDER BY - subject_id, - value`, applyOffset: ` OFFSET %d`, applyLimit: fmt.Sprintf(`