Skip to content

Commit c1072d5

Browse files
davitbzhkennethmhc
authored andcommitted
[FSTORE-1058] if feature view query doesn't include event time feature add for get_batch_data(start_time, end_time) (#1610)
Co-authored-by: kennethmhc <kennethmhc@users.noreply.github.com>
1 parent d04cfb3 commit c1072d5

File tree

4 files changed

+191
-31
lines changed

4 files changed

+191
-31
lines changed

hopsworks-IT/src/test/ruby/spec/featureview_query_spec.rb

Lines changed: 100 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -131,16 +131,18 @@
131131
end
132132

133133

134-
it "should be able to create sql string with different type of event time filter" do
134+
it "should be able to create sql string with different type of event time filter without them included in the selected feature list" do
135135
featurestore_id = get_featurestore_id(@project.id)
136136
project_name = @project.projectname.downcase
137137
featurestore_name = get_featurestore_name(@project.id)
138138
featuregroup_suffix = short_random_id
139139
features_a = [
140-
{ type: "INT", name: "id", primary: true },
141-
{ type: "DATE", name: "ts_date" },
142-
{ type: "TIMESTAMP", name: "ts" },
140+
{type: "INT", name: "id", primary: true },
141+
{type: "INT", name: "a_testfeature1"},
142+
{type: "INT", name: "a_testfeature2"},
143+
{type: "TIMESTAMP", name: "ts" },
143144
]
145+
144146
fg = create_cached_featuregroup_checked_return_fg(@project.id, featurestore_id,
145147
"test_fg_a#{featuregroup_suffix}",
146148
features: features_a,
@@ -150,16 +152,16 @@
150152
id: fg[:id],
151153
type: fg[:type]
152154
},
153-
leftFeatures: [{ name: 'ts_date' }, { name: 'ts' }],
155+
leftFeatures: [{ name: 'a_testfeature1' }, { name: 'a_testfeature2' }],
154156
filter: {
155157
type: "AND",
156158
leftFilter: {
157159
feature: {
158-
name: "ts_date",
160+
name: "a_testfeature1",
159161
featureGroupId: fg[:id]
160162
},
161163
condition: "GREATER_THAN",
162-
value: "2022-01-01"
164+
value: 0
163165
},
164166
rightFilter: {
165167
feature: {
@@ -177,15 +179,102 @@
177179
parsed_query_result = JSON.parse(query_result)
178180

179181
expect(parsed_query_result['query']).to eql(
180-
"SELECT `fg0`.`ts_date` `ts_date`, `fg0`.`ts` `ts`\n" +
182+
"SELECT `fg0`.`a_testfeature1` `a_testfeature1`, `fg0`.`a_testfeature2` `a_testfeature2`\n" +
181183
"FROM `#{featurestore_name}`.`test_fg_a#{featuregroup_suffix}_1` `fg0`\n" +
182-
"WHERE `fg0`.`ts_date` > DATE '#{query[:filter][:leftFilter][:value]}' AND `fg0`.`ts` > TIMESTAMP '#{query[:filter][:rightFilter][:value]}.000'"
184+
"WHERE `fg0`.`a_testfeature1` > #{query[:filter][:leftFilter][:value]} AND `fg0`.`ts` > TIMESTAMP '#{query[:filter][:rightFilter][:value]}.000'"
183185
)
184186

185187
expect(parsed_query_result['queryOnline']).to eql(
186-
"SELECT `fg0`.`ts_date` `ts_date`, `fg0`.`ts` `ts`\n" +
188+
"SELECT `fg0`.`a_testfeature1` `a_testfeature1`, `fg0`.`a_testfeature2` `a_testfeature2`\n" +
187189
"FROM `#{project_name.downcase}`.`test_fg_a#{featuregroup_suffix}_1` `fg0`\n" +
188-
"WHERE `fg0`.`ts_date` > DATE '#{query[:filter][:leftFilter][:value]}' AND `fg0`.`ts` > TIMESTAMP '#{query[:filter][:rightFilter][:value]}.000'"
190+
"WHERE `fg0`.`a_testfeature1` > #{query[:filter][:leftFilter][:value]} AND `fg0`.`ts` > TIMESTAMP '#{query[:filter][:rightFilter][:value]}.000'"
191+
)
192+
end
193+
194+
195+
it "should be able to create sql string in joins with different type of event time filter without them included in the selected feature list" do
196+
featurestore_id = get_featurestore_id(@project.id)
197+
project_name = @project.projectname.downcase
198+
featurestore_name = get_featurestore_name(@project.id)
199+
featuregroup_suffix = short_random_id
200+
features_a = [
201+
{type: "INT", name: "id", primary: true },
202+
{type: "INT", name: "a_testfeature1"},
203+
{type: "INT", name: "a_testfeature2"},
204+
{type: "TIMESTAMP", name: "ts" },
205+
]
206+
207+
features_b = [
208+
{type: "INT", name: "id", primary: true },
209+
{type: "INT", name: "b_testfeature1"},
210+
{type: "INT", name: "b_testfeature2"},
211+
{type: "TIMESTAMP", name: "ts" },
212+
]
213+
214+
fg_a = create_cached_featuregroup_checked_return_fg(@project.id, featurestore_id,
215+
"test_fg_a#{featuregroup_suffix}",
216+
features: features_a,
217+
event_time: "ts")
218+
219+
fg_b = create_cached_featuregroup_checked_return_fg(@project.id, featurestore_id,
220+
"test_fg_b#{featuregroup_suffix}",
221+
features: features_b,
222+
event_time: "ts")
223+
224+
query = {
225+
leftFeatureGroup: {
226+
id: fg_a[:id],
227+
type: fg_a[:type]
228+
},
229+
leftFeatures: [{name: 'a_testfeature1'}, {name: 'a_testfeature2'}],
230+
joins: [{
231+
query: {
232+
leftFeatureGroup: {
233+
id: fg_b[:id],
234+
type: fg_b[:type]
235+
},
236+
leftFeatures: [{name: 'b_testfeature1'}, {name: 'b_testfeature2'}],
237+
}
238+
}
239+
],
240+
filter: {
241+
type: "AND",
242+
leftFilter: {
243+
feature: {
244+
name: "a_testfeature1",
245+
featureGroupId: fg_a[:id]
246+
},
247+
condition: "GREATER_THAN",
248+
value: 0
249+
},
250+
rightFilter: {
251+
feature: {
252+
name: "ts",
253+
featureGroupId: fg_a[:id]
254+
},
255+
condition: "GREATER_THAN",
256+
value: "2022-02-01 00:00:00"
257+
}
258+
}
259+
260+
}
261+
262+
query_result = put "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/query", query.to_json
263+
expect_status_details(200)
264+
parsed_query_result = JSON.parse(query_result)
265+
266+
expect(parsed_query_result['query']).to eql(
267+
"SELECT `fg1`.`a_testfeature1` `a_testfeature1`, `fg1`.`a_testfeature2` `a_testfeature2`, `fg0`.`b_testfeature1` `b_testfeature1`, `fg0`.`b_testfeature2` `b_testfeature2`\n" +
268+
"FROM `#{featurestore_name}`.`test_fg_a#{featuregroup_suffix}_1` `fg1`\n" +
269+
"INNER JOIN `#{featurestore_name}`.`test_fg_b#{featuregroup_suffix}_1` `fg0` ON `fg1`.`id` = `fg0`.`id`\n" +
270+
"WHERE `fg1`.`a_testfeature1` > #{query[:filter][:leftFilter][:value]} AND `fg1`.`ts` > TIMESTAMP '#{query[:filter][:rightFilter][:value]}.000'"
271+
)
272+
273+
expect(parsed_query_result['queryOnline']).to eql(
274+
"SELECT `fg1`.`a_testfeature1` `a_testfeature1`, `fg1`.`a_testfeature2` `a_testfeature2`, `fg0`.`b_testfeature1` `b_testfeature1`, `fg0`.`b_testfeature2` `b_testfeature2`\n" +
275+
"FROM `#{project_name.downcase}`.`test_fg_a#{featuregroup_suffix}_1` `fg1`\n" +
276+
"INNER JOIN `#{project_name.downcase}`.`test_fg_b#{featuregroup_suffix}_1` `fg0` ON `fg1`.`id` = `fg0`.`id`\n" +
277+
"WHERE `fg1`.`a_testfeature1` > #{query[:filter][:leftFilter][:value]} AND `fg1`.`ts` > TIMESTAMP '#{query[:filter][:rightFilter][:value]}.000'"
189278
)
190279
end
191280
end

hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/ConstructorController.java

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -334,26 +334,30 @@ public List<Feature> collectFeatures(Query query) {
334334
public List<Feature> collectFeaturesFromFilter(FilterLogic filter) {
335335
return this.collectFeaturesFromFilter(filter, null);
336336
}
337-
337+
338338
public List<Feature> collectFeaturesFromFilter(FilterLogic filter, Query query) {
339339
List<Feature> features = new ArrayList<>();
340-
if (filter.getLeftFilter() != null) {
340+
collectFeatureFromFilter(filter, features, query);
341+
return features;
342+
}
343+
344+
private void collectFeatureFromFilter(FilterLogic filter, List<Feature> features, Query query) {
345+
if(filter.getLeftFilter() != null) {
341346
features.addAll(filter.getLeftFilter().getFeatures().stream().filter(f ->
342-
(query == null || f.getFeatureGroup().equals( query.getFeaturegroup()))).collect(Collectors.toList()));
347+
(query == null || f.getFeatureGroup().equals( query.getFeaturegroup()))).collect(Collectors.toList()));
343348
}
344-
if (filter.getRightFilter() != null) {
349+
if(filter.getRightFilter() != null) {
345350
features.addAll(filter.getRightFilter().getFeatures().stream().filter(f ->
346-
(query == null || f.getFeatureGroup().equals( query.getFeaturegroup()))).collect(Collectors.toList()));
351+
(query == null || f.getFeatureGroup().equals( query.getFeaturegroup()))).collect(Collectors.toList()));
347352
}
348-
if (filter.getLeftLogic() != null) {
349-
features.addAll(this.collectFeaturesFromFilter(filter.getLeftLogic(), query));
353+
if(filter.getLeftLogic() !=null) {
354+
collectFeatureFromFilter(filter.getLeftLogic(), features, query);
350355
}
351-
if (filter.getRightLogic() != null) {
352-
features.addAll(this.collectFeaturesFromFilter(filter.getRightLogic(), query));
356+
if(filter.getRightLogic() !=null) {
357+
collectFeatureFromFilter(filter.getRightLogic(), features, query);
353358
}
354-
return features;
355359
}
356-
360+
357361
private SqlNode generateCachedTableNode(Query query, boolean online) {
358362
List<String> tableIdentifierStr = new ArrayList<>();
359363
if (online) {

hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/pit/PitJoinController.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,18 +133,25 @@ public List<SqlCall> generateSubQueries(Query baseQuery, Query query, boolean is
133133

134134
// Add filters if needed
135135
List<Feature> originalFeatures = new ArrayList<>(join.getRightQuery().getFeatures());
136+
136137
if (query.getFilter() != null) {
137138
// if all filters are on left-sided features, add them to the base/inner query
138-
if (baseQuery.getFilter() == null && !constructorController.collectFeaturesFromFilter(query.getFilter()
139-
).stream().anyMatch(f -> !query.getFeatures().contains(f))) {
139+
List<Feature> leftQueryFilterFeatures = constructorController.collectFeaturesFromFilter(query.getFilter());
140+
if (baseQuery.getFilter() == null && !leftQueryFilterFeatures
141+
.stream().anyMatch(f -> !query.getAvailableFeatures().contains(f))) {
140142
baseQuery.setFilter(query.getFilter());
141-
}
142-
// else, filters will be added to the outer query and relevant features need to be selected in the subquery
143-
else {
144-
List<Feature> filterFeatures = constructorController.collectFeaturesFromFilter(query.getFilter(),
145-
join.getRightQuery()).stream().filter(f -> !join.getRightQuery().getFeatures().contains(f))
146-
.collect(Collectors.toList());
147-
join.getRightQuery().getFeatures().addAll(filterFeatures);
143+
} else {
144+
// else, filters will be added to the outer query and relevant features need to be selected in the subquery
145+
List<Feature> filterOuterQueryFeatures = constructorController.collectFeaturesFromFilter(query.getFilter(),
146+
join.getRightQuery()).stream()
147+
.filter(f -> !join.getRightQuery().getAvailableFeatures().contains(f))
148+
.collect(Collectors.toList());
149+
List<Feature> filterLFeatures = constructorController.collectFeaturesFromFilter(query.getFilter(),
150+
join.getLeftQuery()).stream()
151+
.filter(f -> !join.getLeftQuery().getAvailableFeatures().contains(f))
152+
.collect(Collectors.toList());
153+
filterOuterQueryFeatures.addAll(filterLFeatures);
154+
join.getRightQuery().getFeatures().addAll(filterOuterQueryFeatures);
148155
}
149156
}
150157

hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/featuregroup/pit/TestPitJoinController.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,7 @@ public void testGenerateSqlWithRightFilterInner() {
739739
"SELECT `right_fg0`.`pk1` `pk1`, `right_fg0`.`pk2` `pk2`, `right_fg0`.`ts` `ts`, `right_fg0`.`label` `label`, `right_fg0`.`pk1` `pk1`, `right_fg0`.`pk2` `pk2`, `right_fg0`.`ts` `ts`, `right_fg0`.`ft1` `ft1`, `right_fg1`.`R_pk1` `R_pk1`, `right_fg1`.`R_ts` `R_ts`, `right_fg1`.`R_ft1` `R_ft1`\n" +
740740
"FROM right_fg0\n" +
741741
"INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
742+
742743
Assert.assertEquals(expected, result);
743744
}
744745

@@ -804,6 +805,65 @@ public void testGenerateSqlWithLeftFilter() {
804805
"INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
805806
Assert.assertEquals(expected, result);
806807
}
808+
809+
@Test
810+
public void testGenerateSqlWithLeftFilterOmitted() {
811+
List<Feature> leftFeatures = new ArrayList<>();
812+
Feature filterFeature = new Feature("label", "fg0", fgLeft, "int", null);
813+
leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true));
814+
leftFeatures.add(new Feature("pk2", "fg0", fgLeft));
815+
leftFeatures.add(filterFeature);
816+
817+
List<Feature> rightFeatures = new ArrayList<>();
818+
rightFeatures.add(new Feature("pk1", "fg1", fgRight));
819+
rightFeatures.add(new Feature("pk2", "fg1", fgRight));
820+
rightFeatures.add(new Feature("ft1", "fg1", fgRight));
821+
822+
List<Feature> rightFeatures1 = new ArrayList<>();
823+
rightFeatures1.add(new Feature("pk1", "fg2", fgRight1));
824+
rightFeatures1.add(new Feature("ft1", "fg2", fgRight1));
825+
826+
List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
827+
List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
828+
829+
// join on different pks
830+
List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
831+
List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
832+
833+
List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
834+
List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
835+
836+
FilterLogic filter = new FilterLogic(new Filter(Arrays.asList(filterFeature), SqlCondition.EQUALS, "1"));
837+
838+
Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, filter);
839+
Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
840+
Query right1 = new Query("fs", "project", fgRight1, "fg2", rightFeatures1, rightFeatures1, false, null);
841+
842+
Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
843+
Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, "R_", joinOperator1);
844+
845+
query.setJoins(Arrays.asList(join, join1));
846+
847+
String result =
848+
pitJoinController.generateSQL(query, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
849+
String expected = "WITH right_fg0 AS (SELECT *\n" +
850+
"FROM (SELECT `fg0`.`pk1` `pk1`, `fg0`.`pk2` `pk2`, `fg0`.`label` `label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1` `pk1`, `fg1`.`pk2` `pk2`, `fg1`.`ft1` `ft1`, RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" +
851+
"FROM `fs`.`fg0_1` `fg0`\n" +
852+
"INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`\n" +
853+
"WHERE `fg0`.`label` = 1) NA\n" +
854+
"WHERE `pit_rank_hopsworks` = 1), right_fg1 AS (SELECT *\n" +
855+
"FROM (SELECT `fg0`.`pk1` `pk1`, `fg0`.`pk2` `pk2`, `fg0`.`label` `label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg2`.`pk1` `R_pk1`, `fg2`.`ft1` `R_ft1`, RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" +
856+
"FROM `fs`.`fg0_1` `fg0`\n" +
857+
"INNER JOIN `fs`.`fg2_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`\n" +
858+
"WHERE `fg0`.`label` = 1) NA\n" +
859+
"WHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`pk1` `pk1`, `right_fg0`.`pk2` `pk2`, `right_fg0`.`label` `label`, `right_fg0`.`pk1` `pk1`, `right_fg0`.`pk2` `pk2`, `right_fg0`.`ft1` `ft1`, `right_fg1`.`R_pk1` `R_pk1`, `right_fg1`.`R_ft1` `R_ft1`\n" +
860+
"FROM right_fg0\n" +
861+
"INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = " +
862+
"`right_fg1`.`join_evt_ts`)";
863+
864+
System.out.println(result);
865+
Assert.assertEquals(expected, result);
866+
}
807867

808868
@Test
809869
public void testGenerateSqlWithDefault() {

0 commit comments

Comments
 (0)