Skip to content

Commit

Permalink
Sergiy/eng 4688 node suggestions algorithm bug (#709)
Browse files Browse the repository at this point in the history
Fixes ENG-4688

Redo a fix for wundergraph/cosmo#167
  • Loading branch information
devsergiy authored Dec 27, 2023
1 parent 0183584 commit 8a95b71
Show file tree
Hide file tree
Showing 2 changed files with 335 additions and 72 deletions.
209 changes: 139 additions & 70 deletions v2/pkg/engine/plan/datasource_filter_visitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package plan

import (
"fmt"
"slices"

"github.com/pkg/errors"

Expand All @@ -16,6 +17,8 @@ type DataSourceFilter struct {
operation *ast.Document
definition *ast.Document
report *operationreport.Report

nodes NodeSuggestions
}

func NewDataSourceFilter(operation, definition *ast.Document, report *operationreport.Report) *DataSourceFilter {
Expand Down Expand Up @@ -47,23 +50,33 @@ func (f *DataSourceFilter) FilterDataSources(dataSources []DataSourceConfigurati
}

func (f *DataSourceFilter) findBestDataSourceSet(dataSources []DataSourceConfiguration, existingNodes NodeSuggestions) NodeSuggestions {
nodes := f.collectNodes(dataSources, existingNodes)
f.nodes = f.collectNodes(dataSources, existingNodes)
if f.report.HasErrors() {
return nil
}

nodes = selectUniqNodes(nodes)
nodes = selectDuplicateNodes(nodes, false)
nodes = selectDuplicateNodes(nodes, true)
f.selectUniqNodes()
// f.printNodes("uniq nodes")
f.selectDuplicateNodes(false)
// f.printNodes("duplicate nodes")
f.selectDuplicateNodes(true)
// f.printNodes("duplicate nodes after second run")

nodes = selectedNodes(nodes)
f.nodes = f.selectedNodes()

f.isResolvable(nodes)
f.isResolvable(f.nodes)
if f.report.HasErrors() {
return nil
}

return nodes
return f.nodes
}

func (f *DataSourceFilter) printNodes(msg string) {
fmt.Println(msg)
for i := range f.nodes {
fmt.Println(f.nodes[i].String())
}
}

func (f *DataSourceFilter) collectNodes(dataSources []DataSourceConfiguration, existingNodes NodeSuggestions) (nodes NodeSuggestions) {
Expand Down Expand Up @@ -124,7 +137,8 @@ func (n *NodeSuggestion) selectWithReason(reason string) {
}

func (n *NodeSuggestion) String() string {
return fmt.Sprintf(`{"ds":%d,"path":"%s","typeName":"%s","fieldName":"%s","isRootNode":%t, "select reason": %v}`, n.DataSourceHash, n.Path, n.TypeName, n.FieldName, n.IsRootNode, n.selectionReasons)
return fmt.Sprintf(`{"ds":%d,"path":"%s","typeName":"%s","fieldName":"%s","isRootNode":%t, "isSelected": %v,"select reason": %v}`,
n.DataSourceHash, n.Path, n.TypeName, n.FieldName, n.IsRootNode, n.selected, n.selectionReasons)
}

type NodeSuggestions []NodeSuggestion
Expand Down Expand Up @@ -407,117 +421,172 @@ const (
ReasonStage3SelectAvailableNode = "stage3: select first available node"
)

func selectUniqNodes(nodes NodeSuggestions) []NodeSuggestion {
for i := range nodes {
if nodes[i].selected {
// selectUniqNodes - selects nodes (e.g. fields) which are unique to a single datasource
// In addition we select:
// - parent of such node if the node is a leaf and not nested under the fragment
// - siblings nodes
func (f *DataSourceFilter) selectUniqNodes() {
for i := range f.nodes {
if f.nodes[i].selected {
continue
}

isNodeUnique := nodes.isNodeUniq(i)
isNodeUnique := f.nodes.isNodeUniq(i)
if !isNodeUnique {
continue
}

// unique nodes always have priority
nodes[i].selectWithReason(ReasonStage1Uniq)

if !nodes[i].onFragment { // on a first stage do not select parent of nodes on fragments
// if node parent of the unique node is on the same source, prioritize it too
parentIdx, ok := nodes.parentNodeOnSameSource(i)
// Only select the parent on this stage if the node is a leaf; otherwise, the parent is selected elsewhere
if ok && nodes.isLeaf(i) {
nodes[parentIdx].selectWithReason(ReasonStage1SameSourceParent)
}
f.nodes[i].selectWithReason(ReasonStage1Uniq)

if !f.nodes[i].onFragment { // on a first stage do not select parent of nodes on fragments
// if node parents of the unique node is on the same source, prioritize it too
f.selectUniqNodeParentsUpToRootNode(i)
}

// if node has leaf children on the same source, prioritize them too
children := nodes.childNodesOnSameSource(i)
children := f.nodes.childNodesOnSameSource(i)
for _, child := range children {
if nodes.isLeaf(child) && nodes.isNodeUniq(child) {
nodes[child].selectWithReason(ReasonStage1SameSourceLeafChild)
if f.nodes.isLeaf(child) && f.nodes.isNodeUniq(child) {
f.nodes[child].selectWithReason(ReasonStage1SameSourceLeafChild)
}
}

// prioritize leaf siblings of the node on the same source
siblings := nodes.siblingNodesOnSameSource(i)
siblings := f.nodes.siblingNodesOnSameSource(i)
for _, sibling := range siblings {
if nodes.isLeaf(sibling) && nodes.isNodeUniq(sibling) {
nodes[sibling].selectWithReason(ReasonStage1SameSourceLeafSibling)
if f.nodes.isLeaf(sibling) && f.nodes.isNodeUniq(sibling) {
f.nodes[sibling].selectWithReason(ReasonStage1SameSourceLeafSibling)
}
}
}
return nodes
}

func selectDuplicateNodes(nodes NodeSuggestions, secondRun bool) []NodeSuggestion {
for i := range nodes {
if nodes[i].selected {
continue
func (f *DataSourceFilter) selectUniqNodeParentsUpToRootNode(i int) {
// When we have a chain of datasource child nodes, we should select every parent until we reach the root node
// as root node is a starting point from where we could get all theese child nodes

if f.nodes[i].IsRootNode {
// no need to select parent of a root node here
// as it could be resolved by itself
return
}

current := i
for {
parentIdx, ok := f.nodes.parentNodeOnSameSource(current)
if !ok {
break
}
f.nodes[parentIdx].selectWithReason(ReasonStage1SameSourceParent)

if nodes.isSelectedOnOtherSource(i) {
current = parentIdx
if f.nodes[current].IsRootNode {
break
}
}
}

// selectDuplicateNodes - selects nodes (e.g. fields) which are not unique to a single datasource,
// e.g. could be resolved by multiple datasources
// This method checks only nodes not already selected on the other datasource
// On a first run we are doing set of checks of surrounding nodes selection for the current analyzed node and each of its duplicates:
// - check for selected parent of a current node or its duplicates
// - check for selected childs of a current node or its duplicates
// - check for selected siblings of a current node or its duplicates
//
// On a second run in additional to all the checks from the first run
// we select nodes which was not choosen by previous stages, so we just pick first available datasource
func (f *DataSourceFilter) selectDuplicateNodes(secondRun bool) {
for i := range f.nodes {
if f.nodes[i].selected {
continue
}

// if node parent on the same source as the current node
parentIdx, ok := nodes.parentNodeOnSameSource(i)
if ok && nodes[parentIdx].selected {
nodes[i].selectWithReason(ReasonStage2SameSourceNodeOfSelectedParent)
if f.nodes.isSelectedOnOtherSource(i) {
continue
}

isSelected := false
nodeDuplicates := f.nodes.duplicatesOf(i)

// check if duplicates are on the same source as parent node
nodeDuplicates := nodes.duplicatesOf(i)
for _, duplicate := range nodeDuplicates {
parentIdx, ok := nodes.parentNodeOnSameSource(duplicate)
if ok && nodes[parentIdx].selected {
nodes[duplicate].selectWithReason(ReasonStage2SameSourceDuplicateNodeOfSelectedParent)
isSelected = true
break
}
// check for selected parent of a current node or its duplicates
if f.checkNodeParent(i) {
continue
}
if isSelected {
if f.checkNodeDuplicates(nodeDuplicates, f.checkNodeParent) {
continue
}

childs := nodes.childNodesOnSameSource(i)
for _, child := range childs {
if nodes[child].selected {
nodes[i].selectWithReason(ReasonStage2SameSourceNodeOfSelectedChild)
isSelected = true
break
}
// check for selected childs of a current node or its duplicates
if f.checkNodeChilds(i) {
continue
}
if isSelected {
if f.checkNodeDuplicates(nodeDuplicates, f.checkNodeChilds) {
continue
}

siblings := nodes.siblingNodesOnSameSource(i)
for _, sibling := range siblings {
if nodes[sibling].selected {
nodes[i].selectWithReason(ReasonStage2SameSourceNodeOfSelectedSibling)
isSelected = true
break
}
// check for selected siblings of a current node or its duplicates
if f.checkNodeSiblings(i) {
continue
}
if isSelected {
if f.checkNodeDuplicates(nodeDuplicates, f.checkNodeSiblings) {
continue
}

// if after all checks node was not selected, select it
// this could happen in case choises are fully equal
if secondRun {
nodes[i].selectWithReason(ReasonStage3SelectAvailableNode)
f.nodes[i].selectWithReason(ReasonStage3SelectAvailableNode)
}
}
return nodes
}

func selectedNodes(nodes NodeSuggestions) (out NodeSuggestions) {
for i := range nodes {
if nodes[i].selected {
out = append(out, nodes[i])
func (f *DataSourceFilter) selectedNodes() (out NodeSuggestions) {
return slices.DeleteFunc(f.nodes, func(e NodeSuggestion) bool {
return !e.selected
})
}

func (f *DataSourceFilter) checkNodeDuplicates(duplicates []int, callback func(nodeIdx int) (nodeIsSelected bool)) (nodeIsSelected bool) {
for _, duplicate := range duplicates {
if callback(duplicate) {
nodeIsSelected = true
break
}
}
return
}

func (f *DataSourceFilter) checkNodeChilds(i int) (nodeIsSelected bool) {
childs := f.nodes.childNodesOnSameSource(i)
for _, child := range childs {
if f.nodes[child].selected {
f.nodes[i].selectWithReason(ReasonStage2SameSourceNodeOfSelectedChild)
nodeIsSelected = true
break
}
}
return
}

func (f *DataSourceFilter) checkNodeSiblings(i int) (nodeIsSelected bool) {
siblings := f.nodes.siblingNodesOnSameSource(i)
for _, sibling := range siblings {
if f.nodes[sibling].selected {
f.nodes[i].selectWithReason(ReasonStage2SameSourceNodeOfSelectedSibling)
nodeIsSelected = true
break
}
}
return
}

func (f *DataSourceFilter) checkNodeParent(i int) (nodeIsSelected bool) {
parentIdx, ok := f.nodes.parentNodeOnSameSource(i)
if ok && f.nodes[parentIdx].selected {
f.nodes[i].selectWithReason(ReasonStage2SameSourceNodeOfSelectedParent)
nodeIsSelected = true
}

return
}
Loading

0 comments on commit 8a95b71

Please sign in to comment.