Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefer closest conflicting causes when backtracking #12459

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/12459.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
When there are complex conflicting requirements use much faster backtracking choices
152 changes: 136 additions & 16 deletions src/pip/_internal/resolution/resolvelib/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,98 @@ def _get_with_identifier(
return default


def causes_with_conflicting_parent(
causes: Sequence["PreferenceInformation"],
) -> Sequence["PreferenceInformation"]:
"""Given causes return which causes conflict because their parent
is not satisfied by another cause, or another causes's parent is
not satisfied by them
"""
# To avoid duplication keeps track of already found conflicting cause by it's id
conflicting_causes_by_id: dict[int, "PreferenceInformation"] = {}
all_causes_by_id = {id(c): c for c in causes}

# Build a relationship between causes, cause ids, and cause parent names
causes_ids_and_parents_by_parent_name: dict[
str, list[tuple[int, Candidate]]
] = collections.defaultdict(list)
for cause_id, cause in all_causes_by_id.items():
if cause.parent:
causes_ids_and_parents_by_parent_name[cause.parent.name].append(
(cause_id, cause.parent)
)

# Check each cause and see if conflicts with the parent of another cause
for cause_id, cause in all_causes_by_id.items():
if cause_id in conflicting_causes_by_id:
continue

cause_id_and_parents = causes_ids_and_parents_by_parent_name.get(
cause.requirement.name
)
if not cause_id_and_parents:
continue

for other_cause_id, parent in cause_id_and_parents:
if not cause.requirement.is_satisfied_by(parent):
conflicting_causes_by_id[cause_id] = cause
conflicting_causes_by_id[other_cause_id] = all_causes_by_id[
other_cause_id
]

return list(conflicting_causes_by_id.values())


def causes_with_no_candidates(
causes: Sequence["PreferenceInformation"],
candidates: Mapping[str, Iterator[Candidate]],
) -> Sequence["PreferenceInformation"]:
"""Given causes return a cause pair that has no possible candidates,
if such a cause pair exists

Does not return all possible causes that have no possible candidates
because searching candidates can be expensive and throw exceptions"""
# Group causes by name first to avoid large O(n^2) comparison
causes_by_name: dict[str, list["PreferenceInformation"]] = collections.defaultdict(
list
)
for cause in causes:
causes_by_name[cause.requirement.project_name].append(cause)

# Check each cause that has the same name, and check if their
# their combined specifiers have no candidates
for cause_name, causes_list in causes_by_name.items():
if len(causes_list) < 2:
continue

while causes_list:
cause = causes_list.pop()
candidate = cause.requirement.get_candidate_lookup()[1]
if candidate is None:
continue

for other_cause in causes_list:
other_candidate = other_cause.requirement.get_candidate_lookup()[1]
if other_candidate is None:
continue

# Check if no candidate can match the combined specifier
combined_specifier = candidate.specifier & other_candidate.specifier
possible_candidates = candidates.get(cause_name)

# If no candidates have been provided then by default
# the causes have no candidates
if possible_candidates is None:
return [cause, other_cause]

if not any(
combined_specifier.contains(c.version) for c in possible_candidates
):
return [cause, other_cause]

return []


class PipProvider(_ProviderBase):
"""Pip's provider implementation for resolvelib.

Expand Down Expand Up @@ -179,16 +271,10 @@ def get_preference(
# free, so we always do it first to avoid needless work if it fails.
requires_python = identifier == REQUIRES_PYTHON_IDENTIFIER

# Prefer the causes of backtracking on the assumption that the problem
# resolving the dependency tree is related to the failures that caused
# the backtracking
backtrack_cause = self.is_backtrack_cause(identifier, backtrack_causes)

return (
not requires_python,
not direct,
not pinned,
not backtrack_cause,
inferred_depth,
requested_order,
not unfree,
Expand Down Expand Up @@ -243,13 +329,47 @@ def get_dependencies(self, candidate: Candidate) -> Sequence[Requirement]:
with_requires = not self._ignore_dependencies
return [r for r in candidate.iter_dependencies(with_requires) if r is not None]

@staticmethod
def is_backtrack_cause(
identifier: str, backtrack_causes: Sequence["PreferenceInformation"]
) -> bool:
for backtrack_cause in backtrack_causes:
if identifier == backtrack_cause.requirement.name:
return True
if backtrack_cause.parent and identifier == backtrack_cause.parent.name:
return True
return False
def filter_unsatisfied_names(
self,
unsatisfied_names: Iterable[str],
resolutions: Mapping[str, Candidate],
candidates: Mapping[str, Iterator[Candidate]],
information: Mapping[str, Iterable["PreferenceInformation"]],
backtrack_causes: Sequence["PreferenceInformation"],
) -> Iterable[str]:
"""
Prefer backtracking on unsatisfied names that are conficting
causes, or secondly are causes
"""
if not backtrack_causes:
return unsatisfied_names

# Check if causes are conflicting, conflicting parents are
# checked before no candidates because "causes_with_no_candidates"
# may download additional candidates and extract their metadata,
# which could be large wheels or sdists which fail to compile
if len(backtrack_causes) > 2:
_conflicting_causes = causes_with_conflicting_parent(backtrack_causes)
if _conflicting_causes:
backtrack_causes = _conflicting_causes
else:
_conflicting_causes = causes_with_no_candidates(
backtrack_causes, candidates
)
if _conflicting_causes:
backtrack_causes = _conflicting_causes
del _conflicting_causes

# Extract the causes and parents names
causes_names = set()
for cause in backtrack_causes:
causes_names.add(cause.requirement.name)
if cause.parent:
causes_names.add(cause.parent.name)

unsatisfied_causes_names = set(unsatisfied_names) & causes_names

if unsatisfied_causes_names:
return unsatisfied_causes_names

return unsatisfied_names
39 changes: 33 additions & 6 deletions src/pip/_vendor/resolvelib/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,12 @@ def _patch_criteria():

# No way to backtrack anymore.
return False

def _extract_causes(self, criteron):
"""Extract causes from list of criterion and deduplicate"""
return list(
{id(i): i for c in criteron for i in c.information}.values()
)

def resolve(self, requirements, max_rounds):
if self._states:
Expand Down Expand Up @@ -418,16 +424,37 @@ def resolve(self, requirements, max_rounds):
return self.state

# keep track of satisfied names to calculate diff after pinning
satisfied_names = set(self.state.criteria.keys()) - set(
unsatisfied_names
unsatisfied_names_set = set(unsatisfied_names)
satisfied_names = (
set(self.state.criteria.keys()) - unsatisfied_names_set
)

filtered_unstatisfied_names = list(
self._p.filter_unsatisfied_names(
unsatisfied_names_set,
resolutions=self.state.mapping,
candidates=IteratorMapping(
self.state.criteria,
operator.attrgetter("candidates"),
),
information=IteratorMapping(
self.state.criteria,
operator.attrgetter("information"),
),
backtrack_causes=self.state.backtrack_causes,
)
)

# Choose the most preferred unpinned criterion to try.
name = min(unsatisfied_names, key=self._get_preference)
failure_causes = self._attempt_to_pin_criterion(name)
if len(filtered_unstatisfied_names) > 1:
name = min(filtered_unstatisfied_names, key=self._get_preference)
else:
name = filtered_unstatisfied_names[0]

failure_criterion = self._attempt_to_pin_criterion(name)

if failure_causes:
causes = [i for c in failure_causes for i in c.information]
if failure_criterion:
causes = self._extract_causes(failure_criterion)
# Backjump if pinning fails. The backjump process puts us in
# an unpinned state, so we can work on it in the next round.
self._r.resolving_conflicts(causes=causes)
Expand Down
Loading