diff --git a/news/12459.feature.rst b/news/12459.feature.rst new file mode 100644 index 00000000000..c82e9a35350 --- /dev/null +++ b/news/12459.feature.rst @@ -0,0 +1 @@ +When there are complex conflicting requirements use much faster backtracking choices diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index 315fb9c8902..ee6d19ab413 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -75,6 +75,98 @@ def _get_with_identifier( return default +def causes_with_conflicting_parent( + causes: Sequence["PreferenceInformation"], +) -> Sequence["PreferenceInformation"]: + """Given causes return which causes conflict because their parent + is not satisfied by another cause, or another causes's parent is + not satisfied by them + """ + # To avoid duplication keeps track of already found conflicting cause by it's id + conflicting_causes_by_id: dict[int, "PreferenceInformation"] = {} + all_causes_by_id = {id(c): c for c in causes} + + # Build a relationship between causes, cause ids, and cause parent names + causes_ids_and_parents_by_parent_name: dict[ + str, list[tuple[int, Candidate]] + ] = collections.defaultdict(list) + for cause_id, cause in all_causes_by_id.items(): + if cause.parent: + causes_ids_and_parents_by_parent_name[cause.parent.name].append( + (cause_id, cause.parent) + ) + + # Check each cause and see if conflicts with the parent of another cause + for cause_id, cause in all_causes_by_id.items(): + if cause_id in conflicting_causes_by_id: + continue + + cause_id_and_parents = causes_ids_and_parents_by_parent_name.get( + cause.requirement.name + ) + if not cause_id_and_parents: + continue + + for other_cause_id, parent in cause_id_and_parents: + if not cause.requirement.is_satisfied_by(parent): + conflicting_causes_by_id[cause_id] = cause + conflicting_causes_by_id[other_cause_id] = all_causes_by_id[ + other_cause_id + ] + + return list(conflicting_causes_by_id.values()) + + +def causes_with_no_candidates( + causes: Sequence["PreferenceInformation"], + candidates: Mapping[str, Iterator[Candidate]], +) -> Sequence["PreferenceInformation"]: + """Given causes return a cause pair that has no possible candidates, + if such a cause pair exists + + Does not return all possible causes that have no possible candidates + because searching candidates can be expensive and throw exceptions""" + # Group causes by name first to avoid large O(n^2) comparison + causes_by_name: dict[str, list["PreferenceInformation"]] = collections.defaultdict( + list + ) + for cause in causes: + causes_by_name[cause.requirement.project_name].append(cause) + + # Check each cause that has the same name, and check if their + # their combined specifiers have no candidates + for cause_name, causes_list in causes_by_name.items(): + if len(causes_list) < 2: + continue + + while causes_list: + cause = causes_list.pop() + candidate = cause.requirement.get_candidate_lookup()[1] + if candidate is None: + continue + + for other_cause in causes_list: + other_candidate = other_cause.requirement.get_candidate_lookup()[1] + if other_candidate is None: + continue + + # Check if no candidate can match the combined specifier + combined_specifier = candidate.specifier & other_candidate.specifier + possible_candidates = candidates.get(cause_name) + + # If no candidates have been provided then by default + # the causes have no candidates + if possible_candidates is None: + return [cause, other_cause] + + if not any( + combined_specifier.contains(c.version) for c in possible_candidates + ): + return [cause, other_cause] + + return [] + + class PipProvider(_ProviderBase): """Pip's provider implementation for resolvelib. @@ -179,16 +271,10 @@ def get_preference( # free, so we always do it first to avoid needless work if it fails. requires_python = identifier == REQUIRES_PYTHON_IDENTIFIER - # Prefer the causes of backtracking on the assumption that the problem - # resolving the dependency tree is related to the failures that caused - # the backtracking - backtrack_cause = self.is_backtrack_cause(identifier, backtrack_causes) - return ( not requires_python, not direct, not pinned, - not backtrack_cause, inferred_depth, requested_order, not unfree, @@ -243,13 +329,47 @@ def get_dependencies(self, candidate: Candidate) -> Sequence[Requirement]: with_requires = not self._ignore_dependencies return [r for r in candidate.iter_dependencies(with_requires) if r is not None] - @staticmethod - def is_backtrack_cause( - identifier: str, backtrack_causes: Sequence["PreferenceInformation"] - ) -> bool: - for backtrack_cause in backtrack_causes: - if identifier == backtrack_cause.requirement.name: - return True - if backtrack_cause.parent and identifier == backtrack_cause.parent.name: - return True - return False + def filter_unsatisfied_names( + self, + unsatisfied_names: Iterable[str], + resolutions: Mapping[str, Candidate], + candidates: Mapping[str, Iterator[Candidate]], + information: Mapping[str, Iterable["PreferenceInformation"]], + backtrack_causes: Sequence["PreferenceInformation"], + ) -> Iterable[str]: + """ + Prefer backtracking on unsatisfied names that are conficting + causes, or secondly are causes + """ + if not backtrack_causes: + return unsatisfied_names + + # Check if causes are conflicting, conflicting parents are + # checked before no candidates because "causes_with_no_candidates" + # may download additional candidates and extract their metadata, + # which could be large wheels or sdists which fail to compile + if len(backtrack_causes) > 2: + _conflicting_causes = causes_with_conflicting_parent(backtrack_causes) + if _conflicting_causes: + backtrack_causes = _conflicting_causes + else: + _conflicting_causes = causes_with_no_candidates( + backtrack_causes, candidates + ) + if _conflicting_causes: + backtrack_causes = _conflicting_causes + del _conflicting_causes + + # Extract the causes and parents names + causes_names = set() + for cause in backtrack_causes: + causes_names.add(cause.requirement.name) + if cause.parent: + causes_names.add(cause.parent.name) + + unsatisfied_causes_names = set(unsatisfied_names) & causes_names + + if unsatisfied_causes_names: + return unsatisfied_causes_names + + return unsatisfied_names diff --git a/src/pip/_vendor/resolvelib/resolvers.py b/src/pip/_vendor/resolvelib/resolvers.py index 2c3d0e306f9..5f3ae5bb6f1 100644 --- a/src/pip/_vendor/resolvelib/resolvers.py +++ b/src/pip/_vendor/resolvelib/resolvers.py @@ -377,6 +377,12 @@ def _patch_criteria(): # No way to backtrack anymore. return False + + def _extract_causes(self, criteron): + """Extract causes from list of criterion and deduplicate""" + return list( + {id(i): i for c in criteron for i in c.information}.values() + ) def resolve(self, requirements, max_rounds): if self._states: @@ -418,16 +424,37 @@ def resolve(self, requirements, max_rounds): return self.state # keep track of satisfied names to calculate diff after pinning - satisfied_names = set(self.state.criteria.keys()) - set( - unsatisfied_names + unsatisfied_names_set = set(unsatisfied_names) + satisfied_names = ( + set(self.state.criteria.keys()) - unsatisfied_names_set ) + filtered_unstatisfied_names = list( + self._p.filter_unsatisfied_names( + unsatisfied_names_set, + resolutions=self.state.mapping, + candidates=IteratorMapping( + self.state.criteria, + operator.attrgetter("candidates"), + ), + information=IteratorMapping( + self.state.criteria, + operator.attrgetter("information"), + ), + backtrack_causes=self.state.backtrack_causes, + ) + ) + # Choose the most preferred unpinned criterion to try. - name = min(unsatisfied_names, key=self._get_preference) - failure_causes = self._attempt_to_pin_criterion(name) + if len(filtered_unstatisfied_names) > 1: + name = min(filtered_unstatisfied_names, key=self._get_preference) + else: + name = filtered_unstatisfied_names[0] + + failure_criterion = self._attempt_to_pin_criterion(name) - if failure_causes: - causes = [i for c in failure_causes for i in c.information] + if failure_criterion: + causes = self._extract_causes(failure_criterion) # Backjump if pinning fails. The backjump process puts us in # an unpinned state, so we can work on it in the next round. self._r.resolving_conflicts(causes=causes)