From 7bedd90388b60130422c0ab3d11500f212fa8da0 Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Sat, 11 Jan 2025 17:03:16 +0100 Subject: [PATCH] Implement external import path --- src/imports_info/mod.rs | 5 +- src/imports_info/queries/external_imports.rs | 231 ++++++++++++++++++- src/imports_info/queries/internal_imports.rs | 10 +- src/pypath.rs | 13 ++ 4 files changed, 252 insertions(+), 7 deletions(-) diff --git a/src/imports_info/mod.rs b/src/imports_info/mod.rs index 2623b90a..f045fcaa 100644 --- a/src/imports_info/mod.rs +++ b/src/imports_info/mod.rs @@ -3,7 +3,10 @@ mod queries; use crate::errors::Error; -pub use crate::imports_info::queries::external_imports::ExternalImportsQueries; +pub use crate::imports_info::queries::external_imports::{ + ExternalImportsPathQuery, ExternalImportsPathQueryBuilder, + ExternalImportsPathQueryBuilderError, ExternalImportsQueries, +}; pub use crate::imports_info::queries::internal_imports::{ InternalImportsPathQuery, InternalImportsPathQueryBuilder, InternalImportsPathQueryBuilderError, InternalImportsQueries, diff --git a/src/imports_info/queries/external_imports.rs b/src/imports_info/queries/external_imports.rs index 3ded34b0..a8d49e61 100644 --- a/src/imports_info/queries/external_imports.rs +++ b/src/imports_info/queries/external_imports.rs @@ -1,16 +1,50 @@ -use std::collections::{HashMap, HashSet}; - use crate::errors::Error; use crate::imports_info::{ImportMetadata, ImportsInfo}; use crate::package_info::PackageItemToken; use crate::pypath::Pypath; +use crate::testpackage; use anyhow::Result; +use derive_builder::Builder; +use derive_more::{IsVariant, Unwrap}; +use derive_new::new; +use getset::Getters; +use maplit::hashset; +use pathfinding::prelude::bfs; +use std::collections::{HashMap, HashSet}; /// An object that allows querying external imports. pub struct ExternalImportsQueries<'a> { pub(crate) imports_info: &'a ImportsInfo, } +/// An object representing an external imports path query. +#[derive(Debug, Clone, new, Getters, Builder)] +#[builder(setter(into))] +pub struct ExternalImportsPathQuery { + /// Package items from which paths may start. + #[new(into)] + #[getset(get = "pub")] + from: HashSet, + + /// External items where paths may end. + #[new(into)] + #[getset(get = "pub")] + to: HashSet, + + /// Paths that would go via these package items should be excluded. + #[new(into)] + #[getset(get = "pub")] + #[builder(default)] + excluding_paths_via: HashSet, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, IsVariant, Unwrap)] +enum PathfindingNode<'a> { + Initial, + PackageItem(&'a PackageItemToken), + ExternalItem(&'a Pypath), +} + impl<'a> ExternalImportsQueries<'a> { /// Returns a map of all the direct imports. /// @@ -262,6 +296,124 @@ impl<'a> ExternalImportsQueries<'a> { } } + /// Returns the shortest import path or `None` if no path can be found. + /// + /// ``` + /// # use std::collections::HashSet; + /// # use anyhow::Result; + /// # use maplit::{hashmap, hashset}; + /// # use pyimports::{testpackage, testutils::TestPackage}; + /// use pyimports::package_info::PackageInfo; + /// use pyimports::imports_info::{ImportsInfo,ImportMetadata,ExternalImportsPathQueryBuilder}; + /// + /// # fn main() -> Result<()> { + /// let testpackage = testpackage! { + /// "__init__.py" => "", + /// "a.py" => "from testpackage import b", + /// "b.py" => "from testpackage import c", + /// "c.py" => "from django.db import models" + /// }; + /// + /// let package_info = PackageInfo::build(testpackage.path())?; + /// let imports_info = ImportsInfo::build(package_info)?; + /// + /// let a = imports_info.package_info() + /// .get_item_by_pypath(&"testpackage.a".parse()?).unwrap() + /// .token(); + /// let b = imports_info.package_info() + /// .get_item_by_pypath(&"testpackage.b".parse()?).unwrap() + /// .token(); + /// let c = imports_info.package_info() + /// .get_item_by_pypath(&"testpackage.c".parse()?).unwrap() + /// .token(); + /// + /// assert_eq!( + /// imports_info.external_imports().find_path( + /// &ExternalImportsPathQueryBuilder::default() + /// .from(a) + /// .to(&"django.db.models".parse()?) + /// .build()? + /// )?, + /// Some((vec![a, b, c], "django.db.models".parse()?)) + /// ); + /// # Ok(()) + /// # } + /// ``` + pub fn find_path( + &'a self, + query: &ExternalImportsPathQuery, + ) -> Result, Pypath)>> { + for item in query.from.iter() { + self.imports_info.package_info.get_item(*item)?; + } + for item in query.excluding_paths_via.iter() { + self.imports_info.package_info.get_item(*item)?; + } + + let empty_package_items = hashset! {}; + let empty_external_items = hashset! {}; + + let path = bfs( + &PathfindingNode::Initial, + // Successors + |item| { + let internal_items = match item { + PathfindingNode::Initial => &query.from, + PathfindingNode::PackageItem(item) => { + self.imports_info.internal_imports.get(item).unwrap() + } + PathfindingNode::ExternalItem(_) => &empty_package_items, + }; + + let external_items = match item { + PathfindingNode::Initial => &empty_external_items, + PathfindingNode::PackageItem(item) => { + self.imports_info.external_imports.get(item).unwrap() + } + PathfindingNode::ExternalItem(_) => &empty_external_items, + }; + + let internal_items = internal_items + .difference(&query.excluding_paths_via) + .map(PathfindingNode::PackageItem); + + let external_items = external_items.iter().map(PathfindingNode::ExternalItem); + + internal_items.chain(external_items) + }, + // Success + |item| match item { + PathfindingNode::Initial => false, + PathfindingNode::PackageItem(_) => false, + PathfindingNode::ExternalItem(pypath) => query.to.contains(pypath), + }, + ); + + if path.is_none() { + return Ok(None); + } + + let mut path = path.unwrap(); + let external_item = path.pop().unwrap().unwrap_external_item().clone(); + + let path = path + .into_iter() + .skip(1) + .map(|item| match item { + PathfindingNode::PackageItem(item) => item, + _ => panic!(), + }) + .cloned() + .collect::>(); + + Ok(Some((path, external_item))) + } + + /// Returns true if an import path exists. + pub fn path_exists(&'a self, query: &ExternalImportsPathQuery) -> Result { + Ok(self.find_path(query)?.is_some()) + } + #[allow(dead_code)] fn get_equal_to_or_descendant_imports(&self, pypath: &Pypath) -> HashSet { self.imports_info @@ -437,4 +589,79 @@ mod tests { Ok(()) } + + #[test] + fn test_find_path() -> Result<()> { + let testpackage = testpackage! { + "__init__.py" => "", + "a.py" => "from testpackage import b", + "b.py" => "from testpackage import c", + "c.py" => "from django.db import models" + }; + + let package_info = PackageInfo::build(testpackage.path())?; + let imports_info = ImportsInfo::build(package_info)?; + + let a = imports_info._item("testpackage.a"); + let b = imports_info._item("testpackage.b"); + let c = imports_info._item("testpackage.c"); + + assert_eq!( + imports_info.external_imports().find_path( + &ExternalImportsPathQueryBuilder::default() + .from(a) + .to(&"django.db.models".parse()?) + .build()? + )?, + Some((vec![a, b, c], "django.db.models".parse()?)) + ); + + Ok(()) + } + + #[test] + fn test_find_path_excluding_via() -> Result<()> { + let testpackage = testpackage! { + "__init__.py" => "", + "a.py" => "from testpackage import b, e", + "b.py" => "from testpackage import c", + "c.py" => "from django.db import models", + "d.py" => "from testpackage import c", + "e.py" => "from testpackage import d" + }; + + let package_info = PackageInfo::build(testpackage.path())?; + let imports_info = ImportsInfo::build(package_info)?; + + let a = imports_info._item("testpackage.a"); + let b = imports_info._item("testpackage.b"); + let c = imports_info._item("testpackage.c"); + let d = imports_info._item("testpackage.d"); + let e = imports_info._item("testpackage.e"); + + // Sanity check + assert_eq!( + imports_info.external_imports().find_path( + &ExternalImportsPathQueryBuilder::default() + .from(a) + .to(&"django.db.models".parse()?) + .build()? + )?, + Some((vec![a, b, c], "django.db.models".parse()?)) + ); + + // Excluding b we need to go via the longer path + assert_eq!( + imports_info.external_imports().find_path( + &ExternalImportsPathQueryBuilder::default() + .from(a) + .to(&"django.db.models".parse()?) + .excluding_paths_via(b) + .build()? + )?, + Some((vec![a, e, d, c], "django.db.models".parse()?)) + ); + + Ok(()) + } } diff --git a/src/imports_info/queries/internal_imports.rs b/src/imports_info/queries/internal_imports.rs index ad96a07b..89fc56e6 100644 --- a/src/imports_info/queries/internal_imports.rs +++ b/src/imports_info/queries/internal_imports.rs @@ -5,6 +5,7 @@ use crate::imports_info::{ImportMetadata, ImportsInfo}; use crate::package_info::PackageItemToken; use anyhow::Result; use derive_builder::Builder; +use derive_more::{IsVariant, Unwrap}; use derive_new::new; use getset::Getters; use pathfinding::prelude::{bfs, bfs_reach}; @@ -97,7 +98,7 @@ pub struct InternalImportsPathQuery { excluding_paths_via: HashSet, } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, IsVariant, Unwrap)] enum PathfindingNode<'a> { Initial, PackageItem(&'a PackageItemToken), @@ -484,8 +485,7 @@ impl<'a> InternalImportsQueries<'a> { } } - /// Returns the shortest import path between the passed package items, - /// or `None` if no path can be found. + /// Returns the shortest import path or `None` if no path can be found. /// /// ``` /// # use std::collections::HashSet; @@ -547,6 +547,7 @@ impl<'a> InternalImportsQueries<'a> { let path = bfs( &PathfindingNode::Initial, + // Successors |item| { let items = match item { PathfindingNode::Initial => &query.from, @@ -559,6 +560,7 @@ impl<'a> InternalImportsQueries<'a> { .difference(&query.excluding_paths_via) .map(PathfindingNode::PackageItem) }, + // Success |item| match item { PathfindingNode::Initial => false, PathfindingNode::PackageItem(item) => query.to.contains(item), @@ -579,7 +581,7 @@ impl<'a> InternalImportsQueries<'a> { Ok(path) } - /// Returns true if an import path exists between the passed package items. + /// Returns true if an import path exists. /// /// ``` /// # use std::collections::HashSet; diff --git a/src/pypath.rs b/src/pypath.rs index d10357fb..de0bcc85 100644 --- a/src/pypath.rs +++ b/src/pypath.rs @@ -1,5 +1,6 @@ //! The `pypath` module provides utilities for working with dotted python import paths. +use std::collections::HashSet; use std::path::Path; use std::str::FromStr; @@ -9,6 +10,7 @@ use anyhow::Result; use derive_more::derive::{Display, Into}; use derive_more::Deref; use lazy_static::lazy_static; +use maplit::hashset; use regex::Regex; lazy_static! { @@ -156,6 +158,17 @@ impl Pypath { } } +impl From for HashSet { + fn from(p: Pypath) -> Self { + hashset! {p} + } +} + +impl From<&Pypath> for HashSet { + fn from(p: &Pypath) -> Self { + hashset! {p.clone()} + } +} #[cfg(test)] mod tests { use super::*;