Skip to content
Open
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/pyrefly_util/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ memory-stats = "1.2.0"
notify = "8.2.0"
parse-display = "0.8.2"
path-absolutize = { version = "3.1.1", features = ["use_unix_paths_on_wasm"] }
pathdiff = "0.2"
rayon = "1.11.0"
ruff_notebook = { git = "https://github.com/astral-sh/ruff/", rev = "474b00568ad78f02ad8e19b8166cbeb6d69f8511" }
ruff_python_ast = { git = "https://github.com/astral-sh/ruff/", rev = "474b00568ad78f02ad8e19b8166cbeb6d69f8511" }
Expand Down
1 change: 1 addition & 0 deletions crates/pyrefly_util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ pub mod owner;
pub mod panic;
pub mod prelude;
pub mod recurser;
pub mod relativize;
pub mod ruff_visitors;
pub mod small_map1;
pub mod small_set1;
Expand Down
19 changes: 19 additions & 0 deletions crates/pyrefly_util/src/relativize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

use std::path::Path;
use std::path::PathBuf;

pub trait Relativize {
fn relativize_from(&self, base: &Path) -> PathBuf;
}

impl Relativize for Path {
fn relativize_from(&self, base: &Path) -> PathBuf {
pathdiff::diff_paths(self, base).unwrap_or_else(|| self.to_path_buf())
}
}
6 changes: 1 addition & 5 deletions pyrefly/lib/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -986,11 +986,7 @@ impl CheckArgs {
let collected = loads.collect_errors();
// Pass pre-collected errors to avoid redundant error collection.
let unused_ignore_errors = loads.collect_unused_ignore_errors_for_display(&collected);
let errors = loads.apply_baseline(
collected,
self.output.baseline.as_deref(),
relative_to.as_path(),
);
let errors = loads.apply_baseline(collected, self.output.baseline.as_deref());
let (directives, ordinary_errors) = if let Some(only) = &self.output.only {
let only = only.iter().collect::<SmallSet<_>>();
(
Expand Down
146 changes: 94 additions & 52 deletions pyrefly/lib/error/baseline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,43 @@ use std::collections::HashSet;
use std::path::Path;

use anyhow::Result;
use pyrefly_util::absolutize::Absolutize;
use pyrefly_util::fs_anyhow;

use crate::error::error::Error;
use crate::error::legacy::LegacyError;
use crate::error::legacy::LegacyErrors;

/// If an error with an exactly matching path, error slug, and starting column exist in the baseline, we ignore it.
/// Keys always use absolute paths internally so that comparison is decoupled from path format in baseline file.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct BaselineKey {
path: String,
name: String,
column: usize,
}

impl From<&LegacyError> for BaselineKey {
fn from(baseline_error: &LegacyError) -> Self {
impl BaselineKey {
fn normalize_path(path: &Path) -> String {
path.absolutize().to_string_lossy().replace('\\', "/")
}

fn from_error(error: &Error) -> Self {
Self {
path: baseline_error.path.clone(),
name: baseline_error.name.clone(),
column: baseline_error.column,
path: Self::normalize_path(error.path().as_path()),
name: error.error_kind().to_name().to_owned(),
column: error.display_range().start.column().get() as usize,
}
}
}

impl BaselineKey {
fn from_error(error: &Error, relative_to: &Path) -> Self {
let error_path = error.path().as_path();
impl From<&LegacyError> for BaselineKey {
fn from(baseline_error: &LegacyError) -> Self {
let path = Path::new(&baseline_error.path);
Self {
path: error_path
.strip_prefix(relative_to)
.unwrap_or(error_path)
.to_string_lossy()
.into_owned(),
name: error.error_kind().to_name().to_owned(),
column: error.display_range().start.column().get() as usize,
path: BaselineKey::normalize_path(path),
name: baseline_error.name.clone(),
column: baseline_error.column,
}
}
}
Expand All @@ -62,22 +64,17 @@ impl BaselineProcessor {
Ok(Self { baseline_keys })
}

pub fn matches_baseline(&self, error: &Error, relative_to: &Path) -> bool {
let key = BaselineKey::from_error(error, relative_to);
pub fn matches_baseline(&self, error: &Error) -> bool {
let key = BaselineKey::from_error(error);
self.baseline_keys.contains(&key)
}

/// Baseline suppressions are processed last, after inline and config suppressions
pub fn process_errors(
&self,
shown_errors: &mut Vec<Error>,
baseline_errors: &mut Vec<Error>,
relative_to: &Path,
) {
pub fn process_errors(&self, shown_errors: &mut Vec<Error>, baseline_errors: &mut Vec<Error>) {
let mut remaining_errors = Vec::new();

for error in shown_errors.drain(..) {
if self.matches_baseline(&error, relative_to) {
if self.matches_baseline(&error) {
baseline_errors.push(error);
} else {
remaining_errors.push(error);
Expand Down Expand Up @@ -107,7 +104,7 @@ mod tests {
fn test_baseline_key_generation() {
let module = Module::new(
ModuleName::from_str("test_module"),
ModulePath::filesystem(PathBuf::from("test/path.py")),
ModulePath::filesystem(PathBuf::from("/workspace/test/path.py")),
Arc::new("test content".to_owned()),
);

Expand All @@ -118,47 +115,47 @@ mod tests {
ErrorKind::BadReturn,
);

let key = BaselineKey::from_error(&error, Path::new("/root"));
let key = BaselineKey::from_error(&error);

assert_eq!(key.path, "test/path.py");
let expected_path = Path::new("/workspace/test/path.py")
.absolutize()
.to_string_lossy()
.replace('\\', "/");
assert_eq!(key.path, expected_path);
assert_eq!(key.name, "bad-return");
assert_eq!(key.column, 1);
}

#[test]
fn test_baseline_matching() {
let baseline_json = r#"
{
"errors": [
{
"line": 1,
"column": 3,
"stop_line": 1,
"stop_column": 5,
"path": "test.py",
"code": -2,
"name": "bad-return",
"description": "Test error",
"concise_description": "Test error"
}
]
}
"#;

let baseline_file: LegacyErrors = serde_json::from_str(baseline_json).unwrap();
let baseline_json = serde_json::json!({
"errors": [{
"line": 1,
"column": 3,
"stop_line": 1,
"stop_column": 5,
"path": "/workspace/test.py",
"code": -2,
"name": "bad-return",
"description": "Test error",
"concise_description": "Test error"
}]
});

let baseline_file: LegacyErrors = serde_json::from_value(baseline_json).unwrap();
let baseline_keys: HashSet<BaselineKey> =
baseline_file.errors.iter().map(BaselineKey::from).collect();

let processor = BaselineProcessor { baseline_keys };

let module = Module::new(
ModuleName::from_str("test_module"),
ModulePath::filesystem(PathBuf::from("test.py")),
ModulePath::filesystem(PathBuf::from("/workspace/test.py")),
Arc::new("test content 123456789".to_owned()),
);
let module2 = Module::new(
ModuleName::from_str("test_module2"),
ModulePath::filesystem(PathBuf::from("test2.py")),
ModulePath::filesystem(PathBuf::from("/workspace/test2.py")),
Arc::new("test content 123456789".to_owned()),
);

Expand All @@ -169,7 +166,7 @@ mod tests {
vec1!["Any error message".to_owned()],
ErrorKind::BadReturn,
);
assert!(processor.matches_baseline(&error1, Path::new("/")));
assert!(processor.matches_baseline(&error1));

// This error should not match (different column)
let error2 = Error::new(
Expand All @@ -178,7 +175,7 @@ mod tests {
vec1!["Test error".to_owned()],
ErrorKind::BadReturn,
);
assert!(!processor.matches_baseline(&error2, Path::new("/")));
assert!(!processor.matches_baseline(&error2));

// This error should not match (different error code)
let error3 = Error::new(
Expand All @@ -187,7 +184,7 @@ mod tests {
vec1!["Any error message".to_owned()],
ErrorKind::AssertType,
);
assert!(!processor.matches_baseline(&error3, Path::new("/")));
assert!(!processor.matches_baseline(&error3));

// This error should not match (different module)
let error4 = Error::new(
Expand All @@ -196,6 +193,51 @@ mod tests {
vec1!["Any error message".to_owned()],
ErrorKind::BadReturn,
);
assert!(!processor.matches_baseline(&error4, Path::new("/")));
assert!(!processor.matches_baseline(&error4));
}

/// Check that an error matches a baseline entry regardless of how the path is stored.
fn assert_baseline_path_matches(baseline_path: &str) {
let cwd = std::env::current_dir().unwrap();
let abs_path = cwd.join("src/foo.py");

let baseline_json = serde_json::json!({
"errors": [{
"line": 1, "column": 5, "stop_line": 1, "stop_column": 10,
"path": baseline_path,
"code": -2, "name": "bad-return",
"description": "test", "concise_description": "test"
}]
});

let baseline_file: LegacyErrors = serde_json::from_value(baseline_json).unwrap();
let baseline_keys: HashSet<BaselineKey> =
baseline_file.errors.iter().map(BaselineKey::from).collect();
let processor = BaselineProcessor { baseline_keys };

let module = Module::new(
ModuleName::from_str("foo"),
ModulePath::filesystem(abs_path),
Arc::new("test content 123456789".to_owned()),
);
let error = Error::new(
module,
TextRange::new(TextSize::new(4), TextSize::new(10)),
vec1!["err".to_owned()],
ErrorKind::BadReturn,
);
assert!(processor.matches_baseline(&error));
}

#[test]
fn test_baseline_matches_absolute_path() {
let cwd = std::env::current_dir().unwrap();
let abs_path = cwd.join("src/foo.py");
assert_baseline_path_matches(&abs_path.to_string_lossy());
}

#[test]
fn test_baseline_matches_relative_path() {
assert_baseline_path_matches("src/foo.py");
}
}
6 changes: 3 additions & 3 deletions pyrefly/lib/error/legacy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::path::Path;

use pyrefly_config::error_kind::Severity;
use pyrefly_util::prelude::SliceExt;
use pyrefly_util::relativize::Relativize;
use serde::Deserialize;
use serde::Serialize;

Expand Down Expand Up @@ -63,10 +64,9 @@ impl LegacyError {
stop_column: error_range.end.column().get() as usize,
cell: error_range.start.cell().map(|cell| cell.get() as usize),
path: error_path
.strip_prefix(relative_to)
.unwrap_or(error_path)
.relativize_from(relative_to)
.to_string_lossy()
.into_owned(),
.replace('\\', "/"), // Normalize Windows backslashes so baseline files are consistent across platforms
// -2 is chosen because it's an unused error code in Pyre1
code: -2, // TODO: replace this dummy value
name: error.error_kind().to_name().to_owned(),
Expand Down
11 changes: 3 additions & 8 deletions pyrefly/lib/state/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,26 +175,21 @@ impl Errors {
errors
}

pub fn collect_errors_with_baseline(
&self,
baseline_path: Option<&Path>,
relative_to: &Path,
) -> CollectedErrors {
pub fn collect_errors_with_baseline(&self, baseline_path: Option<&Path>) -> CollectedErrors {
let errors = self.collect_errors();
self.apply_baseline(errors, baseline_path, relative_to)
self.apply_baseline(errors, baseline_path)
}

/// Apply baseline filtering to already-collected errors.
pub fn apply_baseline(
&self,
mut errors: CollectedErrors,
baseline_path: Option<&Path>,
relative_to: &Path,
) -> CollectedErrors {
if let Some(baseline_path) = baseline_path
&& let Ok(processor) = BaselineProcessor::from_file(baseline_path)
{
processor.process_errors(&mut errors.ordinary, &mut errors.baseline, relative_to);
processor.process_errors(&mut errors.ordinary, &mut errors.baseline);
}
errors
}
Expand Down
Loading