diff --git a/crates/red_knot_python_semantic/src/semantic_index.rs b/crates/red_knot_python_semantic/src/semantic_index.rs index 64e50dcea6836..1730ec1b74e75 100644 --- a/crates/red_knot_python_semantic/src/semantic_index.rs +++ b/crates/red_knot_python_semantic/src/semantic_index.rs @@ -27,7 +27,7 @@ pub mod expression; pub mod symbol; mod use_def; -pub(crate) use self::use_def::{DefinitionWithConstraints, DefinitionWithConstraintsIterator}; +pub(crate) use self::use_def::{BindingWithConstraints, BindingWithConstraintsIterator}; type SymbolMap = hashbrown::HashMap; @@ -326,16 +326,16 @@ mod tests { use crate::Db; impl UseDefMap<'_> { - fn first_public_definition(&self, symbol: ScopedSymbolId) -> Option> { - self.public_definitions(symbol) + fn first_public_binding(&self, symbol: ScopedSymbolId) -> Option> { + self.public_bindings(symbol) .next() - .map(|constrained_definition| constrained_definition.definition) + .map(|constrained_binding| constrained_binding.binding) } - fn first_use_definition(&self, use_id: ScopedUseId) -> Option> { - self.use_definitions(use_id) + fn first_binding_at_use(&self, use_id: ScopedUseId) -> Option> { + self.bindings_at_use(use_id) .next() - .map(|constrained_definition| constrained_definition.definition) + .map(|constrained_binding| constrained_binding.binding) } } @@ -397,8 +397,8 @@ mod tests { let foo = global_table.symbol_id_by_name("foo").unwrap(); let use_def = use_def_map(&db, scope); - let definition = use_def.first_public_definition(foo).unwrap(); - assert!(matches!(definition.node(&db), DefinitionKind::Import(_))); + let binding = use_def.first_public_binding(foo).unwrap(); + assert!(matches!(binding.kind(&db), DefinitionKind::Import(_))); } #[test] @@ -427,22 +427,19 @@ mod tests { assert!( global_table .symbol_by_name("foo") - .is_some_and(|symbol| { symbol.is_defined() && !symbol.is_used() }), + .is_some_and(|symbol| { symbol.is_bound() && !symbol.is_used() }), "symbols that are defined get the defined flag" ); let use_def = use_def_map(&db, scope); - let definition = use_def - .first_public_definition( + let binding = use_def + .first_public_binding( global_table .symbol_id_by_name("foo") .expect("symbol to exist"), ) .unwrap(); - assert!(matches!( - definition.node(&db), - DefinitionKind::ImportFrom(_) - )); + assert!(matches!(binding.kind(&db), DefinitionKind::ImportFrom(_))); } #[test] @@ -455,17 +452,14 @@ mod tests { assert!( global_table .symbol_by_name("foo") - .is_some_and(|symbol| { !symbol.is_defined() && symbol.is_used() }), - "a symbol used but not defined in a scope should have only the used flag" + .is_some_and(|symbol| { !symbol.is_bound() && symbol.is_used() }), + "a symbol used but not bound in a scope should have only the used flag" ); let use_def = use_def_map(&db, scope); - let definition = use_def - .first_public_definition(global_table.symbol_id_by_name("x").expect("symbol exists")) + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name("x").expect("symbol exists")) .unwrap(); - assert!(matches!( - definition.node(&db), - DefinitionKind::Assignment(_) - )); + assert!(matches!(binding.kind(&db), DefinitionKind::Assignment(_))); } #[test] @@ -477,12 +471,12 @@ mod tests { assert_eq!(names(&global_table), vec!["x"]); let use_def = use_def_map(&db, scope); - let definition = use_def - .first_public_definition(global_table.symbol_id_by_name("x").unwrap()) + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name("x").unwrap()) .unwrap(); assert!(matches!( - definition.node(&db), + binding.kind(&db), DefinitionKind::AugmentedAssignment(_) )); } @@ -515,13 +509,10 @@ y = 2 assert_eq!(names(&class_table), vec!["x"]); let use_def = index.use_def_map(class_scope_id); - let definition = use_def - .first_public_definition(class_table.symbol_id_by_name("x").expect("symbol exists")) + let binding = use_def + .first_public_binding(class_table.symbol_id_by_name("x").expect("symbol exists")) .unwrap(); - assert!(matches!( - definition.node(&db), - DefinitionKind::Assignment(_) - )); + assert!(matches!(binding.kind(&db), DefinitionKind::Assignment(_))); } #[test] @@ -551,17 +542,14 @@ y = 2 assert_eq!(names(&function_table), vec!["x"]); let use_def = index.use_def_map(function_scope_id); - let definition = use_def - .first_public_definition( + let binding = use_def + .first_public_binding( function_table .symbol_id_by_name("x") .expect("symbol exists"), ) .unwrap(); - assert!(matches!( - definition.node(&db), - DefinitionKind::Assignment(_) - )); + assert!(matches!(binding.kind(&db), DefinitionKind::Assignment(_))); } #[test] @@ -593,27 +581,27 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs): let use_def = index.use_def_map(function_scope_id); for name in ["a", "b", "c", "d"] { - let definition = use_def - .first_public_definition( + let binding = use_def + .first_public_binding( function_table .symbol_id_by_name(name) .expect("symbol exists"), ) .unwrap(); assert!(matches!( - definition.node(&db), + binding.kind(&db), DefinitionKind::ParameterWithDefault(_) )); } for name in ["args", "kwargs"] { - let definition = use_def - .first_public_definition( + let binding = use_def + .first_public_binding( function_table .symbol_id_by_name(name) .expect("symbol exists"), ) .unwrap(); - assert!(matches!(definition.node(&db), DefinitionKind::Parameter(_))); + assert!(matches!(binding.kind(&db), DefinitionKind::Parameter(_))); } } @@ -641,23 +629,19 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs): let use_def = index.use_def_map(lambda_scope_id); for name in ["a", "b", "c", "d"] { - let definition = use_def - .first_public_definition( - lambda_table.symbol_id_by_name(name).expect("symbol exists"), - ) + let binding = use_def + .first_public_binding(lambda_table.symbol_id_by_name(name).expect("symbol exists")) .unwrap(); assert!(matches!( - definition.node(&db), + binding.kind(&db), DefinitionKind::ParameterWithDefault(_) )); } for name in ["args", "kwargs"] { - let definition = use_def - .first_public_definition( - lambda_table.symbol_id_by_name(name).expect("symbol exists"), - ) + let binding = use_def + .first_public_binding(lambda_table.symbol_id_by_name(name).expect("symbol exists")) .unwrap(); - assert!(matches!(definition.node(&db), DefinitionKind::Parameter(_))); + assert!(matches!(binding.kind(&db), DefinitionKind::Parameter(_))); } } @@ -695,15 +679,15 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs): let use_def = index.use_def_map(comprehension_scope_id); for name in ["x", "y"] { - let definition = use_def - .first_public_definition( + let binding = use_def + .first_public_binding( comprehension_symbol_table .symbol_id_by_name(name) .expect("symbol exists"), ) .unwrap(); assert!(matches!( - definition.node(&db), + binding.kind(&db), DefinitionKind::Comprehension(_) )); } @@ -742,8 +726,8 @@ def f(a: str, /, b: str, c: int = 1, *args, d: int = 2, **kwargs): let element_use_id = element.scoped_use_id(&db, comprehension_scope_id.to_scope_id(&db, file)); - let definition = use_def.first_use_definition(element_use_id).unwrap(); - let DefinitionKind::Comprehension(comprehension) = definition.node(&db) else { + let binding = use_def.first_binding_at_use(element_use_id).unwrap(); + let DefinitionKind::Comprehension(comprehension) = binding.kind(&db) else { panic!("expected generator definition") }; let target = comprehension.target(); @@ -822,12 +806,10 @@ with item1 as x, item2 as y: let use_def = index.use_def_map(FileScopeId::global()); for name in ["x", "y"] { - let Some(definition) = use_def.first_public_definition( - global_table.symbol_id_by_name(name).expect("symbol exists"), - ) else { - panic!("Expected with item definition for {name}"); - }; - assert!(matches!(definition.node(&db), DefinitionKind::WithItem(_))); + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists")) + .expect("Expected with item definition for {name}"); + assert!(matches!(binding.kind(&db), DefinitionKind::WithItem(_))); } } @@ -847,12 +829,10 @@ with context() as (x, y): let use_def = index.use_def_map(FileScopeId::global()); for name in ["x", "y"] { - let Some(definition) = use_def.first_public_definition( - global_table.symbol_id_by_name(name).expect("symbol exists"), - ) else { - panic!("Expected with item definition for {name}"); - }; - assert!(matches!(definition.node(&db), DefinitionKind::WithItem(_))); + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists")) + .expect("Expected with item definition for {name}"); + assert!(matches!(binding.kind(&db), DefinitionKind::WithItem(_))); } } @@ -889,14 +869,14 @@ def func(): assert_eq!(names(&func2_table), vec!["y"]); let use_def = index.use_def_map(FileScopeId::global()); - let definition = use_def - .first_public_definition( + let binding = use_def + .first_public_binding( global_table .symbol_id_by_name("func") .expect("symbol exists"), ) .unwrap(); - assert!(matches!(definition.node(&db), DefinitionKind::Function(_))); + assert!(matches!(binding.kind(&db), DefinitionKind::Function(_))); } #[test] @@ -964,7 +944,7 @@ class C[T]: assert!( ann_table .symbol_by_name("T") - .is_some_and(|s| s.is_defined() && !s.is_used()), + .is_some_and(|s| s.is_bound() && !s.is_used()), "type parameters are defined by the scope that introduces them" ); @@ -996,8 +976,8 @@ class C[T]: }; let x_use_id = x_use_expr_name.scoped_use_id(&db, scope); let use_def = use_def_map(&db, scope); - let definition = use_def.first_use_definition(x_use_id).unwrap(); - let DefinitionKind::Assignment(assignment) = definition.node(&db) else { + let binding = use_def.first_binding_at_use(x_use_id).unwrap(); + let DefinitionKind::Assignment(assignment) = binding.kind(&db) else { panic!("should be an assignment definition") }; let ast::Expr::NumberLiteral(ast::ExprNumberLiteral { @@ -1127,12 +1107,10 @@ match subject: ("k", 0), ("l", 1), ] { - let definition = use_def - .first_public_definition( - global_table.symbol_id_by_name(name).expect("symbol exists"), - ) + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists")) .expect("Expected with item definition for {name}"); - if let DefinitionKind::MatchPattern(pattern) = definition.node(&db) { + if let DefinitionKind::MatchPattern(pattern) = binding.kind(&db) { assert_eq!(pattern.index(), expected_index); } else { panic!("Expected match pattern definition for {name}"); @@ -1159,12 +1137,10 @@ match 1: let use_def = use_def_map(&db, global_scope_id); for (name, expected_index) in [("first", 0), ("second", 0)] { - let definition = use_def - .first_public_definition( - global_table.symbol_id_by_name(name).expect("symbol exists"), - ) + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name(name).expect("symbol exists")) .expect("Expected with item definition for {name}"); - if let DefinitionKind::MatchPattern(pattern) = definition.node(&db) { + if let DefinitionKind::MatchPattern(pattern) = binding.kind(&db) { assert_eq!(pattern.index(), expected_index); } else { panic!("Expected match pattern definition for {name}"); @@ -1181,11 +1157,11 @@ match 1: assert_eq!(&names(&global_table), &["a", "x"]); let use_def = use_def_map(&db, scope); - let definition = use_def - .first_public_definition(global_table.symbol_id_by_name("x").unwrap()) + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name("x").unwrap()) .unwrap(); - assert!(matches!(definition.node(&db), DefinitionKind::For(_))); + assert!(matches!(binding.kind(&db), DefinitionKind::For(_))); } #[test] @@ -1197,15 +1173,15 @@ match 1: assert_eq!(&names(&global_table), &["a", "x", "y"]); let use_def = use_def_map(&db, scope); - let x_definition = use_def - .first_public_definition(global_table.symbol_id_by_name("x").unwrap()) + let x_binding = use_def + .first_public_binding(global_table.symbol_id_by_name("x").unwrap()) .unwrap(); - let y_definition = use_def - .first_public_definition(global_table.symbol_id_by_name("y").unwrap()) + let y_binding = use_def + .first_public_binding(global_table.symbol_id_by_name("y").unwrap()) .unwrap(); - assert!(matches!(x_definition.node(&db), DefinitionKind::For(_))); - assert!(matches!(y_definition.node(&db), DefinitionKind::For(_))); + assert!(matches!(x_binding.kind(&db), DefinitionKind::For(_))); + assert!(matches!(y_binding.kind(&db), DefinitionKind::For(_))); } #[test] @@ -1217,10 +1193,10 @@ match 1: assert_eq!(&names(&global_table), &["e", "a", "b", "c", "d"]); let use_def = use_def_map(&db, scope); - let definition = use_def - .first_public_definition(global_table.symbol_id_by_name("a").unwrap()) + let binding = use_def + .first_public_binding(global_table.symbol_id_by_name("a").unwrap()) .unwrap(); - assert!(matches!(definition.node(&db), DefinitionKind::For(_))); + assert!(matches!(binding.kind(&db), DefinitionKind::For(_))); } } diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 25d9b9159ba3b..1a90c8a6e48c0 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -19,7 +19,7 @@ use crate::semantic_index::definition::{ }; use crate::semantic_index::expression::Expression; use crate::semantic_index::symbol::{ - FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, SymbolFlags, + FileScopeId, NodeWithScopeKey, NodeWithScopeRef, Scope, ScopeId, ScopedSymbolId, SymbolTableBuilder, }; use crate::semantic_index::use_def::{FlowSnapshot, UseDefMapBuilder}; @@ -28,7 +28,8 @@ use crate::Db; use super::constraint::{Constraint, PatternConstraint}; use super::definition::{ - ExceptHandlerDefinitionNodeRef, MatchPatternDefinitionNodeRef, WithItemDefinitionNodeRef, + DefinitionCategory, ExceptHandlerDefinitionNodeRef, MatchPatternDefinitionNodeRef, + WithItemDefinitionNodeRef, }; pub(super) struct SemanticIndexBuilder<'db> { @@ -168,31 +169,38 @@ impl<'db> SemanticIndexBuilder<'db> { self.current_use_def_map_mut().merge(state); } - fn add_or_update_symbol(&mut self, name: Name, flags: SymbolFlags) -> ScopedSymbolId { - let symbol_table = self.current_symbol_table(); - let (symbol_id, added) = symbol_table.add_or_update_symbol(name, flags); + fn add_symbol(&mut self, name: Name) -> ScopedSymbolId { + let (symbol_id, added) = self.current_symbol_table().add_symbol(name); if added { - let use_def_map = self.current_use_def_map_mut(); - use_def_map.add_symbol(symbol_id); + self.current_use_def_map_mut().add_symbol(symbol_id); } symbol_id } + fn mark_symbol_bound(&mut self, id: ScopedSymbolId) { + self.current_symbol_table().mark_symbol_bound(id); + } + + fn mark_symbol_used(&mut self, id: ScopedSymbolId) { + self.current_symbol_table().mark_symbol_used(id); + } + fn add_definition<'a>( &mut self, symbol: ScopedSymbolId, definition_node: impl Into>, ) -> Definition<'db> { let definition_node: DefinitionNodeRef<'_> = definition_node.into(); + #[allow(unsafe_code)] + // SAFETY: `definition_node` is guaranteed to be a child of `self.module` + let kind = unsafe { definition_node.into_owned(self.module.clone()) }; + let category = kind.category(); let definition = Definition::new( self.db, self.file, self.current_scope(), symbol, - #[allow(unsafe_code)] - unsafe { - definition_node.into_owned(self.module.clone()) - }, + kind, countme::Count::default(), ); @@ -201,8 +209,18 @@ impl<'db> SemanticIndexBuilder<'db> { .insert(definition_node.key(), definition); debug_assert_eq!(existing_definition, None); - self.current_use_def_map_mut() - .record_definition(symbol, definition); + if category.is_binding() { + self.mark_symbol_bound(symbol); + } + + let use_def = self.current_use_def_map_mut(); + match category { + DefinitionCategory::DeclarationAndBinding => { + use_def.record_declaration_and_binding(symbol, definition); + } + DefinitionCategory::Declaration => use_def.record_declaration(symbol, definition), + DefinitionCategory::Binding => use_def.record_binding(symbol, definition), + } definition } @@ -284,10 +302,13 @@ impl<'db> SemanticIndexBuilder<'db> { .. }) => (name, &None, default), }; - // TODO create Definition for typevars - self.add_or_update_symbol(name.id.clone(), SymbolFlags::IS_DEFINED); - if let Some(bound) = bound { - self.visit_expr(bound); + let symbol = self.add_symbol(name.id.clone()); + // TODO create Definition for PEP 695 typevars + // note that the "bound" on the typevar is a totally different thing than whether + // or not a name is "bound" by a typevar declaration; the latter is always true. + self.mark_symbol_bound(symbol); + if let Some(bounds) = bound { + self.visit_expr(bounds); } if let Some(default) = default { self.visit_expr(default); @@ -350,8 +371,7 @@ impl<'db> SemanticIndexBuilder<'db> { } fn declare_parameter(&mut self, parameter: AnyParameterRef) { - let symbol = - self.add_or_update_symbol(parameter.name().id().clone(), SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(parameter.name().id().clone()); let definition = self.add_definition(symbol, parameter); @@ -462,8 +482,7 @@ where // The symbol for the function name itself has to be evaluated // at the end to match the runtime evaluation of parameter defaults // and return-type annotations. - let symbol = self - .add_or_update_symbol(function_def.name.id.clone(), SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(function_def.name.id.clone()); self.add_definition(symbol, function_def); } ast::Stmt::ClassDef(class) => { @@ -471,8 +490,7 @@ where self.visit_decorator(decorator); } - let symbol = - self.add_or_update_symbol(class.name.id.clone(), SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(class.name.id.clone()); self.add_definition(symbol, class); self.with_type_params( @@ -498,7 +516,7 @@ where Name::new(alias.name.id.split('.').next().unwrap()) }; - let symbol = self.add_or_update_symbol(symbol_name, SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(symbol_name); self.add_definition(symbol, alias); } } @@ -510,8 +528,7 @@ where &alias.name.id }; - let symbol = - self.add_or_update_symbol(symbol_name.clone(), SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(symbol_name.clone()); self.add_definition(symbol, ImportFromDefinitionNodeRef { node, alias_index }); } } @@ -725,8 +742,7 @@ where // which is invalid syntax. However, it's still pretty obvious here that the user // *wanted* `e` to be bound, so we should still create a definition here nonetheless. if let Some(symbol_name) = symbol_name { - let symbol = self - .add_or_update_symbol(symbol_name.id.clone(), SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(symbol_name.id.clone()); self.add_definition( symbol, @@ -756,24 +772,18 @@ where match expr { ast::Expr::Name(name_node @ ast::ExprName { id, ctx, .. }) => { - let flags = match (ctx, self.current_assignment) { + let (is_use, is_definition) = match (ctx, self.current_assignment) { (ast::ExprContext::Store, Some(CurrentAssignment::AugAssign(_))) => { // For augmented assignment, the target expression is also used. - SymbolFlags::IS_DEFINED | SymbolFlags::IS_USED - } - (ast::ExprContext::Store, Some(CurrentAssignment::AnnAssign(ann_assign))) - if ann_assign.value.is_none() => - { - // An annotated assignment that doesn't assign a value is not a Definition - SymbolFlags::empty() + (true, true) } - (ast::ExprContext::Load, _) => SymbolFlags::IS_USED, - (ast::ExprContext::Store, _) => SymbolFlags::IS_DEFINED, - (ast::ExprContext::Del, _) => SymbolFlags::IS_DEFINED, - (ast::ExprContext::Invalid, _) => SymbolFlags::empty(), + (ast::ExprContext::Load, _) => (true, false), + (ast::ExprContext::Store, _) => (false, true), + (ast::ExprContext::Del, _) => (false, true), + (ast::ExprContext::Invalid, _) => (false, false), }; - let symbol = self.add_or_update_symbol(id.clone(), flags); - if flags.contains(SymbolFlags::IS_DEFINED) { + let symbol = self.add_symbol(id.clone()); + if is_definition { match self.current_assignment { Some(CurrentAssignment::Assign(assignment)) => { self.add_definition( @@ -830,7 +840,8 @@ where } } - if flags.contains(SymbolFlags::IS_USED) { + if is_use { + self.mark_symbol_used(symbol); let use_id = self.current_ast_ids().record_use(expr); self.current_use_def_map_mut().record_use(symbol, use_id); } @@ -970,7 +981,7 @@ where range: _, }) = pattern { - let symbol = self.add_or_update_symbol(name.id().clone(), SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(name.id().clone()); let state = self.current_match_case.as_ref().unwrap(); self.add_definition( symbol, @@ -991,7 +1002,7 @@ where rest: Some(name), .. }) = pattern { - let symbol = self.add_or_update_symbol(name.id().clone(), SymbolFlags::IS_DEFINED); + let symbol = self.add_symbol(name.id().clone()); let state = self.current_match_case.as_ref().unwrap(); self.add_definition( symbol, diff --git a/crates/red_knot_python_semantic/src/semantic_index/definition.rs b/crates/red_knot_python_semantic/src/semantic_index/definition.rs index 0f7f1a5b15066..fd4c4b15c600b 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/definition.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/definition.rs @@ -23,7 +23,7 @@ pub struct Definition<'db> { #[no_eq] #[return_ref] - pub(crate) node: DefinitionKind, + pub(crate) kind: DefinitionKind, #[no_eq] count: countme::Count>, @@ -33,6 +33,21 @@ impl<'db> Definition<'db> { pub(crate) fn scope(self, db: &'db dyn Db) -> ScopeId<'db> { self.file_scope(db).to_scope_id(db, self.file(db)) } + + #[allow(unused)] + pub(crate) fn category(self, db: &'db dyn Db) -> DefinitionCategory { + self.kind(db).category() + } + + #[allow(unused)] + pub(crate) fn is_declaration(self, db: &'db dyn Db) -> bool { + self.kind(db).category().is_declaration() + } + + #[allow(unused)] + pub(crate) fn is_binding(self, db: &'db dyn Db) -> bool { + self.kind(db).category().is_binding() + } } #[derive(Copy, Clone, Debug)] @@ -302,6 +317,41 @@ impl DefinitionNodeRef<'_> { } } +#[derive(Clone, Copy, Debug)] +pub(crate) enum DefinitionCategory { + /// A Definition which binds a value to a name (e.g. `x = 1`). + Binding, + /// A Definition which declares the upper-bound of acceptable types for this name (`x: int`). + Declaration, + /// A Definition which both declares a type and binds a value (e.g. `x: int = 1`). + DeclarationAndBinding, +} + +impl DefinitionCategory { + /// True if this definition establishes a "declared type" for the symbol. + /// + /// If so, any assignments reached by this definition are in error if they assign a value of a + /// type not assignable to the declared type. + /// + /// Annotations establish a declared type. So do function and class definition. + pub(crate) fn is_declaration(self) -> bool { + matches!( + self, + DefinitionCategory::Declaration | DefinitionCategory::DeclarationAndBinding + ) + } + + /// True if this definition assigns a value to the symbol. + /// + /// False only for annotated assignments without a RHS. + pub(crate) fn is_binding(self) -> bool { + matches!( + self, + DefinitionCategory::Binding | DefinitionCategory::DeclarationAndBinding + ) + } +} + #[derive(Clone, Debug)] pub enum DefinitionKind { Import(AstNodeRef), @@ -321,6 +371,52 @@ pub enum DefinitionKind { ExceptHandler(ExceptHandlerDefinitionKind), } +impl DefinitionKind { + pub(crate) fn category(&self) -> DefinitionCategory { + match self { + // functions and classes always bind a value, and we always consider them declarations + DefinitionKind::Function(_) | DefinitionKind::Class(_) => { + DefinitionCategory::DeclarationAndBinding + } + // a parameter always binds a value, but is only a declaration if annotated + DefinitionKind::Parameter(parameter) => { + if parameter.annotation.is_some() { + DefinitionCategory::DeclarationAndBinding + } else { + DefinitionCategory::Binding + } + } + // presence of a default is irrelevant, same logic as for a no-default parameter + DefinitionKind::ParameterWithDefault(parameter_with_default) => { + if parameter_with_default.parameter.annotation.is_some() { + DefinitionCategory::DeclarationAndBinding + } else { + DefinitionCategory::Binding + } + } + // annotated assignment is always a declaration, only a binding if there is a RHS + DefinitionKind::AnnotatedAssignment(ann_assign) => { + if ann_assign.value.is_some() { + DefinitionCategory::DeclarationAndBinding + } else { + DefinitionCategory::Declaration + } + } + // all of these bind values without declaring a type + DefinitionKind::Import(_) + | DefinitionKind::ImportFrom(_) + | DefinitionKind::NamedExpression(_) + | DefinitionKind::Assignment(_) + | DefinitionKind::AugmentedAssignment(_) + | DefinitionKind::For(_) + | DefinitionKind::Comprehension(_) + | DefinitionKind::WithItem(_) + | DefinitionKind::MatchPattern(_) + | DefinitionKind::ExceptHandler(_) => DefinitionCategory::Binding, + } + } +} + #[derive(Clone, Debug)] #[allow(dead_code)] pub struct MatchPatternDefinitionKind { @@ -441,8 +537,12 @@ pub struct ExceptHandlerDefinitionKind { } impl ExceptHandlerDefinitionKind { + pub(crate) fn node(&self) -> &ast::ExceptHandlerExceptHandler { + self.handler.node() + } + pub(crate) fn handled_exceptions(&self) -> Option<&ast::Expr> { - self.handler.node().type_.as_deref() + self.node().type_.as_deref() } pub(crate) fn is_star(&self) -> bool { diff --git a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs index 432c956f69d96..eeab9bf16907d 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/symbol.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/symbol.rs @@ -44,16 +44,16 @@ impl Symbol { } /// Is the symbol defined in its containing scope? - pub fn is_defined(&self) -> bool { - self.flags.contains(SymbolFlags::IS_DEFINED) + pub fn is_bound(&self) -> bool { + self.flags.contains(SymbolFlags::IS_BOUND) } } bitflags! { #[derive(Copy, Clone, Debug, Eq, PartialEq)] - pub(super) struct SymbolFlags: u8 { + struct SymbolFlags: u8 { const IS_USED = 1 << 0; - const IS_DEFINED = 1 << 1; + const IS_BOUND = 1 << 1; /// TODO: This flag is not yet set by anything const MARKED_GLOBAL = 1 << 2; /// TODO: This flag is not yet set by anything @@ -272,11 +272,7 @@ impl SymbolTableBuilder { } } - pub(super) fn add_or_update_symbol( - &mut self, - name: Name, - flags: SymbolFlags, - ) -> (ScopedSymbolId, bool) { + pub(super) fn add_symbol(&mut self, name: Name) -> (ScopedSymbolId, bool) { let hash = SymbolTable::hash_name(&name); let entry = self .table @@ -285,15 +281,9 @@ impl SymbolTableBuilder { .from_hash(hash, |id| self.table.symbols[*id].name() == &name); match entry { - RawEntryMut::Occupied(entry) => { - let symbol = &mut self.table.symbols[*entry.key()]; - symbol.insert_flags(flags); - - (*entry.key(), false) - } + RawEntryMut::Occupied(entry) => (*entry.key(), false), RawEntryMut::Vacant(entry) => { - let mut symbol = Symbol::new(name); - symbol.insert_flags(flags); + let symbol = Symbol::new(name); let id = self.table.symbols.push(symbol); entry.insert_with_hasher(hash, id, (), |id| { @@ -304,6 +294,14 @@ impl SymbolTableBuilder { } } + pub(super) fn mark_symbol_bound(&mut self, id: ScopedSymbolId) { + self.table.symbols[id].insert_flags(SymbolFlags::IS_BOUND); + } + + pub(super) fn mark_symbol_used(&mut self, id: ScopedSymbolId) { + self.table.symbols[id].insert_flags(SymbolFlags::IS_USED); + } + pub(super) fn finish(mut self) -> SymbolTable { self.table.shrink_to_fit(); self.table diff --git a/crates/red_knot_python_semantic/src/semantic_index/use_def.rs b/crates/red_knot_python_semantic/src/semantic_index/use_def.rs index 682ee32a41d03..a4b2a3e3cc07f 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/use_def.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/use_def.rs @@ -1,5 +1,79 @@ -//! Build a map from each use of a symbol to the definitions visible from that use, and the -//! type-narrowing constraints that apply to each definition. +//! First, some terminology: +//! +//! * A "binding" gives a new value to a variable. This includes many different Python statements +//! (assignment statements of course, but also imports, `def` and `class` statements, `as` +//! clauses in `with` and `except` statements, match patterns, and others) and even one +//! expression kind (named expressions). It notably does not include annotated assignment +//! statements without a right-hand side value; these do not assign any new value to the +//! variable. We consider function parameters to be bindings as well, since (from the perspective +//! of the function's internal scope), a function parameter begins the scope bound to a value. +//! +//! * A "declaration" establishes an upper bound type for the values that a variable may be +//! permitted to take on. Annotated assignment statements (with or without an RHS value) are +//! declarations; annotated function parameters are also declarations. We consider `def` and +//! `class` statements to also be declarations, so as to prohibit accidentally shadowing them. +//! +//! Annotated assignments with a right-hand side, and annotated function parameters, are both +//! bindings and declarations. +//! +//! We use [`Definition`] as the universal term (and Salsa tracked struct) encompassing both +//! bindings and declarations. (This sacrifices a bit of type safety in exchange for improved +//! performance via fewer Salsa tracked structs and queries, since most declarations -- typed +//! parameters and annotated assignments with RHS -- are both bindings and declarations.) +//! +//! At any given use of a variable, we can ask about both its "declared type" and its "inferred +//! type". These may be different, but the inferred type must always be assignable to the declared +//! type; that is, the declared type is always wider, and the inferred type may be more precise. If +//! we see an invalid assignment, we emit a diagnostic and abandon our inferred type, deferring to +//! the declared type (this allows an explicit annotation to override bad inference, without a +//! cast), maintaining the invariant. +//! +//! The **inferred type** represents the most precise type we believe encompasses all possible +//! values for the variable at a given use. It is based on a union of the bindings which can reach +//! that use through some control flow path, and the narrowing constraints that control flow must +//! have passed through between the binding and the use. For example, in this code: +//! +//! ```python +//! x = 1 if flag else None +//! if x is not None: +//! use(x) +//! ``` +//! +//! For the use of `x` on the third line, the inferred type should be `Literal[1]`. This is based +//! on the binding on the first line, which assigns the type `Literal[1] | None`, and the narrowing +//! constraint on the second line, which rules out the type `None`, since control flow must pass +//! through this constraint to reach the use in question. +//! +//! The **declared type** represents the code author's declaration (usually through a type +//! annotation) that a given variable should not be assigned any type outside the declared type. In +//! our model, declared types are also control-flow-sensitive; we allow the code author to +//! explicitly re-declare the same variable with a different type. So for a given binding of a +//! variable, we will want to ask which declarations of that variable can reach that binding, in +//! order to determine whether the binding is permitted, or should be a type error. For example: +//! +//! ```python +//! from pathlib import Path +//! def f(path: str): +//! path: Path = Path(path) +//! ``` +//! +//! In this function, the initial declared type of `path` is `str`, meaning that the assignment +//! `path = Path(path)` would be a type error, since it assigns to `path` a value whose type is not +//! assignable to `str`. This is the purpose of declared types: they prevent accidental assignment +//! of the wrong type to a variable. +//! +//! But in some cases it is useful to "shadow" or "re-declare" a variable with a new type, and we +//! permit this, as long as it is done with an explicit re-annotation. So `path: Path = +//! Path(path)`, with the explicit `: Path` annotation, is permitted. +//! +//! The general rule is that whatever declaration(s) can reach a given binding determine the +//! validity of that binding. If there is a path in which the symbol is not declared, that is a +//! declaration of `Unknown`. If multiple declarations can reach a binding, we union them, but by +//! default we also issue a type error, since this implicit union of declared types may hide an +//! error. +//! +//! To support type inference, we build a map from each use of a symbol to the bindings live at +//! that use, and the type narrowing constraints that apply to each binding. //! //! Let's take this code sample: //! @@ -7,147 +81,155 @@ //! x = 1 //! x = 2 //! y = x -//! if y is not None: +//! if flag: //! x = 3 //! else: //! x = 4 //! z = x //! ``` //! -//! In this snippet, we have four definitions of `x` (the statements assigning `1`, `2`, `3`, -//! and `4` to it), and two uses of `x` (the `y = x` and `z = x` assignments). The first -//! [`Definition`] of `x` is never visible to any use, because it's immediately replaced by the -//! second definition, before any use happens. (A linter could thus flag the statement `x = 1` -//! as likely superfluous.) +//! In this snippet, we have four bindings of `x` (the statements assigning `1`, `2`, `3`, and `4` +//! to it), and two uses of `x` (the `y = x` and `z = x` assignments). The first binding of `x` +//! does not reach any use, because it's immediately replaced by the second binding, before any use +//! happens. (A linter could thus flag the statement `x = 1` as likely superfluous.) //! -//! The first use of `x` has one definition visible to it: the assignment `x = 2`. +//! The first use of `x` has one live binding: the assignment `x = 2`. //! //! Things get a bit more complex when we have branches. We will definitely take either the `if` or -//! the `else` branch. Thus, the second use of `x` has two definitions visible to it: `x = 3` and -//! `x = 4`. The `x = 2` definition is no longer visible, because it must be replaced by either `x -//! = 3` or `x = 4`, no matter which branch was taken. We don't know which branch was taken, so we -//! must consider both definitions as visible, which means eventually we would (in type inference) -//! look at these two definitions and infer a type of `Literal[3, 4]` -- the union of `Literal[3]` -//! and `Literal[4]` -- for the second use of `x`. +//! the `else` branch. Thus, the second use of `x` has two live bindings: `x = 3` and `x = 4`. The +//! `x = 2` assignment is no longer visible, because it must be replaced by either `x = 3` or `x = +//! 4`, no matter which branch was taken. We don't know which branch was taken, so we must consider +//! both bindings as live, which means eventually we would (in type inference) look at these two +//! bindings and infer a type of `Literal[3, 4]` -- the union of `Literal[3]` and `Literal[4]` -- +//! for the second use of `x`. //! //! So that's one question our use-def map needs to answer: given a specific use of a symbol, which -//! definition(s) is/are visible from that use. In -//! [`AstIds`](crate::semantic_index::ast_ids::AstIds) we number all uses (that means a `Name` node -//! with `Load` context) so we have a `ScopedUseId` to efficiently represent each use. -//! -//! Another case we need to handle is when a symbol is referenced from a different scope (the most -//! obvious example of this is an import). We call this "public" use of a symbol. So the other -//! question we need to be able to answer is, what are the publicly-visible definitions of each -//! symbol? -//! -//! Technically, public use of a symbol could also occur from any point in control flow of the -//! scope where the symbol is defined (via inline imports and import cycles, in the case of an -//! import, or via a function call partway through the local scope that ends up using a symbol from -//! the scope via a global or nonlocal reference.) But modeling this fully accurately requires -//! whole-program analysis that isn't tractable for an efficient incremental compiler, since it -//! means a given symbol could have a different type every place it's referenced throughout the -//! program, depending on the shape of arbitrarily-sized call/import graphs. So we follow other -//! Python type-checkers in making the simplifying assumption that usually the scope will finish -//! execution before its symbols are made visible to other scopes; for instance, most imports will -//! import from a complete module, not a partially-executed module. (We may want to get a little -//! smarter than this in the future, in particular for closures, but for now this is where we -//! start.) -//! -//! So this means that the publicly-visible definitions of a symbol are the definitions still -//! visible at the end of the scope; effectively we have an implicit "use" of every symbol at the -//! end of the scope. -//! -//! We also need to know, for a given definition of a symbol, what type-narrowing constraints apply +//! binding(s) can reach that use. In [`AstIds`](crate::semantic_index::ast_ids::AstIds) we number +//! all uses (that means a `Name` node with `Load` context) so we have a `ScopedUseId` to +//! efficiently represent each use. +//! +//! We also need to know, for a given definition of a symbol, what type narrowing constraints apply //! to it. For instance, in this code sample: //! //! ```python //! x = 1 if flag else None //! if x is not None: -//! y = x +//! use(x) //! ``` //! -//! At the use of `x` in `y = x`, the visible definition of `x` is `1 if flag else None`, which -//! would infer as the type `Literal[1] | None`. But the constraint `x is not None` dominates this -//! use, which means we can rule out the possibility that `x` is `None` here, which should give us -//! the type `Literal[1]` for this use. +//! At the use of `x`, the live binding of `x` is `1 if flag else None`, which would infer as the +//! type `Literal[1] | None`. But the constraint `x is not None` dominates this use, which means we +//! can rule out the possibility that `x` is `None` here, which should give us the type +//! `Literal[1]` for this use. +//! +//! For declared types, we need to be able to answer the question "given a binding to a symbol, +//! which declarations of that symbol can reach the binding?" This allows us to emit a diagnostic +//! if the binding is attempting to bind a value of a type that is not assignable to the declared +//! type for that symbol, at that point in control flow. +//! +//! We also need to know, given a declaration of a symbol, what the inferred type of that symbol is +//! at that point. This allows us to emit a diagnostic in a case like `x = "foo"; x: int`. The +//! binding `x = "foo"` occurs before the declaration `x: int`, so according to our +//! control-flow-sensitive interpretation of declarations, the assignment is not an error. But the +//! declaration is an error, since it would violate the "inferred type must be assignable to +//! declared type" rule. +//! +//! Another case we need to handle is when a symbol is referenced from a different scope (for +//! example, an import or a nonlocal reference). We call this "public" use of a symbol. For public +//! use of a symbol, we prefer the declared type, if there are any declarations of that symbol; if +//! not, we fall back to the inferred type. So we also need to know which declarations and bindings +//! can reach the end of the scope. +//! +//! Technically, public use of a symbol could occur from any point in control flow of the scope +//! where the symbol is defined (via inline imports and import cycles, in the case of an import, or +//! via a function call partway through the local scope that ends up using a symbol from the scope +//! via a global or nonlocal reference.) But modeling this fully accurately requires whole-program +//! analysis that isn't tractable for an efficient analysis, since it means a given symbol could +//! have a different type every place it's referenced throughout the program, depending on the +//! shape of arbitrarily-sized call/import graphs. So we follow other Python type checkers in +//! making the simplifying assumption that usually the scope will finish execution before its +//! symbols are made visible to other scopes; for instance, most imports will import from a +//! complete module, not a partially-executed module. (We may want to get a little smarter than +//! this in the future for some closures, but for now this is where we start.) //! //! The data structure we build to answer these questions is the `UseDefMap`. It has a -//! `definitions_by_use` vector indexed by [`ScopedUseId`] and a `public_definitions` vector -//! indexed by [`ScopedSymbolId`]. The values in each of these vectors are (in principle) a list of -//! visible definitions at that use, or at the end of the scope for that symbol, with a list of the -//! dominating constraints for each of those definitions. +//! `bindings_by_use` vector of [`SymbolBindings`] indexed by [`ScopedUseId`], a +//! `declarations_by_binding` vector of [`SymbolDeclarations`] indexed by [`ScopedDefinitionId`], a +//! `bindings_by_declaration` vector of [`SymbolBindings`] indexed by [`ScopedDefinitionId`], and +//! `public_bindings` and `public_definitions` vectors indexed by [`ScopedSymbolId`]. The values in +//! each of these vectors are (in principle) a list of live bindings at that use/definition, or at +//! the end of the scope for that symbol, with a list of the dominating constraints for each +//! binding. //! //! In order to avoid vectors-of-vectors-of-vectors and all the allocations that would entail, we //! don't actually store these "list of visible definitions" as a vector of [`Definition`]. -//! Instead, the values in `definitions_by_use` and `public_definitions` are a [`SymbolState`] -//! struct which uses bit-sets to track definitions and constraints in terms of -//! [`ScopedDefinitionId`] and [`ScopedConstraintId`], which are indices into the `all_definitions` -//! and `all_constraints` indexvecs in the [`UseDefMap`]. +//! Instead, [`SymbolBindings`] and [`SymbolDeclarations`] are structs which use bit-sets to track +//! definitions (and constraints, in the case of bindings) in terms of [`ScopedDefinitionId`] and +//! [`ScopedConstraintId`], which are indices into the `all_definitions` and `all_constraints` +//! indexvecs in the [`UseDefMap`]. //! //! There is another special kind of possible "definition" for a symbol: there might be a path from //! the scope entry to a given use in which the symbol is never bound. //! -//! The simplest way to model "unbound" would be as an actual [`Definition`] itself: the initial -//! visible [`Definition`] for each symbol in a scope. But actually modeling it this way would -//! unnecessarily increase the number of [`Definition`] that Salsa must track. Since "unbound" is a -//! special definition in that all symbols share it, and it doesn't have any additional per-symbol -//! state, and constraints are irrelevant to it, we can represent it more efficiently: we use the -//! `may_be_unbound` boolean on the [`SymbolState`] struct. If this flag is `true`, it means the -//! symbol/use really has one additional visible "definition", which is the unbound state. If this -//! flag is `false`, it means we've eliminated the possibility of unbound: every path we've -//! followed includes a definition for this symbol. +//! The simplest way to model "unbound" would be as a "binding" itself: the initial "binding" for +//! each symbol in a scope. But actually modeling it this way would unnecessarily increase the +//! number of [`Definition`]s that Salsa must track. Since "unbound" is special in that all symbols +//! share it, and it doesn't have any additional per-symbol state, and constraints are irrelevant +//! to it, we can represent it more efficiently: we use the `may_be_unbound` boolean on the +//! [`SymbolBindings`] struct. If this flag is `true` for a use of a symbol, it means the symbol +//! has a path to the use in which it is never bound. If this flag is `false`, it means we've +//! eliminated the possibility of unbound: every control flow path to the use includes a binding +//! for this symbol. //! //! To build a [`UseDefMap`], the [`UseDefMapBuilder`] is notified of each new use, definition, and //! constraint as they are encountered by the //! [`SemanticIndexBuilder`](crate::semantic_index::builder::SemanticIndexBuilder) AST visit. For -//! each symbol, the builder tracks the `SymbolState` for that symbol. When we hit a use of a -//! symbol, it records the current state for that symbol for that use. When we reach the end of the -//! scope, it records the state for each symbol as the public definitions of that symbol. +//! each symbol, the builder tracks the `SymbolState` (`SymbolBindings` and `SymbolDeclarations`) +//! for that symbol. When we hit a use or definition of a symbol, we record the necessary parts of +//! the current state for that symbol that we need for that use or definition. When we reach the +//! end of the scope, it records the state for each symbol as the public definitions of that +//! symbol. //! -//! Let's walk through the above example. Initially we record for `x` that it has no visible -//! definitions, and may be unbound. When we see `x = 1`, we record that as the sole visible -//! definition of `x`, and flip `may_be_unbound` to `false`. Then we see `x = 2`, and it replaces -//! `x = 1` as the sole visible definition of `x`. When we get to `y = x`, we record that the -//! visible definitions for that use of `x` are just the `x = 2` definition. +//! Let's walk through the above example. Initially we record for `x` that it has no bindings, and +//! may be unbound. When we see `x = 1`, we record that as the sole live binding of `x`, and flip +//! `may_be_unbound` to `false`. Then we see `x = 2`, and we replace `x = 1` as the sole live +//! binding of `x`. When we get to `y = x`, we record that the live bindings for that use of `x` +//! are just the `x = 2` definition. //! //! Then we hit the `if` branch. We visit the `test` node (`flag` in this case), since that will -//! happen regardless. Then we take a pre-branch snapshot of the currently visible definitions for -//! all symbols, which we'll need later. Then we record `flag` as a possible constraint on the -//! currently visible definition (`x = 2`), and go ahead and visit the `if` body. When we see `x = -//! 3`, it replaces `x = 2` (constrained by `flag`) as the sole visible definition of `x`. At the -//! end of the `if` body, we take another snapshot of the currently-visible definitions; we'll call -//! this the post-if-body snapshot. +//! happen regardless. Then we take a pre-branch snapshot of the current state for all symbols, +//! which we'll need later. Then we record `flag` as a possible constraint on the current binding +//! (`x = 2`), and go ahead and visit the `if` body. When we see `x = 3`, it replaces `x = 2` +//! (constrained by `flag`) as the sole live binding of `x`. At the end of the `if` body, we take +//! another snapshot of the current symbol state; we'll call this the post-if-body snapshot. //! //! Now we need to visit the `else` clause. The conditions when entering the `else` clause should //! be the pre-if conditions; if we are entering the `else` clause, we know that the `if` test //! failed and we didn't execute the `if` body. So we first reset the builder to the pre-if state, -//! using the snapshot we took previously (meaning we now have `x = 2` as the sole visible -//! definition for `x` again), then visit the `else` clause, where `x = 4` replaces `x = 2` as the -//! sole visible definition of `x`. +//! using the snapshot we took previously (meaning we now have `x = 2` as the sole binding for `x` +//! again), then visit the `else` clause, where `x = 4` replaces `x = 2` as the sole live binding +//! of `x`. //! //! Now we reach the end of the if/else, and want to visit the following code. The state here needs //! to reflect that we might have gone through the `if` branch, or we might have gone through the //! `else` branch, and we don't know which. So we need to "merge" our current builder state -//! (reflecting the end-of-else state, with `x = 4` as the only visible definition) with our -//! post-if-body snapshot (which has `x = 3` as the only visible definition). The result of this -//! merge is that we now have two visible definitions of `x`: `x = 3` and `x = 4`. +//! (reflecting the end-of-else state, with `x = 4` as the only live binding) with our post-if-body +//! snapshot (which has `x = 3` as the only live binding). The result of this merge is that we now +//! have two live bindings of `x`: `x = 3` and `x = 4`. //! //! The [`UseDefMapBuilder`] itself just exposes methods for taking a snapshot, resetting to a //! snapshot, and merging a snapshot into the current state. The logic using these methods lives in //! [`SemanticIndexBuilder`](crate::semantic_index::builder::SemanticIndexBuilder), e.g. where it //! visits a `StmtIf` node. -//! -//! (In the future we may have some other questions we want to answer as well, such as "is this -//! definition used?", which will require tracking a bit more info in our map, e.g. a "used" bit -//! for each [`Definition`] which is flipped to true when we record that definition for a use.) use self::symbol_state::{ - ConstraintIdIterator, DefinitionIdWithConstraintsIterator, ScopedConstraintId, - ScopedDefinitionId, SymbolState, + BindingIdWithConstraintsIterator, ConstraintIdIterator, DeclarationIdIterator, + ScopedConstraintId, ScopedDefinitionId, SymbolBindings, SymbolDeclarations, SymbolState, }; use crate::semantic_index::ast_ids::ScopedUseId; use crate::semantic_index::definition::Definition; use crate::semantic_index::symbol::ScopedSymbolId; use ruff_index::IndexVec; +use rustc_hash::FxHashMap; use super::constraint::Constraint; @@ -163,60 +245,139 @@ pub(crate) struct UseDefMap<'db> { /// Array of [`Constraint`] in this scope. all_constraints: IndexVec>, - /// [`SymbolState`] visible at a [`ScopedUseId`]. - definitions_by_use: IndexVec, + /// [`SymbolBindings`] reaching a [`ScopedUseId`]. + bindings_by_use: IndexVec, + + /// [`SymbolBindings`] or [`SymbolDeclarations`] reaching a given [`Definition`]. + /// + /// If the definition is a binding (only) -- `x = 1` for example -- then we need + /// [`SymbolDeclarations`] to know whether this binding is permitted by the live declarations. + /// + /// If the definition is a declaration (only) -- `x: int` for example -- then we need + /// [`SymbolBindings`] to know whether this declaration is consistent with the previously + /// inferred type. + /// + /// If the definition is both a declaration and a binding -- `x: int = 1` for example -- then + /// we don't actually need anything here, all we'll need to validate is that our own RHS is a + /// valid assignment to our own annotation. + definitions_by_definition: FxHashMap, SymbolDefinitions>, /// [`SymbolState`] visible at end of scope for each symbol. - public_definitions: IndexVec, + public_symbols: IndexVec, } impl<'db> UseDefMap<'db> { - pub(crate) fn use_definitions( + pub(crate) fn bindings_at_use( &self, use_id: ScopedUseId, - ) -> DefinitionWithConstraintsIterator<'_, 'db> { - DefinitionWithConstraintsIterator { - all_definitions: &self.all_definitions, - all_constraints: &self.all_constraints, - inner: self.definitions_by_use[use_id].visible_definitions(), - } + ) -> BindingWithConstraintsIterator<'_, 'db> { + self.bindings_iterator(&self.bindings_by_use[use_id]) } pub(crate) fn use_may_be_unbound(&self, use_id: ScopedUseId) -> bool { - self.definitions_by_use[use_id].may_be_unbound() + self.bindings_by_use[use_id].may_be_unbound() } - pub(crate) fn public_definitions( + pub(crate) fn public_bindings( &self, symbol: ScopedSymbolId, - ) -> DefinitionWithConstraintsIterator<'_, 'db> { - DefinitionWithConstraintsIterator { + ) -> BindingWithConstraintsIterator<'_, 'db> { + self.bindings_iterator(self.public_symbols[symbol].bindings()) + } + + pub(crate) fn public_may_be_unbound(&self, symbol: ScopedSymbolId) -> bool { + self.public_symbols[symbol].may_be_unbound() + } + + #[allow(unused)] + pub(crate) fn bindings_at_declaration( + &self, + declaration: Definition<'db>, + ) -> BindingWithConstraintsIterator<'_, 'db> { + if let SymbolDefinitions::Bindings(bindings) = &self.definitions_by_definition[&declaration] + { + self.bindings_iterator(bindings) + } else { + unreachable!("Declaration has non-Bindings in definitions_by_definition"); + } + } + + #[allow(unused)] + pub(crate) fn declarations_at_binding( + &self, + binding: Definition<'db>, + ) -> DeclarationsIterator<'_, 'db> { + if let SymbolDefinitions::Declarations(declarations) = + &self.definitions_by_definition[&binding] + { + self.declarations_iterator(declarations) + } else { + unreachable!("Binding has non-Declarations in definitions_by_definition"); + } + } + + #[allow(unused)] + pub(crate) fn public_declarations( + &self, + symbol: ScopedSymbolId, + ) -> DeclarationsIterator<'_, 'db> { + self.declarations_iterator(self.public_symbols[symbol].declarations()) + } + + #[allow(unused)] + pub(crate) fn has_public_declarations(&self, symbol: ScopedSymbolId) -> bool { + !self.public_symbols[symbol].declarations().is_empty() + } + + #[allow(unused)] + pub(crate) fn public_may_be_undeclared(&self, symbol: ScopedSymbolId) -> bool { + self.public_symbols[symbol].may_be_undeclared() + } + + fn bindings_iterator<'a>( + &'a self, + bindings: &'a SymbolBindings, + ) -> BindingWithConstraintsIterator<'a, 'db> { + BindingWithConstraintsIterator { all_definitions: &self.all_definitions, all_constraints: &self.all_constraints, - inner: self.public_definitions[symbol].visible_definitions(), + inner: bindings.iter(), } } - pub(crate) fn public_may_be_unbound(&self, symbol: ScopedSymbolId) -> bool { - self.public_definitions[symbol].may_be_unbound() + fn declarations_iterator<'a>( + &'a self, + declarations: &'a SymbolDeclarations, + ) -> DeclarationsIterator<'a, 'db> { + DeclarationsIterator { + all_definitions: &self.all_definitions, + inner: declarations.iter(), + } } } +/// Either live bindings or live declarations for a symbol. +#[derive(Debug, PartialEq, Eq)] +enum SymbolDefinitions { + Bindings(SymbolBindings), + Declarations(SymbolDeclarations), +} + #[derive(Debug)] -pub(crate) struct DefinitionWithConstraintsIterator<'map, 'db> { +pub(crate) struct BindingWithConstraintsIterator<'map, 'db> { all_definitions: &'map IndexVec>, all_constraints: &'map IndexVec>, - inner: DefinitionIdWithConstraintsIterator<'map>, + inner: BindingIdWithConstraintsIterator<'map>, } -impl<'map, 'db> Iterator for DefinitionWithConstraintsIterator<'map, 'db> { - type Item = DefinitionWithConstraints<'map, 'db>; +impl<'map, 'db> Iterator for BindingWithConstraintsIterator<'map, 'db> { + type Item = BindingWithConstraints<'map, 'db>; fn next(&mut self) -> Option { self.inner .next() - .map(|def_id_with_constraints| DefinitionWithConstraints { - definition: self.all_definitions[def_id_with_constraints.definition], + .map(|def_id_with_constraints| BindingWithConstraints { + binding: self.all_definitions[def_id_with_constraints.definition], constraints: ConstraintsIterator { all_constraints: self.all_constraints, constraint_ids: def_id_with_constraints.constraint_ids, @@ -225,10 +386,10 @@ impl<'map, 'db> Iterator for DefinitionWithConstraintsIterator<'map, 'db> { } } -impl std::iter::FusedIterator for DefinitionWithConstraintsIterator<'_, '_> {} +impl std::iter::FusedIterator for BindingWithConstraintsIterator<'_, '_> {} -pub(crate) struct DefinitionWithConstraints<'map, 'db> { - pub(crate) definition: Definition<'db>, +pub(crate) struct BindingWithConstraints<'map, 'db> { + pub(crate) binding: Definition<'db>, pub(crate) constraints: ConstraintsIterator<'map, 'db>, } @@ -249,25 +410,43 @@ impl<'map, 'db> Iterator for ConstraintsIterator<'map, 'db> { impl std::iter::FusedIterator for ConstraintsIterator<'_, '_> {} +pub(crate) struct DeclarationsIterator<'map, 'db> { + all_definitions: &'map IndexVec>, + inner: DeclarationIdIterator<'map>, +} + +impl<'map, 'db> Iterator for DeclarationsIterator<'map, 'db> { + type Item = Definition<'db>; + + fn next(&mut self) -> Option { + self.inner.next().map(|def_id| self.all_definitions[def_id]) + } +} + +impl std::iter::FusedIterator for DeclarationsIterator<'_, '_> {} + /// A snapshot of the definitions and constraints state at a particular point in control flow. #[derive(Clone, Debug)] pub(super) struct FlowSnapshot { - definitions_by_symbol: IndexVec, + symbol_states: IndexVec, } #[derive(Debug, Default)] pub(super) struct UseDefMapBuilder<'db> { - /// Append-only array of [`Definition`]; None is unbound. + /// Append-only array of [`Definition`]. all_definitions: IndexVec>, /// Append-only array of [`Constraint`]. all_constraints: IndexVec>, - /// Visible definitions at each so-far-recorded use. - definitions_by_use: IndexVec, + /// Live bindings at each so-far-recorded use. + bindings_by_use: IndexVec, + + /// Live bindings or declarations for each so-far-recorded definition. + definitions_by_definition: FxHashMap, SymbolDefinitions>, - /// Currently visible definitions for each symbol. - definitions_by_symbol: IndexVec, + /// Currently live bindings and declarations for each symbol. + symbol_states: IndexVec, } impl<'db> UseDefMapBuilder<'db> { @@ -276,86 +455,103 @@ impl<'db> UseDefMapBuilder<'db> { } pub(super) fn add_symbol(&mut self, symbol: ScopedSymbolId) { - let new_symbol = self.definitions_by_symbol.push(SymbolState::unbound()); + let new_symbol = self.symbol_states.push(SymbolState::undefined()); debug_assert_eq!(symbol, new_symbol); } - pub(super) fn record_definition( - &mut self, - symbol: ScopedSymbolId, - definition: Definition<'db>, - ) { - // We have a new definition of a symbol; this replaces any previous definitions in this - // path. - let def_id = self.all_definitions.push(definition); - self.definitions_by_symbol[symbol] = SymbolState::with(def_id); + pub(super) fn record_binding(&mut self, symbol: ScopedSymbolId, binding: Definition<'db>) { + let def_id = self.all_definitions.push(binding); + let symbol_state = &mut self.symbol_states[symbol]; + self.definitions_by_definition.insert( + binding, + SymbolDefinitions::Declarations(symbol_state.declarations().clone()), + ); + symbol_state.record_binding(def_id); } pub(super) fn record_constraint(&mut self, constraint: Constraint<'db>) { let constraint_id = self.all_constraints.push(constraint); - for definitions in &mut self.definitions_by_symbol { - definitions.add_constraint(constraint_id); + for state in &mut self.symbol_states { + state.record_constraint(constraint_id); } } + pub(super) fn record_declaration( + &mut self, + symbol: ScopedSymbolId, + declaration: Definition<'db>, + ) { + let def_id = self.all_definitions.push(declaration); + let symbol_state = &mut self.symbol_states[symbol]; + self.definitions_by_definition.insert( + declaration, + SymbolDefinitions::Bindings(symbol_state.bindings().clone()), + ); + symbol_state.record_declaration(def_id); + } + + pub(super) fn record_declaration_and_binding( + &mut self, + symbol: ScopedSymbolId, + definition: Definition<'db>, + ) { + // We don't need to store anything in self.definitions_by_definition. + let def_id = self.all_definitions.push(definition); + let symbol_state = &mut self.symbol_states[symbol]; + symbol_state.record_declaration(def_id); + symbol_state.record_binding(def_id); + } + pub(super) fn record_use(&mut self, symbol: ScopedSymbolId, use_id: ScopedUseId) { - // We have a use of a symbol; clone the currently visible definitions for that symbol, and - // record them as the visible definitions for this use. + // We have a use of a symbol; clone the current bindings for that symbol, and record them + // as the live bindings for this use. let new_use = self - .definitions_by_use - .push(self.definitions_by_symbol[symbol].clone()); + .bindings_by_use + .push(self.symbol_states[symbol].bindings().clone()); debug_assert_eq!(use_id, new_use); } /// Take a snapshot of the current visible-symbols state. pub(super) fn snapshot(&self) -> FlowSnapshot { FlowSnapshot { - definitions_by_symbol: self.definitions_by_symbol.clone(), + symbol_states: self.symbol_states.clone(), } } - /// Restore the current builder visible-definitions state to the given snapshot. + /// Restore the current builder symbols state to the given snapshot. pub(super) fn restore(&mut self, snapshot: FlowSnapshot) { - // We never remove symbols from `definitions_by_symbol` (it's an IndexVec, and the symbol + // We never remove symbols from `symbol_states` (it's an IndexVec, and the symbol // IDs must line up), so the current number of known symbols must always be equal to or // greater than the number of known symbols in a previously-taken snapshot. - let num_symbols = self.definitions_by_symbol.len(); - debug_assert!(num_symbols >= snapshot.definitions_by_symbol.len()); + let num_symbols = self.symbol_states.len(); + debug_assert!(num_symbols >= snapshot.symbol_states.len()); // Restore the current visible-definitions state to the given snapshot. - self.definitions_by_symbol = snapshot.definitions_by_symbol; + self.symbol_states = snapshot.symbol_states; // If the snapshot we are restoring is missing some symbols we've recorded since, we need // to fill them in so the symbol IDs continue to line up. Since they don't exist in the - // snapshot, the correct state to fill them in with is "unbound". - self.definitions_by_symbol - .resize(num_symbols, SymbolState::unbound()); + // snapshot, the correct state to fill them in with is "undefined". + self.symbol_states + .resize(num_symbols, SymbolState::undefined()); } /// Merge the given snapshot into the current state, reflecting that we might have taken either - /// path to get here. The new visible-definitions state for each symbol should include - /// definitions from both the prior state and the snapshot. + /// path to get here. The new state for each symbol should include definitions from both the + /// prior state and the snapshot. pub(super) fn merge(&mut self, snapshot: FlowSnapshot) { - // The tricky thing about merging two Ranges pointing into `all_definitions` is that if the - // two Ranges aren't already adjacent in `all_definitions`, we will have to copy at least - // one or the other of the ranges to the end of `all_definitions` so as to make them - // adjacent. We can't ever move things around in `all_definitions` because previously - // recorded uses may still have ranges pointing to any part of it; all we can do is append. - // It's possible we may end up with some old entries in `all_definitions` that nobody is - // pointing to, but that's OK. - - // We never remove symbols from `definitions_by_symbol` (it's an IndexVec, and the symbol + // We never remove symbols from `symbol_states` (it's an IndexVec, and the symbol // IDs must line up), so the current number of known symbols must always be equal to or // greater than the number of known symbols in a previously-taken snapshot. - debug_assert!(self.definitions_by_symbol.len() >= snapshot.definitions_by_symbol.len()); + debug_assert!(self.symbol_states.len() >= snapshot.symbol_states.len()); - let mut snapshot_definitions_iter = snapshot.definitions_by_symbol.into_iter(); - for current in &mut self.definitions_by_symbol { + let mut snapshot_definitions_iter = snapshot.symbol_states.into_iter(); + for current in &mut self.symbol_states { if let Some(snapshot) = snapshot_definitions_iter.next() { current.merge(snapshot); } else { // Symbol not present in snapshot, so it's unbound from that path. - current.add_unbound(); + current.set_may_be_unbound(); } } } @@ -363,14 +559,16 @@ impl<'db> UseDefMapBuilder<'db> { pub(super) fn finish(mut self) -> UseDefMap<'db> { self.all_definitions.shrink_to_fit(); self.all_constraints.shrink_to_fit(); - self.definitions_by_symbol.shrink_to_fit(); - self.definitions_by_use.shrink_to_fit(); + self.symbol_states.shrink_to_fit(); + self.bindings_by_use.shrink_to_fit(); + self.definitions_by_definition.shrink_to_fit(); UseDefMap { all_definitions: self.all_definitions, all_constraints: self.all_constraints, - definitions_by_use: self.definitions_by_use, - public_definitions: self.definitions_by_symbol, + bindings_by_use: self.bindings_by_use, + public_symbols: self.symbol_states, + definitions_by_definition: self.definitions_by_definition, } } } diff --git a/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs b/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs index c465bbe320b1f..bfd231e456c1e 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/use_def/symbol_state.rs @@ -1,13 +1,13 @@ -//! Track visible definitions of a symbol, and applicable constraints per definition. +//! Track live bindings per symbol, applicable constraints per binding, and live declarations. //! //! These data structures operate entirely on scope-local newtype-indices for definitions and //! constraints, referring to their location in the `all_definitions` and `all_constraints` //! indexvecs in [`super::UseDefMapBuilder`]. //! -//! We need to track arbitrary associations between definitions and constraints, not just a single -//! set of currently dominating constraints (where "dominating" means "control flow must have -//! passed through it to reach this point"), because we can have dominating constraints that apply -//! to some definitions but not others, as in this code: +//! We need to track arbitrary associations between bindings and constraints, not just a single set +//! of currently dominating constraints (where "dominating" means "control flow must have passed +//! through it to reach this point"), because we can have dominating constraints that apply to some +//! bindings but not others, as in this code: //! //! ```python //! x = 1 if flag else None @@ -18,11 +18,11 @@ //! ``` //! //! The `x is not None` constraint dominates the final use of `x`, but it applies only to the first -//! definition of `x`, not the second, so `None` is a possible value for `x`. +//! binding of `x`, not the second, so `None` is a possible value for `x`. //! -//! And we can't just track, for each definition, an index into a list of dominating constraints, -//! either, because we can have definitions which are still visible, but subject to constraints -//! that are no longer dominating, as in this code: +//! And we can't just track, for each binding, an index into a list of dominating constraints, +//! either, because we can have bindings which are still visible, but subject to constraints that +//! are no longer dominating, as in this code: //! //! ```python //! x = 0 @@ -33,13 +33,16 @@ //! ``` //! //! From the point of view of the final use of `x`, the `x is not None` constraint no longer -//! dominates, but it does dominate the `x = 1 if flag2 else None` definition, so we have to keep +//! dominates, but it does dominate the `x = 1 if flag2 else None` binding, so we have to keep //! track of that. //! //! The data structures used here ([`BitSet`] and [`smallvec::SmallVec`]) optimize for keeping all //! data inline (avoiding lots of scattered allocations) in small-to-medium cases, and falling back -//! to heap allocation to be able to scale to arbitrary numbers of definitions and constraints when -//! needed. +//! to heap allocation to be able to scale to arbitrary numbers of live bindings and constraints +//! when needed. +//! +//! Tracking live declarations is simpler, since constraints are not involved, but otherwise very +//! similar to tracking live bindings. use super::bitset::{BitSet, BitSetIterator}; use ruff_index::newtype_index; use smallvec::SmallVec; @@ -53,93 +56,192 @@ pub(super) struct ScopedDefinitionId; pub(super) struct ScopedConstraintId; /// Can reference this * 64 total definitions inline; more will fall back to the heap. -const INLINE_DEFINITION_BLOCKS: usize = 3; +const INLINE_BINDING_BLOCKS: usize = 3; + +/// A [`BitSet`] of [`ScopedDefinitionId`], representing live bindings of a symbol in a scope. +type Bindings = BitSet; +type BindingsIterator<'a> = BitSetIterator<'a, INLINE_BINDING_BLOCKS>; -/// A [`BitSet`] of [`ScopedDefinitionId`], representing visible definitions of a symbol in a scope. -type Definitions = BitSet; -type DefinitionsIterator<'a> = BitSetIterator<'a, INLINE_DEFINITION_BLOCKS>; +/// Can reference this * 64 total declarations inline; more will fall back to the heap. +const INLINE_DECLARATION_BLOCKS: usize = 3; + +/// A [`BitSet`] of [`ScopedDefinitionId`], representing live declarations of a symbol in a scope. +type Declarations = BitSet; +type DeclarationsIterator<'a> = BitSetIterator<'a, INLINE_DECLARATION_BLOCKS>; /// Can reference this * 64 total constraints inline; more will fall back to the heap. const INLINE_CONSTRAINT_BLOCKS: usize = 2; -/// Can keep inline this many visible definitions per symbol at a given time; more will go to heap. -const INLINE_VISIBLE_DEFINITIONS_PER_SYMBOL: usize = 4; +/// Can keep inline this many live bindings per symbol at a given time; more will go to heap. +const INLINE_BINDINGS_PER_SYMBOL: usize = 4; -/// One [`BitSet`] of applicable [`ScopedConstraintId`] per visible definition. -type InlineConstraintArray = - [BitSet; INLINE_VISIBLE_DEFINITIONS_PER_SYMBOL]; +/// One [`BitSet`] of applicable [`ScopedConstraintId`] per live binding. +type InlineConstraintArray = [BitSet; INLINE_BINDINGS_PER_SYMBOL]; type Constraints = SmallVec; type ConstraintsIterator<'a> = std::slice::Iter<'a, BitSet>; type ConstraintsIntoIterator = smallvec::IntoIter; -/// Visible definitions and narrowing constraints for a single symbol at some point in control flow. +/// Live declarations for a single symbol at some point in control flow. #[derive(Clone, Debug, PartialEq, Eq)] -pub(super) struct SymbolState { - /// [`BitSet`]: which [`ScopedDefinitionId`] are visible for this symbol? - visible_definitions: Definitions, +pub(super) struct SymbolDeclarations { + /// [`BitSet`]: which declarations (as [`ScopedDefinitionId`]) can reach the current location? + live_declarations: Declarations, + + /// Could the symbol be un-declared at this point? + may_be_undeclared: bool, +} + +impl SymbolDeclarations { + fn undeclared() -> Self { + Self { + live_declarations: Declarations::default(), + may_be_undeclared: true, + } + } + + /// Record a newly-encountered declaration for this symbol. + fn record_declaration(&mut self, declaration_id: ScopedDefinitionId) { + self.live_declarations = Declarations::with(declaration_id.into()); + self.may_be_undeclared = false; + } + + /// Return an iterator over live declarations for this symbol. + #[allow(unused)] + pub(super) fn iter(&self) -> DeclarationIdIterator { + DeclarationIdIterator { + inner: self.live_declarations.iter(), + } + } - /// For each definition, which [`ScopedConstraintId`] apply? + #[allow(unused)] + pub(super) fn is_empty(&self) -> bool { + self.live_declarations.is_empty() + } + + pub(super) fn may_be_undeclared(&self) -> bool { + self.may_be_undeclared + } +} + +/// Live bindings and narrowing constraints for a single symbol at some point in control flow. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(super) struct SymbolBindings { + /// [`BitSet`]: which bindings (as [`ScopedDefinitionId`]) can reach the current location? + live_bindings: Bindings, + + /// For each live binding, which [`ScopedConstraintId`] apply? /// /// This is a [`smallvec::SmallVec`] which should always have one [`BitSet`] of constraints per - /// definition in `visible_definitions`. + /// binding in `live_bindings`. constraints: Constraints, /// Could the symbol be unbound at this point? may_be_unbound: bool, } -/// A single [`ScopedDefinitionId`] with an iterator of its applicable [`ScopedConstraintId`]. -#[derive(Debug)] -pub(super) struct DefinitionIdWithConstraints<'a> { - pub(super) definition: ScopedDefinitionId, - pub(super) constraint_ids: ConstraintIdIterator<'a>, -} - -impl SymbolState { - /// Return a new [`SymbolState`] representing an unbound symbol. - pub(super) fn unbound() -> Self { +impl SymbolBindings { + fn unbound() -> Self { Self { - visible_definitions: Definitions::default(), + live_bindings: Bindings::default(), constraints: Constraints::default(), may_be_unbound: true, } } - /// Return a new [`SymbolState`] representing a symbol with a single visible definition. - pub(super) fn with(definition_id: ScopedDefinitionId) -> Self { - let mut constraints = Constraints::with_capacity(1); - constraints.push(BitSet::default()); - Self { - visible_definitions: Definitions::with(definition_id.into()), - constraints, - may_be_unbound: false, - } - } - /// Add Unbound as a possibility for this symbol. - pub(super) fn add_unbound(&mut self) { + fn set_may_be_unbound(&mut self) { self.may_be_unbound = true; } - /// Add given constraint to all currently-visible definitions. - pub(super) fn add_constraint(&mut self, constraint_id: ScopedConstraintId) { + /// Record a newly-encountered binding for this symbol. + pub(super) fn record_binding(&mut self, binding_id: ScopedDefinitionId) { + // The new binding replaces all previous live bindings in this path, and has no + // constraints. + self.live_bindings = Bindings::with(binding_id.into()); + self.constraints = Constraints::with_capacity(1); + self.constraints.push(BitSet::default()); + self.may_be_unbound = false; + } + + /// Add given constraint to all live bindings. + pub(super) fn record_constraint(&mut self, constraint_id: ScopedConstraintId) { for bitset in &mut self.constraints { bitset.insert(constraint_id.into()); } } + /// Iterate over currently live bindings for this symbol. + pub(super) fn iter(&self) -> BindingIdWithConstraintsIterator { + BindingIdWithConstraintsIterator { + definitions: self.live_bindings.iter(), + constraints: self.constraints.iter(), + } + } + + pub(super) fn may_be_unbound(&self) -> bool { + self.may_be_unbound + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(super) struct SymbolState { + declarations: SymbolDeclarations, + bindings: SymbolBindings, +} + +impl SymbolState { + /// Return a new [`SymbolState`] representing an unbound, undeclared symbol. + pub(super) fn undefined() -> Self { + Self { + declarations: SymbolDeclarations::undeclared(), + bindings: SymbolBindings::unbound(), + } + } + + /// Add Unbound as a possibility for this symbol. + pub(super) fn set_may_be_unbound(&mut self) { + self.bindings.set_may_be_unbound(); + } + + /// Record a newly-encountered binding for this symbol. + pub(super) fn record_binding(&mut self, binding_id: ScopedDefinitionId) { + self.bindings.record_binding(binding_id); + } + + /// Add given constraint to all live bindings. + pub(super) fn record_constraint(&mut self, constraint_id: ScopedConstraintId) { + self.bindings.record_constraint(constraint_id); + } + + /// Record a newly-encountered declaration of this symbol. + pub(super) fn record_declaration(&mut self, declaration_id: ScopedDefinitionId) { + self.declarations.record_declaration(declaration_id); + } + /// Merge another [`SymbolState`] into this one. pub(super) fn merge(&mut self, b: SymbolState) { let mut a = Self { - visible_definitions: Definitions::default(), - constraints: Constraints::default(), - may_be_unbound: self.may_be_unbound || b.may_be_unbound, + bindings: SymbolBindings { + live_bindings: Bindings::default(), + constraints: Constraints::default(), + may_be_unbound: self.bindings.may_be_unbound || b.bindings.may_be_unbound, + }, + declarations: SymbolDeclarations { + live_declarations: self.declarations.live_declarations.clone(), + may_be_undeclared: self.declarations.may_be_undeclared + || b.declarations.may_be_undeclared, + }, }; + std::mem::swap(&mut a, self); - let mut a_defs_iter = a.visible_definitions.iter(); - let mut b_defs_iter = b.visible_definitions.iter(); - let mut a_constraints_iter = a.constraints.into_iter(); - let mut b_constraints_iter = b.constraints.into_iter(); + self.declarations + .live_declarations + .union(&b.declarations.live_declarations); + + let mut a_defs_iter = a.bindings.live_bindings.iter(); + let mut b_defs_iter = b.bindings.live_bindings.iter(); + let mut a_constraints_iter = a.bindings.constraints.into_iter(); + let mut b_constraints_iter = b.bindings.constraints.into_iter(); let mut opt_a_def: Option = a_defs_iter.next(); let mut opt_b_def: Option = b_defs_iter.next(); @@ -152,7 +254,7 @@ impl SymbolState { // Helper to push `def`, with constraints in `constraints_iter`, onto `self`. let push = |def, constraints_iter: &mut ConstraintsIntoIterator, merged: &mut Self| { - merged.visible_definitions.insert(def); + merged.bindings.live_bindings.insert(def); // SAFETY: we only ever create SymbolState with either no definitions and no constraint // bitsets (`::unbound`) or one definition and one constraint bitset (`::with`), and // `::merge` always pushes one definition and one constraint bitset together (just @@ -161,7 +263,7 @@ impl SymbolState { let constraints = constraints_iter .next() .expect("definitions and constraints length mismatch"); - merged.constraints.push(constraints); + merged.bindings.constraints.push(constraints); }; loop { @@ -191,7 +293,8 @@ impl SymbolState { // If the same definition is visible through both paths, any constraint // that applies on only one path is irrelevant to the resulting type from // unioning the two paths, so we intersect the constraints. - self.constraints + self.bindings + .constraints .last_mut() .unwrap() .intersect(&a_constraints); @@ -214,40 +317,54 @@ impl SymbolState { } } - /// Get iterator over visible definitions with constraints. - pub(super) fn visible_definitions(&self) -> DefinitionIdWithConstraintsIterator { - DefinitionIdWithConstraintsIterator { - definitions: self.visible_definitions.iter(), - constraints: self.constraints.iter(), - } + pub(super) fn bindings(&self) -> &SymbolBindings { + &self.bindings + } + + pub(super) fn declarations(&self) -> &SymbolDeclarations { + &self.declarations } /// Could the symbol be unbound? pub(super) fn may_be_unbound(&self) -> bool { - self.may_be_unbound + self.bindings.may_be_unbound() + } + + /// Could the symbol be undeclared? + pub(super) fn may_be_undeclared(&self) -> bool { + self.declarations.may_be_undeclared() } } -/// The default state of a symbol (if we've seen no definitions of it) is unbound. +/// The default state of a symbol, if we've seen no definitions of it, is undefined (that is, +/// both unbound and undeclared). impl Default for SymbolState { fn default() -> Self { - SymbolState::unbound() + SymbolState::undefined() } } +/// A single binding (as [`ScopedDefinitionId`]) with an iterator of its applicable +/// [`ScopedConstraintId`]. #[derive(Debug)] -pub(super) struct DefinitionIdWithConstraintsIterator<'a> { - definitions: DefinitionsIterator<'a>, +pub(super) struct BindingIdWithConstraints<'a> { + pub(super) definition: ScopedDefinitionId, + pub(super) constraint_ids: ConstraintIdIterator<'a>, +} + +#[derive(Debug)] +pub(super) struct BindingIdWithConstraintsIterator<'a> { + definitions: BindingsIterator<'a>, constraints: ConstraintsIterator<'a>, } -impl<'a> Iterator for DefinitionIdWithConstraintsIterator<'a> { - type Item = DefinitionIdWithConstraints<'a>; +impl<'a> Iterator for BindingIdWithConstraintsIterator<'a> { + type Item = BindingIdWithConstraints<'a>; fn next(&mut self) -> Option { match (self.definitions.next(), self.constraints.next()) { (None, None) => None, - (Some(def), Some(constraints)) => Some(DefinitionIdWithConstraints { + (Some(def), Some(constraints)) => Some(BindingIdWithConstraints { definition: ScopedDefinitionId::from_u32(def), constraint_ids: ConstraintIdIterator { wrapped: constraints.iter(), @@ -259,7 +376,7 @@ impl<'a> Iterator for DefinitionIdWithConstraintsIterator<'a> { } } -impl std::iter::FusedIterator for DefinitionIdWithConstraintsIterator<'_> {} +impl std::iter::FusedIterator for BindingIdWithConstraintsIterator<'_> {} #[derive(Debug)] pub(super) struct ConstraintIdIterator<'a> { @@ -276,15 +393,32 @@ impl Iterator for ConstraintIdIterator<'_> { impl std::iter::FusedIterator for ConstraintIdIterator<'_> {} +#[allow(unused)] +#[derive(Debug)] +pub(super) struct DeclarationIdIterator<'a> { + inner: DeclarationsIterator<'a>, +} + +impl<'a> Iterator for DeclarationIdIterator<'a> { + type Item = ScopedDefinitionId; + + fn next(&mut self) -> Option { + self.inner.next().map(ScopedDefinitionId::from_u32) + } +} + +impl std::iter::FusedIterator for DeclarationIdIterator<'_> {} + #[cfg(test)] mod tests { use super::{ScopedConstraintId, ScopedDefinitionId, SymbolState}; impl SymbolState { - pub(crate) fn assert(&self, may_be_unbound: bool, expected: &[&str]) { + pub(crate) fn assert_bindings(&self, may_be_unbound: bool, expected: &[&str]) { assert_eq!(self.may_be_unbound(), may_be_unbound); let actual = self - .visible_definitions() + .bindings() + .iter() .map(|def_id_with_constraints| { format!( "{}<{}>", @@ -300,75 +434,142 @@ mod tests { .collect::>(); assert_eq!(actual, expected); } + + pub(crate) fn assert_declarations(&self, may_be_undeclared: bool, expected: &[u32]) { + assert_eq!(self.may_be_undeclared(), may_be_undeclared); + let actual = self + .declarations() + .iter() + .map(ScopedDefinitionId::as_u32) + .collect::>(); + assert_eq!(actual, expected); + } } #[test] fn unbound() { - let cd = SymbolState::unbound(); + let sym = SymbolState::undefined(); - cd.assert(true, &[]); + sym.assert_bindings(true, &[]); } #[test] fn with() { - let cd = SymbolState::with(ScopedDefinitionId::from_u32(0)); + let mut sym = SymbolState::undefined(); + sym.record_binding(ScopedDefinitionId::from_u32(0)); - cd.assert(false, &["0<>"]); + sym.assert_bindings(false, &["0<>"]); } #[test] - fn add_unbound() { - let mut cd = SymbolState::with(ScopedDefinitionId::from_u32(0)); - cd.add_unbound(); + fn set_may_be_unbound() { + let mut sym = SymbolState::undefined(); + sym.record_binding(ScopedDefinitionId::from_u32(0)); + sym.set_may_be_unbound(); - cd.assert(true, &["0<>"]); + sym.assert_bindings(true, &["0<>"]); } #[test] - fn add_constraint() { - let mut cd = SymbolState::with(ScopedDefinitionId::from_u32(0)); - cd.add_constraint(ScopedConstraintId::from_u32(0)); + fn record_constraint() { + let mut sym = SymbolState::undefined(); + sym.record_binding(ScopedDefinitionId::from_u32(0)); + sym.record_constraint(ScopedConstraintId::from_u32(0)); - cd.assert(false, &["0<0>"]); + sym.assert_bindings(false, &["0<0>"]); } #[test] fn merge() { // merging the same definition with the same constraint keeps the constraint - let mut cd0a = SymbolState::with(ScopedDefinitionId::from_u32(0)); - cd0a.add_constraint(ScopedConstraintId::from_u32(0)); + let mut sym0a = SymbolState::undefined(); + sym0a.record_binding(ScopedDefinitionId::from_u32(0)); + sym0a.record_constraint(ScopedConstraintId::from_u32(0)); - let mut cd0b = SymbolState::with(ScopedDefinitionId::from_u32(0)); - cd0b.add_constraint(ScopedConstraintId::from_u32(0)); + let mut sym0b = SymbolState::undefined(); + sym0b.record_binding(ScopedDefinitionId::from_u32(0)); + sym0b.record_constraint(ScopedConstraintId::from_u32(0)); - cd0a.merge(cd0b); - let mut cd0 = cd0a; - cd0.assert(false, &["0<0>"]); + sym0a.merge(sym0b); + let mut sym0 = sym0a; + sym0.assert_bindings(false, &["0<0>"]); // merging the same definition with differing constraints drops all constraints - let mut cd1a = SymbolState::with(ScopedDefinitionId::from_u32(1)); - cd1a.add_constraint(ScopedConstraintId::from_u32(1)); + let mut sym1a = SymbolState::undefined(); + sym1a.record_binding(ScopedDefinitionId::from_u32(1)); + sym1a.record_constraint(ScopedConstraintId::from_u32(1)); - let mut cd1b = SymbolState::with(ScopedDefinitionId::from_u32(1)); - cd1b.add_constraint(ScopedConstraintId::from_u32(2)); + let mut sym1b = SymbolState::undefined(); + sym1b.record_binding(ScopedDefinitionId::from_u32(1)); + sym1b.record_constraint(ScopedConstraintId::from_u32(2)); - cd1a.merge(cd1b); - let cd1 = cd1a; - cd1.assert(false, &["1<>"]); + sym1a.merge(sym1b); + let sym1 = sym1a; + sym1.assert_bindings(false, &["1<>"]); // merging a constrained definition with unbound keeps both - let mut cd2a = SymbolState::with(ScopedDefinitionId::from_u32(2)); - cd2a.add_constraint(ScopedConstraintId::from_u32(3)); + let mut sym2a = SymbolState::undefined(); + sym2a.record_binding(ScopedDefinitionId::from_u32(2)); + sym2a.record_constraint(ScopedConstraintId::from_u32(3)); - let cd2b = SymbolState::unbound(); + let sym2b = SymbolState::undefined(); - cd2a.merge(cd2b); - let cd2 = cd2a; - cd2.assert(true, &["2<3>"]); + sym2a.merge(sym2b); + let sym2 = sym2a; + sym2.assert_bindings(true, &["2<3>"]); // merging different definitions keeps them each with their existing constraints - cd0.merge(cd2); - let cd = cd0; - cd.assert(true, &["0<0>", "2<3>"]); + sym0.merge(sym2); + let sym = sym0; + sym.assert_bindings(true, &["0<0>", "2<3>"]); + } + + #[test] + fn no_declaration() { + let sym = SymbolState::undefined(); + + sym.assert_declarations(true, &[]); + } + + #[test] + fn record_declaration() { + let mut sym = SymbolState::undefined(); + sym.record_declaration(ScopedDefinitionId::from_u32(1)); + + sym.assert_declarations(false, &[1]); + } + + #[test] + fn record_declaration_override() { + let mut sym = SymbolState::undefined(); + sym.record_declaration(ScopedDefinitionId::from_u32(1)); + sym.record_declaration(ScopedDefinitionId::from_u32(2)); + + sym.assert_declarations(false, &[2]); + } + + #[test] + fn record_declaration_merge() { + let mut sym = SymbolState::undefined(); + sym.record_declaration(ScopedDefinitionId::from_u32(1)); + + let mut sym2 = SymbolState::undefined(); + sym2.record_declaration(ScopedDefinitionId::from_u32(2)); + + sym.merge(sym2); + + sym.assert_declarations(false, &[1, 2]); + } + + #[test] + fn record_declaration_merge_partial_undeclared() { + let mut sym = SymbolState::undefined(); + sym.record_declaration(ScopedDefinitionId::from_u32(1)); + + let sym2 = SymbolState::undefined(); + + sym.merge(sym2); + + sym.assert_declarations(true, &[1]); } } diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index d37b3c9ce7b08..0224524ea5544 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -7,8 +7,8 @@ use crate::semantic_index::ast_ids::HasScopedAstId; use crate::semantic_index::definition::{Definition, DefinitionKind}; use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId}; use crate::semantic_index::{ - global_scope, semantic_index, symbol_table, use_def_map, DefinitionWithConstraints, - DefinitionWithConstraintsIterator, + global_scope, semantic_index, symbol_table, use_def_map, BindingWithConstraints, + BindingWithConstraintsIterator, }; use crate::stdlib::{builtins_symbol_ty, types_symbol_ty, typeshed_symbol_ty}; use crate::types::narrow::narrowing_constraint; @@ -51,7 +51,7 @@ pub(crate) fn symbol_ty_by_id<'db>( let use_def = use_def_map(db, scope); definitions_ty( db, - use_def.public_definitions(symbol), + use_def.public_bindings(symbol), use_def .public_may_be_unbound(symbol) .then_some(Type::Unbound), @@ -113,28 +113,28 @@ pub(crate) fn definition_expression_ty<'db>( /// provide an `unbound_ty`. pub(crate) fn definitions_ty<'db>( db: &'db dyn Db, - definitions_with_constraints: DefinitionWithConstraintsIterator<'_, 'db>, + bindings_with_constraints: BindingWithConstraintsIterator<'_, 'db>, unbound_ty: Option>, ) -> Type<'db> { - let def_types = definitions_with_constraints.map( - |DefinitionWithConstraints { - definition, + let def_types = bindings_with_constraints.map( + |BindingWithConstraints { + binding, constraints, }| { - let mut constraint_tys = constraints - .filter_map(|constraint| narrowing_constraint(db, constraint, definition)); - let definition_ty = definition_ty(db, definition); + let mut constraint_tys = + constraints.filter_map(|constraint| narrowing_constraint(db, constraint, binding)); + let binding_ty = definition_ty(db, binding); if let Some(first_constraint_ty) = constraint_tys.next() { let mut builder = IntersectionBuilder::new(db); builder = builder - .add_positive(definition_ty) + .add_positive(binding_ty) .add_positive(first_constraint_ty); for constraint_ty in constraint_tys { builder = builder.add_positive(constraint_ty); } builder.build() } else { - definition_ty + binding_ty } }, ); @@ -589,7 +589,7 @@ impl<'db> FunctionType<'db> { /// inferred return type for this function pub fn return_type(&self, db: &'db dyn Db) -> Type<'db> { let definition = self.definition(db); - let DefinitionKind::Function(function_stmt_node) = definition.node(db) else { + let DefinitionKind::Function(function_stmt_node) = definition.kind(db) else { panic!("Function type definition must have `DefinitionKind::Function`") }; @@ -644,7 +644,7 @@ impl<'db> ClassType<'db> { /// If `definition` is not a `DefinitionKind::Class`. pub fn bases(&self, db: &'db dyn Db) -> impl Iterator> { let definition = self.definition(db); - let DefinitionKind::Class(class_stmt_node) = definition.node(db) else { + let DefinitionKind::Class(class_stmt_node) = definition.kind(db) else { panic!("Class type definition must have DefinitionKind::Class"); }; class_stmt_node diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index e5415a8b868d2..ba153d727606c 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -364,7 +364,7 @@ impl<'db> TypeInferenceBuilder<'db> { } fn infer_region_definition(&mut self, definition: Definition<'db>) { - match definition.node(self.db) { + match definition.kind(self.db) { DefinitionKind::Function(function) => { self.infer_function_definition(function.node(), definition); } @@ -435,7 +435,7 @@ impl<'db> TypeInferenceBuilder<'db> { } fn infer_region_deferred(&mut self, definition: Definition<'db>) { - match definition.node(self.db) { + match definition.kind(self.db) { DefinitionKind::Function(function) => self.infer_function_deferred(function.node()), DefinitionKind::Class(class) => self.infer_class_deferred(class.node()), DefinitionKind::AnnotatedAssignment(_annotated_assignment) => { @@ -1938,17 +1938,17 @@ impl<'db> TypeInferenceBuilder<'db> { /// Look up a name reference that isn't bound in the local scope. fn lookup_name(&self, name: &ast::name::Name) -> Type<'db> { let file_scope_id = self.scope.file_scope_id(self.db); - let is_defined = self + let is_bound = self .index .symbol_table(file_scope_id) .symbol_by_name(name) .expect("Symbol table should create a symbol for every Name node") - .is_defined(); + .is_bound(); - // In function-like scopes, any local variable (symbol that is defined in this - // scope) can only have a definition in this scope, or be undefined; it never references - // another scope. (At runtime, it would use the `LOAD_FAST` opcode.) - if !is_defined || !self.scope.is_function_like(self.db) { + // In function-like scopes, any local variable (symbol that is bound in this scope) can + // only have a definition in this scope, or error; it never references another scope. + // (At runtime, it would use the `LOAD_FAST` opcode.) + if !is_bound || !self.scope.is_function_like(self.db) { // Walk up parent scopes looking for a possible enclosing scope that may have a // definition of this name visible to us (would be `LOAD_DEREF` at runtime.) for (enclosing_scope_file_id, _) in self.index.ancestor_scopes(file_scope_id) { @@ -1963,7 +1963,7 @@ impl<'db> TypeInferenceBuilder<'db> { let Some(enclosing_symbol) = enclosing_symbol_table.symbol_by_name(name) else { continue; }; - if enclosing_symbol.is_defined() { + if enclosing_symbol.is_bound() { // We can return early here, because the nearest function-like scope that // defines a name must be the only source for the nonlocal reference (at // runtime, it is the scope that creates the cell for our closure.) If the name @@ -2005,13 +2005,13 @@ impl<'db> TypeInferenceBuilder<'db> { // if we're inferring types of deferred expressions, always treat them as public symbols let (definitions, may_be_unbound) = if self.is_deferred() { ( - use_def.public_definitions(symbol), + use_def.public_bindings(symbol), use_def.public_may_be_unbound(symbol), ) } else { let use_id = name.scoped_use_id(self.db, self.scope); ( - use_def.use_definitions(use_id), + use_def.bindings_at_use(use_id), use_def.use_may_be_unbound(use_id), ) }; @@ -5087,13 +5087,13 @@ mod tests { // Incremental inference tests - fn first_public_def<'db>(db: &'db TestDb, file: File, name: &str) -> Definition<'db> { + fn first_public_binding<'db>(db: &'db TestDb, file: File, name: &str) -> Definition<'db> { let scope = global_scope(db, file); use_def_map(db, scope) - .public_definitions(symbol_table(db, scope).symbol_id_by_name(name).unwrap()) + .public_bindings(symbol_table(db, scope).symbol_id_by_name(name).unwrap()) .next() .unwrap() - .definition + .binding } #[test] @@ -5151,7 +5151,7 @@ mod tests { assert_function_query_was_not_run( &db, infer_definition_types, - first_public_def(&db, a, "x"), + first_public_binding(&db, a, "x"), &events, ); @@ -5187,7 +5187,7 @@ mod tests { assert_function_query_was_not_run( &db, infer_definition_types, - first_public_def(&db, a, "x"), + first_public_binding(&db, a, "x"), &events, ); Ok(())