-
Notifications
You must be signed in to change notification settings - Fork 33
Implement DependencyGraph and foundational driver architecture
#271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev/imports
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,335 @@ | ||
| use std::collections::{HashMap, VecDeque}; | ||
| use std::path::PathBuf; | ||
| use std::sync::Arc; | ||
|
|
||
| use crate::error::{Error, ErrorCollector, RichError, Span}; | ||
| use crate::parse::{self, ParseFromStrWithErrors}; | ||
| use crate::resolution::{CanonPath, DependencyMap, SourceFile}; | ||
|
|
||
| /// Represents a single, isolated file in the SimplicityHL project. | ||
| /// In this architecture, a file and a module are the exact same thing. | ||
| #[derive(Debug, Clone)] | ||
| struct Module { | ||
| source: SourceFile, | ||
| /// The completely parsed program for this specific file. | ||
| /// it contains all the functions, aliases, and imports defined inside the file. | ||
| parsed_program: parse::Program, | ||
| } | ||
|
|
||
| /// The central nervous system of the compiler's frontend, serving as the critical | ||
| /// intermediate representation between isolated parsing and global semantic analysis. | ||
| /// | ||
| /// While an Abstract Syntax Tree (AST) represents the grammar of a single file, the | ||
| /// `DependencyGraph` bridges the gap by linking multiple `parse::Program` ASTs into a | ||
| /// unified Directed Acyclic Graph (DAG). This DAG is then used to build a convenient, | ||
| /// global `Program` structure, which the semantic analyzer can easily process. | ||
| /// | ||
| /// This structure provides the global context necessary to solve high-level compiler | ||
| /// problems, including: | ||
| /// * **Cross-Module Resolution:** Allowing the compiler to traverse edges and verify | ||
| /// that imported symbols, functions, and types actually exist in other files. | ||
| /// * **Topological Sorting:** Guaranteeing that modules are analyzed and compiled in | ||
| /// the strictly correct mathematical order (e.g., analyzing module `B` before module | ||
| /// `A` if `A` depends on `B`). | ||
| /// * **Cycle Detection:** Preventing infinite compiler loops by ensuring no circular | ||
| /// imports exist before heavy semantic processing begins. | ||
| pub struct DependencyGraph { | ||
| /// Arena Pattern: the data itself lives here. | ||
| /// A flat vector guarantees that module data is stored contiguously in memory. | ||
| modules: Vec<Module>, | ||
|
|
||
| /// The configuration environment. | ||
| /// Used to resolve external library dependencies and invoke their associated functions. | ||
| dependency_map: Arc<DependencyMap>, | ||
|
|
||
| /// Fast lookup: `CanonPath` -> Module ID. | ||
| /// A reverse index mapping absolute file paths to their internal IDs. | ||
| /// This solves the duplication problem, ensuring each file is only parsed once. | ||
| lookup: HashMap<CanonPath, usize>, | ||
|
|
||
| /// Fast lookup: Module ID -> `CanonPath`. | ||
| /// A direct index mapping internal IDs back to their absolute file paths. | ||
| /// This serves as the exact inverse of the `lookup` map. | ||
| paths: Vec<CanonPath>, | ||
|
|
||
| /// The Adjacency List: Defines the Directed acyclic Graph (DAG) of imports. | ||
| /// | ||
| /// The Key (`usize`) is the ID of a "Parent" module (the file doing the importing). | ||
| /// The Value (`Vec<usize>`) is a list of IDs of the "Child" modules it relies on. | ||
| /// | ||
| /// Example: If `main.simf` (ID: 0) has `use lib::math;` (ID: 1) and `use lib::io;` (ID: 2), | ||
| /// this map will contain: `{ 0: [1, 2] }`. | ||
| dependencies: HashMap<usize, Vec<usize>>, | ||
| } | ||
|
|
||
| impl DependencyGraph { | ||
| /// Initializes a new `ProjectGraph` by parsing the root program and discovering all dependencies. | ||
| /// | ||
| /// Performs a BFS to recursively parse `use` statements, | ||
| /// building a DAG of the project's modules. | ||
| /// | ||
| /// # Arguments | ||
| /// | ||
| /// * `root_source` - The `SourceFile` representing the entry point of the project. | ||
| /// * `dependency_map` - The context-aware mapping rules used to resolve external imports. | ||
| /// * `root_program` - A reference to the already-parsed AST of the root file. | ||
| /// * `handler` - The diagnostics collector used to record resolution and parsing errors. | ||
| /// | ||
| /// # Returns | ||
| /// | ||
| /// * `Ok(Some(Self))` - If the entire project graph was successfully resolved and parsed. | ||
| /// * `Ok(None)` - If the graph traversal completed, but one or more modules contained | ||
| /// errors (which have been safely logged into the `handler`). | ||
| /// | ||
| /// # Errors | ||
| /// | ||
| /// This function will return an `Err(String)` only for critical internal compiler errors | ||
| /// (e.g., if a provided `SourceFile` is unexpectedly missing its underlying file path). | ||
| pub fn new( | ||
| root_source: SourceFile, | ||
| dependency_map: Arc<DependencyMap>, | ||
| root_program: &parse::Program, | ||
| handler: &mut ErrorCollector, | ||
| ) -> Result<Option<Self>, String> { | ||
| let root_name = if let Some(root_name) = root_source.name() { | ||
| CanonPath::canonicalize(root_name)? | ||
| } else { | ||
| return Err( | ||
| "The root_source variable inside the ProjectGraph::new() function has no name" | ||
| .to_string(), | ||
| ); | ||
| }; | ||
|
|
||
| let mut graph = Self { | ||
| modules: vec![Module { | ||
| source: root_source, | ||
| parsed_program: root_program.clone(), | ||
| }], | ||
| dependency_map, | ||
| lookup: HashMap::new(), | ||
| paths: vec![root_name.clone()], | ||
| dependencies: HashMap::new(), | ||
| }; | ||
|
|
||
| let root_id = 0; | ||
| graph.lookup.insert(root_name, root_id); | ||
| graph.dependencies.insert(root_id, Vec::new()); | ||
|
|
||
| let mut queue = VecDeque::new(); | ||
| queue.push_back(root_id); | ||
|
|
||
| while let Some(curr_id) = queue.pop_front() { | ||
| let Some(current_module) = graph.modules.get(curr_id) else { | ||
| return Err(format!( | ||
| "Internal Driver Error: Module ID {} is in the queue but missing from the graph.modules.", | ||
| curr_id | ||
| )); | ||
| }; | ||
|
|
||
| // We need this to report errors inside THIS file. | ||
| let importer_source = current_module.source.clone(); | ||
|
|
||
| let importer_source_name = if let Some(name) = importer_source.name() { | ||
| CanonPath::canonicalize(name)? | ||
| } else { | ||
| return Err(format!( | ||
| "The {:?} variable inside the DependencyGraph::new() function has no name", | ||
| importer_source | ||
| )); | ||
| }; | ||
|
|
||
| // PHASE 1: Immutably read from the graph | ||
| let valid_imports = Self::resolve_imports( | ||
| &graph.modules[curr_id].parsed_program, | ||
| &importer_source, | ||
| importer_source_name, | ||
| &graph.dependency_map, | ||
| handler, | ||
| ); | ||
|
|
||
| // PHASE 2: Mutate the graph | ||
| graph.load_and_parse_dependencies( | ||
| curr_id, | ||
| valid_imports, | ||
| &importer_source, | ||
| handler, | ||
| &mut queue, | ||
| ); | ||
| } | ||
|
|
||
| Ok((!handler.has_errors()).then_some(graph)) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it expected that
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was not expected. I could add a local |
||
| } | ||
|
|
||
| /// This helper cleanly encapsulates the process of loading source text, parsing it | ||
| /// into an `parse::Program`, and combining them so the compiler can easily work with the file. | ||
| /// If the file is missing or contains syntax errors, it logs the diagnostic to the | ||
| /// `ErrorCollector` and safely returns `None`. | ||
| fn parse_and_get_program( | ||
| path: &CanonPath, | ||
| importer_source: SourceFile, | ||
| span: Span, | ||
| handler: &mut ErrorCollector, | ||
| ) -> Option<Module> { | ||
| let Ok(content) = std::fs::read_to_string(path.as_path()) else { | ||
| let err = RichError::new(Error::FileNotFound(PathBuf::from(path.as_path())), span) | ||
| .with_source(importer_source.clone()); | ||
|
|
||
| handler.push(err); | ||
| return None; | ||
| }; | ||
|
|
||
| let dep_source_file = SourceFile::new(path.as_path(), Arc::from(content.clone())); | ||
|
|
||
| parse::Program::parse_from_str_with_errors(&dep_source_file, handler).map( | ||
| |parsed_program| Module { | ||
| source: dep_source_file, | ||
| parsed_program, | ||
| }, | ||
| ) | ||
| } | ||
|
|
||
| /// PHASE 1 OF GRAPH CONSTRUCTION: Resolves all imports inside a single `parse::Program`. | ||
| /// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor. | ||
| fn resolve_imports( | ||
| current_program: &parse::Program, | ||
| importer_source: &SourceFile, | ||
| importer_source_name: CanonPath, | ||
| dependency_map: &DependencyMap, | ||
| handler: &mut ErrorCollector, | ||
| ) -> Vec<(CanonPath, Span)> { | ||
| let mut valid_imports = Vec::new(); | ||
|
|
||
| for elem in current_program.items() { | ||
| let parse::Item::Use(use_decl) = elem else { | ||
| continue; | ||
| }; | ||
|
|
||
| match dependency_map.resolve_path(importer_source_name.clone(), use_decl) { | ||
| Ok(path) => valid_imports.push((path, *use_decl.span())), | ||
| Err(err) => handler.push(err.with_source(importer_source.clone())), | ||
| } | ||
| } | ||
|
|
||
| valid_imports | ||
| } | ||
|
|
||
| /// PHASE 2 OF GRAPH CONSTRUCTION: Loads, parses, and registers new dependencies. | ||
| /// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor. | ||
| fn load_and_parse_dependencies( | ||
| &mut self, | ||
| curr_id: usize, | ||
| valid_imports: Vec<(CanonPath, Span)>, | ||
| importer_source: &SourceFile, | ||
| handler: &mut ErrorCollector, | ||
| queue: &mut std::collections::VecDeque<usize>, | ||
| ) { | ||
| for (path, import_span) in valid_imports { | ||
| if let Some(&existing_id) = self.lookup.get(&path) { | ||
| let deps = self.dependencies.entry(curr_id).or_default(); | ||
| if !deps.contains(&existing_id) { | ||
| deps.push(existing_id); | ||
| } | ||
|
Comment on lines
+227
to
+231
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it actually detect import cycles? As it is written it treats any already-seen path as a normal resolved dependency and continues. For
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it will be the responsibility of C3 and the other algorithms in the following PR |
||
| continue; | ||
| } | ||
|
|
||
| let Some(module) = | ||
| Self::parse_and_get_program(&path, importer_source.clone(), import_span, handler) | ||
| else { | ||
| continue; | ||
| }; | ||
|
|
||
| let last_ind = self.modules.len(); | ||
| self.modules.push(module); | ||
|
|
||
| self.lookup.insert(path.clone(), last_ind); | ||
| self.paths.push(path.clone()); | ||
| self.dependencies.entry(curr_id).or_default().push(last_ind); | ||
|
|
||
| queue.push_back(last_ind); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| use crate::resolution::tests::canon; | ||
| use crate::test_utils::TempWorkspace; | ||
|
|
||
| #[test] | ||
| fn test_new_bfs_traversal_state() { | ||
| // Goal: Verify that a simple chain (main -> a -> b) correctly pushes items | ||
| // into the vectors and builds the adjacency list in BFS order. | ||
|
|
||
| let ws = TempWorkspace::new("bfs_state"); | ||
| let mut handler = ErrorCollector::new(); | ||
|
|
||
| let workspace = canon(&ws.create_dir("workspace")); | ||
|
|
||
| let dir_a = canon(&ws.create_dir("workspace/a")); | ||
| let dir_b = canon(&ws.create_dir("workspace/b")); | ||
|
|
||
| let main_content = "use a::mock_file::mock_item;"; | ||
| let a_content = "use b::mock_file::mock_item;"; | ||
| let b_content = ""; | ||
|
|
||
| let main_file = canon(&ws.create_file("workspace/main.simf", main_content)); | ||
| let a_file = canon(&ws.create_file("workspace/a/mock_file.simf", a_content)); | ||
| let b_file = canon(&ws.create_file("workspace/b/mock_file.simf", b_content)); | ||
|
|
||
| let mut map = DependencyMap::new(); | ||
|
|
||
| map.insert(workspace.clone(), "a".to_string(), dir_a) | ||
| .unwrap(); | ||
| map.insert(workspace.clone(), "b".to_string(), dir_b) | ||
| .unwrap(); | ||
| let map = Arc::new(map); | ||
|
|
||
| let main_source = SourceFile::new(main_file.as_path(), Arc::from(main_content)); | ||
| let main_program_option = | ||
| parse::Program::parse_from_str_with_errors(&main_source, &mut handler); | ||
|
|
||
| let Some(main_program) = main_program_option else { | ||
| eprintln!("Parser Error in Test Setup: {}", handler); | ||
| std::process::exit(1); | ||
| }; | ||
|
|
||
| // Act | ||
| let graph_option = | ||
| DependencyGraph::new(main_source, map, &main_program, &mut handler).unwrap(); | ||
|
|
||
| let Some(graph) = graph_option else { | ||
| eprintln!("DependencyGraph Error: {}", handler); | ||
| std::process::exit(1); | ||
| }; | ||
|
|
||
| // Assert: Size checks | ||
| assert_eq!(graph.modules.len(), 3); | ||
| assert_eq!(graph.paths.len(), 3); | ||
|
|
||
| // Assert: Ensure BFS assigned the IDs in the exact correct order | ||
| let main_id = *graph.lookup.get(&main_file).unwrap(); | ||
| let a_id = *graph.lookup.get(&a_file).unwrap(); | ||
| let b_id = *graph.lookup.get(&b_file).unwrap(); | ||
|
|
||
| assert_eq!(main_id, 0); | ||
| assert_eq!(a_id, 1); | ||
| assert_eq!(b_id, 2); | ||
|
|
||
| // Assert: Ensure the Adjacency List (dependencies map) linked them correctly | ||
| assert_eq!( | ||
| *graph.dependencies.get(&main_id).unwrap(), | ||
| vec![a_id], | ||
| "Main depends on A" | ||
| ); | ||
| assert_eq!( | ||
| *graph.dependencies.get(&a_id).unwrap(), | ||
| vec![b_id], | ||
| "A depends on B" | ||
| ); | ||
| assert!( | ||
| !graph.dependencies.contains_key(&b_id), | ||
| "B depends on nothing" | ||
| ); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we move it to driver/mod.rs?
The reasoning is that we gonna introduce a few independent algorithms here, so we could have them under same module, as they are just related to the driver itself