Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
335 changes: 335 additions & 0 deletions src/driver/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
use std::collections::{HashMap, VecDeque};
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we move it to driver/mod.rs?
The reasoning is that we gonna introduce a few independent algorithms here, so we could have them under same module, as they are just related to the driver itself

use std::path::PathBuf;
use std::sync::Arc;

use crate::error::{Error, ErrorCollector, RichError, Span};
use crate::parse::{self, ParseFromStrWithErrors};
use crate::resolution::{CanonPath, DependencyMap, SourceFile};

/// Represents a single, isolated file in the SimplicityHL project.
/// In this architecture, a file and a module are the exact same thing.
#[derive(Debug, Clone)]
struct Module {
source: SourceFile,
/// The completely parsed program for this specific file.
/// it contains all the functions, aliases, and imports defined inside the file.
parsed_program: parse::Program,
}

/// The central nervous system of the compiler's frontend, serving as the critical
/// intermediate representation between isolated parsing and global semantic analysis.
///
/// While an Abstract Syntax Tree (AST) represents the grammar of a single file, the
/// `DependencyGraph` bridges the gap by linking multiple `parse::Program` ASTs into a
/// unified Directed Acyclic Graph (DAG). This DAG is then used to build a convenient,
/// global `Program` structure, which the semantic analyzer can easily process.
///
/// This structure provides the global context necessary to solve high-level compiler
/// problems, including:
/// * **Cross-Module Resolution:** Allowing the compiler to traverse edges and verify
/// that imported symbols, functions, and types actually exist in other files.
/// * **Topological Sorting:** Guaranteeing that modules are analyzed and compiled in
/// the strictly correct mathematical order (e.g., analyzing module `B` before module
/// `A` if `A` depends on `B`).
/// * **Cycle Detection:** Preventing infinite compiler loops by ensuring no circular
/// imports exist before heavy semantic processing begins.
pub struct DependencyGraph {
/// Arena Pattern: the data itself lives here.
/// A flat vector guarantees that module data is stored contiguously in memory.
modules: Vec<Module>,

/// The configuration environment.
/// Used to resolve external library dependencies and invoke their associated functions.
dependency_map: Arc<DependencyMap>,

/// Fast lookup: `CanonPath` -> Module ID.
/// A reverse index mapping absolute file paths to their internal IDs.
/// This solves the duplication problem, ensuring each file is only parsed once.
lookup: HashMap<CanonPath, usize>,

/// Fast lookup: Module ID -> `CanonPath`.
/// A direct index mapping internal IDs back to their absolute file paths.
/// This serves as the exact inverse of the `lookup` map.
paths: Vec<CanonPath>,

/// The Adjacency List: Defines the Directed acyclic Graph (DAG) of imports.
///
/// The Key (`usize`) is the ID of a "Parent" module (the file doing the importing).
/// The Value (`Vec<usize>`) is a list of IDs of the "Child" modules it relies on.
///
/// Example: If `main.simf` (ID: 0) has `use lib::math;` (ID: 1) and `use lib::io;` (ID: 2),
/// this map will contain: `{ 0: [1, 2] }`.
dependencies: HashMap<usize, Vec<usize>>,
}

impl DependencyGraph {
/// Initializes a new `ProjectGraph` by parsing the root program and discovering all dependencies.
///
/// Performs a BFS to recursively parse `use` statements,
/// building a DAG of the project's modules.
///
/// # Arguments
///
/// * `root_source` - The `SourceFile` representing the entry point of the project.
/// * `dependency_map` - The context-aware mapping rules used to resolve external imports.
/// * `root_program` - A reference to the already-parsed AST of the root file.
/// * `handler` - The diagnostics collector used to record resolution and parsing errors.
///
/// # Returns
///
/// * `Ok(Some(Self))` - If the entire project graph was successfully resolved and parsed.
/// * `Ok(None)` - If the graph traversal completed, but one or more modules contained
/// errors (which have been safely logged into the `handler`).
///
/// # Errors
///
/// This function will return an `Err(String)` only for critical internal compiler errors
/// (e.g., if a provided `SourceFile` is unexpectedly missing its underlying file path).
pub fn new(
root_source: SourceFile,
dependency_map: Arc<DependencyMap>,
root_program: &parse::Program,
handler: &mut ErrorCollector,
) -> Result<Option<Self>, String> {
let root_name = if let Some(root_name) = root_source.name() {
CanonPath::canonicalize(root_name)?
} else {
return Err(
"The root_source variable inside the ProjectGraph::new() function has no name"
.to_string(),
);
};

let mut graph = Self {
modules: vec![Module {
source: root_source,
parsed_program: root_program.clone(),
}],
dependency_map,
lookup: HashMap::new(),
paths: vec![root_name.clone()],
dependencies: HashMap::new(),
};

let root_id = 0;
graph.lookup.insert(root_name, root_id);
graph.dependencies.insert(root_id, Vec::new());

let mut queue = VecDeque::new();
queue.push_back(root_id);

while let Some(curr_id) = queue.pop_front() {
let Some(current_module) = graph.modules.get(curr_id) else {
return Err(format!(
"Internal Driver Error: Module ID {} is in the queue but missing from the graph.modules.",
curr_id
));
};

// We need this to report errors inside THIS file.
let importer_source = current_module.source.clone();

let importer_source_name = if let Some(name) = importer_source.name() {
CanonPath::canonicalize(name)?
} else {
return Err(format!(
"The {:?} variable inside the DependencyGraph::new() function has no name",
importer_source
));
};

// PHASE 1: Immutably read from the graph
let valid_imports = Self::resolve_imports(
&graph.modules[curr_id].parsed_program,
&importer_source,
importer_source_name,
&graph.dependency_map,
handler,
);

// PHASE 2: Mutate the graph
graph.load_and_parse_dependencies(
curr_id,
valid_imports,
&importer_source,
handler,
&mut queue,
);
}

Ok((!handler.has_errors()).then_some(graph))
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it expected that ParseFromStrWithErrors now returns None whenever the shared ErrorCollector already contains any earlier error, and DependencyGraph::new reuses one collector across the entire BFS?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was not expected. I could add a local ErrorCollector inside the parse_and_get_program function to parse more files and collect more errors. I think this would be better than the current approach

}

/// This helper cleanly encapsulates the process of loading source text, parsing it
/// into an `parse::Program`, and combining them so the compiler can easily work with the file.
/// If the file is missing or contains syntax errors, it logs the diagnostic to the
/// `ErrorCollector` and safely returns `None`.
fn parse_and_get_program(
path: &CanonPath,
importer_source: SourceFile,
span: Span,
handler: &mut ErrorCollector,
) -> Option<Module> {
let Ok(content) = std::fs::read_to_string(path.as_path()) else {
let err = RichError::new(Error::FileNotFound(PathBuf::from(path.as_path())), span)
.with_source(importer_source.clone());

handler.push(err);
return None;
};

let dep_source_file = SourceFile::new(path.as_path(), Arc::from(content.clone()));

parse::Program::parse_from_str_with_errors(&dep_source_file, handler).map(
|parsed_program| Module {
source: dep_source_file,
parsed_program,
},
)
}

/// PHASE 1 OF GRAPH CONSTRUCTION: Resolves all imports inside a single `parse::Program`.
/// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor.
fn resolve_imports(
current_program: &parse::Program,
importer_source: &SourceFile,
importer_source_name: CanonPath,
dependency_map: &DependencyMap,
handler: &mut ErrorCollector,
) -> Vec<(CanonPath, Span)> {
let mut valid_imports = Vec::new();

for elem in current_program.items() {
let parse::Item::Use(use_decl) = elem else {
continue;
};

match dependency_map.resolve_path(importer_source_name.clone(), use_decl) {
Ok(path) => valid_imports.push((path, *use_decl.span())),
Err(err) => handler.push(err.with_source(importer_source.clone())),
}
}

valid_imports
}

/// PHASE 2 OF GRAPH CONSTRUCTION: Loads, parses, and registers new dependencies.
/// Note: This is a specialized helper function designed exclusively for the `DependencyGraph::new()` constructor.
fn load_and_parse_dependencies(
&mut self,
curr_id: usize,
valid_imports: Vec<(CanonPath, Span)>,
importer_source: &SourceFile,
handler: &mut ErrorCollector,
queue: &mut std::collections::VecDeque<usize>,
) {
for (path, import_span) in valid_imports {
if let Some(&existing_id) = self.lookup.get(&path) {
let deps = self.dependencies.entry(curr_id).or_default();
if !deps.contains(&existing_id) {
deps.push(existing_id);
}
Comment on lines +227 to +231
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it actually detect import cycles? As it is written it treats any already-seen path as a normal resolved dependency and continues. For A -> B -> A, B’s import of A hits the existing_id branch, and in Ok((!handler.has_errors()).then_some(graph)) still returns Some(graph) with no diagnostic

Copy link
Copy Markdown
Collaborator Author

@LesterEvSe LesterEvSe Apr 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it will be the responsibility of C3 and the other algorithms in the following PR

continue;
}

let Some(module) =
Self::parse_and_get_program(&path, importer_source.clone(), import_span, handler)
else {
continue;
};

let last_ind = self.modules.len();
self.modules.push(module);

self.lookup.insert(path.clone(), last_ind);
self.paths.push(path.clone());
self.dependencies.entry(curr_id).or_default().push(last_ind);

queue.push_back(last_ind);
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::resolution::tests::canon;
use crate::test_utils::TempWorkspace;

#[test]
fn test_new_bfs_traversal_state() {
// Goal: Verify that a simple chain (main -> a -> b) correctly pushes items
// into the vectors and builds the adjacency list in BFS order.

let ws = TempWorkspace::new("bfs_state");
let mut handler = ErrorCollector::new();

let workspace = canon(&ws.create_dir("workspace"));

let dir_a = canon(&ws.create_dir("workspace/a"));
let dir_b = canon(&ws.create_dir("workspace/b"));

let main_content = "use a::mock_file::mock_item;";
let a_content = "use b::mock_file::mock_item;";
let b_content = "";

let main_file = canon(&ws.create_file("workspace/main.simf", main_content));
let a_file = canon(&ws.create_file("workspace/a/mock_file.simf", a_content));
let b_file = canon(&ws.create_file("workspace/b/mock_file.simf", b_content));

let mut map = DependencyMap::new();

map.insert(workspace.clone(), "a".to_string(), dir_a)
.unwrap();
map.insert(workspace.clone(), "b".to_string(), dir_b)
.unwrap();
let map = Arc::new(map);

let main_source = SourceFile::new(main_file.as_path(), Arc::from(main_content));
let main_program_option =
parse::Program::parse_from_str_with_errors(&main_source, &mut handler);

let Some(main_program) = main_program_option else {
eprintln!("Parser Error in Test Setup: {}", handler);
std::process::exit(1);
};

// Act
let graph_option =
DependencyGraph::new(main_source, map, &main_program, &mut handler).unwrap();

let Some(graph) = graph_option else {
eprintln!("DependencyGraph Error: {}", handler);
std::process::exit(1);
};

// Assert: Size checks
assert_eq!(graph.modules.len(), 3);
assert_eq!(graph.paths.len(), 3);

// Assert: Ensure BFS assigned the IDs in the exact correct order
let main_id = *graph.lookup.get(&main_file).unwrap();
let a_id = *graph.lookup.get(&a_file).unwrap();
let b_id = *graph.lookup.get(&b_file).unwrap();

assert_eq!(main_id, 0);
assert_eq!(a_id, 1);
assert_eq!(b_id, 2);

// Assert: Ensure the Adjacency List (dependencies map) linked them correctly
assert_eq!(
*graph.dependencies.get(&main_id).unwrap(),
vec![a_id],
"Main depends on A"
);
assert_eq!(
*graph.dependencies.get(&a_id).unwrap(),
vec![b_id],
"A depends on B"
);
assert!(
!graph.dependencies.contains_key(&b_id),
"B depends on nothing"
);
}
}
6 changes: 5 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod compile;
pub mod debug;
#[cfg(feature = "docs")]
pub mod docs;
pub mod driver;
pub mod dummy_env;
pub mod error;
pub mod jet;
Expand Down Expand Up @@ -37,6 +38,7 @@ pub use simplicity::elements;
use crate::debug::DebugSymbols;
use crate::error::{ErrorCollector, WithContent};
use crate::parse::ParseFromStrWithErrors;
use crate::resolution::SourceFile;
pub use crate::types::ResolvedType;
pub use crate::value::Value;
pub use crate::witness::{Arguments, Parameters, WitnessTypes, WitnessValues};
Expand All @@ -58,8 +60,10 @@ impl TemplateProgram {
/// The string is not a valid SimplicityHL program.
pub fn new<Str: Into<Arc<str>>>(s: Str) -> Result<Self, String> {
let file = s.into();
let source = SourceFile::anonymous(file.clone());
let mut error_handler = ErrorCollector::new();
let parse_program = parse::Program::parse_from_str_with_errors(&file, &mut error_handler);
let parse_program = parse::Program::parse_from_str_with_errors(&source, &mut error_handler);

if let Some(program) = parse_program {
let ast_program = ast::Program::analyze(&program).with_content(Arc::clone(&file))?;
Ok(Self {
Expand Down
Loading
Loading