Skip to content

Commit fe24997

Browse files
authored
Merge pull request #244 from seddonym/read-data-map-file
Read data map file
2 parents 9cd1824 + 9004845 commit fe24997

9 files changed

Lines changed: 128 additions & 100 deletions

File tree

.importlinter

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ ignore_imports =
1717
; Grimp doesn't understand extension modules: it thinks grimp._rustgrimp
1818
; is an object within grimp, rather than a module in its own right,
1919
; so we ignore these imports here.
20+
grimp.adaptors.caching -> grimp
2021
grimp.adaptors.graph -> grimp
2122
grimp.adaptors.filesystem -> grimp
2223
grimp.application.scanning -> grimp

rust/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.
2525
ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" }
2626
ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" }
2727
serde = { version = "1.0", features = ["derive"] }
28+
serde_json = "1.0.137"
2829
serde_yaml = "0.9"
2930
unindent = "0.2.4"
3031
encoding_rs = "0.8.35"
@@ -38,4 +39,4 @@ default = ["extension-module"]
3839

3940
[dev-dependencies]
4041
parameterized = "2.0.0"
41-
serde_json = "1.0.137"
42+

rust/src/caching.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
use crate::errors::{GrimpError, GrimpResult};
2+
use crate::filesystem::get_file_system_boxed;
3+
use crate::import_scanning::{DirectImport, imports_by_module_to_py};
4+
use crate::module_finding::Module;
5+
use pyo3::types::PyDict;
6+
use pyo3::{Bound, PyAny, PyResult, Python, pyfunction};
7+
use std::collections::{HashMap, HashSet};
8+
9+
/// Reads the cache file containing all the imports for a given package.
10+
/// Args:
11+
/// - filename: str
12+
/// - file_system: The file system interface to use. (A BasicFileSystem.)
13+
/// Returns Dict[Module, Set[DirectImport]]
14+
#[pyfunction]
15+
pub fn read_cache_data_map_file<'py>(
16+
py: Python<'py>,
17+
filename: &str,
18+
file_system: Bound<'py, PyAny>,
19+
) -> PyResult<Bound<'py, PyDict>> {
20+
let file_system_boxed = get_file_system_boxed(&file_system)?;
21+
22+
let file_contents = file_system_boxed.read(filename)?;
23+
24+
let imports_by_module = parse_json_to_map(&file_contents, filename)?;
25+
26+
Ok(imports_by_module_to_py(py, imports_by_module))
27+
}
28+
29+
pub fn parse_json_to_map(
30+
json_str: &str,
31+
filename: &str,
32+
) -> GrimpResult<HashMap<Module, HashSet<DirectImport>>> {
33+
let raw_map: HashMap<String, Vec<(String, usize, String)>> = serde_json::from_str(json_str)
34+
.map_err(|_| GrimpError::CorruptCache(filename.to_string()))?;
35+
36+
let mut parsed_map: HashMap<Module, HashSet<DirectImport>> = HashMap::new();
37+
38+
for (module_name, imports) in raw_map {
39+
let module = Module {
40+
name: module_name.clone(),
41+
};
42+
let import_set: HashSet<DirectImport> = imports
43+
.into_iter()
44+
.map(|(imported, line_number, line_contents)| DirectImport {
45+
importer: module_name.clone(),
46+
imported,
47+
line_number,
48+
line_contents,
49+
})
50+
.collect();
51+
parsed_map.insert(module, import_set);
52+
}
53+
54+
Ok(parsed_map)
55+
}

rust/src/errors.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError};
1+
use crate::exceptions::{
2+
CorruptCache, InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError,
3+
};
24
use pyo3::PyErr;
35
use pyo3::exceptions::PyValueError;
46
use ruff_python_parser::ParseError as RuffParseError;
@@ -26,6 +28,9 @@ pub enum GrimpError {
2628
#[source]
2729
parse_error: RuffParseError,
2830
},
31+
32+
#[error("Could not use corrupt cache file {0}.")]
33+
CorruptCache(String),
2934
}
3035

3136
pub type GrimpResult<T> = Result<T, GrimpError>;
@@ -43,6 +48,7 @@ impl From<GrimpError> for PyErr {
4348
GrimpError::ParseError {
4449
line_number, text, ..
4550
} => PyErr::new::<ParseError, _>((line_number, text)),
51+
GrimpError::CorruptCache(_) => CorruptCache::new_err(value.to_string()),
4652
}
4753
}
4854
}

rust/src/exceptions.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use pyo3::prelude::*;
55
create_exception!(_rustgrimp, ModuleNotPresent, PyException);
66
create_exception!(_rustgrimp, NoSuchContainer, PyException);
77
create_exception!(_rustgrimp, InvalidModuleExpression, PyException);
8+
create_exception!(_rustgrimp, CorruptCache, PyException);
89

910
// We need to use here `pyclass(extends=PyException)` instead of `create_exception!`
1011
// since the exception contains custom data. See:

rust/src/filesystem.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use itertools::Itertools;
2-
use pyo3::exceptions::{PyFileNotFoundError, PyUnicodeDecodeError};
2+
use pyo3::exceptions::{PyFileNotFoundError, PyTypeError, PyUnicodeDecodeError};
33
use pyo3::prelude::*;
44
use regex::Regex;
55
use std::collections::HashMap;
@@ -375,3 +375,21 @@ pub fn parse_indented_file_system_string(file_system_string: &str) -> HashMap<St
375375

376376
file_paths_map
377377
}
378+
379+
#[allow(clippy::borrowed_box)]
380+
pub fn get_file_system_boxed<'py>(
381+
file_system: &Bound<'py, PyAny>,
382+
) -> PyResult<Box<dyn FileSystem + Send + Sync>> {
383+
let file_system_boxed: Box<dyn FileSystem + Send + Sync>;
384+
385+
if let Ok(py_real) = file_system.extract::<PyRef<PyRealBasicFileSystem>>() {
386+
file_system_boxed = Box::new(py_real.inner.clone());
387+
} else if let Ok(py_fake) = file_system.extract::<PyRef<PyFakeBasicFileSystem>>() {
388+
file_system_boxed = Box::new(py_fake.inner.clone());
389+
} else {
390+
return Err(PyTypeError::new_err(
391+
"file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem",
392+
));
393+
}
394+
Ok(file_system_boxed)
395+
}

rust/src/import_scanning.rs

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use crate::errors::GrimpResult;
2-
use crate::filesystem::{FileSystem, PyFakeBasicFileSystem, PyRealBasicFileSystem};
3-
use crate::import_parsing;
2+
use crate::filesystem::FileSystem;
43
use crate::module_finding::{FoundPackage, Module};
4+
use crate::{import_parsing, module_finding};
55
use itertools::Itertools;
6-
use pyo3::exceptions::PyTypeError;
76
use pyo3::prelude::*;
87
use pyo3::types::{PyDict, PySet};
98
/// Statically analyses some Python modules for import statements within their shared package.
@@ -13,10 +12,10 @@ use std::io::{self, ErrorKind};
1312

1413
#[derive(Debug, Hash, Eq, PartialEq)]
1514
pub struct DirectImport {
16-
importer: String,
17-
imported: String,
18-
line_number: usize,
19-
line_contents: String,
15+
pub importer: String,
16+
pub imported: String,
17+
pub line_number: usize,
18+
pub line_contents: String,
2019
}
2120

2221
pub fn py_found_packages_to_rust(py_found_packages: &Bound<'_, PyAny>) -> HashSet<FoundPackage> {
@@ -55,24 +54,6 @@ fn module_is_descendant(module_name: &str, potential_ancestor: &str) -> bool {
5554
module_name.starts_with(&format!("{potential_ancestor}."))
5655
}
5756

58-
#[allow(clippy::borrowed_box)]
59-
pub fn get_file_system_boxed<'py>(
60-
file_system: &Bound<'py, PyAny>,
61-
) -> PyResult<Box<dyn FileSystem + Send + Sync>> {
62-
let file_system_boxed: Box<dyn FileSystem + Send + Sync>;
63-
64-
if let Ok(py_real) = file_system.extract::<PyRef<PyRealBasicFileSystem>>() {
65-
file_system_boxed = Box::new(py_real.inner.clone());
66-
} else if let Ok(py_fake) = file_system.extract::<PyRef<PyFakeBasicFileSystem>>() {
67-
file_system_boxed = Box::new(py_fake.inner.clone());
68-
} else {
69-
return Err(PyTypeError::new_err(
70-
"file_system must be an instance of RealBasicFileSystem or FakeBasicFileSystem",
71-
));
72-
}
73-
Ok(file_system_boxed)
74-
}
75-
7657
/// Statically analyses the given module and returns a set of Modules that
7758
/// it imports.
7859
#[allow(clippy::borrowed_box)]
@@ -350,3 +331,22 @@ fn _distill_external_module(
350331
Some(module_name.split('.').next().unwrap().to_string())
351332
}
352333
}
334+
335+
/// Convert the rust data structure into a Python dict[Module, set[DirectImport]].
336+
pub fn imports_by_module_to_py(
337+
py: Python,
338+
imports_by_module: HashMap<module_finding::Module, HashSet<DirectImport>>,
339+
) -> Bound<PyDict> {
340+
let valueobjects_pymodule = PyModule::import(py, "grimp.domain.valueobjects").unwrap();
341+
let py_module_class = valueobjects_pymodule.getattr("Module").unwrap();
342+
343+
let imports_by_module_py = PyDict::new(py);
344+
for (module, imports) in imports_by_module.iter() {
345+
let py_module_instance = py_module_class.call1((module.name.clone(),)).unwrap();
346+
let py_imports = to_py_direct_imports(py, imports);
347+
imports_by_module_py
348+
.set_item(py_module_instance, py_imports)
349+
.unwrap();
350+
}
351+
imports_by_module_py
352+
}

rust/src/lib.rs

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod caching;
12
pub mod errors;
23
pub mod exceptions;
34
mod filesystem;
@@ -7,16 +8,18 @@ mod import_scanning;
78
pub mod module_expressions;
89
mod module_finding;
910

11+
use crate::caching::read_cache_data_map_file;
1012
use crate::errors::{GrimpError, GrimpResult};
11-
use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError};
13+
use crate::exceptions::{
14+
CorruptCache, InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError,
15+
};
1216
use crate::filesystem::{PyFakeBasicFileSystem, PyRealBasicFileSystem};
1317
use crate::graph::higher_order_queries::Level;
1418
use crate::graph::{Graph, Module, ModuleIterator, ModuleTokenIterator};
15-
use crate::import_scanning::{
16-
get_file_system_boxed, py_found_packages_to_rust, scan_for_imports_no_py, to_py_direct_imports,
17-
};
19+
use crate::import_scanning::{py_found_packages_to_rust, scan_for_imports_no_py};
1820
use crate::module_expressions::ModuleExpression;
1921
use derive_new::new;
22+
use filesystem::get_file_system_boxed;
2023
use itertools::Itertools;
2124
use pyo3::IntoPyObjectExt;
2225
use pyo3::exceptions::PyValueError;
@@ -29,6 +32,7 @@ use std::collections::HashSet;
2932
#[pymodule]
3033
fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
3134
m.add_wrapped(wrap_pyfunction!(scan_for_imports))?;
35+
m.add_wrapped(wrap_pyfunction!(read_cache_data_map_file))?;
3236
m.add_class::<GraphWrapper>()?;
3337
m.add_class::<PyRealBasicFileSystem>()?;
3438
m.add_class::<PyFakeBasicFileSystem>()?;
@@ -39,6 +43,7 @@ fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
3943
py.get_type::<InvalidModuleExpression>(),
4044
)?;
4145
m.add("ParseError", py.get_type::<ParseError>())?;
46+
m.add("CorruptCache", py.get_type::<CorruptCache>())?;
4247
Ok(())
4348
}
4449

@@ -65,8 +70,6 @@ fn scan_for_imports<'py>(
6570
exclude_type_checking_imports: bool,
6671
file_system: Bound<'py, PyAny>,
6772
) -> PyResult<Bound<'py, PyDict>> {
68-
let valueobjects_pymodule = PyModule::import(py, "grimp.domain.valueobjects").unwrap();
69-
let py_module_class = valueobjects_pymodule.getattr("Module").unwrap();
7073
let file_system_boxed = get_file_system_boxed(&file_system)?;
7174
let found_packages_rust = py_found_packages_to_rust(&found_packages);
7275
let modules_rust: HashSet<module_finding::Module> = module_files
@@ -112,14 +115,7 @@ fn scan_for_imports<'py>(
112115
}
113116
let imports_by_module = imports_by_module_result.unwrap();
114117

115-
let imports_by_module_py = PyDict::new(py);
116-
for (module, imports) in imports_by_module.iter() {
117-
let py_module_instance = py_module_class.call1((module.name.clone(),)).unwrap();
118-
let py_imports = to_py_direct_imports(py, imports);
119-
imports_by_module_py
120-
.set_item(py_module_instance, py_imports)
121-
.unwrap();
122-
}
118+
let imports_by_module_py = import_scanning::imports_by_module_to_py(py, imports_by_module);
123119

124120
Ok(imports_by_module_py)
125121
}

src/grimp/adaptors/caching.py

Lines changed: 8 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import hashlib
2+
23
import json
34
import logging
45
from typing import Dict, List, Optional, Set, Tuple, Type
@@ -9,6 +10,7 @@
910

1011
from ..application.ports.caching import Cache as AbstractCache
1112
from ..application.ports.caching import CacheMiss
13+
from grimp import _rustgrimp as rust # type: ignore[attr-defined]
1214

1315
logger = logging.getLogger(__name__)
1416
PrimitiveFormat = Dict[str, List[Tuple[str, Optional[int], str]]]
@@ -199,74 +201,22 @@ def _read_data_map_file(self) -> Dict[Module, Set[DirectImport]]:
199201
),
200202
)
201203
try:
202-
serialized = self.file_system.read(data_cache_filename)
204+
imports_by_module = rust.read_cache_data_map_file(
205+
data_cache_filename, self.file_system.convert_to_basic()
206+
)
203207
except FileNotFoundError:
204208
logger.info(f"No cache file: {data_cache_filename}.")
205209
return {}
206-
207-
# Deserialize to primitives.
208-
try:
209-
deserialized_json = json.loads(serialized)
210-
logger.info(f"Used cache data file {data_cache_filename}.")
211-
except json.JSONDecodeError:
210+
except rust.CorruptCache:
212211
logger.warning(f"Could not use corrupt cache file {data_cache_filename}.")
213212
return {}
214213

215-
primitives_map: PrimitiveFormat = self._to_primitives_data_map(deserialized_json)
216-
217-
return {
218-
Module(name=name): {
219-
DirectImport(
220-
importer=Module(name),
221-
imported=Module(import_data[0]),
222-
line_number=int(import_data[1]), # type: ignore
223-
line_contents=import_data[2],
224-
)
225-
for import_data in imports_data
226-
}
227-
for name, imports_data in primitives_map.items()
228-
}
214+
logger.info(f"Used cache data file {data_cache_filename}.")
215+
return imports_by_module
229216

230217
def _build_data_cache_filename(self, found_package: FoundPackage) -> str:
231218
return self.file_system.join(self.cache_dir, f"{found_package.name}.data.json")
232219

233-
def _to_primitives_data_map(self, deserialized_json: object) -> PrimitiveFormat:
234-
"""
235-
Convert the deserialized json from a data file to a narrower schema.
236-
237-
Anything that doesn't fit the schema will be removed.
238-
"""
239-
if not isinstance(deserialized_json, dict):
240-
return {}
241-
242-
primitives_map: PrimitiveFormat = {}
243-
244-
for key, value in deserialized_json.items():
245-
if not isinstance(key, str):
246-
continue
247-
if not isinstance(value, list):
248-
continue
249-
primitive_imports = []
250-
for deserialized_import in value:
251-
try:
252-
[imported, line_number, line_contents] = deserialized_import
253-
except ValueError:
254-
continue
255-
try:
256-
primitive_imports.append(
257-
(
258-
str(imported),
259-
int(line_number) if line_number else None,
260-
str(line_contents),
261-
)
262-
)
263-
except TypeError:
264-
continue
265-
266-
primitives_map[key] = primitive_imports
267-
268-
return primitives_map
269-
270220
def _write_marker_files_if_not_already_there(self) -> None:
271221
marker_files_info = (
272222
(".gitignore", "# Automatically created by Grimp.\n*"),

0 commit comments

Comments
 (0)