Skip to content

Commit d650ad3

Browse files
authored
Merge pull request #242 from seddonym/scanning-optimization
Scanning optimization
2 parents 39c5c0a + b53a0ed commit d650ad3

2 files changed

Lines changed: 28 additions & 28 deletions

File tree

rust/src/filesystem.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ use std::ffi::OsStr;
66
use std::fs;
77
use std::path::{Path, PathBuf};
88
use unindent::unindent;
9+
use lazy_static::lazy_static;
10+
11+
12+
lazy_static! {
13+
static ref ENCODING_RE: Regex = Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)").unwrap();
14+
}
915

1016
pub trait FileSystem: Send + Sync {
1117
fn sep(&self) -> String;
@@ -81,13 +87,13 @@ impl FileSystem for RealBasicFileSystem {
8187
})?;
8288

8389
let s = String::from_utf8_lossy(&bytes);
84-
let encoding_re = Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)").unwrap();
90+
8591

8692
let mut detected_encoding: Option<String> = None;
8793

8894
// Coding specification needs to be in the first two lines, or it's ignored.
8995
for line in s.lines().take(2) {
90-
if let Some(captures) = encoding_re.captures(line)
96+
if let Some(captures) = ENCODING_RE.captures(line)
9197
&& let Some(encoding_name) = captures.get(1) {
9298
detected_encoding = Some(encoding_name.as_str().to_string());
9399
break;

rust/src/import_scanning.rs

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
/// Statically analyses some Python modules for import statements within their shared package.
2-
use rayon::prelude::*;
31
use crate::errors::GrimpResult;
42
use crate::filesystem::{FileSystem, PyFakeBasicFileSystem, PyRealBasicFileSystem};
53
use crate::import_parsing;
@@ -8,6 +6,8 @@ use itertools::Itertools;
86
use pyo3::exceptions::PyTypeError;
97
use pyo3::prelude::*;
108
use pyo3::types::{PyDict, PySet};
9+
/// Statically analyses some Python modules for import statements within their shared package.
10+
use rayon::prelude::*;
1111
use std::collections::{HashMap, HashSet};
1212
use std::io::{self, ErrorKind};
1313

@@ -85,12 +85,20 @@ pub fn scan_for_imports_no_py(
8585
) -> GrimpResult<HashMap<Module, HashSet<DirectImport>>> {
8686
let module_packages = get_modules_from_found_packages(found_packages);
8787

88+
// Assemble a lookup table so we only need to do this once.
89+
let mut found_packages_by_module = HashMap::new();
90+
for found_package in found_packages {
91+
for module_file in &found_package.module_files {
92+
found_packages_by_module.insert(&module_file.module, found_package);
93+
}
94+
}
8895
let results: GrimpResult<Vec<(Module, HashSet<DirectImport>)>> = modules
8996
.par_iter()
9097
.map(|module| {
9198
let imports = scan_for_imports_no_py_single_module(
9299
module,
93100
file_system,
101+
&found_packages_by_module,
94102
found_packages,
95103
&module_packages,
96104
include_external_packages,
@@ -107,13 +115,14 @@ pub fn scan_for_imports_no_py(
107115
fn scan_for_imports_no_py_single_module(
108116
module: &Module,
109117
file_system: &Box<dyn FileSystem + Send + Sync>,
118+
found_packages_by_module: &HashMap<&Module, &FoundPackage>,
110119
found_packages: &HashSet<FoundPackage>,
111120
all_modules: &HashSet<Module>,
112121
include_external_packages: bool,
113122
exclude_type_checking_imports: bool,
114123
) -> GrimpResult<HashSet<DirectImport>> {
115124
let mut imports: HashSet<DirectImport> = HashSet::new();
116-
let found_package_for_module = _lookup_found_package_for_module(module, found_packages);
125+
let found_package_for_module = found_packages_by_module[module];
117126
let module_filename =
118127
_determine_module_filename(module, found_package_for_module, file_system).unwrap();
119128
let module_contents = file_system.read(&module_filename).unwrap();
@@ -148,14 +157,14 @@ fn scan_for_imports_no_py_single_module(
148157
if include_external_packages
149158
&& let Some(imported_module) =
150159
_distill_external_module(&imported_object_name, found_packages)
151-
{
152-
imports.insert(DirectImport {
153-
importer: module.name.to_string(),
154-
imported: imported_module,
155-
line_number: imported_object.line_number,
156-
line_contents: imported_object.line_contents,
157-
});
158-
}
160+
{
161+
imports.insert(DirectImport {
162+
importer: module.name.to_string(),
163+
imported: imported_module,
164+
line_number: imported_object.line_number,
165+
line_contents: imported_object.line_contents,
166+
});
167+
}
159168
}
160169
}
161170
}
@@ -192,21 +201,6 @@ pub fn to_py_direct_imports<'a>(
192201
pyset
193202
}
194203

195-
fn _lookup_found_package_for_module<'b>(
196-
module: &Module,
197-
found_packages: &'b HashSet<FoundPackage>,
198-
) -> &'b FoundPackage {
199-
// TODO: it's probably inefficient to do this every time we look up a module.
200-
for found_package in found_packages {
201-
for module_file in &found_package.module_files {
202-
if module_file.module == *module {
203-
return found_package;
204-
}
205-
}
206-
}
207-
panic!("Could not lookup found package for module {module}");
208-
}
209-
210204
#[allow(clippy::borrowed_box)]
211205
fn _determine_module_filename(
212206
module: &Module,

0 commit comments

Comments
 (0)