Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
## 2026-04-08 - [Performance: Defer Allocation during Traversal]
**Learning:** During DAG traversals, creating owned variants of identifiers (like `file.to_path_buf()`) *before* checking `visited` HashSets results in heap allocations (O(E)) for every edge instead of every visited node (O(V)). By moving the `&PathBuf` allocation strictly *after* all HashSet `contains` checks using the borrowed reference (`&Path`), we drastically reduce memory churn.
**Action:** Always check `HashSet::contains` with a borrowed reference *before* creating the owned version required by `HashSet::insert`, especially in performance-critical graph traversal paths.

## 2025-05-14 - Optimize SQL generation by avoiding intermediate Vec allocations and format! in loops
**Learning:** In D1 targets `build_upsert_stmt` and `build_delete_stmt`, strings are frequently joined inside loops resulting in unneeded `Vec` allocations and string interpolations, particularly since queries are generated dynamically at scale. Preallocating large `String`s using `with_capacity` and generating SQL queries using `write!` significantly reduces memory allocations and string allocations for edge targets mapping.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (typo): The phrase "for edge targets mapping" is grammatically awkward; consider rephrasing.

Consider rephrasing the end to something like β€œfor edge target mappings” or β€œfor edge-target mapping” to improve readability while preserving the intent.

Suggested change
**Learning:** In D1 targets `build_upsert_stmt` and `build_delete_stmt`, strings are frequently joined inside loops resulting in unneeded `Vec` allocations and string interpolations, particularly since queries are generated dynamically at scale. Preallocating large `String`s using `with_capacity` and generating SQL queries using `write!` significantly reduces memory allocations and string allocations for edge targets mapping.
**Learning:** In D1 targets `build_upsert_stmt` and `build_delete_stmt`, strings are frequently joined inside loops resulting in unneeded `Vec` allocations and string interpolations, particularly since queries are generated dynamically at scale. Preallocating large `String`s using `with_capacity` and generating SQL queries using `write!` significantly reduces memory allocations and string allocations for edge target mappings.

**Action:** Always prefer `std::fmt::Write` + `String::with_capacity` to assemble queries efficiently instead of `format!` and `Vec::join` during batch code exports.
70 changes: 40 additions & 30 deletions crates/flow/src/targets/d1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,40 +300,48 @@ impl D1ExportContext {
key: &KeyValue,
values: &FieldValues,
) -> Result<(String, Vec<serde_json::Value>), RecocoError> {
let mut columns = vec![];
let mut placeholders = vec![];
let mut params = vec![];
let mut update_clauses = vec![];
use std::fmt::Write;

let mut params = Vec::with_capacity(self.key_fields_schema.len() + self.value_fields_schema.len());
// ⚑ Bolt: Optimize SQL generation by avoiding intermediate Vec allocations and format! in loops
let mut sql = String::with_capacity(128 + (self.key_fields_schema.len() + self.value_fields_schema.len()) * 32);

// Extract key parts - KeyValue is a wrapper around Box<[KeyPart]>
write!(sql, "INSERT INTO {} (", self.table_name).unwrap();

let mut first = true;
for (idx, _key_field) in self.key_fields_schema.iter().enumerate() {
if let Some(key_part) = key.0.get(idx) {
columns.push(self.key_fields_schema[idx].name.clone());
placeholders.push("?".to_string());
if !first { sql.push_str(", "); }
sql.push_str(&self.key_fields_schema[idx].name);
params.push(key_part_to_json(key_part)?);
first = false;
}
}

// Add value fields
for (idx, value) in values.fields.iter().enumerate() {
if let Some(value_field) = self.value_fields_schema.get(idx) {
columns.push(value_field.name.clone());
placeholders.push("?".to_string());
if !first { sql.push_str(", "); }
sql.push_str(&value_field.name);
params.push(value_to_json(value)?);
update_clauses.push(format!(
"{} = excluded.{}",
value_field.name, value_field.name
));
first = false;
}
}

let sql = format!(
"INSERT INTO {} ({}) VALUES ({}) ON CONFLICT DO UPDATE SET {}",
self.table_name,
columns.join(", "),
placeholders.join(", "),
update_clauses.join(", ")
);
sql.push_str(") VALUES (");
Comment on lines 321 to +330
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (performance): Avoid iterating over values.fields twice by deriving the UPDATE clause during the first pass.

Right now the code walks values.fields once for the column list/params and again for the ON CONFLICT DO UPDATE SET clause. Instead, derive the update expressions in the same loop where you collect columns/params (e.g., track the field names in a list or build the SET clause directly). This keeps the insert and update columns in lockstep without relying on two separate passes to stay in sync.

Suggested implementation:

        }

        // Walk values.fields once, deriving both the INSERT column list and
        // the set of columns that will be used for the ON CONFLICT DO UPDATE
        // clause so we don't need a second pass later.
        let mut updatable_columns: Vec<&str> = Vec::new();

        for (idx, value) in values.fields.iter().enumerate() {
            if let Some(value_field) = self.value_fields_schema.get(idx) {
                if !first {
                    sql.push_str(", ");
                }
                sql.push_str(&value_field.name);
                params.push(value_to_json(value)?);

                // Track value columns for the UPDATE ... SET clause so we can
                // build the ON CONFLICT DO UPDATE clause without a second pass.
                updatable_columns.push(&value_field.name);
            }

To fully implement your suggestion and remove the second iteration over values.fields, you should:

  1. Locate the later code that currently walks values.fields a second time to build the ON CONFLICT DO UPDATE SET ... clause.
  2. Remove that second for (idx, value) in values.fields.iter().enumerate() loop entirely.
  3. After the INSERT column/value construction is complete (and after the ON CONFLICT (...) portion, if present), build the DO UPDATE SET clause from updatable_columns, for example:
    if !updatable_columns.is_empty() {
        sql.push_str(" DO UPDATE SET ");
        for (i, col) in updatable_columns.iter().enumerate() {
            if i > 0 {
                sql.push_str(", ");
            }
            // Use the appropriate expression for your dialect, e.g. `excluded` or similar
            write!(sql, "{} = excluded.{}", col, col).unwrap();
        }
    }
  4. Ensure updatable_columns is in scope where you build the DO UPDATE SET clause (you may need to move the declaration slightly up/down depending on surrounding code).

Once these additional changes are applied, the values.fields slice will only be iterated once, and the INSERT and UPDATE column sets will stay in lockstep by construction.

for i in 0..params.len() {
if i > 0 { sql.push_str(", "); }
sql.push('?');
}

sql.push_str(") ON CONFLICT DO UPDATE SET ");
let mut first_update = true;
for (idx, _value) in values.fields.iter().enumerate() {
if let Some(value_field) = self.value_fields_schema.get(idx) {
if !first_update { sql.push_str(", "); }
write!(sql, "{} = excluded.{}", value_field.name, value_field.name).unwrap();
first_update = false;
}
Comment on lines +336 to +343
}

Ok((sql, params))
}
Expand All @@ -342,22 +350,24 @@ impl D1ExportContext {
&self,
key: &KeyValue,
) -> Result<(String, Vec<serde_json::Value>), RecocoError> {
let mut where_clauses = vec![];
let mut params = vec![];
use std::fmt::Write;

let mut params = Vec::with_capacity(self.key_fields_schema.len());
// ⚑ Bolt: Pre-allocate capacity and use write! instead of intermediate String joining
let mut sql = String::with_capacity(64 + self.key_fields_schema.len() * 32);

write!(sql, "DELETE FROM {} WHERE ", self.table_name).unwrap();

let mut first = true;
for (idx, _key_field) in self.key_fields_schema.iter().enumerate() {
if let Some(key_part) = key.0.get(idx) {
where_clauses.push(format!("{} = ?", self.key_fields_schema[idx].name));
if !first { sql.push_str(" AND "); }
write!(sql, "{} = ?", self.key_fields_schema[idx].name).unwrap();
params.push(key_part_to_json(key_part)?);
first = false;
}
}

let sql = format!(
"DELETE FROM {} WHERE {}",
self.table_name,
where_clauses.join(" AND ")
);

Ok((sql, params))
}

Expand Down