Skip to content

Commit 5174bc2

Browse files
ofriwclaude
andcommitted
Add deterministic line-breaking module for presentations
Implements language-aware line breaking with 60-char limit: - Break patterns by priority (sentence boundaries, operators, etc.) - Language-specific rules (TypeScript, bash, JSON, markdown, text) - Preserves semantic meaning and indentation - Returns statistics on lines processed and shortened 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent b16e9da commit 5174bc2

File tree

1 file changed

+322
-0
lines changed

1 file changed

+322
-0
lines changed

scripts/lib/line-breaker.js

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
/**
2+
* Deterministic Line Breaking for Presentation Code Blocks
3+
*
4+
* This module provides language-aware line breaking to ensure all code blocks
5+
* in presentations fit within the 60-character limit for readability.
6+
*
7+
* Strategy:
8+
* - Target line length: 50-55 chars (optimal readability)
9+
* - Maximum line length: 60 chars (hard limit)
10+
* - Language-specific break points (prioritized)
11+
* - Preserve semantic meaning (no mid-word breaks)
12+
*/
13+
14+
const MAX_LINE_LENGTH = 60;
15+
const TARGET_LINE_LENGTH = 50;
16+
const INDENT = ' '; // 2 spaces for wrapped lines
17+
18+
/**
19+
* Language-specific break patterns with priorities
20+
* Higher priority = preferred break points
21+
*/
22+
const BREAK_PATTERNS = {
23+
text: [
24+
// Sentence boundaries (highest priority for readability)
25+
{ regex: /\.\s+(?=[A-Z])/g, priority: 10, preserveDelimiter: true, description: 'Sentence boundary' },
26+
// After colons that introduce content
27+
{ regex: /:\s+/g, priority: 9, preserveDelimiter: false, description: 'After colon' },
28+
// After conjunctions with commas
29+
{ regex: /,\s+(?:and|or|but)\s+/g, priority: 8, preserveDelimiter: false, description: 'Conjunction' },
30+
// After commas
31+
{ regex: /,\s+/g, priority: 7, preserveDelimiter: false, description: 'After comma' },
32+
// Before bullet points
33+
{ regex: /\s+-\s+/g, priority: 6, preserveDelimiter: true, description: 'Before bullet' },
34+
// After "that", "which", "where" (natural pauses)
35+
{ regex: /(?:that|which|where)\s+/g, priority: 5, preserveDelimiter: false, description: 'Relative clause' },
36+
// At any space (fallback)
37+
{ regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' },
38+
],
39+
40+
markdown: [
41+
// After headers
42+
{ regex: /#\s+[^\n]+\n/g, priority: 10, preserveDelimiter: true, description: 'After header' },
43+
// After bullet points
44+
{ regex: /\n[-*]\s+/g, priority: 9, preserveDelimiter: true, description: 'After bullet' },
45+
// After sentences
46+
{ regex: /\.\s+/g, priority: 8, preserveDelimiter: false, description: 'After sentence' },
47+
// After commas
48+
{ regex: /,\s+/g, priority: 5, preserveDelimiter: false, description: 'After comma' },
49+
// At spaces
50+
{ regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' },
51+
],
52+
53+
bash: [
54+
// After pipes (preserve pipe, break before next command)
55+
{ regex: /\|\s+/g, priority: 10, preserveDelimiter: false, addContinuation: true, description: 'After pipe' },
56+
// After logical operators
57+
{ regex: /&&\s+/g, priority: 9, preserveDelimiter: false, addContinuation: true, description: 'After AND' },
58+
{ regex: /\|\|\s+/g, priority: 9, preserveDelimiter: false, addContinuation: true, description: 'After OR' },
59+
// After semicolons
60+
{ regex: /;\s*/g, priority: 8, preserveDelimiter: false, description: 'After semicolon' },
61+
// After flags
62+
{ regex: /\s+--?[a-zA-Z-]+(?:\s+|=)/g, priority: 7, preserveDelimiter: false, addContinuation: true, description: 'After flag' },
63+
// At spaces
64+
{ regex: /\s+/g, priority: 1, preserveDelimiter: false, addContinuation: true, description: 'Whitespace' },
65+
],
66+
67+
typescript: [
68+
// Method chains
69+
{ regex: /\./g, priority: 10, preserveDelimiter: true, description: 'Method chain' },
70+
// After commas in parameter lists
71+
{ regex: /,\s+/g, priority: 9, preserveDelimiter: false, description: 'After comma' },
72+
// After logical operators
73+
{ regex: /\s+(?:&&|\|\|)\s+/g, priority: 8, preserveDelimiter: false, description: 'Logical operator' },
74+
// After assignment operators
75+
{ regex: /\s*=\s*/g, priority: 7, preserveDelimiter: false, description: 'Assignment' },
76+
// After opening braces/parens
77+
{ regex: /[({]\s*/g, priority: 6, preserveDelimiter: true, description: 'After opening' },
78+
// At spaces
79+
{ regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' },
80+
],
81+
82+
javascript: [
83+
// Same as TypeScript
84+
{ regex: /\./g, priority: 10, preserveDelimiter: true, description: 'Method chain' },
85+
{ regex: /,\s+/g, priority: 9, preserveDelimiter: false, description: 'After comma' },
86+
{ regex: /\s+(?:&&|\|\|)\s+/g, priority: 8, preserveDelimiter: false, description: 'Logical operator' },
87+
{ regex: /\s*=\s*/g, priority: 7, preserveDelimiter: false, description: 'Assignment' },
88+
{ regex: /[({]\s*/g, priority: 6, preserveDelimiter: true, description: 'After opening' },
89+
{ regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' },
90+
],
91+
92+
json: [
93+
// After commas in objects/arrays
94+
{ regex: /,\s*/g, priority: 10, preserveDelimiter: false, description: 'After comma' },
95+
// After colons
96+
{ regex: /:\s*/g, priority: 9, preserveDelimiter: false, description: 'After colon' },
97+
// After opening braces/brackets
98+
{ regex: /[{[]\s*/g, priority: 8, preserveDelimiter: true, description: 'After opening' },
99+
// At spaces
100+
{ regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' },
101+
],
102+
};
103+
104+
// Fallback for unknown languages
105+
const DEFAULT_PATTERNS = BREAK_PATTERNS.text;
106+
107+
/**
108+
* Find all potential break points in a line
109+
* @param {string} line - The line to analyze
110+
* @param {Array} patterns - Language-specific break patterns
111+
* @param {number} maxLength - Maximum allowed length
112+
* @returns {Array} Array of {index, priority, pattern} objects
113+
*/
114+
function findBreakPoints(line, patterns, maxLength) {
115+
const breakPoints = [];
116+
117+
for (const pattern of patterns) {
118+
const regex = new RegExp(pattern.regex);
119+
let match;
120+
121+
while ((match = regex.exec(line)) !== null) {
122+
// Always break after the delimiter to keep it with preceding text
123+
const index = match.index + match[0].length;
124+
125+
// Only consider break points within the acceptable range
126+
if (index > 0 && index <= maxLength) {
127+
breakPoints.push({
128+
index,
129+
priority: pattern.priority,
130+
pattern: pattern.description,
131+
addContinuation: pattern.addContinuation || false,
132+
});
133+
}
134+
}
135+
}
136+
137+
// Sort by priority (descending), then by distance to target (ascending)
138+
return breakPoints.sort((a, b) => {
139+
if (a.priority !== b.priority) {
140+
return b.priority - a.priority; // Higher priority first
141+
}
142+
// Among same priority, prefer closest to target
143+
const aDist = Math.abs(a.index - TARGET_LINE_LENGTH);
144+
const bDist = Math.abs(b.index - TARGET_LINE_LENGTH);
145+
return aDist - bDist;
146+
});
147+
}
148+
149+
/**
150+
* Break a single long line into multiple lines
151+
* @param {string} line - The line to break
152+
* @param {string} language - Programming/markup language
153+
* @returns {string} Line with \n breaks inserted
154+
*/
155+
function breakLongLine(line, language = 'text') {
156+
// If line is already short enough, return as-is
157+
if (line.length <= MAX_LINE_LENGTH) {
158+
return line;
159+
}
160+
161+
const patterns = BREAK_PATTERNS[language] || DEFAULT_PATTERNS;
162+
const lines = [];
163+
let remaining = line;
164+
let isFirstLine = true;
165+
166+
while (remaining.length > MAX_LINE_LENGTH) {
167+
const breakPoints = findBreakPoints(remaining, patterns, MAX_LINE_LENGTH);
168+
169+
if (breakPoints.length === 0) {
170+
// No good break point found - force break at last space before max
171+
const lastSpace = remaining.lastIndexOf(' ', MAX_LINE_LENGTH);
172+
if (lastSpace > 0) {
173+
lines.push(isFirstLine ? remaining.substring(0, lastSpace) : INDENT + remaining.substring(0, lastSpace));
174+
remaining = remaining.substring(lastSpace + 1).trim();
175+
} else {
176+
// No space at all - hard break (shouldn't happen with our patterns, but safety)
177+
lines.push(isFirstLine ? remaining.substring(0, MAX_LINE_LENGTH) : INDENT + remaining.substring(0, MAX_LINE_LENGTH));
178+
remaining = remaining.substring(MAX_LINE_LENGTH);
179+
}
180+
} else {
181+
// Use the best break point
182+
const breakPoint = breakPoints[0];
183+
const head = remaining.substring(0, breakPoint.index).trimEnd();
184+
let tail = remaining.substring(breakPoint.index).trimStart();
185+
186+
// Add line continuation for bash if needed
187+
if (breakPoint.addContinuation && language === 'bash') {
188+
lines.push(isFirstLine ? head + ' \\' : INDENT + head + ' \\');
189+
} else {
190+
lines.push(isFirstLine ? head : INDENT + head);
191+
}
192+
193+
remaining = tail;
194+
}
195+
196+
isFirstLine = false;
197+
}
198+
199+
// Add remaining text (indented if not first line)
200+
if (remaining.length > 0) {
201+
lines.push(isFirstLine ? remaining : INDENT + remaining);
202+
}
203+
204+
return lines.join('\n');
205+
}
206+
207+
/**
208+
* Process all lines in a code block
209+
* @param {string} code - Multi-line code string (with \n)
210+
* @param {string} language - Programming/markup language
211+
* @returns {string} Processed code with line breaks
212+
*/
213+
function processCodeBlock(code, language = 'text') {
214+
if (!code || typeof code !== 'string') {
215+
return code;
216+
}
217+
218+
const lines = code.split('\n');
219+
const processedLines = lines.map(line => breakLongLine(line, language));
220+
221+
return processedLines.join('\n');
222+
}
223+
224+
/**
225+
* Recursively process all code blocks in a presentation JSON
226+
* @param {Object} presentation - Presentation JSON object
227+
* @returns {Object} Modified presentation with broken lines
228+
*/
229+
function processPresentation(presentation) {
230+
if (!presentation || !presentation.slides) {
231+
return presentation;
232+
}
233+
234+
let linesProcessed = 0;
235+
let linesShortened = 0;
236+
let maxReduction = 0;
237+
238+
// Process each slide
239+
for (const slide of presentation.slides) {
240+
// Process code slides
241+
if (slide.type === 'code' && slide.code) {
242+
const originalLines = slide.code.split('\n');
243+
const language = slide.language || 'text';
244+
slide.code = processCodeBlock(slide.code, language);
245+
246+
const newLines = slide.code.split('\n');
247+
linesProcessed += originalLines.length;
248+
249+
// Check for improvements
250+
for (let i = 0; i < originalLines.length; i++) {
251+
if (originalLines[i].length > MAX_LINE_LENGTH) {
252+
linesShortened++;
253+
maxReduction = Math.max(maxReduction, originalLines[i].length - MAX_LINE_LENGTH);
254+
}
255+
}
256+
}
257+
258+
// Process codeComparison slides
259+
if (slide.type === 'codeComparison') {
260+
const language = slide.leftCode?.language || slide.rightCode?.language || 'text';
261+
262+
if (slide.leftCode && slide.leftCode.code) {
263+
const originalLines = slide.leftCode.code.split('\n');
264+
slide.leftCode.code = processCodeBlock(slide.leftCode.code, language);
265+
linesProcessed += originalLines.length;
266+
267+
for (const line of originalLines) {
268+
if (line.length > MAX_LINE_LENGTH) {
269+
linesShortened++;
270+
maxReduction = Math.max(maxReduction, line.length - MAX_LINE_LENGTH);
271+
}
272+
}
273+
}
274+
275+
if (slide.rightCode && slide.rightCode.code) {
276+
const originalLines = slide.rightCode.code.split('\n');
277+
slide.rightCode.code = processCodeBlock(slide.rightCode.code, language);
278+
linesProcessed += originalLines.length;
279+
280+
for (const line of originalLines) {
281+
if (line.length > MAX_LINE_LENGTH) {
282+
linesShortened++;
283+
maxReduction = Math.max(maxReduction, line.length - MAX_LINE_LENGTH);
284+
}
285+
}
286+
}
287+
}
288+
289+
// Process codeExecution slides (steps with .line field)
290+
if (slide.type === 'codeExecution' && Array.isArray(slide.steps)) {
291+
for (const step of slide.steps) {
292+
if (step.line && typeof step.line === 'string') {
293+
const originalLength = step.line.length;
294+
step.line = processCodeBlock(step.line, 'text');
295+
linesProcessed++;
296+
297+
if (originalLength > MAX_LINE_LENGTH) {
298+
linesShortened++;
299+
maxReduction = Math.max(maxReduction, originalLength - MAX_LINE_LENGTH);
300+
}
301+
}
302+
}
303+
}
304+
}
305+
306+
return {
307+
presentation,
308+
stats: {
309+
linesProcessed,
310+
linesShortened,
311+
maxReduction: maxReduction > 0 ? maxReduction : 0,
312+
},
313+
};
314+
}
315+
316+
export {
317+
processPresentation,
318+
processCodeBlock,
319+
breakLongLine,
320+
MAX_LINE_LENGTH,
321+
TARGET_LINE_LENGTH,
322+
};

0 commit comments

Comments
 (0)