|
| 1 | +/** |
| 2 | + * Deterministic Line Breaking for Presentation Code Blocks |
| 3 | + * |
| 4 | + * This module provides language-aware line breaking to ensure all code blocks |
| 5 | + * in presentations fit within the 60-character limit for readability. |
| 6 | + * |
| 7 | + * Strategy: |
| 8 | + * - Target line length: 50-55 chars (optimal readability) |
| 9 | + * - Maximum line length: 60 chars (hard limit) |
| 10 | + * - Language-specific break points (prioritized) |
| 11 | + * - Preserve semantic meaning (no mid-word breaks) |
| 12 | + */ |
| 13 | + |
| 14 | +const MAX_LINE_LENGTH = 60; |
| 15 | +const TARGET_LINE_LENGTH = 50; |
| 16 | +const INDENT = ' '; // 2 spaces for wrapped lines |
| 17 | + |
| 18 | +/** |
| 19 | + * Language-specific break patterns with priorities |
| 20 | + * Higher priority = preferred break points |
| 21 | + */ |
| 22 | +const BREAK_PATTERNS = { |
| 23 | + text: [ |
| 24 | + // Sentence boundaries (highest priority for readability) |
| 25 | + { regex: /\.\s+(?=[A-Z])/g, priority: 10, preserveDelimiter: true, description: 'Sentence boundary' }, |
| 26 | + // After colons that introduce content |
| 27 | + { regex: /:\s+/g, priority: 9, preserveDelimiter: false, description: 'After colon' }, |
| 28 | + // After conjunctions with commas |
| 29 | + { regex: /,\s+(?:and|or|but)\s+/g, priority: 8, preserveDelimiter: false, description: 'Conjunction' }, |
| 30 | + // After commas |
| 31 | + { regex: /,\s+/g, priority: 7, preserveDelimiter: false, description: 'After comma' }, |
| 32 | + // Before bullet points |
| 33 | + { regex: /\s+-\s+/g, priority: 6, preserveDelimiter: true, description: 'Before bullet' }, |
| 34 | + // After "that", "which", "where" (natural pauses) |
| 35 | + { regex: /(?:that|which|where)\s+/g, priority: 5, preserveDelimiter: false, description: 'Relative clause' }, |
| 36 | + // At any space (fallback) |
| 37 | + { regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' }, |
| 38 | + ], |
| 39 | + |
| 40 | + markdown: [ |
| 41 | + // After headers |
| 42 | + { regex: /#\s+[^\n]+\n/g, priority: 10, preserveDelimiter: true, description: 'After header' }, |
| 43 | + // After bullet points |
| 44 | + { regex: /\n[-*]\s+/g, priority: 9, preserveDelimiter: true, description: 'After bullet' }, |
| 45 | + // After sentences |
| 46 | + { regex: /\.\s+/g, priority: 8, preserveDelimiter: false, description: 'After sentence' }, |
| 47 | + // After commas |
| 48 | + { regex: /,\s+/g, priority: 5, preserveDelimiter: false, description: 'After comma' }, |
| 49 | + // At spaces |
| 50 | + { regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' }, |
| 51 | + ], |
| 52 | + |
| 53 | + bash: [ |
| 54 | + // After pipes (preserve pipe, break before next command) |
| 55 | + { regex: /\|\s+/g, priority: 10, preserveDelimiter: false, addContinuation: true, description: 'After pipe' }, |
| 56 | + // After logical operators |
| 57 | + { regex: /&&\s+/g, priority: 9, preserveDelimiter: false, addContinuation: true, description: 'After AND' }, |
| 58 | + { regex: /\|\|\s+/g, priority: 9, preserveDelimiter: false, addContinuation: true, description: 'After OR' }, |
| 59 | + // After semicolons |
| 60 | + { regex: /;\s*/g, priority: 8, preserveDelimiter: false, description: 'After semicolon' }, |
| 61 | + // After flags |
| 62 | + { regex: /\s+--?[a-zA-Z-]+(?:\s+|=)/g, priority: 7, preserveDelimiter: false, addContinuation: true, description: 'After flag' }, |
| 63 | + // At spaces |
| 64 | + { regex: /\s+/g, priority: 1, preserveDelimiter: false, addContinuation: true, description: 'Whitespace' }, |
| 65 | + ], |
| 66 | + |
| 67 | + typescript: [ |
| 68 | + // Method chains |
| 69 | + { regex: /\./g, priority: 10, preserveDelimiter: true, description: 'Method chain' }, |
| 70 | + // After commas in parameter lists |
| 71 | + { regex: /,\s+/g, priority: 9, preserveDelimiter: false, description: 'After comma' }, |
| 72 | + // After logical operators |
| 73 | + { regex: /\s+(?:&&|\|\|)\s+/g, priority: 8, preserveDelimiter: false, description: 'Logical operator' }, |
| 74 | + // After assignment operators |
| 75 | + { regex: /\s*=\s*/g, priority: 7, preserveDelimiter: false, description: 'Assignment' }, |
| 76 | + // After opening braces/parens |
| 77 | + { regex: /[({]\s*/g, priority: 6, preserveDelimiter: true, description: 'After opening' }, |
| 78 | + // At spaces |
| 79 | + { regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' }, |
| 80 | + ], |
| 81 | + |
| 82 | + javascript: [ |
| 83 | + // Same as TypeScript |
| 84 | + { regex: /\./g, priority: 10, preserveDelimiter: true, description: 'Method chain' }, |
| 85 | + { regex: /,\s+/g, priority: 9, preserveDelimiter: false, description: 'After comma' }, |
| 86 | + { regex: /\s+(?:&&|\|\|)\s+/g, priority: 8, preserveDelimiter: false, description: 'Logical operator' }, |
| 87 | + { regex: /\s*=\s*/g, priority: 7, preserveDelimiter: false, description: 'Assignment' }, |
| 88 | + { regex: /[({]\s*/g, priority: 6, preserveDelimiter: true, description: 'After opening' }, |
| 89 | + { regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' }, |
| 90 | + ], |
| 91 | + |
| 92 | + json: [ |
| 93 | + // After commas in objects/arrays |
| 94 | + { regex: /,\s*/g, priority: 10, preserveDelimiter: false, description: 'After comma' }, |
| 95 | + // After colons |
| 96 | + { regex: /:\s*/g, priority: 9, preserveDelimiter: false, description: 'After colon' }, |
| 97 | + // After opening braces/brackets |
| 98 | + { regex: /[{[]\s*/g, priority: 8, preserveDelimiter: true, description: 'After opening' }, |
| 99 | + // At spaces |
| 100 | + { regex: /\s+/g, priority: 1, preserveDelimiter: false, description: 'Whitespace' }, |
| 101 | + ], |
| 102 | +}; |
| 103 | + |
| 104 | +// Fallback for unknown languages |
| 105 | +const DEFAULT_PATTERNS = BREAK_PATTERNS.text; |
| 106 | + |
| 107 | +/** |
| 108 | + * Find all potential break points in a line |
| 109 | + * @param {string} line - The line to analyze |
| 110 | + * @param {Array} patterns - Language-specific break patterns |
| 111 | + * @param {number} maxLength - Maximum allowed length |
| 112 | + * @returns {Array} Array of {index, priority, pattern} objects |
| 113 | + */ |
| 114 | +function findBreakPoints(line, patterns, maxLength) { |
| 115 | + const breakPoints = []; |
| 116 | + |
| 117 | + for (const pattern of patterns) { |
| 118 | + const regex = new RegExp(pattern.regex); |
| 119 | + let match; |
| 120 | + |
| 121 | + while ((match = regex.exec(line)) !== null) { |
| 122 | + // Always break after the delimiter to keep it with preceding text |
| 123 | + const index = match.index + match[0].length; |
| 124 | + |
| 125 | + // Only consider break points within the acceptable range |
| 126 | + if (index > 0 && index <= maxLength) { |
| 127 | + breakPoints.push({ |
| 128 | + index, |
| 129 | + priority: pattern.priority, |
| 130 | + pattern: pattern.description, |
| 131 | + addContinuation: pattern.addContinuation || false, |
| 132 | + }); |
| 133 | + } |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + // Sort by priority (descending), then by distance to target (ascending) |
| 138 | + return breakPoints.sort((a, b) => { |
| 139 | + if (a.priority !== b.priority) { |
| 140 | + return b.priority - a.priority; // Higher priority first |
| 141 | + } |
| 142 | + // Among same priority, prefer closest to target |
| 143 | + const aDist = Math.abs(a.index - TARGET_LINE_LENGTH); |
| 144 | + const bDist = Math.abs(b.index - TARGET_LINE_LENGTH); |
| 145 | + return aDist - bDist; |
| 146 | + }); |
| 147 | +} |
| 148 | + |
| 149 | +/** |
| 150 | + * Break a single long line into multiple lines |
| 151 | + * @param {string} line - The line to break |
| 152 | + * @param {string} language - Programming/markup language |
| 153 | + * @returns {string} Line with \n breaks inserted |
| 154 | + */ |
| 155 | +function breakLongLine(line, language = 'text') { |
| 156 | + // If line is already short enough, return as-is |
| 157 | + if (line.length <= MAX_LINE_LENGTH) { |
| 158 | + return line; |
| 159 | + } |
| 160 | + |
| 161 | + const patterns = BREAK_PATTERNS[language] || DEFAULT_PATTERNS; |
| 162 | + const lines = []; |
| 163 | + let remaining = line; |
| 164 | + let isFirstLine = true; |
| 165 | + |
| 166 | + while (remaining.length > MAX_LINE_LENGTH) { |
| 167 | + const breakPoints = findBreakPoints(remaining, patterns, MAX_LINE_LENGTH); |
| 168 | + |
| 169 | + if (breakPoints.length === 0) { |
| 170 | + // No good break point found - force break at last space before max |
| 171 | + const lastSpace = remaining.lastIndexOf(' ', MAX_LINE_LENGTH); |
| 172 | + if (lastSpace > 0) { |
| 173 | + lines.push(isFirstLine ? remaining.substring(0, lastSpace) : INDENT + remaining.substring(0, lastSpace)); |
| 174 | + remaining = remaining.substring(lastSpace + 1).trim(); |
| 175 | + } else { |
| 176 | + // No space at all - hard break (shouldn't happen with our patterns, but safety) |
| 177 | + lines.push(isFirstLine ? remaining.substring(0, MAX_LINE_LENGTH) : INDENT + remaining.substring(0, MAX_LINE_LENGTH)); |
| 178 | + remaining = remaining.substring(MAX_LINE_LENGTH); |
| 179 | + } |
| 180 | + } else { |
| 181 | + // Use the best break point |
| 182 | + const breakPoint = breakPoints[0]; |
| 183 | + const head = remaining.substring(0, breakPoint.index).trimEnd(); |
| 184 | + let tail = remaining.substring(breakPoint.index).trimStart(); |
| 185 | + |
| 186 | + // Add line continuation for bash if needed |
| 187 | + if (breakPoint.addContinuation && language === 'bash') { |
| 188 | + lines.push(isFirstLine ? head + ' \\' : INDENT + head + ' \\'); |
| 189 | + } else { |
| 190 | + lines.push(isFirstLine ? head : INDENT + head); |
| 191 | + } |
| 192 | + |
| 193 | + remaining = tail; |
| 194 | + } |
| 195 | + |
| 196 | + isFirstLine = false; |
| 197 | + } |
| 198 | + |
| 199 | + // Add remaining text (indented if not first line) |
| 200 | + if (remaining.length > 0) { |
| 201 | + lines.push(isFirstLine ? remaining : INDENT + remaining); |
| 202 | + } |
| 203 | + |
| 204 | + return lines.join('\n'); |
| 205 | +} |
| 206 | + |
| 207 | +/** |
| 208 | + * Process all lines in a code block |
| 209 | + * @param {string} code - Multi-line code string (with \n) |
| 210 | + * @param {string} language - Programming/markup language |
| 211 | + * @returns {string} Processed code with line breaks |
| 212 | + */ |
| 213 | +function processCodeBlock(code, language = 'text') { |
| 214 | + if (!code || typeof code !== 'string') { |
| 215 | + return code; |
| 216 | + } |
| 217 | + |
| 218 | + const lines = code.split('\n'); |
| 219 | + const processedLines = lines.map(line => breakLongLine(line, language)); |
| 220 | + |
| 221 | + return processedLines.join('\n'); |
| 222 | +} |
| 223 | + |
| 224 | +/** |
| 225 | + * Recursively process all code blocks in a presentation JSON |
| 226 | + * @param {Object} presentation - Presentation JSON object |
| 227 | + * @returns {Object} Modified presentation with broken lines |
| 228 | + */ |
| 229 | +function processPresentation(presentation) { |
| 230 | + if (!presentation || !presentation.slides) { |
| 231 | + return presentation; |
| 232 | + } |
| 233 | + |
| 234 | + let linesProcessed = 0; |
| 235 | + let linesShortened = 0; |
| 236 | + let maxReduction = 0; |
| 237 | + |
| 238 | + // Process each slide |
| 239 | + for (const slide of presentation.slides) { |
| 240 | + // Process code slides |
| 241 | + if (slide.type === 'code' && slide.code) { |
| 242 | + const originalLines = slide.code.split('\n'); |
| 243 | + const language = slide.language || 'text'; |
| 244 | + slide.code = processCodeBlock(slide.code, language); |
| 245 | + |
| 246 | + const newLines = slide.code.split('\n'); |
| 247 | + linesProcessed += originalLines.length; |
| 248 | + |
| 249 | + // Check for improvements |
| 250 | + for (let i = 0; i < originalLines.length; i++) { |
| 251 | + if (originalLines[i].length > MAX_LINE_LENGTH) { |
| 252 | + linesShortened++; |
| 253 | + maxReduction = Math.max(maxReduction, originalLines[i].length - MAX_LINE_LENGTH); |
| 254 | + } |
| 255 | + } |
| 256 | + } |
| 257 | + |
| 258 | + // Process codeComparison slides |
| 259 | + if (slide.type === 'codeComparison') { |
| 260 | + const language = slide.leftCode?.language || slide.rightCode?.language || 'text'; |
| 261 | + |
| 262 | + if (slide.leftCode && slide.leftCode.code) { |
| 263 | + const originalLines = slide.leftCode.code.split('\n'); |
| 264 | + slide.leftCode.code = processCodeBlock(slide.leftCode.code, language); |
| 265 | + linesProcessed += originalLines.length; |
| 266 | + |
| 267 | + for (const line of originalLines) { |
| 268 | + if (line.length > MAX_LINE_LENGTH) { |
| 269 | + linesShortened++; |
| 270 | + maxReduction = Math.max(maxReduction, line.length - MAX_LINE_LENGTH); |
| 271 | + } |
| 272 | + } |
| 273 | + } |
| 274 | + |
| 275 | + if (slide.rightCode && slide.rightCode.code) { |
| 276 | + const originalLines = slide.rightCode.code.split('\n'); |
| 277 | + slide.rightCode.code = processCodeBlock(slide.rightCode.code, language); |
| 278 | + linesProcessed += originalLines.length; |
| 279 | + |
| 280 | + for (const line of originalLines) { |
| 281 | + if (line.length > MAX_LINE_LENGTH) { |
| 282 | + linesShortened++; |
| 283 | + maxReduction = Math.max(maxReduction, line.length - MAX_LINE_LENGTH); |
| 284 | + } |
| 285 | + } |
| 286 | + } |
| 287 | + } |
| 288 | + |
| 289 | + // Process codeExecution slides (steps with .line field) |
| 290 | + if (slide.type === 'codeExecution' && Array.isArray(slide.steps)) { |
| 291 | + for (const step of slide.steps) { |
| 292 | + if (step.line && typeof step.line === 'string') { |
| 293 | + const originalLength = step.line.length; |
| 294 | + step.line = processCodeBlock(step.line, 'text'); |
| 295 | + linesProcessed++; |
| 296 | + |
| 297 | + if (originalLength > MAX_LINE_LENGTH) { |
| 298 | + linesShortened++; |
| 299 | + maxReduction = Math.max(maxReduction, originalLength - MAX_LINE_LENGTH); |
| 300 | + } |
| 301 | + } |
| 302 | + } |
| 303 | + } |
| 304 | + } |
| 305 | + |
| 306 | + return { |
| 307 | + presentation, |
| 308 | + stats: { |
| 309 | + linesProcessed, |
| 310 | + linesShortened, |
| 311 | + maxReduction: maxReduction > 0 ? maxReduction : 0, |
| 312 | + }, |
| 313 | + }; |
| 314 | +} |
| 315 | + |
| 316 | +export { |
| 317 | + processPresentation, |
| 318 | + processCodeBlock, |
| 319 | + breakLongLine, |
| 320 | + MAX_LINE_LENGTH, |
| 321 | + TARGET_LINE_LENGTH, |
| 322 | +}; |
0 commit comments