33 // 1. Get Extension ID (Wait for content.js to set it)
44 const getExtensionId = ( ) => document . documentElement . dataset . sentienceExtensionId ;
55 let extId = getExtensionId ( ) ;
6-
6+
77 // Safety poller for async loading race conditions
88 if ( ! extId ) {
99 await new Promise ( resolve => {
3939 return NodeFilter . FILTER_ACCEPT ;
4040 }
4141 } ;
42-
42+
4343 const walker = document . createTreeWalker ( root , NodeFilter . SHOW_ELEMENT , filter ) ;
4444 while ( walker . nextNode ( ) ) {
4545 const node = walker . currentNode ;
100100 // Fast center-point check
101101 const cx = rect . x + rect . width / 2 ;
102102 const cy = rect . y + rect . height / 2 ;
103-
103+
104104 // If point is off-screen, elementFromPoint returns null, assume NOT occluded for safety
105105 if ( cx < 0 || cx > window . innerWidth || cy < 0 || cy > window . innerHeight ) return false ;
106106
107107 const topEl = document . elementFromPoint ( cx , cy ) ;
108108 if ( ! topEl ) return false ;
109-
109+
110110 // It's visible if the top element is us, or contains us, or we contain it
111111 return ! ( el === topEl || el . contains ( topEl ) || topEl . contains ( el ) ) ;
112112 }
131131 function getRawHTML ( root ) {
132132 const sourceRoot = root || document . body ;
133133 const clone = sourceRoot . cloneNode ( true ) ;
134-
134+
135135 // Remove unwanted elements by tag name (simple and reliable)
136136 const unwantedTags = [ 'nav' , 'footer' , 'header' , 'script' , 'style' , 'noscript' , 'iframe' , 'svg' ] ;
137137 unwantedTags . forEach ( tag => {
157157 while ( node = walker . nextNode ( ) ) {
158158 const tag = node . tagName . toLowerCase ( ) ;
159159 if ( tag === 'head' || tag === 'title' ) continue ;
160-
160+
161161 const style = window . getComputedStyle ( node ) ;
162162 if ( style . display === 'none' || style . visibility === 'hidden' ||
163163 ( node . offsetWidth === 0 && node . offsetHeight === 0 ) ) {
222222 function convertToMarkdown ( root ) {
223223 // Get cleaned HTML first
224224 const rawHTML = getRawHTML ( root ) ;
225-
225+
226226 // Create a temporary container to parse the HTML
227227 const tempDiv = document . createElement ( 'div' ) ;
228228 tempDiv . innerHTML = rawHTML ;
229-
229+
230230 let markdown = '' ;
231231 let insideLink = false ; // Track if we're inside an <a> tag
232232
279279 }
280280
281281 walk ( tempDiv ) ;
282-
282+
283283 // Cleanup: remove excessive newlines
284284 return markdown . replace ( / \n { 3 , } / g, '\n\n' ) . trim ( ) ;
285285 }
299299
300300 const style = window . getComputedStyle ( node ) ;
301301 if ( style . display === 'none' || style . visibility === 'hidden' ) return ;
302-
302+
303303 // Block level elements get a newline
304304 const isBlock = style . display === 'block' || style . display === 'flex' || node . tagName === 'P' || node . tagName === 'DIV' ;
305305 if ( isBlock ) text += ' ' ;
306-
306+
307307 if ( node . shadowRoot ) {
308308 Array . from ( node . shadowRoot . childNodes ) . forEach ( walk ) ;
309309 } else {
310310 node . childNodes . forEach ( walk ) ;
311311 }
312-
312+
313313 if ( isBlock ) text += '\n' ;
314314 }
315315 }
331331 } ;
332332 await module . default ( undefined , imports ) ;
333333 wasmModule = module ;
334-
334+
335335 // Verify functions are available
336336 if ( ! wasmModule . analyze_page ) {
337337 console . error ( '[SentienceAPI.com] available' ) ;
354354 const rawData = [ ] ;
355355 // Remove textMap as we include text in rawData
356356 window . sentience_registry = [ ] ;
357-
357+
358358 const nodes = getAllElements ( ) ;
359-
359+
360360 nodes . forEach ( ( el , idx ) => {
361361 if ( ! el . getBoundingClientRect ) return ;
362362 const rect = el . getBoundingClientRect ( ) ;
363363 if ( rect . width < 5 || rect . height < 5 ) return ;
364364
365365 window . sentience_registry [ idx ] = el ;
366-
366+
367367 // Calculate properties for Fat Payload
368368 const textVal = getText ( el ) ;
369369 const inView = isInViewport ( rect ) ;
453453 // Prune raw elements using WASM before sending to API
454454 // This prevents 413 errors on large sites (Amazon: 5000+ -> ~200-400)
455455 const prunedRawData = wasmModule . prune_for_api ( rawData ) ;
456-
456+
457457 // Clean up null/undefined fields in raw_elements as well
458458 const cleanedRawElements = cleanElement ( prunedRawData ) ;
459459
469469 read : ( options = { } ) => {
470470 const format = options . format || 'raw' ; // 'raw', 'text', or 'markdown'
471471 let content ;
472-
472+
473473 if ( format === 'raw' ) {
474474 // Return raw HTML suitable for Turndown or other Node.js libraries
475475 content = getRawHTML ( document . body ) ;
480480 // Default to text
481481 content = convertToText ( document . body ) ;
482482 }
483-
483+
484484 return {
485485 status : "success" ,
486486 url : window . location . href ,
497497 return false ;
498498 }
499499 } ;
500- } ) ( ) ;
500+ } ) ( ) ;
0 commit comments