@@ -380,15 +380,11 @@ fn process_node_to_markdown(
380380 // Tables
381381 "table" => {
382382 output. push_str ( "\n \n " ) ;
383- process_node_to_markdown (
383+ output . push_str ( & render_table_markdown (
384384 element_ref,
385- output,
386- list_depth,
387- in_pre,
388- in_code,
389385 no_images,
390386 no_links,
391- ) ;
387+ ) ) ;
392388 output. push_str ( "\n \n " ) ;
393389 }
394390 "thead" | "tbody" | "tfoot" => {
@@ -448,6 +444,89 @@ fn process_node_to_markdown(
448444 }
449445}
450446
447+ fn render_table_markdown ( table : scraper:: ElementRef , no_images : bool , no_links : bool ) -> String {
448+ let mut rows = Vec :: new ( ) ;
449+ collect_table_rows ( table, & mut rows, no_images, no_links) ;
450+
451+ if rows. is_empty ( ) {
452+ return String :: new ( ) ;
453+ }
454+
455+ let mut output = String :: new ( ) ;
456+ for ( row_index, row) in rows. iter ( ) . enumerate ( ) {
457+ output. push_str ( "| " ) ;
458+ output. push_str ( & row. join ( " | " ) ) ;
459+ output. push_str ( " |\n " ) ;
460+
461+ if row_index == 0 {
462+ output. push_str ( "| " ) ;
463+ output. push_str ( & vec ! [ "---" ; row. len( ) . max( 1 ) ] . join ( " | " ) ) ;
464+ output. push_str ( " |\n " ) ;
465+ }
466+ }
467+
468+ output
469+ }
470+
471+ fn collect_table_rows (
472+ node : scraper:: ElementRef ,
473+ rows : & mut Vec < Vec < String > > ,
474+ no_images : bool ,
475+ no_links : bool ,
476+ ) {
477+ for child in node. children ( ) {
478+ if let Some ( element_ref) = scraper:: ElementRef :: wrap ( child) {
479+ match element_ref. value ( ) . name . local . as_ref ( ) {
480+ "tr" => rows. push ( collect_table_cells ( element_ref, no_images, no_links) ) ,
481+ "thead" | "tbody" | "tfoot" => {
482+ collect_table_rows ( element_ref, rows, no_images, no_links) ;
483+ }
484+ _ => { }
485+ }
486+ }
487+ }
488+ }
489+
490+ fn collect_table_cells ( row : scraper:: ElementRef , no_images : bool , no_links : bool ) -> Vec < String > {
491+ let mut cells = Vec :: new ( ) ;
492+
493+ for child in row. children ( ) {
494+ if let Some ( element_ref) = scraper:: ElementRef :: wrap ( child)
495+ && matches ! ( element_ref. value( ) . name. local. as_ref( ) , "th" | "td" )
496+ {
497+ cells. push ( render_table_cell ( element_ref, no_images, no_links) ) ;
498+ }
499+ }
500+
501+ cells
502+ }
503+
504+ fn render_table_cell ( cell : scraper:: ElementRef , no_images : bool , no_links : bool ) -> String {
505+ let mut output = String :: new ( ) ;
506+ let mut list_depth = 0 ;
507+ let mut in_pre = false ;
508+ let mut in_code = false ;
509+
510+ process_node_to_markdown (
511+ cell,
512+ & mut output,
513+ & mut list_depth,
514+ & mut in_pre,
515+ & mut in_code,
516+ no_images,
517+ no_links,
518+ ) ;
519+
520+ clean_table_cell ( & output)
521+ }
522+
523+ fn clean_table_cell ( cell : & str ) -> String {
524+ normalize_whitespace ( & cell. replace ( '\n' , " " ) )
525+ . replace ( '|' , "\\ |" )
526+ . trim ( )
527+ . to_string ( )
528+ }
529+
451530/// Convert HTML to plain text.
452531pub fn html_to_text ( html : & str ) -> String {
453532 let cleaned = remove_unwanted_elements ( html) ;
0 commit comments