File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 33#[ cfg( test) ]
44mod tests {
55 use crate :: agent_cmd:: cli:: { CopyArgs , ExportArgs } ;
6- use crate :: agent_cmd:: loader:: {
7- load_builtin_agents, parse_frontmatter, read_file_with_encoding,
8- } ;
6+ use crate :: agent_cmd:: loader:: { load_builtin_agents, parse_frontmatter} ;
97 use crate :: agent_cmd:: types:: AgentMode ;
8+ use crate :: utils:: file:: read_file_with_encoding;
109
1110 #[ test]
1211 fn test_read_file_with_utf8 ( ) {
Original file line number Diff line number Diff line change @@ -311,7 +311,8 @@ fn process_node_to_markdown(
311311 }
312312 "li" => {
313313 let indent = " " . repeat ( list_depth. saturating_sub ( 1 ) ) ;
314- output. push_str ( & format ! ( "\n {indent}- " ) ) ;
314+ let marker = list_item_marker ( element_ref) ;
315+ output. push_str ( & format ! ( "\n {indent}{marker}" ) ) ;
315316 process_node_to_markdown (
316317 element_ref,
317318 output,
@@ -448,6 +449,28 @@ fn process_node_to_markdown(
448449 }
449450}
450451
452+ fn list_item_marker ( element_ref : scraper:: ElementRef ) -> String {
453+ let Some ( parent) = element_ref. parent ( ) . and_then ( scraper:: ElementRef :: wrap) else {
454+ return "- " . to_string ( ) ;
455+ } ;
456+
457+ if parent. value ( ) . name ( ) != "ol" {
458+ return "- " . to_string ( ) ;
459+ }
460+
461+ let start = parent
462+ . attr ( "start" )
463+ . and_then ( |value| value. parse :: < usize > ( ) . ok ( ) )
464+ . unwrap_or ( 1 ) ;
465+ let previous_items = element_ref
466+ . prev_siblings ( )
467+ . filter_map ( scraper:: ElementRef :: wrap)
468+ . filter ( |sibling| sibling. value ( ) . name ( ) == "li" )
469+ . count ( ) ;
470+
471+ format ! ( "{}. " , start + previous_items)
472+ }
473+
451474/// Convert HTML to plain text.
452475pub fn html_to_text ( html : & str ) -> String {
453476 let cleaned = remove_unwanted_elements ( html) ;
Original file line number Diff line number Diff line change @@ -41,6 +41,28 @@ mod tests {
4141 assert ! ( !md_no_images. contains( "![" ) ) ;
4242 }
4343
44+ #[ test]
45+ fn test_html_to_markdown_ordered_lists ( ) {
46+ let html = r#"
47+ <ol>
48+ <li>Install Rust</li>
49+ <li>Run cortex</li>
50+ <li>Check output<ul><li>Keep nested unordered item</li></ul></li>
51+ </ol>
52+ <ol start="4">
53+ <li>Continue numbering</li>
54+ </ol>
55+ "# ;
56+ let md = html_to_markdown ( html, false , false ) ;
57+
58+ assert ! ( md. contains( "1. Install Rust" ) , "got: {md}" ) ;
59+ assert ! ( md. contains( "2. Run cortex" ) , "got: {md}" ) ;
60+ assert ! ( md. contains( "3. Check output" ) , "got: {md}" ) ;
61+ assert ! ( md. contains( " - Keep nested unordered item" ) , "got: {md}" ) ;
62+ assert ! ( md. contains( "4. Continue numbering" ) , "got: {md}" ) ;
63+ assert ! ( !md. contains( "- Install Rust" ) , "got: {md}" ) ;
64+ }
65+
4466 #[ test]
4567 fn test_html_to_text ( ) {
4668 let html = "<h1>Title</h1><p>Hello <strong>world</strong>!</p>" ;
You can’t perform that action at this time.
0 commit comments