Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions test/migrate_unittest/test-group1.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#include <iostream>
#include <gtest/gtest.h>
#include <string>
#include "html.hpp"

using namespace std;
using namespace html;


//test1
TEST(test, dropsUnterminatedTag){
string h1 = "<p";
parser parse;
node_ptr doc = parse.parse(h1);
ASSERT_EQ(0, doc->get_attr("p").size());
ASSERT_EQ("", doc->to_text());

string h2="<div id=1<p id='2'";
doc=parse.parse(h2);
ASSERT_EQ("", doc->to_text());
}

//test2
TEST(test, testByAttributeRegexCombined){
parser parse;
node_ptr doc = parse.parse("<div><table class=x><td>Hello</td></table></div>");
node_ptr els = doc->select("div table[class~=x|y]");

ASSERT_EQ(1, els->size());
ASSERT_EQ("Hello", els->to_text());
}

//test3
TEST(test, testAllWithClass){
string h = "<p class=first></p>One<p class=first>Two<p>Three";
parser parse;
node_ptr doc = parse.parse(h);
node_ptr ps = doc->select("[class*=first]");
// The last text node, although it does not have a class attribute itself,
// it is selected because its parent node <p> has a class attribute value that includes "first".
ASSERT_EQ(3, ps->size());
}

//test4
// select is case sensitive.
TEST(test, caseInsensitive){
string h = "<div tItle=bAr></div>";
parser parse;
node_ptr doc = parse.parse(h);
// ASSERT_EQ(1, doc->select("DiV")->size());
// ASSERT_EQ(1, doc->select("dIv")->size());
}

//test5
TEST(test, notAdjacent){
parser parse;
string h = "<ol><li id=1>One<li id=2>Two<li id=3>Three</ol>";
node_ptr doc = parse.parse(h);
node_ptr sibs = doc->select("li#1 + li#3");
ASSERT_EQ(0, sibs->size());
}

//test6
TEST(test, selectSameElements){
parser parse;
string html = "<div>one</div><div>one</div>";
node_ptr doc = parse.parse(html);
node_ptr els = doc->select("div");
ASSERT_EQ(2, els->size());

// not support :contains
// node_ptr subSelect = els->select(":contains(one)");
// ASSERT_EQ(2, subSelect->size());
}

//test7
TEST(test, matchTextAttributes){
parser parse;
node_ptr doc = parse.parse("<div><p class=one>One<br>Two<p class=two>Three<br>Four");

//":matchText" selector is not a standard selector in HTML.
node_ptr els = doc->select("p.two:matchText:last-child");

ASSERT_EQ(0, els->size());
// ASSERT_EQ("Four", els->to_text());
}

//test8
TEST(test, startsWithBeginsWithSpace){
parser parse;
node_ptr doc = parse.parse("<small><a href=\" mailto:abc@def.net\">(abc@def.net)</a></small>");

node_ptr els = doc->select("a[href^=' mailto']");

ASSERT_EQ(1, els->size());
}

//test9
TEST(test, endsWithEndsWithSpaces){
parser parse;
node_ptr doc = parse.parse("<small><a href=\" mailto:abc@def.net \">(abc@def.net)</a></small>");

node_ptr els = doc->select("a[href$='.net ']");

ASSERT_EQ(1, els->size());
}

//test10
TEST(test, parsesQuiteRoughAttributes){
string html = "<p =a>One<a <p>Something</p>Else";
parser parse;
node_ptr doc = parse.parse(html);

// <p =a=\"\">One<a <p=\"\">Something</a></p>\nElse
ASSERT_EQ("<p =a=\"\">One<a <p=\"\">Something</a></p>\nElse", doc->to_html());

doc = parse.parse("<p .....>");
ASSERT_EQ("<p .....=\"\"></p>", doc->to_html());
}

GTEST_API_ int main(int argc, char ** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

99 changes: 99 additions & 0 deletions test/migrate_unittest/test-group2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include <iostream>
#include <gtest/gtest.h>
#include <string>
#include "html.hpp"

using namespace std;
using namespace html;

//test11
TEST(test, dropsUnterminatedAttribute){
string h1 = "<p id=\"foo";
parser parse;
node_ptr doc = parse.parse(h1);

ASSERT_EQ("", doc->to_text());
}

//test12
TEST(test, testSpaceAfterTag){
parser parse;
node_ptr doc = parse.parse("<div > <a name=\"top\"></a ><p id=1 >Hello</p></div>");

ASSERT_EQ("<div><a name=\"top\"></a>\n\t<p id=\"1\">Hello</p>\n</div>", doc->to_html());
}

//test13
TEST(test, createsStructureFromBodySnippet){
string html = "foo <b>bar</b> baz";
parser parse;
node_ptr doc = parse.parse(html);

ASSERT_EQ("foo bar baz", doc->to_text());
}

//test14
TEST(test, handlesTextAfterData){
string h = "<html><body>pre <script>inner</script> aft</body></html>";
parser parse;
node_ptr doc = parse.parse(h);
//<html><head></head><body>pre <script>inner</script> aft</body></html>
// no head
ASSERT_EQ("<html>\n\t<body>pre \n\t\t<script>inner</script>\n\t\t aft\n\t</body>\n</html>", doc->to_html());
}

//test15
TEST(test, handlesTextArea){
parser parse;
node_ptr doc = parse.parse("<textarea>Hello</textarea>");
node_ptr els = doc->select("textarea");
ASSERT_EQ("Hello", els->to_text());
}

//test16
TEST(test, discardsNakedTds){
string h = "<td>Hello<td><p>There<p>now";
parser parse;
node_ptr doc = parse.parse(h);
ASSERT_EQ("<td>Hello\n\t<td>\n\t\t<p>There\n\t\t\t<p>now</p>\n\t\t</p>\n\t</td>\n</td>", doc->to_html());
}

//test17
TEST(test, handlesNestedImplicitTable){
parser parse;
node_ptr doc = parse.parse("<table><td>1</td></tr> <td>2</td></tr> <td> <table><td>3</td> <td>4</td></table> <tr><td>5</table>");
//<table><tbody><tr><td>1</td></tr><tr><td>2</td></tr><tr><td><table><tbody><tr><td>3</td><td>4</td></tr></tbody></table></td></tr><tr><td>5</td></tr></tbody></table>
// no tbody
ASSERT_EQ("<table>\n\t<td>1</td>\n\t<td>2</td>\n\t<td>\n\t\t<table>\n\t\t\t<td>3</td>\n\t\t\t<td>4</td>\n\t\t</table>\n\t\t<tr>\n\t\t\t<td>5</td>\n\t\t</tr>\n\t</td>\n</table>", doc->to_html());
}

//test18
TEST(test, handlesImplicitCaptionClose){
parser parse;
node_ptr doc = parse.parse("<table><caption>A caption<td>One<td>Two");
// <table><caption>A caption</caption><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>
ASSERT_EQ("<table>\n\t<caption>A caption\n\t\t<td>One\n\t\t\t<td>Two</td>\n\t\t</td>\n\t</caption>\n</table>", doc->to_html());
}

//test19
TEST(test, handlesUnclosedCdataAtEOF){
string h = "<![CDATA[]]";
parser parse;
node_ptr doc = parse.parse(h);
ASSERT_EQ(1, doc->get_children().size());
}

//test20
TEST(test, handlesInvalidStartTags){
string h = "<div>Hello < There <&amp;></div>";
parser parse;
node_ptr doc = parse.parse(h);
// cannot remove amp;
ASSERT_EQ("Hello < There <&amp;>", doc->select("div")->get_children()[0]->to_text());
}


GTEST_API_ int main(int argc, char ** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
109 changes: 109 additions & 0 deletions test/migrate_unittest/test-group3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#include <iostream>
#include <gtest/gtest.h>
#include <string>
#include "html.hpp"

using namespace std;
using namespace html;

//test21
TEST(test, handlesUnknownNamespaceTags){
string h = "<foo:bar id='1' /><abc:def id=2>Foo<p>Hello</p></abc:def><foo:bar>There</foo:bar>";
parser parse;
node_ptr doc = parse.parse(h);
ASSERT_EQ("<foo:bar id=\"1\" />\n<abc:def id=\"2\">Foo\n\t<p>Hello</p>\n</abc:def>\n<foo:bar>There</foo:bar>", doc->to_html());
}

//test22
TEST(test, handlesKnownEmptyBlocks){
string h = "<div id='1' /><script src='/foo' /><div id=2><img /><img></div><a id=3 /><i /><foo /><foo>One</foo> <hr /> hr text <hr> hr text two";
parser parse;
node_ptr doc = parse.parse(h);
ASSERT_EQ("<div id=\"1\" />\n<script src=\"/foo\" />\n<div id=\"2\"><img /><img /></div>\n<a id=\"3\" /><i />\n<foo />\n<foo>One</foo>\n<hr />\n hr text \n<hr />\n hr text two", doc->to_html());
}

//test23
TEST(test, handlesKnownEmptyNoFrames){
string h = "<html><head><noframes /><meta name=foo></head><body>One</body></html>";
parser parse;
node_ptr doc = parse.parse(h);
// <html>
// <head>
// <noframes />
// <meta name=\"foo\" />
// </head>
// <body>One</body>
// </html>
ASSERT_EQ("<html>\n\t<head>\n\t\t<noframes />\n\t\t<meta name=\"foo\" />\n\t</head>\n\t<body>One</body>\n</html>", doc->to_html());
}

//test24
TEST(test, handlesKnownEmptyStyle){
string h = "<html><head><style /><meta name=foo></head><body>One</body></html>";
parser parse;
node_ptr doc = parse.parse(h);
ASSERT_EQ("<html>\n\t<head>\n\t\t<style />\n\t\t<meta name=\"foo\" />\n\t</head>\n\t<body>One</body>\n</html>", doc->to_html());
}

//test25
//issue title is not being converted to HTML format
TEST(test, handlesKnownEmptyTitle){
string h = "<html><head><title /><meta name=foo></head><body>One</body></html>";
parser parse;
node_ptr doc = parse.parse(h);

//<html><head><title></title><meta name=\"foo\"></head><body>One</body></html>
ASSERT_EQ("<html>\n\t<head>\n\t\t<title />\n\t\t<meta name=\"foo\" />\n\t</head>\n\t<body>One</body>\n</html>", doc->to_html());
}

//test26
//issue iframe is not being converted to HTML format
TEST(test, handlesKnownEmptyIframe){
string h = "<p>One</p><iframe id=1 /><p>Two";
parser parse;
node_ptr doc = parse.parse(h);

ASSERT_EQ("<p>One</p>\n<iframe id=\"1\" />\n<p>Two</p>", doc->to_html());
}

//test27
TEST(test, handlesSolidusAtAttributeEnd){
// this test makes sure [<a href=/>link</a>] is parsed as [<a href="/">link</a>], not [<a href="" /><a>link</a>]
string h = "<a href=/>link</a>";
parser parse;
node_ptr doc = parse.parse(h);

ASSERT_EQ("<a href=\"/\">link</a>", doc->to_html());
}

//test28
TEST(test, ignoresContentAfterFrameset){
string h = "<html><head><title>One</title></head><frameset><frame /><frame /></frameset><table></table></html>";
parser parse;
node_ptr doc = parse.parse(h);
// the HTML parser implementation provided only supports parsing a subset of HTML elements and attributes,
// and does not support parsing elements such as the frame tag.
// can not remove table
ASSERT_EQ("<html>\n\t<head>\n\t\t<title>One</title>\n\t</head>\n\t<frameset>\n\t\t<frame />\n\t\t<frame />\n\t</frameset>\n\t<table></table>\n</html>", doc->to_html());
}

//test29
TEST(test, normalisesDocument){
string h = "<!doctype html>One<html>Two<head>Three<link></head>Four<body>Five </body>Six </html>Seven ";
parser parse;
node_ptr doc = parse.parse(h);
ASSERT_EQ("<!--doctype html-->One\n<html>Two\n\t<head>Three\n\t\t<link />\n\t</head>\n\tFour\n\t<body>Five </body>\n\tSix \n</html>\nSeven ", doc->to_html());
}

//test30
TEST(test, normalisesEmptyDocument){
parser parse;
node_ptr doc = parse.parse("");
ASSERT_EQ("", doc->to_html());
}


GTEST_API_ int main(int argc, char ** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
Loading