Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions Readability.js
Original file line number Diff line number Diff line change
Expand Up @@ -1475,7 +1475,7 @@ Readability.prototype = {
} else {
var contentBonus = 0;

// Give a bonus if sibling nodes and top candidates have the example same classname
// Give a bonus if sibling nodes and top candidates have the same classname
if (
sibling.className === topCandidate.className &&
topCandidate.className !== ""
Expand Down Expand Up @@ -1508,24 +1508,13 @@ Readability.prototype = {
}

if (append) {
this.log("Appending node:", sibling);

if (!this.ALTER_TO_DIV_EXCEPTIONS.includes(sibling.nodeName)) {
// We have a node that isn't a common block level element, like a form or td tag.
// Turn it into a div so it doesn't get filtered out later by accident.
this.log("Altering sibling:", sibling, "to div.");

sibling = this._setNodeTag(sibling, "DIV");
}

articleContent.appendChild(sibling);
// Fetch children again to make it compatible
// with DOM parsers without live collection support.
// Siblings array is live, so re-grab it and adjust index
siblings = parentOfTopCandidate.children;
// siblings is a reference to the children array, and
// sibling is removed from the array when we call appendChild().
// As a result, we must revisit this index since the nodes
// have been shifted.
s -= 1;
sl -= 1;
}
Expand Down
9 changes: 9 additions & 0 deletions test/test-pages/bbc-reader-bug/expected-metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"title": "Motorhead guitarist Phil Campbell honoured",
"byline": null,
"dir": null,
"lang": null,
"excerpt": "Earlier paragraph 1 (should NOT be skipped)",
"siteName": null,
"readerable": false
}
9 changes: 9 additions & 0 deletions test/test-pages/bbc-reader-bug/expected.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<div id="readability-page-1" class="page">
<div>
<p>Motorhead paid tribute to Campbell...</p>
<p>Paragraph 2 with more text to score well, adding commas like this, and this, for points.</p>
<p>Paragraph 3 with more text to score well, adding commas like this, and this, for points.</p>
<p>Paragraph 4 with more text to score well, adding commas like this, and this, for points.</p>
<p>Paragraph 5 with more text to score well, adding commas like this, and this, for points.</p>
</div>
</div>
24 changes: 24 additions & 0 deletions test/test-pages/bbc-reader-bug/source.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<html lang="en">
<head>
<title>Motorhead guitarist Phil Campbell honoured</title>
<meta name="description" content="Earlier paragraph 1 (should NOT be skipped)"/>
</head>
<body>
<div id="main-content">
<div class="intro-block">
<p>Earlier paragraph 1 (should NOT be skipped)</p>
<p>Earlier paragraph 2</p>
</div>

<div class="article-body-content">
<div class="story-body__inner">
<p>Motorhead paid tribute to Campbell...</p>
<p>Paragraph 2 with more text to score well, adding commas like this, and this, for points.</p>
<p>Paragraph 3 with more text to score well, adding commas like this, and this, for points.</p>
<p>Paragraph 4 with more text to score well, adding commas like this, and this, for points.</p>
<p>Paragraph 5 with more text to score well, adding commas like this, and this, for points.</p>
</div>
</div>
</div>
</body>
</html>