66use fivefilters \Readability \Nodes \DOM \DOMElement ;
77use fivefilters \Readability \Nodes \DOM \DOMNode ;
88use fivefilters \Readability \Nodes \DOM \DOMText ;
9- use DOMNodeList ;
109
1110/**
1211 * @property ?DOMNode $firstChild
@@ -19,29 +18,20 @@ trait NodeTrait
1918{
2019 /**
2120 * Content score of the node. Used to determine the value of the content.
22- *
23- * @var int
2421 */
25- public $ contentScore = 0 ;
22+ public float $ contentScore = 0. 0 ;
2623
2724 /**
2825 * Flag for initialized status.
29- *
30- * @var bool
3126 */
32- private $ initialized = false ;
27+ private bool $ initialized = false ;
3328
3429 /**
35- * Flag data tables.
36- *
37- * @var bool
30+ * Flag for data tables.
3831 */
39- private $ readabilityDataTable = false ;
32+ private bool $ readabilityDataTable = false ;
4033
41- /**
42- * @var array
43- */
44- private $ divToPElements = [
34+ private array $ divToPElements = [
4535 'blockquote ' ,
4636 'dl ' ,
4737 'div ' ,
@@ -56,10 +46,8 @@ trait NodeTrait
5646 /**
5747 * The commented out elements qualify as phrasing content but tend to be
5848 * removed by readability when put into paragraphs, so we ignore them here.
59- *
60- * @var array
6149 */
62- private $ phrasing_elems = [
50+ private array $ phrasing_elems = [
6351 // 'CANVAS', 'IFRAME', 'SVG', 'VIDEO',
6452 'abbr ' , 'audio ' , 'b ' , 'bdo ' , 'br ' , 'button ' , 'cite ' , 'code ' , 'data ' ,
6553 'datalist ' , 'dfn ' , 'em ' , 'embed ' , 'i ' , 'img ' , 'input ' , 'kbd ' , 'label ' ,
@@ -69,7 +57,7 @@ trait NodeTrait
6957 ];
7058
7159 /**
72- * initialized getter.
60+ * Is initialized getter.
7361 */
7462 public function isInitialized (): bool
7563 {
@@ -444,18 +432,18 @@ public function isPhrasingContent(): bool
444432 */
445433 public function isProbablyVisible (): bool
446434 {
447- return !preg_match ('/display:( )?none/i ' , $ this ->getAttribute ('style ' )) &&
435+ return !preg_match ('/display:( )?none/i ' , $ this ->getAttribute ('style ' )) &&
448436 !$ this ->hasAttribute ('hidden ' ) &&
449437 //check for "fallback-image" so that wikimedia math images are displayed
450- (!$ this ->hasAttribute ('aria-hidden ' ) || $ this ->getAttribute ('aria-hidden ' ) !== 'true ' || ($ this ->hasAttribute ( ' class ' ) && strpos ( $ this -> getAttribute ('class ' ), 'fallback-image ' ) !== false ));
438+ (!$ this ->hasAttribute ('aria-hidden ' ) || $ this ->getAttribute ('aria-hidden ' ) !== 'true ' || str_contains ($ this ->getAttribute ('class ' ), 'fallback-image ' ));
451439 }
452440
453441 /**
454442 * Check if node is whitespace.
455443 */
456444 public function isWhitespace (): bool
457445 {
458- return ($ this ->nodeType === XML_TEXT_NODE && mb_strlen ( trim ( $ this ->textContent )) === 0 ) ||
446+ return ($ this ->nodeType === XML_TEXT_NODE && $ this ->isWhitespaceInElementContent () ) ||
459447 ($ this ->nodeType === XML_ELEMENT_NODE && $ this ->nodeName === 'br ' );
460448 }
461449
@@ -497,17 +485,12 @@ public function shiftingAwareGetElementsByTagName(string $tag): \Generator
497485 }
498486
499487 /**
500- * Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this
501- * function to get the first one that is an DOMElement node.
488+ * Git first element child or null
502489 */
503490 public function getFirstElementChild (): ?DOMElement
504491 {
505- if ($ this ->childNodes instanceof \Traversable) {
506- foreach ($ this ->childNodes as $ node ) {
507- if ($ node instanceof DOMElement) {
508- return $ node ;
509- }
510- }
492+ if ($ this ->nodeType === XML_ELEMENT_NODE || $ this ->nodeType === XML_DOCUMENT_NODE ) {
493+ return $ this ->firstElementChild ;
511494 }
512495
513496 return null ;
0 commit comments