diff --git a/composer.json b/composer.json index 0827ad7..5489971 100644 --- a/composer.json +++ b/composer.json @@ -20,7 +20,7 @@ "require-dev": { "phpunit/phpunit": "^8.0 || ^9.0", "php-coveralls/php-coveralls": "^2.0", - "vimeo/psalm": "6.12.0" + "vimeo/psalm": "6.13.1" }, "autoload": { "psr-4": { diff --git a/psalm-baseline.xml b/psalm-baseline.xml index 17211f7..22b1150 100644 --- a/psalm-baseline.xml +++ b/psalm-baseline.xml @@ -1,36 +1,30 @@ - + + + + + + + - - - - - - - - - - - + textContent]]> + + + + - $this->readJson($node->textContent, $url)]]> + $this->readJson($node->textContent, $url)]]> - - - - - - - parentNode; @@ -44,9 +38,57 @@ } }]]> + + textContent]]> + textContent]]> + textContent]]> + textContent]]> + + textContent]]> + + textContent]]> + + + + + + + + + + + + + + + + + + + + textContent]]> + textContent]]> + textContent]]> + textContent]]> + textContent]]> + textContent]]> + textContent]]> + attributes]]> + attributes->getNamedItem('itemprop')->textContent]]> + parentNode]]> + textContent]]> + + + textContent]]> + textContent]]> + textContent]]> + - $this->nodeToItem($node, $xpath, $url)]]> - + + + $this->nodeToItem($node, $xpath, $url)]]> + parentNode; @@ -59,25 +101,29 @@ } } }]]> - - - textContent)]]> - + - attributes->getNamedItem('itemprop')->textContent]]> + attributes]]> + parentNode]]> - - - + + attributes->getNamedItem('itemscope')]]> + isSameNode($node)]]> + + + attributes]]> + attributes]]> + attributes]]> + - parentNode; @@ -89,14 +135,53 @@ return false; } } - - // Unreachable, but makes static analysis happy - return false; }]]> + + textContent]]> + textContent]]> + + textContent]]> + + + textContent]]> + textContent]]> + textContent]]> + + + + + + + + + + + + + + + + + textContent]]> + textContent]]> + textContent]]> + attributes]]> + parentNode]]> + attributes->getNamedItem('property')->textContent]]> + textContent]]> + textContent]]> + textContent]]> + + + textContent]]> + - $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null)]]> - + + + $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null)]]> + parentNode; @@ -108,27 +193,26 @@ return false; } } - - // Unreachable, but makes static analysis happy - return false; }]]> - - - - textContent]]> textContent)]]> - + - attributes->getNamedItem('property')->textContent]]> - textContent]]> + attributes]]> + parentNode]]> - - - - + + attributes->getNamedItem('typeof')]]> + isSameNode($node)]]> + + + attributes]]> + attributes]]> + attributes]]> + attributes]]> + diff --git a/src/DOMBuilder.php b/src/DOMBuilder.php index 8ca4210..8cb23c9 100644 --- a/src/DOMBuilder.php +++ b/src/DOMBuilder.php @@ -4,18 +4,27 @@ namespace Brick\StructuredData; +use Dom\Document; +use Dom\HTMLDocument; use DOMDocument; +use function class_exists; + +use const Dom\HTML_NO_DEFAULT_NS; use const LIBXML_NOERROR; use const LIBXML_NOWARNING; final class DOMBuilder { /** - * Builds a DOMDocument from an HTML string. + * Builds a (DOM)Document from an HTML string. */ - public static function fromHTML(string $html): DOMDocument + public static function fromHTML(string $html): Document|DOMDocument { + if (class_exists(HTMLDocument::class)) { + return HTMLDocument::createFromString($html, LIBXML_NOERROR | HTML_NO_DEFAULT_NS); + } + $document = new DOMDocument(); $document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR); @@ -23,10 +32,14 @@ public static function fromHTML(string $html): DOMDocument } /** - * Builds a DOMDocument from an HTML file. + * Builds a (DOM)Document from an HTML file. */ - public static function fromHTMLFile(string $file): DOMDocument + public static function fromHTMLFile(string $file): Document|DOMDocument { + if (class_exists(HTMLDocument::class)) { + return HTMLDocument::createFromFile($file, LIBXML_NOERROR | HTML_NO_DEFAULT_NS); + } + $document = new DOMDocument(); $document->loadHTMLFile($file, LIBXML_NOWARNING | LIBXML_NOERROR); diff --git a/src/Reader.php b/src/Reader.php index c37c713..5942ad4 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -4,6 +4,7 @@ namespace Brick\StructuredData; +use Dom\Document; use DOMDocument; /** @@ -14,11 +15,11 @@ interface Reader /** * Reads the items contained in the given document. * - * @param DOMDocument $document The DOM document to read. - * @param string $url The URL the document was retrieved from. This will be used only to resolve relative - * URLs in property values. The implementation must not attempt to connect to this URL. + * @param Document|DOMDocument $document The (DOM)Document to read. + * @param string $url The URL the document was retrieved from. This will be used only to resolve relative + * URLs in property values. The implementation must not attempt to connect to this URL. * * @return Item[] The top-level items. */ - public function read(DOMDocument $document, string $url): array; + public function read(Document|DOMDocument $document, string $url): array; } diff --git a/src/Reader/JsonLdReader.php b/src/Reader/JsonLdReader.php index 3f5672b..ec9d02e 100644 --- a/src/Reader/JsonLdReader.php +++ b/src/Reader/JsonLdReader.php @@ -6,18 +6,22 @@ use Brick\StructuredData\Item; use Brick\StructuredData\Reader; +use Dom\Document; +use Dom\Node; +use Dom\XPath; use DOMDocument; use DOMNode; use DOMXPath; use Override; use Sabre\Uri\InvalidUriException; -use stdClass; use function array_filter; use function array_map; use function array_merge; use function array_values; use function array_walk_recursive; +use function assert; +use function class_exists; use function in_array; use function is_array; use function is_bool; @@ -66,9 +70,14 @@ public function __construct(array $iriProperties = []) } #[Override] - public function read(DOMDocument $document, string $url): array + public function read(Document|DOMDocument $document, string $url): array { - $xpath = new DOMXPath($document); + if ($document instanceof Document) { + assert(class_exists(XPath::class)); + $xpath = new XPath($document); + } else { + $xpath = new DOMXPath($document); + } $nodes = $xpath->query('//script[@type="application/ld+json"]'); $nodes = iterator_to_array($nodes); @@ -78,7 +87,7 @@ public function read(DOMDocument $document, string $url): array } $items = array_map( - fn (DOMNode $node) => $this->readJson($node->textContent, $url), + fn (DOMNode|Node $node) => $this->readJson($node->textContent, $url), $nodes, ); @@ -115,7 +124,7 @@ private function readJson(string $json, string $url): array if (is_array($data)) { $items = array_map( - fn ($item) => is_object($item) ? $this->readItem($item, $url, null) : null, + fn (mixed $item) => is_object($item) ? $this->readItem($item, $url, null) : null, $data, ); @@ -131,11 +140,11 @@ private function readJson(string $json, string $url): array /** * Reads a single item. * - * @param stdClass $item A decoded JSON object representing an item, or null if invalid. + * @param object $item A decoded JSON object representing an item, or null if invalid. * @param string $url The URL the document was retrieved from, for relative URL resolution. * @param string|null $vocabulary The currently vocabulary URL, if any. */ - private function readItem(stdClass $item, string $url, ?string $vocabulary): Item + private function readItem(object $item, string $url, ?string $vocabulary): Item { if (isset($item->{'@context'}) && is_string($item->{'@context'})) { $vocabulary = $this->checkVocabularyUrl($item->{'@context'}); // ugh @@ -162,7 +171,7 @@ private function readItem(stdClass $item, string $url, ?string $vocabulary): Ite } elseif (is_array($type)) { $types = array_map( fn ($type) => is_string($type) ? $this->resolveTerm($type, $vocabulary) : null, - $types, + $type, ); $types = array_filter($types); @@ -213,7 +222,7 @@ private function flattenArray(array $array): array { $result = []; - array_walk_recursive($array, function ($a) use (&$result): void { + array_walk_recursive($array, function (mixed $a) use (&$result): void { $result[] = $a; }); diff --git a/src/Reader/MicrodataReader.php b/src/Reader/MicrodataReader.php index 8878c65..50a858e 100644 --- a/src/Reader/MicrodataReader.php +++ b/src/Reader/MicrodataReader.php @@ -6,6 +6,9 @@ use Brick\StructuredData\Item; use Brick\StructuredData\Reader; +use Dom\Document; +use Dom\Node; +use Dom\XPath; use DOMDocument; use DOMNode; use DOMXPath; @@ -15,6 +18,8 @@ use function array_filter; use function array_map; use function array_values; +use function assert; +use function class_exists; use function explode; use function in_array; use function iterator_to_array; @@ -36,9 +41,14 @@ final class MicrodataReader implements Reader { #[Override] - public function read(DOMDocument $document, string $url): array + public function read(Document|DOMDocument $document, string $url): array { - $xpath = new DOMXPath($document); + if ($document instanceof Document) { + assert(class_exists(XPath::class)); + $xpath = new XPath($document); + } else { + $xpath = new DOMXPath($document); + } /** * An item is a top-level Microdata item if its element does not have an itemprop attribute. @@ -49,19 +59,19 @@ public function read(DOMDocument $document, string $url): array $nodes = iterator_to_array($nodes); return array_map( - fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url), + fn (DOMNode|Node $node) => $this->nodeToItem($node, $xpath, $url), $nodes, ); } /** - * Extracts information from a DOMNode into an Item. + * Extracts information from a (DOM)Node into an Item. * - * @param DOMNode $node A DOMNode representing an element with the itemscope attribute. - * @param DOMXPath $xpath A DOMXPath object created from the node's document element. - * @param string $url The URL the document was retrieved from, for relative URL resolution. + * @param DOMNode|Node $node A (DOM)Node representing an element with the itemscope attribute. + * @param DOMXPath|XPath $xpath A (DOM)XPath object created from the node's document element. + * @param string $url The URL the document was retrieved from, for relative URL resolution. */ - private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item + private function nodeToItem(DOMNode|Node $node, DOMXPath|XPath $xpath, string $url): Item { $itemid = $node->attributes->getNamedItem('itemid'); @@ -106,7 +116,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item // Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this. // See: https://stackoverflow.com/q/26365495/759866 - $itemprops = array_filter($itemprops, function (DOMNode $itemprop) use ($node, $xpath) { + $itemprops = array_filter($itemprops, function (DOMNode|Node $itemprop) use ($node) { for (; ;) { $itemprop = $itemprop->parentNode; @@ -122,7 +132,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item $vocabularyIdentifier = $this->getVocabularyIdentifier($types); - /** @var DOMNode[] $itemprops */ + /** @var array $itemprops */ foreach ($itemprops as $itemprop) { /** * An element introducing a property can introduce multiple properties at once, to avoid duplication when @@ -159,11 +169,11 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item /** * @see https://www.w3.org/TR/microdata/#values * - * @param DOMNode $node A DOMNode representing an element with the itemprop attribute. - * @param DOMXPath $xpath A DOMXPath object created from the node's document element. - * @param string $url The URL the document was retrieved from, for relative URL resolution. + * @param DOMNode|Node $node A (DOM)Node representing an element with the itemprop attribute. + * @param DOMXPath|XPath $xpath A (DOM)XPath object created from the node's document element. + * @param string $url The URL the document was retrieved from, for relative URL resolution. */ - private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url): Item|string + private function getPropertyValue(DOMNode|Node $node, DOMXPath|XPath $xpath, string $url): Item|string { /** * If the element also has an itemscope attribute: the value is the item created by the element. diff --git a/src/Reader/RdfaLiteReader.php b/src/Reader/RdfaLiteReader.php index 70037e7..abb9b78 100644 --- a/src/Reader/RdfaLiteReader.php +++ b/src/Reader/RdfaLiteReader.php @@ -6,6 +6,10 @@ use Brick\StructuredData\Item; use Brick\StructuredData\Reader; +use Dom\Document; +use Dom\Element; +use Dom\Node; +use Dom\XPath; use DOMDocument; use DOMNode; use DOMXPath; @@ -15,6 +19,8 @@ use function array_filter; use function array_map; use function array_values; +use function assert; +use function class_exists; use function count; use function explode; use function iterator_to_array; @@ -93,9 +99,14 @@ final class RdfaLiteReader implements Reader ]; #[Override] - public function read(DOMDocument $document, string $url): array + public function read(Document|DOMDocument $document, string $url): array { - $xpath = new DOMXPath($document); + if ($document instanceof Document) { + assert(class_exists(XPath::class)); + $xpath = new XPath($document); + } else { + $xpath = new DOMXPath($document); + } /** * Top-level item has a typeof attribute and no property attribute. @@ -104,7 +115,7 @@ public function read(DOMDocument $document, string $url): array $nodes = iterator_to_array($nodes); return array_map( - fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null), + fn (DOMNode|Node $node) => $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null), $nodes, ); } @@ -112,14 +123,14 @@ public function read(DOMDocument $document, string $url): array /** * Extracts information from a DOMNode into an Item. * - * @param DOMNode $node A DOMNode representing an element with the typeof attribute. - * @param DOMXPath $xpath A DOMXPath object created from the node's document element. - * @param string $url The URL the document was retrieved from, for relative URL resolution. - * @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL. - * @param string|null $vocabulary The URL of the vocabulary in use, if any. - * This is the content of the vocab attribute of the closest item ancestor. + * @param DOMNode|Node $node A (DOM)Node representing an element with the typeof attribute. + * @param DOMXPath|XPath $xpath A (DOM)XPath object created from the node's document element. + * @param string $url The URL the document was retrieved from, for relative URL resolution. + * @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL. + * @param string|null $vocabulary The URL of the vocabulary in use, if any. + * This is the content of the vocab attribute of the closest item ancestor. */ - private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item + private function nodeToItem(DOMNode|Node $node, DOMXPath|XPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item { $vocabulary = $this->updateVocabulary($node, $vocabulary); @@ -165,7 +176,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $ // Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this. // See: https://stackoverflow.com/q/26365495/759866 - $properties = array_filter($properties, function (DOMNode $itemprop) use ($node, $xpath) { + $properties = array_filter($properties, function (DOMNode|Node $itemprop) use ($node) { for (; ;) { $itemprop = $itemprop->parentNode; @@ -177,12 +188,9 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $ return false; } } - - // Unreachable, but makes static analysis happy - return false; }); - /** @var DOMNode[] $properties */ + /** @var array $properties */ foreach ($properties as $property) { $names = $property->attributes->getNamedItem('property')->textContent; @@ -261,12 +269,12 @@ private function isValidAbsoluteURL(string $url): bool /** * Replaces the current vocabulary with the one from the vocab attribute of the current node, if set. * - * @param DOMNode $node The DOMNode that may contain a vocab attribute. - * @param string|null $vocabulary The URL of the vocabulary in use, if any. + * @param DOMNode|Node $node The (DOM)Node that may contain a vocab attribute. + * @param string|null $vocabulary The URL of the vocabulary in use, if any. * * @return string|null The updated vocabulary URL, if any. */ - private function updateVocabulary(DOMNode $node, ?string $vocabulary): ?string + private function updateVocabulary(DOMNode|Node $node, ?string $vocabulary): ?string { $vocab = $node->attributes->getNamedItem('vocab'); @@ -310,13 +318,13 @@ private function checkVocabularyUrl(string $url): ?string /** * @see https://www.w3.org/TR/microdata/#values * - * @param DOMNode $node A DOMNode representing an element with the property attribute. - * @param DOMXPath $xpath A DOMXPath object created from the node's document element. - * @param string $url The URL the document was retrieved from, for relative URL resolution. - * @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL. - * @param string|null $vocabulary The URL of the vocabulary in use, if any. + * @param DOMNode|Node $node A (DOM)Node representing an element with the property attribute. + * @param DOMXPath|XPath $xpath A (DOM)XPath object created from the node's document element. + * @param string $url The URL the document was retrieved from, for relative URL resolution. + * @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL. + * @param string|null $vocabulary The URL of the vocabulary in use, if any. */ - private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item|string + private function getPropertyValue(DOMNode|Node $node, DOMXPath|XPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item|string { // If the element also has an typeof attribute, create an item from the element $attr = $node->attributes->getNamedItem('typeof'); diff --git a/src/Reader/ReaderChain.php b/src/Reader/ReaderChain.php index e1c606c..cd322b5 100644 --- a/src/Reader/ReaderChain.php +++ b/src/Reader/ReaderChain.php @@ -5,6 +5,7 @@ namespace Brick\StructuredData\Reader; use Brick\StructuredData\Reader; +use Dom\Document; use DOMDocument; use Override; @@ -29,7 +30,7 @@ public function __construct(Reader ...$readers) } #[Override] - public function read(DOMDocument $document, string $url): array + public function read(Document|DOMDocument $document, string $url): array { if (! $this->readers) { return []; diff --git a/tests/ReaderTest.php b/tests/ReaderTest.php index 6c33b71..7120b45 100644 --- a/tests/ReaderTest.php +++ b/tests/ReaderTest.php @@ -59,7 +59,7 @@ public function providerHtmlToJson(): iterable $jsonFile = preg_replace('/\-in\.html$/', '-out.json', $htmlFile); $expectedJson = rtrim(file_get_contents($jsonFile)); - yield [$htmlFile, $expectedJson]; + yield $htmlFile => [$htmlFile, $expectedJson]; } } }