From 8514222443c9bf366362cc85e8dbbb11f6e9222c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 27 May 2025 01:19:05 +0200 Subject: [PATCH 1/6] XMLProcessor: Support namespaces --- .../EntityReader/EPubEntityReader.php | 8 +- .../EntityReader/WXREntityReader.php | 12 +- components/XML/Tests/XMLProcessorTest.php | 138 ++++----- components/XML/XMLProcessor.php | 286 +++++++++++++++--- 4 files changed, 325 insertions(+), 119 deletions(-) diff --git a/components/DataLiberation/EntityReader/EPubEntityReader.php b/components/DataLiberation/EntityReader/EPubEntityReader.php index e77f433f..2fe8ad03 100644 --- a/components/DataLiberation/EntityReader/EPubEntityReader.php +++ b/components/DataLiberation/EntityReader/EPubEntityReader.php @@ -141,7 +141,7 @@ private function parse_manifest() { return false; } - $full_path = $xml->get_attribute( 'full-path' ); + $full_path = $xml->get_attribute_by_qualified_name( 'full-path' ); if ( ! $full_path ) { return false; } @@ -161,13 +161,13 @@ private function parse_manifest() { ); while ( $xml->next_tag() ) { $parsed_entry = array(); - $keys = $xml->get_attribute_names_with_prefix( '' ); + $keys = $xml->get_attribute_qualified_names_with_prefix( '' ); foreach ( $keys as $key ) { - $parsed_entry[ $key ] = $xml->get_attribute( $key ); + $parsed_entry[ $key ] = $xml->get_attribute_by_qualified_name( $key ); } if ( $xml->matches_breadcrumbs( array( 'metadata', '*' ) ) ) { $parsed['metadata'][] = array( - 'tag' => $xml->get_tag(), + 'tag' => $xml->get_qualified_tag(), 'attributes' => $parsed_entry, ); } elseif ( $xml->matches_breadcrumbs( array( 'manifest', 'item' ) ) ) { diff --git a/components/DataLiberation/EntityReader/WXREntityReader.php b/components/DataLiberation/EntityReader/WXREntityReader.php index 6b545456..b4908849 100644 --- a/components/DataLiberation/EntityReader/WXREntityReader.php +++ b/components/DataLiberation/EntityReader/WXREntityReader.php @@ -659,7 +659,7 @@ private function read_next_entity() { $this->last_xml_cursor_outside_of_entity = $this->xml->get_reentrancy_cursor(); } - $tag = $this->xml->get_tag(); + $tag = $this->xml->get_qualified_tag(); /** * Custom adjustment: the Accessibility WXR file uses a non-standard * wp:wp_author tag. @@ -732,16 +732,16 @@ private function read_next_entity() { */ if ( $this->xml->is_tag_opener() ) { $this->last_opener_attributes = array(); - $names = $this->xml->get_attribute_names_with_prefix( '' ); + $names = $this->xml->get_attribute_qualified_names_with_prefix( '' ); foreach ( $names as $name ) { - $this->last_opener_attributes[ $name ] = $this->xml->get_attribute( $name ); + $this->last_opener_attributes[ $name ] = $this->xml->get_attribute_by_qualified_name( $name ); } $this->text_buffer = ''; $is_site_option_opener = ( count( $this->xml->get_breadcrumbs() ) === 3 && $this->xml->matches_breadcrumbs( array( 'rss', 'channel', '*' ) ) && - array_key_exists( $this->xml->get_tag(), static::KNOWN_SITE_OPTIONS ) + array_key_exists( $this->xml->get_qualified_tag(), static::KNOWN_SITE_OPTIONS ) ); if ( $is_site_option_opener ) { $this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream(); @@ -848,13 +848,13 @@ private function read_next_entity() { * @return bool Whether a site_option entity was emitted. */ private function parse_site_option() { - if ( ! array_key_exists( $this->xml->get_tag(), static::KNOWN_SITE_OPTIONS ) ) { + if ( ! array_key_exists( $this->xml->get_qualified_tag(), static::KNOWN_SITE_OPTIONS ) ) { return false; } $this->entity_type = 'site_option'; $this->entity_data = array( - 'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_tag() ], + 'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_qualified_tag() ], 'option_value' => $this->text_buffer, ); $this->emit_entity(); diff --git a/components/XML/Tests/XMLProcessorTest.php b/components/XML/Tests/XMLProcessorTest.php index 10965cfe..b03144fe 100644 --- a/components/XML/Tests/XMLProcessorTest.php +++ b/components/XML/Tests/XMLProcessorTest.php @@ -26,36 +26,36 @@ public function beforeEach() { /** * @ticket 61365 * - * @covers XMLProcessor::get_tag + * @covers XMLProcessor::get_qualified_tag */ public function test_get_tag_returns_null_before_finding_tags() { $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertNull( $processor->get_tag(), 'Calling get_tag() without selecting a tag did not return null' ); + $this->assertNull( $processor->get_qualified_tag(), 'Calling get_tag() without selecting a tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_tag + * @covers XMLProcessor::get_qualified_tag */ public function test_get_tag_returns_null_when_not_in_open_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_tag(), 'Accessing a non-existing tag did not return null' ); + $this->assertNull( $processor->get_qualified_tag(), 'Accessing a non-existing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_tag + * @covers XMLProcessor::get_qualified_tag */ public function test_get_tag_returns_open_tag_name() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'wp:content', $processor->get_tag(), 'Accessing an existing tag name did not return "div"' ); + $this->assertSame( 'wp:content', $processor->get_qualified_tag(), 'Accessing an existing tag name did not return "div"' ); } /** @@ -107,20 +107,20 @@ public static function data_is_empty_element() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_returns_null_when_not_in_open_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_attribute( 'wp:post-type' ), + $this->assertNull( $processor->get_attribute_by_qualified_name( 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_returns_null_when_in_closing_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -128,26 +128,26 @@ public function test_get_attribute_returns_null_when_in_closing_tag() { $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); - $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' ); + $this->assertNull( $processor->get_attribute_by_qualified_name( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_returns_null_when_attribute_missing() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); + $this->assertNull( $processor->get_attribute_by_qualified_name( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); } /** * @ticket 61365 * * @expectedIncorrectUsage XMLProcessor::base_class_next_token - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_attributes_are_rejected_in_tag_closers() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -160,13 +160,13 @@ public function test_attributes_are_rejected_in_tag_closers() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_returns_attribute_value() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), + $this->assertSame( 'test', $processor->get_attribute_by_qualified_name( 'wp:post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' ); } @@ -174,7 +174,7 @@ public function test_get_attribute_returns_attribute_value() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_parsing_stops_on_malformed_attribute_value_no_value() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -186,7 +186,7 @@ public function test_parsing_stops_on_malformed_attribute_value_no_value() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -198,33 +198,33 @@ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::get_attribute * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute( 'enabled' ) ); + $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute_by_qualified_name( 'enabled' ) ); } /** * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::get_attribute * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( '”', $processor->get_attribute( 'enabled' ) ); + $this->assertEquals( '”', $processor->get_attribute_by_qualified_name( 'enabled' ) ); } /** * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_parsing_stops_on_malformed_attribute_value_contains_lt_character() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -236,7 +236,7 @@ public function test_parsing_stops_on_malformed_attribute_value_contains_lt_char * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_parsing_stops_on_malformed_tags_duplicate_attributes() { $processor = XMLProcessor::create_from_string( 'Text' ); @@ -248,7 +248,7 @@ public function test_parsing_stops_on_malformed_tags_duplicate_attributes() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_parsing_stops_on_malformed_attribute_name_contains_slash() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -259,7 +259,7 @@ public function test_parsing_stops_on_malformed_attribute_name_contains_slash() /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_modifiable_text_returns_a_decoded_value() { $processor = XMLProcessor::create_from_string( '“😄”' ); @@ -277,7 +277,7 @@ public function test_get_modifiable_text_returns_a_decoded_value() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_returns_a_decoded_value() { $processor = XMLProcessor::create_from_string( '' ); @@ -285,7 +285,7 @@ public function test_get_attribute_returns_a_decoded_value() { $this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' ); $this->assertEquals( '“😄”', - $processor->get_attribute( 'encoded-data' ), + $processor->get_attribute_by_qualified_name( 'encoded-data' ), 'Reading an encoded attribute did not decode it.' ); } @@ -293,7 +293,7 @@ public function test_get_attribute_returns_a_decoded_value() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name * * @param string $attribute_name Name of data-enabled attribute with case variations. */ @@ -303,12 +303,12 @@ public function test_get_attribute_is_case_sensitive() { $this->assertEquals( 'true', - $processor->get_attribute( 'DATA-enabled' ), + $processor->get_attribute_by_qualified_name( 'DATA-enabled' ), 'Accessing an attribute by a same-cased name did return not its value' ); $this->assertNull( - $processor->get_attribute( 'data-enabled' ), + $processor->get_attribute_by_qualified_name( 'data-enabled' ), 'Accessing an attribute by a differently-cased name did return its value' ); } @@ -350,12 +350,12 @@ public function test_set_attribute_is_case_sensitive() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertNull( - $processor->get_attribute_names_with_prefix( 'data-' ), + $processor->get_attribute_qualified_names_with_prefix( 'data-' ), 'Accessing attributes by their prefix did not return null when no tag was selected' ); } @@ -363,46 +363,46 @@ public function test_get_attribute_names_with_prefix_returns_null_before_finding /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag( 'p' ); - $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), + $this->assertNull( $processor->get_attribute_qualified_names_with_prefix( 'data-' ), 'Accessing attributes of a non-existing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag( 'wp:content' ); $processor->next_tag( array( 'tag_closers' => 'visit' ) ); - $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), + $this->assertNull( $processor->get_attribute_qualified_names_with_prefix( 'data-' ), 'Accessing attributes of a closing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() { $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag( 'wp:content' ); - $this->assertSame( array(), $processor->get_attribute_names_with_prefix( 'data-' ), + $this->assertSame( array(), $processor->get_attribute_qualified_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -410,7 +410,7 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_ $this->assertSame( array( 'data-test-ID' ), - $processor->get_attribute_names_with_prefix( 'data-' ), + $processor->get_attribute_qualified_names_with_prefix( 'data-' ), 'Accessing attributes by their prefix did not return their lowercase names' ); } @@ -418,7 +418,7 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_ /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_names_with_prefix + * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -432,7 +432,7 @@ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_ ); $this->assertSame( array( 'data-test-id', 'data-foo' ), - $processor->get_attribute_names_with_prefix( 'data-' ), + $processor->get_attribute_qualified_names_with_prefix( 'data-' ), "Accessing attribute names doesn't find attribute added via set_attribute" ); } @@ -753,7 +753,7 @@ public function test_internal_pointer_returns_to_original_spot_after_inserting_c $tags->seek( 'here' ); $this->assertSame( 'outside
inside
', $tags->get_updated_xml() ); - $this->assertSame( 'section', $tags->get_tag() ); + $this->assertSame( 'section', $tags->get_qualified_tag() ); $this->assertFalse( $tags->is_tag_closer() ); } @@ -821,7 +821,7 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr ); $this->assertSame( 'test-value', - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute_by_qualified_name( 'test-attribute' ), 'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()' ); } @@ -829,7 +829,7 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_returns_updated_values_before_they_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -838,7 +838,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie $this->assertSame( 'test-value', - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute_by_qualified_name( 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( @@ -851,7 +851,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -860,7 +860,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie $this->assertSame( 'test-value', - $processor->get_attribute( 'test-ATTribute' ), + $processor->get_attribute_by_qualified_name( 'test-ATTribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( @@ -874,7 +874,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -882,7 +882,7 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_appli $processor->remove_attribute( 'id' ); $this->assertNull( - $processor->get_attribute( 'id' ), + $processor->get_attribute_by_qualified_name( 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()' ); $this->assertSame( @@ -895,7 +895,7 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_appli /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -904,7 +904,7 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut $processor->remove_attribute( 'test-attribute' ); $this->assertNull( - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute_by_qualified_name( 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' ); $this->assertSame( @@ -917,7 +917,7 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute + * @covers XMLProcessor::get_attribute_by_qualified_name */ public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -926,7 +926,7 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin $processor->remove_attribute( 'id' ); $this->assertNull( - $processor->get_attribute( 'id' ), + $processor->get_attribute_by_qualified_name( 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' ); $this->assertSame( @@ -1106,7 +1106,7 @@ public function test_documents_may_end_with_unclosed_comment( $xml_ending_before $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_tag()}." + "Should not have found any tag, but found {$processor->get_qualified_tag()}." ); $this->assertTrue( @@ -1144,7 +1144,7 @@ public function test_partial_syntax_triggers_parse_error_when_streaming_is_not_u $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_tag()}." + "Should not have found any tag, but found {$processor->get_qualified_tag()}." ); $this->assertFalse( @@ -1189,7 +1189,7 @@ public function test_next_tag_returns_false_for_incomplete_syntax_elements( $inc $processor->next_tag(); $this->assertFalse( $processor->next_tag(), - "Shouldn't have found any tags but found {$processor->get_tag()}." + "Shouldn't have found any tags but found {$processor->get_qualified_tag()}." ); $this->assertTrue( @@ -1379,8 +1379,8 @@ public function test_xml_declaration() { $processor->get_token_type(), 'The XML declaration was not correctly identified.' ); - $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' ); - $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' ); + $this->assertEquals( '1.0', $processor->get_attribute_by_qualified_name( 'version' ), 'The version attribute was not correctly captured.' ); + $this->assertEquals( 'UTF-8', $processor->get_attribute_by_qualified_name( 'encoding' ), 'The encoding attribute was not correctly captured.' ); } /** @@ -1395,8 +1395,8 @@ public function test_xml_declaration_with_single_quotes() { $processor->get_token_type(), 'The XML declaration was not correctly identified.' ); - $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' ); - $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' ); + $this->assertEquals( '1.0', $processor->get_attribute_by_qualified_name( 'version' ), 'The version attribute was not correctly captured.' ); + $this->assertEquals( 'UTF-8', $processor->get_attribute_by_qualified_name( 'encoding' ), 'The encoding attribute was not correctly captured.' ); } /** @@ -1456,7 +1456,7 @@ public function insert_after( $new_xml ) { $subclass->next_tag(); $this->assertSame( 'p', - $subclass->get_tag(), + $subclass->get_qualified_tag(), 'Should have matched inserted XML as next tag.' ); @@ -1544,7 +1544,7 @@ public function test_next_tag_by_breadcrumbs() { ) ); - $this->assertEquals( 'image', $processor->get_tag(), 'Did not find the expected tag' ); + $this->assertEquals( 'image', $processor->get_qualified_tag(), 'Did not find the expected tag' ); } /** @@ -1623,7 +1623,7 @@ public function test_mixed_misc_grammar_allowed_after_root_element() { $processor = XMLProcessor::create_from_string( ' ' ); $processor->next_tag(); - $this->assertEquals( 'root', $processor->get_tag(), 'Did not find a tag.' ); + $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find a tag.' ); $processor->next_tag(); $this->assertNull( $processor->get_last_error(), 'Did not run into a parse error after the root element' ); @@ -1758,7 +1758,7 @@ public function test_pause_and_resume() { $processor = XMLProcessor::create_for_streaming( $xml ); $processor->next_tag(); $processor->next_tag(); - $this->assertEquals( 'first_child', $processor->get_tag(), 'Did not find a tag.' ); + $this->assertEquals( 'first_child', $processor->get_qualified_tag(), 'Did not find a tag.' ); $entity_offset = $processor->get_token_byte_offset_in_the_input_stream(); $cursor = $processor->get_reentrancy_cursor(); @@ -1768,7 +1768,7 @@ public function test_pause_and_resume() { $cursor ); $resumed->next_tag(); - $this->assertEquals( 'first_child', $resumed->get_tag(), 'Did not find a tag.' ); + $this->assertEquals( 'first_child', $resumed->get_qualified_tag(), 'Did not find a tag.' ); $resumed->next_token(); $this->assertEquals( 'Hello there', $resumed->get_modifiable_text(), 'Did not find the expected text.' ); } @@ -1786,7 +1786,7 @@ public function test_doctype_parsing() { $this->assertTrue( $processor->next_token(), 'Did not find DOCTYPE node' ); $this->assertEquals( '#doctype', $processor->get_token_type(), 'Did not find DOCTYPE node' ); $this->assertTrue( $processor->next_token(), 'Did not find root tag' ); - $this->assertEquals( 'root', $processor->get_tag(), 'Did not find root tag' ); + $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find root tag' ); } /** @@ -1806,7 +1806,7 @@ public function test_xhtml_doctype_parsing() { $this->assertEquals( 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd', $processor->get_system_literal(), 'Did not find system literal' ); $this->assertTrue( $processor->next_token(), 'Did not find root tag' ); - $this->assertEquals( 'root', $processor->get_tag(), 'Did not find root tag' ); + $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find root tag' ); } /** @@ -1826,7 +1826,7 @@ public function test_system_doctype_parsing() { $this->assertEquals( 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd', $processor->get_system_literal(), 'Did not find system literal' ); $this->assertTrue( $processor->next_token(), 'Did not find root tag' ); - $this->assertEquals( 'root', $processor->get_tag(), 'Did not find root tag' ); + $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find root tag' ); } /** diff --git a/components/XML/XMLProcessor.php b/components/XML/XMLProcessor.php index 2eea43ef..bea1dfd4 100644 --- a/components/XML/XMLProcessor.php +++ b/components/XML/XMLProcessor.php @@ -130,15 +130,15 @@ * $remaining_count = 5; * while ( $remaining_count > 0 && $tags->next_tag() ) { * if ( - * ( 'wp:musician' === $tags->get_tag() || 'wp:actor' === $tags->get_tag() ) && - * 'jazzy' === $tags->get_attribute( 'data-style' ) + * ( 'wp:musician' === $tags->get_qualified_tag() || 'wp:actor' === $tags->get_qualified_tag() ) && + * 'jazzy' === $tags->get_attribute_by_qualified_name( 'data-style' ) * ) { * $tags->set_attribute( 'wp:theme-style', 'theme-style-everest-jazz' ); * $remaining_count--; * } * } * - * `get_attribute()` will return `null` if the attribute wasn't present + * `get_attribute_by_qualified_name()` will return `null` if the attribute wasn't present * on the tag when it was called. It may return `""` (the empty string) * in cases where the attribute was present but its value was empty. * For boolean attributes, those whose name is present but no value is @@ -225,7 +225,7 @@ * while ( $p->next_tag( array( 'tag_name' => 'wp:todo-list' ) ) ) { * $p->set_bookmark( 'list-start' ); * while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) { - * if ( 'wp:todo' === $p->get_tag() && $p->is_tag_closer() ) { + * if ( 'wp:todo' === $p->get_qualified_tag() && $p->is_tag_closer() ) { * $p->set_bookmark( 'list-end' ); * $p->seek( 'list-start' ); * $p->set_attribute( 'data-contained-todos', (string) $total_todos ); @@ -234,7 +234,7 @@ * break; * } * - * if ( 'wp:todo-item' === $p->get_tag() && ! $p->is_tag_closer() ) { + * if ( 'wp:todo-item' === $p->get_qualified_tag() && ! $p->is_tag_closer() ) { * $total_todos++; * } * } @@ -348,6 +348,23 @@ class XMLProcessor { */ const MAX_SEEK_OPS = 1000; + const DEFAULT_NAMESPACE_PREFIX = ''; + + /** + * One stack frame per element, each a prefix ⇒ URI map. + * Frame 0 contains the two pre-declared namespaces and initial empty default. + * + * @since WP_VERSION + * @var array> + */ + private $namespace_stack = array( + array( + 'xml' => 'http://www.w3.org/XML/1998/namespace', // Predefined, cannot be unbound or changed + 'xmlns' => 'http://www.w3.org/2000/xmlns/', // Reserved for xmlns attributes, not a real namespace for elements/attributes + self::DEFAULT_NAMESPACE_PREFIX => '', // Default namespace is initially empty (no namespace) + ), + ); + /** * The XML document to parse. * @@ -709,9 +726,6 @@ class XMLProcessor { */ public $stack_of_open_elements = array(); - /** - * - */ public static function create_from_string( $xml, $cursor = null, $known_definite_encoding = 'UTF-8' ) { $processor = static::create_for_streaming( $xml, $cursor, $known_definite_encoding ); if ( null === $processor ) { @@ -757,6 +771,7 @@ public function get_reentrancy_cursor() { 'parser_context' => $this->parser_context, 'stack_of_open_elements' => $this->stack_of_open_elements, 'expecting_more_input' => $this->expecting_more_input, + 'namespace_stack' => $this->namespace_stack, ) ) ); @@ -804,6 +819,7 @@ protected function initialize_from_cursor( $cursor ) { $this->stack_of_open_elements = $cursor['stack_of_open_elements']; $this->parser_context = $cursor['parser_context']; $this->expecting_more_input = $cursor['expecting_more_input']; + $this->namespace_stack = $cursor['namespace_stack']; return true; } @@ -1035,6 +1051,19 @@ protected function parse_next_token() { $this->bytes_already_parsed = $tag_ends_at + 1; $this->token_length = $this->bytes_already_parsed - $this->token_starts_at; + /** + * Confirm the tag name is valid with respect to XML namespaces. + * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#Conformance + */ + $tag_name = $this->get_qualified_tag(); + if ( substr_count( $tag_name, ':' ) > 1 ) { + $this->bail( + sprintf('Invalid tag name "%s" – more than one ":" in tag name. Every tag name must contain either zero or one colon.', $tag_name), + self::ERROR_SYNTAX + ); + return false; + } + /* * If we are in a PCData element, everything until the closer * is considered text. @@ -1054,7 +1083,7 @@ protected function parse_next_token() { $tag_ends_at = $this->token_starts_at + $this->token_length; $attributes = $this->attributes; - $found_closer = $this->skip_pcdata( $this->get_tag() ); + $found_closer = $this->skip_pcdata( $this->get_qualified_tag() ); // Closer not found, the document is incomplete. if ( false === $found_closer ) { @@ -1146,7 +1175,7 @@ public function is_finished(): bool { * $p = new XMLProcessor( $xml ); * $in_list = false; * while ( $p->next_tag( array( 'tag_closers' => $in_list ? 'visit' : 'skip' ) ) ) { - * if ( 'UL' === $p->get_tag() ) { + * if ( 'UL' === $p->get_qualified_tag() ) { * if ( $p->is_tag_closer() ) { * $in_list = false; * $p->set_bookmark( 'resume' ); @@ -1161,7 +1190,7 @@ public function is_finished(): bool { * } * } * - * if ( 'LI' === $p->get_tag() ) { + * if ( 'LI' === $p->get_qualified_tag() ) { * $p->set_bookmark( 'last-li' ); * } * } @@ -1395,7 +1424,7 @@ public function declare_element_as_pcdata( $element_name ) { * */ public function is_pcdata_element() { - return array_key_exists( $this->get_tag(), $this->pcdata_elements ); + return array_key_exists( $this->get_qualified_tag(), $this->pcdata_elements ); } @@ -1537,7 +1566,6 @@ private function parse_next_tag() { if ( $at + 1 >= $doc_length ) { $this->mark_incomplete_input(); - return false; } @@ -1842,7 +1870,7 @@ private function parse_next_tag() { 'm' === $xml[ $at + 3 ] && 'l' === $xml[ $at + 4 ] ) { - // Setting the parser state early for the get_attribute() calls later in this + // Setting the parser state early for the get_attribute_by_qualified_name() calls later in this // branch. $this->parser_state = self::STATE_XML_DECLARATION; @@ -1879,7 +1907,7 @@ private function parse_next_tag() { } } - if ( '1.0' !== $this->get_attribute( 'version' ) ) { + if ( '1.0' !== $this->get_attribute_by_qualified_name( 'version' ) ) { $this->bail( 'Unsupported XML version declared', self::ERROR_UNSUPPORTED ); } @@ -1889,13 +1917,13 @@ private function parse_next_tag() { * * See https://www.w3.org/TR/xml/#sec-predefined-ent. */ - if ( null !== $this->get_attribute( 'encoding' ) - && 'UTF-8' !== strtoupper( $this->get_attribute( 'encoding' ) ) + if ( null !== $this->get_attribute_by_qualified_name( 'encoding' ) + && 'UTF-8' !== strtoupper( $this->get_attribute_by_qualified_name( 'encoding' ) ) ) { $this->bail( 'Unsupported XML encoding declared, only UTF-8 is supported.', self::ERROR_UNSUPPORTED ); } - if ( null !== $this->get_attribute( 'standalone' ) - && 'YES' !== strtoupper( $this->get_attribute( 'standalone' ) ) + if ( null !== $this->get_attribute_by_qualified_name( 'standalone' ) + && 'YES' !== strtoupper( $this->get_attribute_by_qualified_name( 'standalone' ) ) ) { $this->bail( 'Standalone XML documents are not supported.', self::ERROR_UNSUPPORTED ); } @@ -2095,6 +2123,18 @@ private function parse_next_attribute() { $this->bail( 'Duplicate attribute found in an XML tag.', self::ERROR_SYNTAX ); } + /** + * Confirm the tag name is valid with respect to XML namespaces. + * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#Conformance + */ + if ( substr_count( $attribute_name, ':' ) > 1 ) { + $this->bail( + sprintf('Invalid attribute name "%s" – more than one ":" in attribute name. Every attribute name must contain either zero or one colon.', $attribute_name), + self::ERROR_SYNTAX + ); + return false; + } + $this->attributes[ $attribute_name ] = new WP_HTML_Attribute_Token( $attribute_name, $value_start, @@ -2573,22 +2613,22 @@ private function get_enqueued_attribute_value( $comparable_name ) { * * Example: * - * $p = new XMLProcessor( 'Test' ); + * $p = new XMLProcessor( 'Test' ); * $p->next_tag( array( 'class_name' => 'test' ) ) === true; - * $p->get_attribute( 'data-test-id' ) === '14'; - * $p->get_attribute( 'enabled' ) === true; - * $p->get_attribute( 'aria-label' ) === null; + * $p->get_attribute_by_qualified_name( 'data-test-id' ) === '14'; + * $p->get_attribute_by_qualified_name( 'enabled' ) === true; + * $p->get_attribute_by_qualified_name( 'aria-label' ) === null; * * $p->next_tag() === false; - * $p->get_attribute( 'class' ) === null; + * $p->get_attribute_by_qualified_name( 'class' ) === null; * - * @param string $name Name of attribute whose value is requested. + * @param string $qualified_name Qualified name of attribute whose value is requested, e.g. wp:data-test-id * * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`. * @since WP_VERSION * */ - public function get_attribute( $name ) { + public function get_attribute_by_qualified_name( $qualified_name ) { if ( self::STATE_MATCHED_TAG !== $this->parser_state && self::STATE_XML_DECLARATION !== $this->parser_state @@ -2597,16 +2637,16 @@ public function get_attribute( $name ) { } // Return any enqueued attribute value updates if they exist. - $enqueued_value = $this->get_enqueued_attribute_value( $name ); + $enqueued_value = $this->get_enqueued_attribute_value( $qualified_name ); if ( false !== $enqueued_value ) { return $enqueued_value; } - if ( ! isset( $this->attributes[ $name ] ) ) { + if ( ! isset( $this->attributes[ $qualified_name ] ) ) { return null; } - $attribute = $this->attributes[ $name ]; + $attribute = $this->attributes[ $qualified_name ]; $raw_value = substr( $this->xml, $attribute->value_starts_at, $attribute->value_length ); $decoded = XMLDecoder::decode( $raw_value ); @@ -2631,7 +2671,31 @@ public function get_attribute( $name ) { } /** - * Gets names of all attributes matching a given prefix in the current tag. + * Returns the value of an attribute scoped to a given fully-qualified namespace name. + * + * Example: + * + * $p = new XMLProcessor( 'Test' ); + * $p->get_attribute_by_expanded_name( 'urn:ISBN:0-395-36341-6', 'test' ) === '123'; + * + * @param $namespace_name Fully-qualified namespace name, e.g. urn:ISBN:0-395-36341-6 + * @param $local_name Local name of the attribute, e.g. test + * + * @return string|null Value of the attribute, or null if not found. + */ + public function get_attribute_by_expanded_name( $namespace_name, $local_name ) { + // Find a local prefix of the fully-qualified namespace name + $namespaces = $this->namespace_stack[count($this->namespace_stack) - 1]; + $prefix = array_search($namespace_name, $namespaces); + if(false === $prefix) { + return null; + } + // Found! Create a qualified name and return the attribute value + return $this->get_attribute_by_qualified_name($prefix . ':' . $local_name); + } + + /** + * Gets qualified names of all attributes matching a given prefix in the current tag. * * Note that matching is case-sensitive. This is in accordance with the spec. * @@ -2639,9 +2703,9 @@ public function get_attribute( $name ) { * * $p = new XMLProcessor( 'Test' ); * $p->next_tag( array( 'class_name' => 'test' ) ) === true; - * $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-ENABLED' ); - * $p->get_attribute_names_with_prefix( 'DATA-' ) === array( 'DATA-test-id' ); - * $p->get_attribute_names_with_prefix( 'DAta-' ) === array(); + * $p->get_attribute_qualified_names_with_prefix( 'data-' ) === array( 'data-ENABLED' ); + * $p->get_attribute_qualified_names_with_prefix( 'DATA-' ) === array( 'DATA-test-id' ); + * $p->get_attribute_qualified_names_with_prefix( 'DAta-' ) === array(); * * @param string $prefix Prefix of requested attribute names. * @@ -2649,7 +2713,7 @@ public function get_attribute( $name ) { * @since WP_VERSION * */ - public function get_attribute_names_with_prefix( $prefix ) { + public function get_attribute_qualified_names_with_prefix( $prefix ) { if ( self::STATE_MATCHED_TAG !== $this->parser_state || $this->is_closing_tag @@ -2674,16 +2738,16 @@ public function get_attribute_names_with_prefix( $prefix ) { * * $p = new XMLProcessor( 'Test' ); * $p->next_tag() === true; - * $p->get_tag() === 'DIV'; + * $p->get_qualified_tag() === 'DIV'; * * $p->next_tag() === false; - * $p->get_tag() === null; + * $p->get_qualified_tag() === null; * * @return string|null Name of currently matched tag in input XML, or `null` if none found. * @since WP_VERSION * */ - public function get_tag() { + public function get_qualified_tag() { if ( null === $this->tag_name_starts_at ) { return null; } @@ -2697,6 +2761,59 @@ public function get_tag() { return null; } + /** + * Returns the namespace prefix of the matched tag. + * + * Example: + * + * $p = new XMLProcessor( 'Test' ); + * $p->next_tag() === true; + * $p->get_namespace_prefix() === 'wp'; + * + * @return string|null The namespace prefix of the matched tag, or null if not available. + */ + public function get_namespace_prefix() { + $tag_name = $this->get_qualified_tag(); + // Only tags have a namespace prefix + if (null === $tag_name) { + return null; + } + $prefix_length = strcspn($tag_name, ':'); + if (0 === $prefix_length || $prefix_length === strlen($tag_name)) { + return self::DEFAULT_NAMESPACE_PREFIX; + } + return substr($tag_name, 0, $prefix_length); + } + + /** + * Returns the namespace reference of the matched tag. + * + * Example: + * + * $p = new XMLProcessor( 'Test' ); + * $p->next_tag() === true; + * $p->next_tag() === true; + * $p->get_namespace_reference() === 'http://www.w3.org/1999/xhtml'; + * + * @return string|null The namespace reference of the matched tag, or null if not available. + */ + public function get_namespace_reference() { + $namespace_prefix = $this->get_namespace_prefix(); + if(null === $namespace_prefix) { + return null; + } + /** + * Look up the namespace reference in the last element of the namespace stack – + * it reflects all the declared, inherited, and unset namespaces that are in effect + * for the current element. + */ + $namespaces = $this->namespace_stack[count($this->namespace_stack) - 1]; + if (isset($namespaces[$namespace_prefix])) { + return $namespaces[$namespace_prefix]; + } + return null; + } + /** * Returns the name from the DOCTYPE declaration. * @@ -2901,7 +3018,7 @@ public function get_token_type() { public function get_token_name() { switch ( $this->parser_state ) { case self::STATE_MATCHED_TAG: - return $this->get_tag(); + return $this->get_qualified_tag(); case self::STATE_TEXT_NODE: return '#text'; @@ -3415,7 +3532,7 @@ private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) { return true; case '#tag': // Update the stack of open elements - $tag_name = $this->get_tag(); + $tag_name = $this->get_qualified_tag(); if ( $this->is_tag_closer() ) { $popped = $this->pop_open_element(); if ( $popped !== $tag_name ) { @@ -3556,7 +3673,7 @@ public function matches_breadcrumbs( $breadcrumbs ) { if ( '#tag' === $this->get_token_type() && '*' !== $crumb && - $this->get_tag() !== $crumb + $this->get_qualified_tag() !== $crumb ) { return false; } @@ -3606,14 +3723,103 @@ public function get_current_depth() { } private function pop_open_element() { + array_pop($this->namespace_stack); return array_pop( $this->stack_of_open_elements ); } private function push_open_element( $tag_name ) { + // Track open elements array_push( $this->stack_of_open_elements, $tag_name ); + + /** + * By default, inherit all namespaces from the parent element. + */ + $namespaces = $this->namespace_stack[count($this->namespace_stack) - 1]; + + // Override parent namespaces with the current element's declarations. + foreach($this->attributes as $attribute) { + /** + * xmlns attribute is the default namespace + * xmlns: declares a namespace prefix scoped to the current element and its descendants + * + * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#ns-decl + */ + if( 'xmlns' === $attribute->name) { + $namespaces[self::DEFAULT_NAMESPACE_PREFIX] = $this->get_attribute_by_qualified_name($attribute->name); + continue; + } + + if (substr($attribute->name, 0, 6) === 'xmlns:') { + $prefix = substr($attribute->name, 6); + if (self::DEFAULT_NAMESPACE_PREFIX === $prefix) { + $this->bail( sprintf('Invalid namespace prefix: %s', $attribute->name), self::ERROR_SYNTAX ); + return false; + } + $ns_reference = $this->get_attribute_by_qualified_name($attribute->name); + /** + * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#xmlReserved + */ + if('xml' === $prefix && 'http://www.w3.org/XML/1998/namespace' !== $ns_reference) { + $this->bail( 'The `xml` namespace prefix is by definition bound to the namespace name http://www.w3.org/XML/1998/namespace and must not be overridden.', self::ERROR_SYNTAX ); + return false; + } + /** + * The attribute value in a namespace declaration for a prefix MAY be empty. + * This has the effect, within the scope of the declaration, of removing any + * association of the prefix with a namespace name. Further declarations MAY + * re-declare the prefix again. + */ + if('' === $ns_reference) { + unset($namespaces[$prefix]); + continue; + } + $namespaces[$prefix] = $ns_reference; + continue; + } + } + array_push($this->namespace_stack, $namespaces); + + /** + * Now that we know the namespaces associated with the current element, + * assert that no two attributes have the same (name, namespace) pair. + * + * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#uniqAttrs + */ + $seen = array(); + foreach ( $this->attributes as $attribute ) { + $attr_name = $attribute->name; + // Split into prefix and local name if a colon exists. + $colon_pos = strpos( $attr_name, ':' ); + if ( false === $colon_pos ) { + // Unprefixed attributes do not have a default namespace + // and were already checked for uniqueness in parse_next_attribute() + continue; + } + $prefix = substr($attr_name, 0, $colon_pos); + $local_name = substr($attr_name, $colon_pos + 1); + $namespace_uri = $namespaces[ $prefix ] ?? self::DEFAULT_NAMESPACE_PREFIX; + + /** + * It looks supicious but it's safe – $local_name is guaranteed to not contain + * a colon at this point. + */ + $key = $namespace_uri . ':' . $local_name; + if ( isset( $seen[ $key ] ) ) { + $this->bail( + sprintf( + 'Duplicate attribute "%s" with namespace "%s" found in the same element.', + $local_name, + $namespace_uri + ), + self::ERROR_SYNTAX + ); + return false; + } + $seen[ $key ] = true; + } } private function mark_incomplete_input( From 8c0dcdca32e19e28b5e4fd35409e9db9207d65ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 27 May 2025 13:44:37 +0200 Subject: [PATCH 2/6] Introduce XMLStackOfOpenElements --- .../EntityReader/EPubEntityReader.php | 6 +- .../EntityReader/WXREntityReader.php | 10 +- components/XML/Tests/XMLProcessorTest.php | 114 ++--- components/XML/XMLAttributeToken.php | 115 +++++ components/XML/XMLElement.php | 72 +++ components/XML/XMLProcessor.php | 435 +++++++++++------- components/XML/XMLStackOfOpenElements.php | 84 ++++ 7 files changed, 612 insertions(+), 224 deletions(-) create mode 100644 components/XML/XMLAttributeToken.php create mode 100644 components/XML/XMLElement.php create mode 100644 components/XML/XMLStackOfOpenElements.php diff --git a/components/DataLiberation/EntityReader/EPubEntityReader.php b/components/DataLiberation/EntityReader/EPubEntityReader.php index 2fe8ad03..d3528560 100644 --- a/components/DataLiberation/EntityReader/EPubEntityReader.php +++ b/components/DataLiberation/EntityReader/EPubEntityReader.php @@ -141,7 +141,7 @@ private function parse_manifest() { return false; } - $full_path = $xml->get_attribute_by_qualified_name( 'full-path' ); + $full_path = $xml->get_attribute( 'full-path' ); if ( ! $full_path ) { return false; } @@ -163,11 +163,11 @@ private function parse_manifest() { $parsed_entry = array(); $keys = $xml->get_attribute_qualified_names_with_prefix( '' ); foreach ( $keys as $key ) { - $parsed_entry[ $key ] = $xml->get_attribute_by_qualified_name( $key ); + $parsed_entry[ $key ] = $xml->get_attribute( $key ); } if ( $xml->matches_breadcrumbs( array( 'metadata', '*' ) ) ) { $parsed['metadata'][] = array( - 'tag' => $xml->get_qualified_tag(), + 'tag' => $xml->get_local_tag_name(), 'attributes' => $parsed_entry, ); } elseif ( $xml->matches_breadcrumbs( array( 'manifest', 'item' ) ) ) { diff --git a/components/DataLiberation/EntityReader/WXREntityReader.php b/components/DataLiberation/EntityReader/WXREntityReader.php index b4908849..3cc4edd9 100644 --- a/components/DataLiberation/EntityReader/WXREntityReader.php +++ b/components/DataLiberation/EntityReader/WXREntityReader.php @@ -659,7 +659,7 @@ private function read_next_entity() { $this->last_xml_cursor_outside_of_entity = $this->xml->get_reentrancy_cursor(); } - $tag = $this->xml->get_qualified_tag(); + $tag = $this->xml->get_local_tag_name(); /** * Custom adjustment: the Accessibility WXR file uses a non-standard * wp:wp_author tag. @@ -734,14 +734,14 @@ private function read_next_entity() { $this->last_opener_attributes = array(); $names = $this->xml->get_attribute_qualified_names_with_prefix( '' ); foreach ( $names as $name ) { - $this->last_opener_attributes[ $name ] = $this->xml->get_attribute_by_qualified_name( $name ); + $this->last_opener_attributes[ $name ] = $this->xml->get_attribute( $name ); } $this->text_buffer = ''; $is_site_option_opener = ( count( $this->xml->get_breadcrumbs() ) === 3 && $this->xml->matches_breadcrumbs( array( 'rss', 'channel', '*' ) ) && - array_key_exists( $this->xml->get_qualified_tag(), static::KNOWN_SITE_OPTIONS ) + array_key_exists( $this->xml->get_local_tag_name(), static::KNOWN_SITE_OPTIONS ) ); if ( $is_site_option_opener ) { $this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream(); @@ -848,13 +848,13 @@ private function read_next_entity() { * @return bool Whether a site_option entity was emitted. */ private function parse_site_option() { - if ( ! array_key_exists( $this->xml->get_qualified_tag(), static::KNOWN_SITE_OPTIONS ) ) { + if ( ! array_key_exists( $this->xml->get_local_tag_name(), static::KNOWN_SITE_OPTIONS ) ) { return false; } $this->entity_type = 'site_option'; $this->entity_data = array( - 'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_qualified_tag() ], + 'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_local_tag_name() ], 'option_value' => $this->text_buffer, ); $this->emit_entity(); diff --git a/components/XML/Tests/XMLProcessorTest.php b/components/XML/Tests/XMLProcessorTest.php index b03144fe..5a2bd814 100644 --- a/components/XML/Tests/XMLProcessorTest.php +++ b/components/XML/Tests/XMLProcessorTest.php @@ -26,36 +26,36 @@ public function beforeEach() { /** * @ticket 61365 * - * @covers XMLProcessor::get_qualified_tag + * @covers XMLProcessor::get_local_tag_name */ public function test_get_tag_returns_null_before_finding_tags() { $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertNull( $processor->get_qualified_tag(), 'Calling get_tag() without selecting a tag did not return null' ); + $this->assertNull( $processor->get_local_tag_name(), 'Calling get_tag() without selecting a tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_qualified_tag + * @covers XMLProcessor::get_local_tag_name */ public function test_get_tag_returns_null_when_not_in_open_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_qualified_tag(), 'Accessing a non-existing tag did not return null' ); + $this->assertNull( $processor->get_local_tag_name(), 'Accessing a non-existing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_qualified_tag + * @covers XMLProcessor::get_local_tag_name */ public function test_get_tag_returns_open_tag_name() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'wp:content', $processor->get_qualified_tag(), 'Accessing an existing tag name did not return "div"' ); + $this->assertSame( 'wp:content', $processor->get_local_tag_name(), 'Accessing an existing tag name did not return "div"' ); } /** @@ -107,20 +107,20 @@ public static function data_is_empty_element() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_not_in_open_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_attribute_by_qualified_name( 'wp:post-type' ), + $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_in_closing_tag() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -128,26 +128,26 @@ public function test_get_attribute_returns_null_when_in_closing_tag() { $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); - $this->assertNull( $processor->get_attribute_by_qualified_name( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' ); + $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_attribute_missing() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertNull( $processor->get_attribute_by_qualified_name( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); + $this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); } /** * @ticket 61365 * * @expectedIncorrectUsage XMLProcessor::base_class_next_token - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_attributes_are_rejected_in_tag_closers() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -160,13 +160,13 @@ public function test_attributes_are_rejected_in_tag_closers() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_attribute_value() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'test', $processor->get_attribute_by_qualified_name( 'wp:post-type' ), + $this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' ); } @@ -174,7 +174,7 @@ public function test_get_attribute_returns_attribute_value() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_parsing_stops_on_malformed_attribute_value_no_value() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -186,7 +186,7 @@ public function test_parsing_stops_on_malformed_attribute_value_no_value() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -198,33 +198,33 @@ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::get_attribute * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute_by_qualified_name( 'enabled' ) ); + $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute( 'enabled' ) ); } /** * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::get_attribute * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( '”', $processor->get_attribute_by_qualified_name( 'enabled' ) ); + $this->assertEquals( '”', $processor->get_attribute( 'enabled' ) ); } /** * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_parsing_stops_on_malformed_attribute_value_contains_lt_character() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -236,7 +236,7 @@ public function test_parsing_stops_on_malformed_attribute_value_contains_lt_char * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_parsing_stops_on_malformed_tags_duplicate_attributes() { $processor = XMLProcessor::create_from_string( 'Text' ); @@ -248,7 +248,7 @@ public function test_parsing_stops_on_malformed_tags_duplicate_attributes() { * @ticket 61365 * @expectedIncorrectUsage XMLProcessor::parse_next_attribute * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_parsing_stops_on_malformed_attribute_name_contains_slash() { $processor = XMLProcessor::create_from_string( 'Test' ); @@ -259,7 +259,7 @@ public function test_parsing_stops_on_malformed_attribute_name_contains_slash() /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_modifiable_text_returns_a_decoded_value() { $processor = XMLProcessor::create_from_string( '“😄”' ); @@ -277,7 +277,7 @@ public function test_get_modifiable_text_returns_a_decoded_value() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_a_decoded_value() { $processor = XMLProcessor::create_from_string( '' ); @@ -285,7 +285,7 @@ public function test_get_attribute_returns_a_decoded_value() { $this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' ); $this->assertEquals( '“😄”', - $processor->get_attribute_by_qualified_name( 'encoded-data' ), + $processor->get_attribute( 'encoded-data' ), 'Reading an encoded attribute did not decode it.' ); } @@ -293,7 +293,7 @@ public function test_get_attribute_returns_a_decoded_value() { /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute * * @param string $attribute_name Name of data-enabled attribute with case variations. */ @@ -303,12 +303,12 @@ public function test_get_attribute_is_case_sensitive() { $this->assertEquals( 'true', - $processor->get_attribute_by_qualified_name( 'DATA-enabled' ), + $processor->get_attribute( 'DATA-enabled' ), 'Accessing an attribute by a same-cased name did return not its value' ); $this->assertNull( - $processor->get_attribute_by_qualified_name( 'data-enabled' ), + $processor->get_attribute( 'data-enabled' ), 'Accessing an attribute by a differently-cased name did return its value' ); } @@ -753,7 +753,7 @@ public function test_internal_pointer_returns_to_original_spot_after_inserting_c $tags->seek( 'here' ); $this->assertSame( 'outside
inside
', $tags->get_updated_xml() ); - $this->assertSame( 'section', $tags->get_qualified_tag() ); + $this->assertSame( 'section', $tags->get_local_tag_name() ); $this->assertFalse( $tags->is_tag_closer() ); } @@ -821,7 +821,7 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr ); $this->assertSame( 'test-value', - $processor->get_attribute_by_qualified_name( 'test-attribute' ), + $processor->get_attribute( 'test-attribute' ), 'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()' ); } @@ -829,7 +829,7 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_updated_values_before_they_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -838,7 +838,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie $this->assertSame( 'test-value', - $processor->get_attribute_by_qualified_name( 'test-attribute' ), + $processor->get_attribute( 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( @@ -851,7 +851,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -860,7 +860,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie $this->assertSame( 'test-value', - $processor->get_attribute_by_qualified_name( 'test-ATTribute' ), + $processor->get_attribute( 'test-ATTribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( @@ -874,7 +874,7 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -882,7 +882,7 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_appli $processor->remove_attribute( 'id' ); $this->assertNull( - $processor->get_attribute_by_qualified_name( 'id' ), + $processor->get_attribute( 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()' ); $this->assertSame( @@ -895,7 +895,7 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_appli /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -904,7 +904,7 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut $processor->remove_attribute( 'test-attribute' ); $this->assertNull( - $processor->get_attribute_by_qualified_name( 'test-attribute' ), + $processor->get_attribute( 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' ); $this->assertSame( @@ -917,7 +917,7 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut /** * @ticket 61365 * - * @covers XMLProcessor::get_attribute_by_qualified_name + * @covers XMLProcessor::get_attribute */ public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); @@ -926,7 +926,7 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin $processor->remove_attribute( 'id' ); $this->assertNull( - $processor->get_attribute_by_qualified_name( 'id' ), + $processor->get_attribute( 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' ); $this->assertSame( @@ -1106,7 +1106,7 @@ public function test_documents_may_end_with_unclosed_comment( $xml_ending_before $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_qualified_tag()}." + "Should not have found any tag, but found {$processor->get_local_tag_name()}." ); $this->assertTrue( @@ -1144,7 +1144,7 @@ public function test_partial_syntax_triggers_parse_error_when_streaming_is_not_u $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_qualified_tag()}." + "Should not have found any tag, but found {$processor->get_local_tag_name()}." ); $this->assertFalse( @@ -1189,7 +1189,7 @@ public function test_next_tag_returns_false_for_incomplete_syntax_elements( $inc $processor->next_tag(); $this->assertFalse( $processor->next_tag(), - "Shouldn't have found any tags but found {$processor->get_qualified_tag()}." + "Shouldn't have found any tags but found {$processor->get_local_tag_name()}." ); $this->assertTrue( @@ -1379,8 +1379,8 @@ public function test_xml_declaration() { $processor->get_token_type(), 'The XML declaration was not correctly identified.' ); - $this->assertEquals( '1.0', $processor->get_attribute_by_qualified_name( 'version' ), 'The version attribute was not correctly captured.' ); - $this->assertEquals( 'UTF-8', $processor->get_attribute_by_qualified_name( 'encoding' ), 'The encoding attribute was not correctly captured.' ); + $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' ); + $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' ); } /** @@ -1395,8 +1395,8 @@ public function test_xml_declaration_with_single_quotes() { $processor->get_token_type(), 'The XML declaration was not correctly identified.' ); - $this->assertEquals( '1.0', $processor->get_attribute_by_qualified_name( 'version' ), 'The version attribute was not correctly captured.' ); - $this->assertEquals( 'UTF-8', $processor->get_attribute_by_qualified_name( 'encoding' ), 'The encoding attribute was not correctly captured.' ); + $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' ); + $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' ); } /** @@ -1456,7 +1456,7 @@ public function insert_after( $new_xml ) { $subclass->next_tag(); $this->assertSame( 'p', - $subclass->get_qualified_tag(), + $subclass->get_local_tag_name(), 'Should have matched inserted XML as next tag.' ); @@ -1544,7 +1544,7 @@ public function test_next_tag_by_breadcrumbs() { ) ); - $this->assertEquals( 'image', $processor->get_qualified_tag(), 'Did not find the expected tag' ); + $this->assertEquals( 'image', $processor->get_local_tag_name(), 'Did not find the expected tag' ); } /** @@ -1623,7 +1623,7 @@ public function test_mixed_misc_grammar_allowed_after_root_element() { $processor = XMLProcessor::create_from_string( ' ' ); $processor->next_tag(); - $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find a tag.' ); + $this->assertEquals( 'root', $processor->get_local_tag_name(), 'Did not find a tag.' ); $processor->next_tag(); $this->assertNull( $processor->get_last_error(), 'Did not run into a parse error after the root element' ); @@ -1758,7 +1758,7 @@ public function test_pause_and_resume() { $processor = XMLProcessor::create_for_streaming( $xml ); $processor->next_tag(); $processor->next_tag(); - $this->assertEquals( 'first_child', $processor->get_qualified_tag(), 'Did not find a tag.' ); + $this->assertEquals( 'first_child', $processor->get_local_tag_name(), 'Did not find a tag.' ); $entity_offset = $processor->get_token_byte_offset_in_the_input_stream(); $cursor = $processor->get_reentrancy_cursor(); @@ -1768,7 +1768,7 @@ public function test_pause_and_resume() { $cursor ); $resumed->next_tag(); - $this->assertEquals( 'first_child', $resumed->get_qualified_tag(), 'Did not find a tag.' ); + $this->assertEquals( 'first_child', $resumed->get_local_tag_name(), 'Did not find a tag.' ); $resumed->next_token(); $this->assertEquals( 'Hello there', $resumed->get_modifiable_text(), 'Did not find the expected text.' ); } @@ -1786,7 +1786,7 @@ public function test_doctype_parsing() { $this->assertTrue( $processor->next_token(), 'Did not find DOCTYPE node' ); $this->assertEquals( '#doctype', $processor->get_token_type(), 'Did not find DOCTYPE node' ); $this->assertTrue( $processor->next_token(), 'Did not find root tag' ); - $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find root tag' ); + $this->assertEquals( 'root', $processor->get_local_tag_name(), 'Did not find root tag' ); } /** @@ -1806,7 +1806,7 @@ public function test_xhtml_doctype_parsing() { $this->assertEquals( 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd', $processor->get_system_literal(), 'Did not find system literal' ); $this->assertTrue( $processor->next_token(), 'Did not find root tag' ); - $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find root tag' ); + $this->assertEquals( 'root', $processor->get_local_tag_name(), 'Did not find root tag' ); } /** @@ -1826,7 +1826,7 @@ public function test_system_doctype_parsing() { $this->assertEquals( 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd', $processor->get_system_literal(), 'Did not find system literal' ); $this->assertTrue( $processor->next_token(), 'Did not find root tag' ); - $this->assertEquals( 'root', $processor->get_qualified_tag(), 'Did not find root tag' ); + $this->assertEquals( 'root', $processor->get_local_tag_name(), 'Did not find root tag' ); } /** diff --git a/components/XML/XMLAttributeToken.php b/components/XML/XMLAttributeToken.php new file mode 100644 index 00000000..0f114c0e --- /dev/null +++ b/components/XML/XMLAttributeToken.php @@ -0,0 +1,115 @@ +name = $name; + $this->value_starts_at = $value_start; + $this->value_length = $value_length; + $this->start = $start; + $this->length = $length; + $this->namespace_prefix = $namespace_prefix; + $this->local_name = $local_name; + $this->namespace = $namespace; + } +} diff --git a/components/XML/XMLElement.php b/components/XML/XMLElement.php new file mode 100644 index 00000000..f0dd9952 --- /dev/null +++ b/components/XML/XMLElement.php @@ -0,0 +1,72 @@ + + */ + public $namespaces_in_scope; + + /** + * Qualified name. + * + * @var string + */ + public $qualified_name; + + /** + * Constructor. + * + * @param string $local_name Local name. + * @param string $namespace_prefix Namespace prefix. + * @param string $namespace Full XML namespace name. + * @param array $namespaces_in_scope Namespaces in current element's scope. + */ + public function __construct( $local_name, $namespace_prefix, $namespace, $namespaces_in_scope ) { + $this->local_name = $local_name; + $this->namespace_prefix = $namespace_prefix; + $this->namespace = $namespace; + $this->namespaces_in_scope = $namespaces_in_scope; + $this->qualified_name = $namespace_prefix ? $namespace_prefix . ':' . $local_name : $local_name; + } + +} diff --git a/components/XML/XMLProcessor.php b/components/XML/XMLProcessor.php index bea1dfd4..e4c9718a 100644 --- a/components/XML/XMLProcessor.php +++ b/components/XML/XMLProcessor.php @@ -2,7 +2,6 @@ namespace WordPress\XML; -use WP_HTML_Attribute_Token; use WP_HTML_Span; use WP_HTML_Text_Replacement; @@ -350,21 +349,6 @@ class XMLProcessor { const DEFAULT_NAMESPACE_PREFIX = ''; - /** - * One stack frame per element, each a prefix ⇒ URI map. - * Frame 0 contains the two pre-declared namespaces and initial empty default. - * - * @since WP_VERSION - * @var array> - */ - private $namespace_stack = array( - array( - 'xml' => 'http://www.w3.org/XML/1998/namespace', // Predefined, cannot be unbound or changed - 'xmlns' => 'http://www.w3.org/2000/xmlns/', // Reserved for xmlns attributes, not a real namespace for elements/attributes - self::DEFAULT_NAMESPACE_PREFIX => '', // Default namespace is initially empty (no namespace) - ), - ); - /** * The XML document to parse. * @@ -466,6 +450,13 @@ class XMLProcessor { */ private $token_length; + /** + * Currently matched XML element object. + * + * @var XMLElement|null + */ + private $element; + /** * Byte offset in input document where current tag name starts. * @@ -551,18 +542,18 @@ class XMLProcessor { * // * // ^ parsing will continue from this point. * $this->attributes = array( - * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ) + * 'id' => new XMLAttributeToken( 'id', 9, 6, 5, 11, '', 'id', '' ) * ); * * // When picking up parsing again, or when asking to find the * // `class` attribute we will continue and add to this array. * $this->attributes = array( - * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ), - * 'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false ) + * 'id' => new XMLAttributeToken( 'id', 9, 6, 5, 11, '', 'id', '' ), + * 'class' => new XMLAttributeToken( 'class', 23, 7, 17, 13, '', 'class', '' ) * ); * * @since WP_VERSION - * @var WP_HTML_Attribute_Token[] + * @var XMLAttributeToken[] */ private $attributes = array(); @@ -722,9 +713,9 @@ class XMLProcessor { * * @since WP_VERSION * - * @var string[] + * @var XMLStackOfOpenElements */ - public $stack_of_open_elements = array(); + private $stack_of_open_elements; public static function create_from_string( $xml, $cursor = null, $known_definite_encoding = 'UTF-8' ) { $processor = static::create_for_streaming( $xml, $cursor, $known_definite_encoding ); @@ -763,15 +754,24 @@ public static function create_for_streaming( $xml = '', $cursor = null, $known_d * `set_bookmark()` and `seek()`. */ public function get_reentrancy_cursor() { + $stack_of_open_elements = []; + foreach ( $this->stack_of_open_elements->get_items() as $element ) { + $stack_of_open_elements[] = [ + 'local_name' => $element->local_name, + 'namespace_prefix' => $element->namespace_prefix, + 'namespace' => $element->namespace, + 'namespaces_in_scope' => $element->namespaces_in_scope, + ]; + } + return base64_encode( json_encode( array( 'is_finished' => $this->is_finished(), 'upstream_bytes_forgotten' => $this->upstream_bytes_forgotten, 'parser_context' => $this->parser_context, - 'stack_of_open_elements' => $this->stack_of_open_elements, + 'stack_of_open_elements' => $stack_of_open_elements, 'expecting_more_input' => $this->expecting_more_input, - 'namespace_stack' => $this->namespace_stack, ) ) ); @@ -816,10 +816,13 @@ protected function initialize_from_cursor( $cursor ) { // Assume the input stream will start from the last known byte offset. $this->bytes_already_parsed = 0; $this->upstream_bytes_forgotten = $cursor['upstream_bytes_forgotten']; - $this->stack_of_open_elements = $cursor['stack_of_open_elements']; - $this->parser_context = $cursor['parser_context']; - $this->expecting_more_input = $cursor['expecting_more_input']; - $this->namespace_stack = $cursor['namespace_stack']; + $this->stack_of_open_elements = new XMLStackOfOpenElements(); + foreach ( $cursor['stack_of_open_elements'] as $element ) { + $this->stack_of_open_elements->push( new XMLElement( $element['local_name'], $element['namespace_prefix'], + $element['namespace'], $element['namespaces_in_scope'] ) ); + } + $this->parser_context = $cursor['parser_context']; + $this->expecting_more_input = $cursor['expecting_more_input']; return true; } @@ -852,7 +855,8 @@ protected function __construct( $xml, $use_the_static_create_methods_instead = n '6.4.0' ); } - $this->xml = $xml; + $this->xml = $xml; + $this->stack_of_open_elements = new XMLStackOfOpenElements(); } /** @@ -1055,12 +1059,8 @@ protected function parse_next_token() { * Confirm the tag name is valid with respect to XML namespaces. * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#Conformance */ - $tag_name = $this->get_qualified_tag(); - if ( substr_count( $tag_name, ':' ) > 1 ) { - $this->bail( - sprintf('Invalid tag name "%s" – more than one ":" in tag name. Every tag name must contain either zero or one colon.', $tag_name), - self::ERROR_SYNTAX - ); + $tag_name = $this->get_local_tag_name(); + if ( false === $this->validate_qualified_name( $tag_name ) ) { return false; } @@ -1083,7 +1083,7 @@ protected function parse_next_token() { $tag_ends_at = $this->token_starts_at + $this->token_length; $attributes = $this->attributes; - $found_closer = $this->skip_pcdata( $this->get_qualified_tag() ); + $found_closer = $this->skip_pcdata( $this->get_local_tag_name() ); // Closer not found, the document is incomplete. if ( false === $found_closer ) { @@ -1424,7 +1424,7 @@ public function declare_element_as_pcdata( $element_name ) { * */ public function is_pcdata_element() { - return array_key_exists( $this->get_qualified_tag(), $this->pcdata_elements ); + return array_key_exists( $this->get_local_tag_name(), $this->pcdata_elements ); } @@ -1566,6 +1566,7 @@ private function parse_next_tag() { if ( $at + 1 >= $doc_length ) { $this->mark_incomplete_input(); + return false; } @@ -1904,11 +1905,13 @@ private function parse_next_tag() { foreach ( $this->attributes as $name => $attribute ) { if ( 'version' !== $name && 'encoding' !== $name && 'standalone' !== $name ) { $this->bail( 'Invalid attribute found in XML declaration.', self::ERROR_SYNTAX ); + return false; } } - if ( '1.0' !== $this->get_attribute_by_qualified_name( 'version' ) ) { + if ( '1.0' !== $this->get_attribute( '', 'version' ) ) { $this->bail( 'Unsupported XML version declared', self::ERROR_UNSUPPORTED ); + return false; } /** @@ -1917,15 +1920,18 @@ private function parse_next_tag() { * * See https://www.w3.org/TR/xml/#sec-predefined-ent. */ - if ( null !== $this->get_attribute_by_qualified_name( 'encoding' ) - && 'UTF-8' !== strtoupper( $this->get_attribute_by_qualified_name( 'encoding' ) ) + if ( null !== $this->get_attribute( '', 'encoding' ) + && 'UTF-8' !== strtoupper( $this->get_attribute( '', 'encoding' ) ) ) { $this->bail( 'Unsupported XML encoding declared, only UTF-8 is supported.', self::ERROR_UNSUPPORTED ); + return false; } - if ( null !== $this->get_attribute_by_qualified_name( 'standalone' ) - && 'YES' !== strtoupper( $this->get_attribute_by_qualified_name( 'standalone' ) ) + + if ( null !== $this->get_attribute( '', 'standalone' ) + && 'YES' !== strtoupper( $this->get_attribute( '', 'standalone' ) ) ) { $this->bail( 'Standalone XML documents are not supported.', self::ERROR_UNSUPPORTED ); + return false; } $at = $this->bytes_already_parsed; @@ -1940,6 +1946,7 @@ private function parse_next_tag() { '>' === $xml[ $at + 1 ] ) ) { $this->bail( 'XML declaration closer not found.', self::ERROR_SYNTAX ); + return false; } $this->token_length = $at + 2 - $this->token_starts_at; @@ -2127,21 +2134,31 @@ private function parse_next_attribute() { * Confirm the tag name is valid with respect to XML namespaces. * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#Conformance */ - if ( substr_count( $attribute_name, ':' ) > 1 ) { - $this->bail( - sprintf('Invalid attribute name "%s" – more than one ":" in attribute name. Every attribute name must contain either zero or one colon.', $attribute_name), - self::ERROR_SYNTAX - ); + if ( false === $this->validate_qualified_name( $attribute_name ) ) { return false; } - $this->attributes[ $attribute_name ] = new WP_HTML_Attribute_Token( + /** + * We must compute the namespace prefix and local name for each attribute + * to assert there are no duplicate (local name, namespace) pairs in any + * element. Note we must still keep track of string indices to support + * replacements. + */ + list( $namespace_prefix, $local_name ) = $this->parse_qualified_name( $attribute_name ); + + $this->attributes[ $attribute_name ] = new XMLAttributeToken( $attribute_name, $value_start, $value_length, $attribute_start, $attribute_end - $attribute_start, - false + $namespace_prefix, + $local_name + /** + * The full namespace is resolved in push_open_element() once + * we know all xmlns declarations in the current element's + * scope. + */ ); return true; @@ -2333,6 +2350,7 @@ private function after_tag() { unset( $this->lexical_updates[ $name ] ); } + $this->element = null; $this->token_starts_at = null; $this->token_length = null; $this->tag_name_starts_at = null; @@ -2622,13 +2640,13 @@ private function get_enqueued_attribute_value( $comparable_name ) { * $p->next_tag() === false; * $p->get_attribute_by_qualified_name( 'class' ) === null; * - * @param string $qualified_name Qualified name of attribute whose value is requested, e.g. wp:data-test-id + * @param string $local_name Qualified name of attribute whose value is requested, e.g. wp:data-test-id * * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`. * @since WP_VERSION * */ - public function get_attribute_by_qualified_name( $qualified_name ) { + public function get_attribute( $namespace_reference, $local_name ) { if ( self::STATE_MATCHED_TAG !== $this->parser_state && self::STATE_XML_DECLARATION !== $this->parser_state @@ -2637,16 +2655,16 @@ public function get_attribute_by_qualified_name( $qualified_name ) { } // Return any enqueued attribute value updates if they exist. - $enqueued_value = $this->get_enqueued_attribute_value( $qualified_name ); + $enqueued_value = $this->get_enqueued_attribute_value( $local_name ); if ( false !== $enqueued_value ) { return $enqueued_value; } - if ( ! isset( $this->attributes[ $qualified_name ] ) ) { + if ( ! isset( $this->attributes[ $local_name ] ) ) { return null; } - $attribute = $this->attributes[ $qualified_name ]; + $attribute = $this->attributes[ $local_name ]; $raw_value = substr( $this->xml, $attribute->value_starts_at, $attribute->value_length ); $decoded = XMLDecoder::decode( $raw_value ); @@ -2670,28 +2688,34 @@ public function get_attribute_by_qualified_name( $qualified_name ) { return $decoded; } - /** - * Returns the value of an attribute scoped to a given fully-qualified namespace name. - * - * Example: - * - * $p = new XMLProcessor( 'Test' ); - * $p->get_attribute_by_expanded_name( 'urn:ISBN:0-395-36341-6', 'test' ) === '123'; - * - * @param $namespace_name Fully-qualified namespace name, e.g. urn:ISBN:0-395-36341-6 - * @param $local_name Local name of the attribute, e.g. test - * - * @return string|null Value of the attribute, or null if not found. - */ - public function get_attribute_by_expanded_name( $namespace_name, $local_name ) { - // Find a local prefix of the fully-qualified namespace name - $namespaces = $this->namespace_stack[count($this->namespace_stack) - 1]; - $prefix = array_search($namespace_name, $namespaces); - if(false === $prefix) { - return null; + private function get_attribute_value( XMLAttributeToken $attribute ) { + // Return any enqueued attribute value updates if they exist. + // $enqueued_value = $this->get_enqueued_attribute_value( $local_name ); + // if ( false !== $enqueued_value ) { + // return $enqueued_value; + // } + + $raw_value = substr( $this->xml, $attribute->value_starts_at, $attribute->value_length ); + + $decoded = XMLDecoder::decode( $raw_value ); + if ( ! isset( $decoded ) ) { + /** + * If the attribute contained an invalid value, it's + * a fatal error. + * + * @see WP_XML_Decoder::decode() + */ + $this->last_error = self::ERROR_SYNTAX; + _doing_it_wrong( + __METHOD__, + __( 'Invalid attribute value encountered.' ), + 'WP_VERSION' + ); + + return false; } - // Found! Create a qualified name and return the attribute value - return $this->get_attribute_by_qualified_name($prefix . ':' . $local_name); + + return $decoded; } /** @@ -2747,18 +2771,38 @@ public function get_attribute_qualified_names_with_prefix( $prefix ) { * @since WP_VERSION * */ - public function get_qualified_tag() { + public function get_local_tag_name() { + if ( null !== $this->element ) { + // Return cached name if we already have it. + return $this->element->local_name; + } + + $qualified_tag_name = $this->get_qualified_tag_name(); + if ( null === $qualified_tag_name ) { + return null; + } + + list( $_, $local_name ) = $this->parse_qualified_name( $qualified_tag_name ); + + return $local_name; + } + + public function get_qualified_tag_name() { + if ( null !== $this->element ) { + // Return cached name if we already have it. + return $this->element->qualified_name; + } + if ( null === $this->tag_name_starts_at ) { return null; } $tag_name = substr( $this->xml, $this->tag_name_starts_at, $this->tag_name_length ); - - if ( self::STATE_MATCHED_TAG === $this->parser_state ) { - return $tag_name; + if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { + return null; } - return null; + return $tag_name; } /** @@ -2773,16 +2817,11 @@ public function get_qualified_tag() { * @return string|null The namespace prefix of the matched tag, or null if not available. */ public function get_namespace_prefix() { - $tag_name = $this->get_qualified_tag(); - // Only tags have a namespace prefix - if (null === $tag_name) { + if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return null; } - $prefix_length = strcspn($tag_name, ':'); - if (0 === $prefix_length || $prefix_length === strlen($tag_name)) { - return self::DEFAULT_NAMESPACE_PREFIX; - } - return substr($tag_name, 0, $prefix_length); + + return $this->element->namespace_prefix; } /** @@ -2797,21 +2836,12 @@ public function get_namespace_prefix() { * * @return string|null The namespace reference of the matched tag, or null if not available. */ - public function get_namespace_reference() { - $namespace_prefix = $this->get_namespace_prefix(); - if(null === $namespace_prefix) { + public function get_namespace() { + if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return null; } - /** - * Look up the namespace reference in the last element of the namespace stack – - * it reflects all the declared, inherited, and unset namespaces that are in effect - * for the current element. - */ - $namespaces = $this->namespace_stack[count($this->namespace_stack) - 1]; - if (isset($namespaces[$namespace_prefix])) { - return $namespaces[$namespace_prefix]; - } - return null; + + return $this->element->namespace; } /** @@ -3018,7 +3048,7 @@ public function get_token_type() { public function get_token_name() { switch ( $this->parser_state ) { case self::STATE_MATCHED_TAG: - return $this->get_qualified_tag(); + return $this->get_local_tag_name(); case self::STATE_TEXT_NODE: return '#text'; @@ -3532,25 +3562,26 @@ private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) { return true; case '#tag': // Update the stack of open elements - $tag_name = $this->get_qualified_tag(); + $tag_qname = $this->get_qualified_tag_name(); if ( $this->is_tag_closer() ) { - $popped = $this->pop_open_element(); - if ( $popped !== $tag_name ) { + $popped_qname = $this->pop_open_element()->qualified_name; + if ( $popped_qname !== $tag_qname ) { $this->bail( sprintf( // translators: %1$s is the name of the closing HTML tag, %2$s is the name of the opening HTML tag. __( 'The closing tag "%1$s" did not match the opening tag "%2$s".' ), - $tag_name, - $popped + $tag_qname, + $popped_qname ), self::ERROR_SYNTAX ); } - if ( count( $this->stack_of_open_elements ) === 0 ) { + if ( $this->stack_of_open_elements->count() === 0 ) { $this->parser_context = self::IN_MISC_CONTEXT; } } else { - $this->push_open_element( $tag_name ); + $this->push_open_element( $tag_qname ); + $this->element = $this->stack_of_open_elements->top(); } return true; @@ -3632,7 +3663,7 @@ private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) { * */ public function get_breadcrumbs() { - return $this->stack_of_open_elements; + return $this->stack_of_open_elements->get_items(); } /** @@ -3673,13 +3704,14 @@ public function matches_breadcrumbs( $breadcrumbs ) { if ( '#tag' === $this->get_token_type() && '*' !== $crumb && - $this->get_qualified_tag() !== $crumb + $this->get_local_tag_name() !== $crumb ) { return false; } - for ( $i = count( $this->stack_of_open_elements ) - 1; $i >= 0; $i -- ) { - $tag_name = $this->stack_of_open_elements[ $i ]; + // @TODO: Match namespaces! + for ( $i = $this->stack_of_open_elements->count() - 1; $i >= 0; $i -- ) { + $tag_name = $this->stack_of_open_elements->get_items()[ $i ]->local_name; $crumb = current( $breadcrumbs ); if ( '*' !== $crumb && $tag_name !== $crumb ) { @@ -3719,51 +3751,55 @@ public function matches_breadcrumbs( $breadcrumbs ) { * */ public function get_current_depth() { - return count( $this->stack_of_open_elements ); + return $this->stack_of_open_elements->count(); } private function pop_open_element() { - array_pop($this->namespace_stack); - return array_pop( $this->stack_of_open_elements ); + return $this->stack_of_open_elements->pop(); } - private function push_open_element( $tag_name ) { - // Track open elements - array_push( - $this->stack_of_open_elements, - $tag_name - ); + private function push_open_element( $qualified_name ) { + /** + * Before we push the element, we need to compute its: + * + * - local name + * - namespace prefix + * - namespace URI + * - namespaces in current element's scope + */ + + // All qualified names are validated at this point. + list( $tag_namespace_prefix, $tag_local_name ) = $this->parse_qualified_name( $qualified_name ); + + // Resolve namespaces: /** * By default, inherit all namespaces from the parent element. */ - $namespaces = $this->namespace_stack[count($this->namespace_stack) - 1]; - - // Override parent namespaces with the current element's declarations. - foreach($this->attributes as $attribute) { + $namespaces = $this->stack_of_open_elements->get_namespaces_in_scope(); + foreach ( $this->attributes as $attribute ) { /** * xmlns attribute is the default namespace * xmlns: declares a namespace prefix scoped to the current element and its descendants * * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#ns-decl */ - if( 'xmlns' === $attribute->name) { - $namespaces[self::DEFAULT_NAMESPACE_PREFIX] = $this->get_attribute_by_qualified_name($attribute->name); + if ( 'xmlns' === $attribute->name ) { + $value = $this->get_attribute_value( $attribute ); + $namespaces[ self::DEFAULT_NAMESPACE_PREFIX ] = $value; continue; } - if (substr($attribute->name, 0, 6) === 'xmlns:') { - $prefix = substr($attribute->name, 6); - if (self::DEFAULT_NAMESPACE_PREFIX === $prefix) { - $this->bail( sprintf('Invalid namespace prefix: %s', $attribute->name), self::ERROR_SYNTAX ); - return false; - } - $ns_reference = $this->get_attribute_by_qualified_name($attribute->name); + if ( 'xmlns' === $attribute->namespace_prefix ) { + $value = $this->get_attribute_value( $attribute ); + /** * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#xmlReserved */ - if('xml' === $prefix && 'http://www.w3.org/XML/1998/namespace' !== $ns_reference) { - $this->bail( 'The `xml` namespace prefix is by definition bound to the namespace name http://www.w3.org/XML/1998/namespace and must not be overridden.', self::ERROR_SYNTAX ); + if ( 'xml' === $attribute->namespace_prefix && 'http://www.w3.org/XML/1998/namespace' !== $value ) { + $this->bail( 'The `xml` namespace prefix is by definition bound to the namespace name http://www.w3.org/XML/1998/namespace and must not be overridden.', + self::ERROR_SYNTAX ); + return false; } /** @@ -3772,54 +3808,135 @@ private function push_open_element( $tag_name ) { * association of the prefix with a namespace name. Further declarations MAY * re-declare the prefix again. */ - if('' === $ns_reference) { - unset($namespaces[$prefix]); + if ( '' === $value ) { + unset( $namespaces[ $attribute->namespace_prefix ] ); continue; } - $namespaces[$prefix] = $ns_reference; + + $namespaces[ $attribute->local_name ] = $value; continue; } } - array_push($this->namespace_stack, $namespaces); + + // Validate namespaces for the element and its attributes: + + /** + * Validate the element namespace. + */ + if ( ! array_key_exists( $tag_namespace_prefix, $namespaces ) ) { + $this->bail( + sprintf( + 'Namespace prefix "%s" does not resolve to any namespace in the current element\'s scope.', + $tag_namespace_prefix + ), + self::ERROR_SYNTAX + ); + } /** - * Now that we know the namespaces associated with the current element, - * assert that no two attributes have the same (name, namespace) pair. + * Let's assert: + * + * * All attributes have valid namespaces. + * * No two attributes have the same (local name, namespace) pair. * * @see https://www.w3.org/TR/2006/REC-xml-names11-20060816/#uniqAttrs */ - $seen = array(); + $namespaced_attributes = array(); foreach ( $this->attributes as $attribute ) { - $attr_name = $attribute->name; - // Split into prefix and local name if a colon exists. - $colon_pos = strpos( $attr_name, ':' ); - if ( false === $colon_pos ) { - // Unprefixed attributes do not have a default namespace - // and were already checked for uniqueness in parse_next_attribute() - continue; + list( $attribute_namespace_prefix, $attribute_local_name ) = $this->parse_qualified_name( $attribute->name ); + if ( ! array_key_exists( $attribute_namespace_prefix, $namespaces ) ) { + $this->bail( + sprintf( + 'Attribute "%s" has an invalid namespace prefix "%s".', + $attribute->name, + $attribute_namespace_prefix + ), + self::ERROR_SYNTAX + ); + + return false; } - $prefix = substr($attr_name, 0, $colon_pos); - $local_name = substr($attr_name, $colon_pos + 1); - $namespace_uri = $namespaces[ $prefix ] ?? self::DEFAULT_NAMESPACE_PREFIX; + $namespace_uri = $namespaces[ $attribute_namespace_prefix ]; /** * It looks supicious but it's safe – $local_name is guaranteed to not contain - * a colon at this point. + * curly braces at this point. */ - $key = $namespace_uri . ':' . $local_name; - if ( isset( $seen[ $key ] ) ) { + $attribute_full_key = '{' . $namespace_uri . '}' . $attribute_local_name; + if ( isset( $namespaced_attributes[ $attribute_full_key ] ) ) { $this->bail( sprintf( 'Duplicate attribute "%s" with namespace "%s" found in the same element.', - $local_name, + $attribute_local_name, $namespace_uri ), self::ERROR_SYNTAX ); + return false; } - $seen[ $key ] = true; + $namespaced_attributes[ $attribute_full_key ] = true; + } + + // Store attributes with their namespaces. + $this->attributes = $namespaced_attributes; + + // Finally, push the element onto the stack. + $this->stack_of_open_elements->push( + new XMLElement( $tag_local_name, $tag_namespace_prefix, $namespaces[ $tag_namespace_prefix ], $namespaces ) + ); + + return true; + } + + /** + * Parses a qualified name into a namespace prefix and local name. + * + * Example: + * + * $processor = new XMLProcessor( '' ); + * $processor->parse_qualified_name( 'wp:post' ); // Returns array( 'wp', 'post' ) + * $processor->parse_qualified_name( 'image' ); // Returns array( '', 'image' ) + * + * @param string $qualified_name The qualified name to parse. + * + * @return array The namespace prefix and local name. + */ + private function parse_qualified_name( $qualified_name ) { + $namespace_prefix = self::DEFAULT_NAMESPACE_PREFIX; + $local_name = $qualified_name; + + $prefix_length = strcspn( $qualified_name, ':' ); + if ( null !== $prefix_length && $prefix_length !== strlen( $qualified_name ) ) { + $namespace_prefix = substr( $qualified_name, 0, $prefix_length ); + $local_name = substr( $qualified_name, $prefix_length + 1 ); } + + return array( $namespace_prefix, $local_name ); + } + + private function validate_qualified_name( $qualified_name ) { + if ( substr_count( $qualified_name, ':' ) > 1 ) { + $this->bail( + sprintf( 'Invalid identifier "%s" – more than one ":" in tag name. Every tag name must contain either zero or one colon.', + $qualified_name ), + self::ERROR_SYNTAX + ); + + return false; + } + + $prefix_length = strcspn( $qualified_name, ':' ); + if ( $prefix_length === 0 && strlen( $qualified_name ) > 0 ) { + $this->bail( + sprintf( 'Invalid identifier "%s" – namespace qualifier must not have zero length.', $qualified_name ), + self::ERROR_SYNTAX + ); + + return false; + } + + return true; } private function mark_incomplete_input( diff --git a/components/XML/XMLStackOfOpenElements.php b/components/XML/XMLStackOfOpenElements.php new file mode 100644 index 00000000..d9cc7597 --- /dev/null +++ b/components/XML/XMLStackOfOpenElements.php @@ -0,0 +1,84 @@ +stack[] = $element; + } + + /** + * Pops the top XMLElement from the stack. + * + * @return XMLElement|null Returns the popped element, or null if stack is empty. + */ + public function pop() { + if ( empty( $this->stack ) ) { + return null; + } + return array_pop( $this->stack ); + } + + /** + * Returns the top XMLElement on the stack without removing it. + * + * @return XMLElement|null Returns the top element, or null if stack is empty. + */ + public function top() { + if ( empty( $this->stack ) ) { + return null; + } + return $this->stack[ count( $this->stack ) - 1 ]; + } + + /** + * Returns the number of elements in the stack. + * + * @return int + */ + public function count() { + return count( $this->stack ); + } + + public function get_items() { + return $this->stack; + } + + /** + * Returns the namespaces in scope for the top element. + * + * @return array|null Namespaces in scope, or null if stack is empty. + */ + public function get_namespaces_in_scope() { + $top = $this->top(); + if ( null === $top ) { + // Namespaces defined by default in every XML document. + return array( + 'xml' => 'http://www.w3.org/XML/1998/namespace', // Predefined, cannot be unbound or changed + 'xmlns' => 'http://www.w3.org/2000/xmlns/', // Reserved for xmlns attributes, not a real namespace for elements/attributes + XMLProcessor::DEFAULT_NAMESPACE_PREFIX => '', // Default namespace is initially empty (no namespace) + ); + } + return $top->namespaces_in_scope; + } + +} From 44cf981209f92951cea1d0fd24fbac9ab76e0c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Tue, 27 May 2025 16:19:20 +0200 Subject: [PATCH 3/6] Support namespaces in all methods --- .../EntityReader/EPubEntityReader.php | 2 +- .../EntityReader/WXREntityReader.php | 8 +- components/XML/Tests/XMLProcessorTest.php | 403 +++++++------- components/XML/XMLAttributeToken.php | 6 +- components/XML/XMLElement.php | 4 + components/XML/XMLProcessor.php | 492 ++++++++++-------- components/XML/XMLStackOfOpenElements.php | 10 + phpunit.xml | 2 +- 8 files changed, 501 insertions(+), 426 deletions(-) diff --git a/components/DataLiberation/EntityReader/EPubEntityReader.php b/components/DataLiberation/EntityReader/EPubEntityReader.php index d3528560..806276f9 100644 --- a/components/DataLiberation/EntityReader/EPubEntityReader.php +++ b/components/DataLiberation/EntityReader/EPubEntityReader.php @@ -167,7 +167,7 @@ private function parse_manifest() { } if ( $xml->matches_breadcrumbs( array( 'metadata', '*' ) ) ) { $parsed['metadata'][] = array( - 'tag' => $xml->get_local_tag_name(), + 'tag' => $xml->get_tag_local_name(), 'attributes' => $parsed_entry, ); } elseif ( $xml->matches_breadcrumbs( array( 'manifest', 'item' ) ) ) { diff --git a/components/DataLiberation/EntityReader/WXREntityReader.php b/components/DataLiberation/EntityReader/WXREntityReader.php index 3cc4edd9..56b5682c 100644 --- a/components/DataLiberation/EntityReader/WXREntityReader.php +++ b/components/DataLiberation/EntityReader/WXREntityReader.php @@ -659,7 +659,7 @@ private function read_next_entity() { $this->last_xml_cursor_outside_of_entity = $this->xml->get_reentrancy_cursor(); } - $tag = $this->xml->get_local_tag_name(); + $tag = $this->xml->get_tag_local_name(); /** * Custom adjustment: the Accessibility WXR file uses a non-standard * wp:wp_author tag. @@ -741,7 +741,7 @@ private function read_next_entity() { $is_site_option_opener = ( count( $this->xml->get_breadcrumbs() ) === 3 && $this->xml->matches_breadcrumbs( array( 'rss', 'channel', '*' ) ) && - array_key_exists( $this->xml->get_local_tag_name(), static::KNOWN_SITE_OPTIONS ) + array_key_exists( $this->xml->get_tag_local_name(), static::KNOWN_SITE_OPTIONS ) ); if ( $is_site_option_opener ) { $this->last_xml_byte_offset_outside_of_entity = $this->xml->get_token_byte_offset_in_the_input_stream(); @@ -848,13 +848,13 @@ private function read_next_entity() { * @return bool Whether a site_option entity was emitted. */ private function parse_site_option() { - if ( ! array_key_exists( $this->xml->get_local_tag_name(), static::KNOWN_SITE_OPTIONS ) ) { + if ( ! array_key_exists( $this->xml->get_tag_local_name(), static::KNOWN_SITE_OPTIONS ) ) { return false; } $this->entity_type = 'site_option'; $this->entity_data = array( - 'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_local_tag_name() ], + 'option_name' => static::KNOWN_SITE_OPTIONS[ $this->xml->get_tag_local_name() ], 'option_value' => $this->text_buffer, ); $this->emit_entity(); diff --git a/components/XML/Tests/XMLProcessorTest.php b/components/XML/Tests/XMLProcessorTest.php index 5a2bd814..acec3341 100644 --- a/components/XML/Tests/XMLProcessorTest.php +++ b/components/XML/Tests/XMLProcessorTest.php @@ -15,9 +15,9 @@ * @coversDefaultClass XMLProcessor */ class XMLProcessorTest extends TestCase { - const XML_SIMPLE = 'Text'; - const XML_WITH_CLASSES = 'Text'; - const XML_MALFORMED = 'Back to notifications'; + const XML_SIMPLE = 'Text'; + const XML_WITH_CLASSES = 'Text'; + const XML_MALFORMED = 'Back to notifications'; public function beforeEach() { $GLOBALS['_doing_it_wrong_messages'] = array(); @@ -26,36 +26,36 @@ public function beforeEach() { /** * @ticket 61365 * - * @covers XMLProcessor::get_local_tag_name + * @covers XMLProcessor::get_tag_local_name */ public function test_get_tag_returns_null_before_finding_tags() { $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertNull( $processor->get_local_tag_name(), 'Calling get_tag() without selecting a tag did not return null' ); + $this->assertNull( $processor->get_tag_local_name(), 'Calling get_tag() without selecting a tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_local_tag_name + * @covers XMLProcessor::get_tag_local_name */ public function test_get_tag_returns_null_when_not_in_open_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_local_tag_name(), 'Accessing a non-existing tag did not return null' ); + $this->assertFalse( $processor->next_tag( array( '', 'p') ), 'Querying a non-existing tag did not return false' ); + $this->assertNull( $processor->get_tag_local_name(), 'Accessing a non-existing tag did not return null' ); } /** * @ticket 61365 * - * @covers XMLProcessor::get_local_tag_name + * @covers XMLProcessor::get_tag_local_name */ public function test_get_tag_returns_open_tag_name() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'wp:content', $processor->get_local_tag_name(), 'Accessing an existing tag name did not return "div"' ); + $this->assertTrue( $processor->next_tag( 'content' ), 'Querying an existing tag did not return true' ); + $this->assertSame( 'content', $processor->get_tag_local_name(), 'Accessing an existing tag name did not return "div"' ); } /** @@ -110,10 +110,10 @@ public static function data_is_empty_element() { * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_not_in_open_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); - $this->assertNull( $processor->get_attribute( 'wp:post-type' ), + $this->assertNull( $processor->get_attribute( '', 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' ); } @@ -123,12 +123,12 @@ public function test_get_attribute_returns_null_when_not_in_open_tag() { * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_in_closing_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $processor->next_tag( array( 'w.org', 'content' ) ), 'Querying an existing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' ); - $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' ); + $this->assertNull( $processor->get_attribute( 'w.org', 'post-type' ), 'Accessing an attribute of a closing tag did not return null' ); } /** @@ -137,10 +137,10 @@ public function test_get_attribute_returns_null_when_in_closing_tag() { * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_null_when_attribute_missing() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' ); + $this->assertTrue( $processor->next_tag( array( 'w.org', 'content' ) ), 'Querying an existing tag did not return true' ); + $this->assertNull( $processor->get_attribute( '', 'test-id' ), 'Accessing a non-existing attribute did not return null' ); } /** @@ -150,9 +150,9 @@ public function test_get_attribute_returns_null_when_attribute_missing() { * @covers XMLProcessor::get_attribute */ public function test_attributes_are_rejected_in_tag_closers() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); + $this->assertTrue( $processor->next_tag( 'content' ), 'Querying an existing tag did not return true' ); $this->assertTrue( $processor->next_token(), 'Querying a text node did not return true.' ); $this->assertFalse( $processor->next_token(), 'Querying an existing but invalid closing tag did not return false.' ); } @@ -163,10 +163,10 @@ public function test_attributes_are_rejected_in_tag_closers() { * @covers XMLProcessor::get_attribute */ public function test_get_attribute_returns_attribute_value() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); - $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' ); - $this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), + $this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( array( 'w.org', 'content' ) ) ) ), 'Querying an existing tag did not return true' ); + $this->assertSame( 'test', $processor->get_attribute( 'w.org', 'post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' ); } @@ -201,10 +201,10 @@ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() { * @covers XMLProcessor::get_attribute */ public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute( 'enabled' ) ); + $this->assertEquals( 'WordPress & WordPress', $processor->get_attribute( '', 'enabled' ) ); } /** @@ -214,10 +214,10 @@ public function test_malformed_attribute_value_containing_ampersand_is_treated_a * @covers XMLProcessor::get_attribute */ public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' ); - $this->assertEquals( '”', $processor->get_attribute( 'enabled' ) ); + $this->assertEquals( '”', $processor->get_attribute( '', 'enabled' ) ); } /** @@ -262,7 +262,7 @@ public function test_parsing_stops_on_malformed_attribute_name_contains_slash() * @covers XMLProcessor::get_attribute */ public function test_get_modifiable_text_returns_a_decoded_value() { - $processor = XMLProcessor::create_from_string( '“😄”' ); + $processor = XMLProcessor::create_from_string( '“😄”' ); $processor->next_tag( 'root' ); $processor->next_token(); @@ -285,7 +285,7 @@ public function test_get_attribute_returns_a_decoded_value() { $this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' ); $this->assertEquals( '“😄”', - $processor->get_attribute( 'encoded-data' ), + $processor->get_attribute( '', 'encoded-data' ), 'Reading an encoded attribute did not decode it.' ); } @@ -298,17 +298,17 @@ public function test_get_attribute_returns_a_decoded_value() { * @param string $attribute_name Name of data-enabled attribute with case variations. */ public function test_get_attribute_is_case_sensitive() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); $this->assertEquals( 'true', - $processor->get_attribute( 'DATA-enabled' ), + $processor->get_attribute( '', 'DATA-enabled' ), 'Accessing an attribute by a same-cased name did return not its value' ); $this->assertNull( - $processor->get_attribute( 'data-enabled' ), + $processor->get_attribute( '', 'data-enabled' ), 'Accessing an attribute by a differently-cased name did return its value' ); } @@ -322,14 +322,14 @@ public function test_get_attribute_is_case_sensitive() { public function test_remove_attribute_is_case_sensitive() { $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->remove_attribute( 'data-enabled' ); + $processor->remove_attribute( '', 'data-enabled' ); $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did remove the attribute' ); - $processor->remove_attribute( 'DATA-enabled' ); + $processor->remove_attribute( '', 'DATA-enabled' ); - $this->assertSame( 'Test', $processor->get_updated_xml(), + $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did not remove the attribute' ); } @@ -339,11 +339,11 @@ public function test_remove_attribute_is_case_sensitive() { * @covers XMLProcessor::set_attribute */ public function test_set_attribute_is_case_sensitive() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->set_attribute( 'data-enabled', 'abc' ); + $processor->set_attribute( '', 'data-enabled', 'abc' ); - $this->assertSame( 'Test', $processor->get_updated_xml(), + $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-insensitive set_attribute call did not update the existing attribute' ); } @@ -355,7 +355,7 @@ public function test_set_attribute_is_case_sensitive() { public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() { $processor = XMLProcessor::create_from_string( 'Test' ); $this->assertNull( - $processor->get_attribute_qualified_names_with_prefix( 'data-' ), + $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes by their prefix did not return null when no tag was selected' ); } @@ -366,9 +366,9 @@ public function test_get_attribute_names_with_prefix_returns_null_before_finding * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); - $processor->next_tag( 'p' ); - $this->assertNull( $processor->get_attribute_qualified_names_with_prefix( 'data-' ), + $processor = XMLProcessor::create_from_string( 'Test' ); + $processor->next_tag( 'w.org', 'content' ); + $this->assertNull( $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes of a non-existing tag did not return null' ); } @@ -378,11 +378,11 @@ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_op * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); - $processor->next_tag( 'wp:content' ); + $processor = XMLProcessor::create_from_string( 'Test' ); + $processor->next_tag( 'w.org', 'content' ); $processor->next_tag( array( 'tag_closers' => 'visit' ) ); - $this->assertNull( $processor->get_attribute_qualified_names_with_prefix( 'data-' ), + $this->assertNull( $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes of a closing tag did not return null' ); } @@ -395,7 +395,7 @@ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag( 'wp:content' ); - $this->assertSame( array(), $processor->get_attribute_qualified_names_with_prefix( 'data-' ), + $this->assertSame( array(), $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' ); } @@ -405,12 +405,12 @@ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); $this->assertSame( - array( 'data-test-ID' ), - $processor->get_attribute_qualified_names_with_prefix( 'data-' ), + array( array( '', 'data-test-ID' ) ), + $processor->get_attribute_names_with_prefix( '', 'data-' ), 'Accessing attributes by their prefix did not return their lowercase names' ); } @@ -421,18 +421,18 @@ public function test_get_attribute_names_with_prefix_returns_matching_attribute_ * @covers XMLProcessor::get_attribute_qualified_names_with_prefix */ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->set_attribute( 'data-test-id', '14' ); + $processor->set_attribute( '', 'data-test-id', '14' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), "Updated XML doesn't include attribute added via set_attribute" ); $this->assertSame( - array( 'data-test-id', 'data-foo' ), - $processor->get_attribute_qualified_names_with_prefix( 'data-' ), + array( array( '', 'data-test-id' ), array( '', 'data-foo' ) ), + $processor->get_attribute_names_with_prefix( '', 'data-' ), "Accessing attribute names doesn't find attribute added via set_attribute" ); } @@ -443,12 +443,13 @@ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_ * @covers XMLProcessor::__toString */ public function test_to_string_returns_updated_xml() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); + $processor->next_tag(); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $processor->next_tag(); - $processor->set_attribute( 'id', 'wp:content-id-1' ); + $processor->set_attribute( '', 'id', 'wp:content-id-1' ); $this->assertSame( $processor->get_updated_xml(), @@ -463,32 +464,32 @@ public function test_to_string_returns_updated_xml() { * @covers XMLProcessor::get_updated_xml */ public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() { - $processor = XMLProcessor::create_from_string( 'Test' ); + $processor = XMLProcessor::create_from_string( 'Test' ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $processor->next_tag(); - $processor->set_attribute( 'id', 'wp:content-id-1' ); + $processor->set_attribute( '', 'id', 'content-id-1' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), 'Calling get_updated_xml after updating the attributes of the second tag returned different XML than expected' ); - $processor->set_attribute( 'id', 'wp:content-id-2' ); + $processor->set_attribute( '', 'id', 'content-id-2' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), 'Calling get_updated_xml after updating the attributes of the second tag for the second time returned different XML than expected' ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $this->assertSame( - 'Test', + 'Test', $processor->get_updated_xml(), 'Calling get_updated_xml after removing the id attribute of the third tag returned different XML than expected' ); @@ -517,17 +518,17 @@ public function test_get_updated_xml_without_updating_any_attributes_returns_the * @expectedIncorrectUsage XMLProcessor::parse_next_attribute */ public function test_get_updated_xml_applies_updates_to_content_after_seeking_to_before_parsed_bytes() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); $processor->next_tag(); - $processor->set_attribute( 'wonky', 'true' ); + $processor->set_attribute( '', 'wonky', 'true' ); $processor->next_tag(); $processor->set_bookmark( 'here' ); $processor->next_tag( array( 'tag_closers' => 'visit' ) ); $processor->seek( 'here' ); - $this->assertSame( '', $processor->get_updated_xml() ); + $this->assertSame( '', $processor->get_updated_xml() ); } public function test_declare_element_as_pcdata() { @@ -545,7 +546,7 @@ public function test_declare_element_as_pcdata() { But! It is all treated as text. '; $processor = XMLProcessor::create_from_string( - "$text" + "$text" ); $processor->declare_element_as_pcdata( 'my-pcdata' ); $processor->next_tag( 'my-pcdata' ); @@ -622,34 +623,34 @@ public function get_raw_token() { public static function data_xml_nth_token_substring() { return array( // Tags. - 'DIV start tag' => array( '', 1, '' ), + 'DIV start tag' => array( '', 1, '' ), 'DIV start tag with attributes' => array( - '', + '', 1, - '', + '', ), - 'Nested DIV' => array( '', 2, '' ), - 'Sibling DIV' => array( '', 3, '' ), - 'DIV before text' => array( ' text', 1, '' ), - 'DIV after comment' => array( '', 3, '' ), - 'DIV before comment' => array( ' ', 1, '' ), - 'Start "self-closing" tag' => array( '', 1, '' ), + 'Nested DIV' => array( '', 2, '' ), + 'Sibling DIV' => array( '', 3, '' ), + 'DIV before text' => array( ' text', 1, '' ), + 'DIV after comment' => array( '', 3, '' ), + 'DIV before comment' => array( ' ', 1, '' ), + 'Start "self-closing" tag' => array( '', 1, '' ), 'Void tag' => array( '', 1, '' ), 'Void tag w/self-closing flag' => array( '', 1, '' ), - 'Void tag inside DIV' => array( '', 2, '' ), + 'Void tag inside DIV' => array( '', 2, '' ), // Text. 'Text' => array( 'Just text', 1, 'Just text' ), - 'Text in DIV' => array( 'Text', 2, 'Text' ), - 'Text before DIV' => array( 'Text', 1, 'Text' ), + 'Text in DIV' => array( 'Text', 2, 'Text' ), + 'Text before DIV' => array( 'Text', 1, 'Text' ), 'Text after comment' => array( 'Text', 2, 'Text' ), 'Text before comment' => array( 'Text ', 1, 'Text' ), // Comments. 'Comment' => array( '', 1, '' ), - 'Comment in DIV' => array( '', 2, '' ), - 'Comment before DIV' => array( '', 1, '' ), - 'Comment after DIV' => array( '', 3, '' ), + 'Comment in DIV' => array( '', 2, '' ), + 'Comment before DIV' => array( '', 1, '' ), + 'Comment after DIV' => array( '', 3, '' ), 'Comment after comment' => array( '', 2, '' ), 'Comment before comment' => array( ' ', 1, '' ), 'Empty comment' => array( '', 1, '' ), @@ -685,7 +686,7 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() { */ public function test_normalizes_carriage_returns_in_text_nodes() { $processor = XMLProcessor::create_from_string( - "We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns" + "We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns" ); $processor->next_tag(); $processor->next_token(); @@ -703,7 +704,7 @@ public function test_normalizes_carriage_returns_in_text_nodes() { */ public function test_normalizes_carriage_returns_in_cdata() { $processor = XMLProcessor::create_from_string( - "" + "" ); $processor->next_tag(); $processor->next_token(); @@ -722,10 +723,10 @@ public function test_normalizes_carriage_returns_in_cdata() { * @covers XMLProcessor::is_tag_closer */ public function test_next_tag_should_not_stop_on_closers() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); - $this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Did not find desired tag opener' ); - $this->assertFalse( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), + $this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( array( 'w.org', 'content' ) ) ) ), 'Did not find desired tag opener' ); + $this->assertFalse( $processor->next_tag( array( 'breadcrumbs' => array( array( 'w.org', 'content' ) ) ) ), 'Visited an unwanted tag, a tag closer' ); } @@ -738,11 +739,11 @@ public function test_next_tag_should_not_stop_on_closers() { * @covers XMLProcessor::get_updated_xml */ public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() { - $tags = XMLProcessor::create_from_string( 'outside
inside
' ); + $tags = XMLProcessor::create_from_string( 'outside
inside
' ); $tags->next_tag(); $tags->next_tag(); - $tags->set_attribute( 'wp:post-type', 'foo' ); + $tags->set_attribute( '', 'wp:post-type', 'foo' ); $tags->next_tag( 'section' ); // Return to this spot after moving ahead. @@ -751,9 +752,9 @@ public function test_internal_pointer_returns_to_original_spot_after_inserting_c // Move ahead. $tags->next_tag( 'photo' ); $tags->seek( 'here' ); - $this->assertSame( 'outside
inside
', + $this->assertSame( 'outside
inside
', $tags->get_updated_xml() ); - $this->assertSame( 'section', $tags->get_local_tag_name() ); + $this->assertSame( 'section', $tags->get_tag_local_name() ); $this->assertFalse( $tags->is_tag_closer() ); } @@ -768,7 +769,7 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); $this->assertFalse( $processor->next_tag( 'wp:content' ), 'Querying a non-existing tag did not return false' ); - $processor->set_attribute( 'id', 'primary' ); + $processor->set_attribute( '', 'id', 'primary' ); $this->assertSame( self::XML_SIMPLE, @@ -786,18 +787,18 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar * @covers XMLProcessor::remove_class */ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); $processor->next_token(); $this->assertFalse( $processor->is_tag_closer(), 'Skipped tag opener' ); $processor->next_token(); $this->assertTrue( $processor->is_tag_closer(), 'Skipped tag closer' ); - $this->assertFalse( $processor->set_attribute( 'id', 'test' ), + $this->assertFalse( $processor->set_attribute( '', 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" ); - $this->assertFalse( $processor->remove_attribute( 'invalid-id' ), + $this->assertFalse( $processor->remove_attribute( '', 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" ); $this->assertSame( - '', + '', $processor->get_updated_xml(), 'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML' ); @@ -812,16 +813,16 @@ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() { public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-attribute', 'test-value' ); + $processor->set_attribute( '', 'test-attribute', 'test-value' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML does not include attribute added via set_attribute()' ); $this->assertSame( 'test-value', - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute( '', 'test-attribute' ), 'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()' ); } @@ -834,15 +835,15 @@ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attr public function test_get_attribute_returns_updated_values_before_they_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-attribute', 'test-value' ); + $processor->set_attribute( '', 'test-attribute', 'test-value' ); $this->assertSame( 'test-value', - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute( '', 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML does not include attribute added via set_attribute()' ); @@ -856,15 +857,15 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-ATTribute', 'test-value' ); + $processor->set_attribute( '', 'test-ATTribute', 'test-value' ); $this->assertSame( 'test-value', - $processor->get_attribute( 'test-ATTribute' ), + $processor->get_attribute( '', 'test-ATTribute' ), 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML does not include attribute added via set_attribute()' ); @@ -879,14 +880,14 @@ public function test_get_attribute_returns_updated_values_before_they_are_applie public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $this->assertNull( - $processor->get_attribute( 'id' ), + $processor->get_attribute( '', 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML includes attribute that was removed by remove_attribute()' ); @@ -900,11 +901,11 @@ public function test_get_attribute_reflects_removed_attribute_before_it_is_appli public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'test-attribute', 'test-value' ); - $processor->remove_attribute( 'test-attribute' ); + $processor->set_attribute( '', 'test-attribute', 'test-value' ); + $processor->remove_attribute( '', 'test-attribute' ); $this->assertNull( - $processor->get_attribute( 'test-attribute' ), + $processor->get_attribute( '', 'test-attribute' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()' ); $this->assertSame( @@ -922,15 +923,15 @@ public function test_get_attribute_reflects_adding_and_then_removing_an_attribut public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'id', 'test-value' ); - $processor->remove_attribute( 'id' ); + $processor->set_attribute( '', 'id', 'test-value' ); + $processor->remove_attribute( '', 'id' ); $this->assertNull( - $processor->get_attribute( 'id' ), + $processor->get_attribute( '', 'id' ), 'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Updated XML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()' ); @@ -944,9 +945,9 @@ public function test_get_attribute_reflects_setting_and_then_removing_an_existin public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->set_attribute( 'id', 'new-id' ); + $processor->set_attribute( '', 'id', 'new-id' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Existing attribute was not updated' ); @@ -961,13 +962,13 @@ public function test_set_attribute_with_an_existing_attribute_name_updates_its_v * @covers XMLProcessor::set_attribute */ public function test_set_attribute_with_case_variants_updates_only_the_original_first_copy() { - $processor = XMLProcessor::create_from_string( '' ); + $processor = XMLProcessor::create_from_string( '' ); $processor->next_tag(); - $processor->set_attribute( 'data-enabled', 'canary1' ); - $processor->set_attribute( 'data-enabled', 'canary2' ); - $processor->set_attribute( 'data-enabled', 'canary3' ); + $processor->set_attribute( '', 'data-enabled', 'canary1' ); + $processor->set_attribute( '', 'data-enabled', 'canary2' ); + $processor->set_attribute( '', 'data-enabled', 'canary3' ); - $this->assertSame( '', strtolower( $processor->get_updated_xml() ) ); + $this->assertSame( '', strtolower( $processor->get_updated_xml() ) ); } /** @@ -979,11 +980,11 @@ public function test_set_attribute_with_case_variants_updates_only_the_original_ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); while ( $processor->next_tag() ) { - $processor->set_attribute( 'data-foo', 'bar' ); + $processor->set_attribute( '', 'data-foo', 'bar' ); } $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Not all tags were updated when looping with next_tag() and set_attribute()' ); @@ -997,10 +998,10 @@ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Attribute was not removed' ); @@ -1014,7 +1015,7 @@ public function test_remove_attribute_with_an_existing_attribute_name_removes_it public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() { $processor = XMLProcessor::create_from_string( self::XML_SIMPLE ); $processor->next_tag(); - $processor->remove_attribute( 'no-such-attribute' ); + $processor->remove_attribute( '', 'no-such-attribute' ); $this->assertSame( self::XML_SIMPLE, @@ -1030,24 +1031,24 @@ public function test_remove_attribute_with_a_non_existing_attribute_name_does_no */ public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation_marks() { $processor = XMLProcessor::create_from_string( - 'Text' + 'Text' ); $processor->next_tag( array( - 'breadcrumbs' => array( 'wp:content' ), + 'breadcrumbs' => array( array( 'w.org', 'content' ) ), 'id' => 'first', ) ); - $processor->remove_attribute( 'id' ); + $processor->remove_attribute( '', 'id' ); $processor->next_tag( array( - 'breadcrumbs' => array( 'wp:text' ), + 'breadcrumbs' => array( array( 'w.org', 'text' ) ), 'id' => 'second', ) ); - $processor->set_attribute( 'id', 'single-quote' ); + $processor->set_attribute( '', 'id', 'single-quote' ); $this->assertSame( - 'Text', + 'Text', $processor->get_updated_xml(), 'Did not remove single-quoted attribute' ); @@ -1066,7 +1067,7 @@ public function test_setting_an_attribute_to_false_is_rejected() { ); $processor->next_tag( 'input' ); $this->assertFalse( - $processor->set_attribute( 'checked', false ), + $processor->set_attribute( '', 'checked', false ), 'Accepted a boolean attribute name.' ); } @@ -1081,7 +1082,7 @@ public function test_setting_a_missing_attribute_to_false_does_not_change_the_ma $xml_input = '
'; $processor = XMLProcessor::create_from_string( $xml_input ); $processor->next_tag( 'input' ); - $processor->set_attribute( 'checked', false ); + $processor->set_attribute( '', 'checked', false ); $this->assertSame( $xml_input, $processor->get_updated_xml(), @@ -1106,7 +1107,7 @@ public function test_documents_may_end_with_unclosed_comment( $xml_ending_before $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_local_tag_name()}." + "Should not have found any tag, but found {$processor->get_tag_local_name()}." ); $this->assertTrue( @@ -1144,7 +1145,7 @@ public function test_partial_syntax_triggers_parse_error_when_streaming_is_not_u $this->assertFalse( $processor->next_tag(), - "Should not have found any tag, but found {$processor->get_local_tag_name()}." + "Should not have found any tag, but found {$processor->get_tag_local_name()}." ); $this->assertFalse( @@ -1189,7 +1190,7 @@ public function test_next_tag_returns_false_for_incomplete_syntax_elements( $inc $processor->next_tag(); $this->assertFalse( $processor->next_tag(), - "Shouldn't have found any tags but found {$processor->get_local_tag_name()}." + "Shouldn't have found any tags but found {$processor->get_tag_local_name()}." ); $this->assertTrue( @@ -1205,19 +1206,19 @@ public function test_next_tag_returns_false_for_incomplete_syntax_elements( $inc */ public static function data_incomplete_syntax_elements() { return array( - 'Incomplete tag name' => array( ' array( ' array( ' array( '