Page MenuHomePhorge

No OneTemporary

Size
5 KB
Referenced Files
None
Subscribers
None
diff --git a/includes/OutputTransform/Stages/DeduplicateStyles.php b/includes/OutputTransform/Stages/DeduplicateStyles.php
index f80e70f1896..94adc71031f 100644
--- a/includes/OutputTransform/Stages/DeduplicateStyles.php
+++ b/includes/OutputTransform/Stages/DeduplicateStyles.php
@@ -21,32 +21,51 @@ class DeduplicateStyles extends ContentTextTransformStage {
protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string {
$seen = [];
- return HtmlHelper::modifyElements(
- $text,
- static function ( SerializerNode $node ): bool {
- return $node->name === 'style' &&
- ( $node->attrs['data-mw-deduplicate'] ?? '' ) !== '';
- },
- static function ( SerializerNode $node ) use ( &$seen ): SerializerNode {
- $key = $node->attrs['data-mw-deduplicate'];
- if ( !isset( $seen[$key] ) ) {
- $seen[$key] = true;
+ $isParsoidContent = $options['isParsoidContent'] ?? false;
+
+ $transform = static function ( $fragment ) use ( &$seen, $isParsoidContent ) {
+ return HtmlHelper::modifyElements(
+ $fragment,
+ static function ( SerializerNode $node ): bool {
+ return $node->name === 'style' &&
+ ( $node->attrs['data-mw-deduplicate'] ?? '' ) !== '';
+ },
+ static function ( SerializerNode $node ) use ( &$seen ): SerializerNode {
+ $key = $node->attrs['data-mw-deduplicate'];
+ if ( !isset( $seen[$key] ) ) {
+ $seen[$key] = true;
+ return $node;
+ }
+ // We were going to use an empty <style> here, but there
+ // was concern that would be too much overhead for browsers.
+ // So let's hope a <link> with a non-standard rel and href isn't
+ // going to be misinterpreted or mangled by any subsequent processing.
+ $node->name = 'link';
+ $node->attrs = new PlainAttributes( [
+ 'rel' => 'mw-deduplicated-inline-style',
+ 'href' => "mw-data:" . wfUrlencode( $key ),
+ ] );
+ $node->children = [];
+ $node->void = true;
return $node;
- }
- // We were going to use an empty <style> here, but there
- // was concern that would be too much overhead for browsers.
- // So let's hope a <link> with a non-standard rel and href isn't
- // going to be misinterpreted or mangled by any subsequent processing.
- $node->name = 'link';
- $node->attrs = new PlainAttributes( [
- 'rel' => 'mw-deduplicated-inline-style',
- 'href' => "mw-data:" . wfUrlencode( $key ),
- ] );
- $node->children = [];
- $node->void = true;
- return $node;
- },
- $options['isParsoidContent'] ?? false
- );
+ },
+ $isParsoidContent
+ );
+ };
+
+ if ( !$isParsoidContent ) {
+ // Optimization: Only transform possible style nodes to avoid having to tokenize the entire output,
+ // which is expensive for large pages (T394059).
+ // This is unsafe to do for Parsoid content, since the naïve regex below might match encoded style
+ // tags within data-parsoid attribute values, so only apply it to legacy parser output.
+ // Parsoid content transformations will be further optimized in T394005.
+ return preg_replace_callback(
+ '#<style\s+([^>]*data-mw-deduplicate\s*=[\'"][^>]*)>.*?</style>#s',
+ static fn ( array $matches ) => $transform( $matches[0] ),
+ $text
+ );
+ }
+
+ return $transform( $text );
}
}
diff --git a/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php b/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php
index eae21cbbedc..45c5a4d629e 100644
--- a/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php
+++ b/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php
@@ -35,8 +35,12 @@ class DeduplicateStylesTest extends OutputTransformStageTestBase {
] );
}
- public function provideTransform(): array {
- $dedup = <<<EOF
+ public function provideTransform(): iterable {
+ $testCases = [
+ 'legacy parser output' => [
+ TestUtils::TEST_TO_DEDUP,
+ [],
+ <<<EOF
<p>This is a test document.</p>
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
@@ -47,13 +51,33 @@ class DeduplicateStylesTest extends OutputTransformStageTestBase {
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
<style>.Duplicate1 {}</style>
-EOF;
-
- $po = new ParserOutput( TestUtils::TEST_TO_DEDUP );
- $expected = new ParserOutput( $dedup );
- $opts = [];
- return [
- [ $po, null, $opts, $expected ]
+EOF
+ ],
+ 'parsoid content with encoded style tags in data-mw attribute' => [
+ <<<EOF
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<span data-mw="{&quot;name&quot;:&quot;ref&quot;,&quot;attrs&quot;:{&quot;name&quot;:&quot;blank&quot;},
+&quot;body&quot;:{&quot;html&quot;:&quot;<style data-mw-deduplicate=\&quot;duplicate1\&quot;>.Duplicate1 {}</style>&quot;}"></span>
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+EOF
+ ,
+ [ 'isParsoidContent' => true ],
+ <<<EOF
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<span data-mw="{&quot;name&quot;:&quot;ref&quot;,&quot;attrs&quot;:{&quot;name&quot;:&quot;blank&quot;},
+&quot;body&quot;:{&quot;html&quot;:&quot;<style data-mw-deduplicate=\&quot;duplicate1\&quot;>.Duplicate1 {}</style>&quot;}"></span>
+<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
+EOF
+ ]
];
+
+ foreach ( $testCases as $name => [ $input, $options, $expected ] ) {
+ yield $name => [
+ new ParserOutput( $input ),
+ null,
+ $options,
+ new ParserOutput( $expected )
+ ];
+ }
}
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jul 5, 5:32 AM (11 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
227613
Default Alt Text
(5 KB)

Event Timeline