Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F585201
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
5 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/includes/OutputTransform/Stages/DeduplicateStyles.php b/includes/OutputTransform/Stages/DeduplicateStyles.php
index f80e70f1896..94adc71031f 100644
--- a/includes/OutputTransform/Stages/DeduplicateStyles.php
+++ b/includes/OutputTransform/Stages/DeduplicateStyles.php
@@ -21,32 +21,51 @@ class DeduplicateStyles extends ContentTextTransformStage {
protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string {
$seen = [];
- return HtmlHelper::modifyElements(
- $text,
- static function ( SerializerNode $node ): bool {
- return $node->name === 'style' &&
- ( $node->attrs['data-mw-deduplicate'] ?? '' ) !== '';
- },
- static function ( SerializerNode $node ) use ( &$seen ): SerializerNode {
- $key = $node->attrs['data-mw-deduplicate'];
- if ( !isset( $seen[$key] ) ) {
- $seen[$key] = true;
+ $isParsoidContent = $options['isParsoidContent'] ?? false;
+
+ $transform = static function ( $fragment ) use ( &$seen, $isParsoidContent ) {
+ return HtmlHelper::modifyElements(
+ $fragment,
+ static function ( SerializerNode $node ): bool {
+ return $node->name === 'style' &&
+ ( $node->attrs['data-mw-deduplicate'] ?? '' ) !== '';
+ },
+ static function ( SerializerNode $node ) use ( &$seen ): SerializerNode {
+ $key = $node->attrs['data-mw-deduplicate'];
+ if ( !isset( $seen[$key] ) ) {
+ $seen[$key] = true;
+ return $node;
+ }
+ // We were going to use an empty <style> here, but there
+ // was concern that would be too much overhead for browsers.
+ // So let's hope a <link> with a non-standard rel and href isn't
+ // going to be misinterpreted or mangled by any subsequent processing.
+ $node->name = 'link';
+ $node->attrs = new PlainAttributes( [
+ 'rel' => 'mw-deduplicated-inline-style',
+ 'href' => "mw-data:" . wfUrlencode( $key ),
+ ] );
+ $node->children = [];
+ $node->void = true;
return $node;
- }
- // We were going to use an empty <style> here, but there
- // was concern that would be too much overhead for browsers.
- // So let's hope a <link> with a non-standard rel and href isn't
- // going to be misinterpreted or mangled by any subsequent processing.
- $node->name = 'link';
- $node->attrs = new PlainAttributes( [
- 'rel' => 'mw-deduplicated-inline-style',
- 'href' => "mw-data:" . wfUrlencode( $key ),
- ] );
- $node->children = [];
- $node->void = true;
- return $node;
- },
- $options['isParsoidContent'] ?? false
- );
+ },
+ $isParsoidContent
+ );
+ };
+
+ if ( !$isParsoidContent ) {
+ // Optimization: Only transform possible style nodes to avoid having to tokenize the entire output,
+ // which is expensive for large pages (T394059).
+ // This is unsafe to do for Parsoid content, since the naïve regex below might match encoded style
+ // tags within data-parsoid attribute values, so only apply it to legacy parser output.
+ // Parsoid content transformations will be further optimized in T394005.
+ return preg_replace_callback(
+ '#<style\s+([^>]*data-mw-deduplicate\s*=[\'"][^>]*)>.*?</style>#s',
+ static fn ( array $matches ) => $transform( $matches[0] ),
+ $text
+ );
+ }
+
+ return $transform( $text );
}
}
diff --git a/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php b/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php
index eae21cbbedc..45c5a4d629e 100644
--- a/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php
+++ b/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php
@@ -35,8 +35,12 @@ class DeduplicateStylesTest extends OutputTransformStageTestBase {
] );
}
- public function provideTransform(): array {
- $dedup = <<<EOF
+ public function provideTransform(): iterable {
+ $testCases = [
+ 'legacy parser output' => [
+ TestUtils::TEST_TO_DEDUP,
+ [],
+ <<<EOF
<p>This is a test document.</p>
<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
@@ -47,13 +51,33 @@ class DeduplicateStylesTest extends OutputTransformStageTestBase {
<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" />
<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
<style>.Duplicate1 {}</style>
-EOF;
-
- $po = new ParserOutput( TestUtils::TEST_TO_DEDUP );
- $expected = new ParserOutput( $dedup );
- $opts = [];
- return [
- [ $po, null, $opts, $expected ]
+EOF
+ ],
+ 'parsoid content with encoded style tags in data-mw attribute' => [
+ <<<EOF
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<span data-mw="{"name":"ref","attrs":{"name":"blank"},
+"body":{"html":"<style data-mw-deduplicate=\"duplicate1\">.Duplicate1 {}</style>"}"></span>
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+EOF
+ ,
+ [ 'isParsoidContent' => true ],
+ <<<EOF
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<span data-mw="{"name":"ref","attrs":{"name":"blank"},
+"body":{"html":"<style data-mw-deduplicate=\"duplicate1\">.Duplicate1 {}</style>"}"></span>
+<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1">
+EOF
+ ]
];
+
+ foreach ( $testCases as $name => [ $input, $options, $expected ] ) {
+ yield $name => [
+ new ParserOutput( $input ),
+ null,
+ $options,
+ new ParserOutput( $expected )
+ ];
+ }
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jul 5, 5:32 AM (11 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
227613
Default Alt Text
(5 KB)
Attached To
Mode
rMW mediawiki
Attached
Detach File
Event Timeline
Log In to Comment