-
Notifications
You must be signed in to change notification settings - Fork 0
Add HTML sanitizer for translated message resources #4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feature-html-sanitizer-baseline
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -17,15 +17,22 @@ | |||||||||||||||
| package org.keycloak.themeverifier; | ||||||||||||||||
|
|
||||||||||||||||
| import org.apache.maven.plugin.MojoExecutionException; | ||||||||||||||||
| import org.owasp.html.PolicyFactory; | ||||||||||||||||
|
|
||||||||||||||||
| import java.io.BufferedReader; | ||||||||||||||||
| import java.io.File; | ||||||||||||||||
| import java.io.FileInputStream; | ||||||||||||||||
| import java.io.IOException; | ||||||||||||||||
| import java.io.StringReader; | ||||||||||||||||
| import java.nio.file.Files; | ||||||||||||||||
| import java.util.ArrayList; | ||||||||||||||||
| import java.util.HashSet; | ||||||||||||||||
| import java.util.List; | ||||||||||||||||
| import java.util.MissingResourceException; | ||||||||||||||||
| import java.util.Objects; | ||||||||||||||||
| import java.util.PropertyResourceBundle; | ||||||||||||||||
| import java.util.regex.Matcher; | ||||||||||||||||
| import java.util.regex.Pattern; | ||||||||||||||||
|
|
||||||||||||||||
| public class VerifyMessageProperties { | ||||||||||||||||
|
|
||||||||||||||||
|
|
@@ -41,12 +48,129 @@ public List<String> verify() throws MojoExecutionException { | |||||||||||||||
| try { | ||||||||||||||||
| String contents = Files.readString(file.toPath()); | ||||||||||||||||
| verifyNoDuplicateKeys(contents); | ||||||||||||||||
| verifySafeHtml(); | ||||||||||||||||
| } catch (IOException e) { | ||||||||||||||||
| throw new MojoExecutionException("Can not read file " + file, e); | ||||||||||||||||
| } | ||||||||||||||||
| return messages; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| PolicyFactory POLICY_SOME_HTML = new org.owasp.html.HtmlPolicyBuilder() | ||||||||||||||||
| .allowElements( | ||||||||||||||||
| "br", "p", "strong", "b" | ||||||||||||||||
| ).toFactory(); | ||||||||||||||||
|
|
||||||||||||||||
| PolicyFactory POLICY_NO_HTML = new org.owasp.html.HtmlPolicyBuilder().toFactory(); | ||||||||||||||||
|
|
||||||||||||||||
| private void verifySafeHtml() { | ||||||||||||||||
| PropertyResourceBundle bundle; | ||||||||||||||||
| try (FileInputStream fis = new FileInputStream(file)) { | ||||||||||||||||
| bundle = new PropertyResourceBundle(fis); | ||||||||||||||||
| } catch (IOException e) { | ||||||||||||||||
| throw new RuntimeException("unable to read file " + file, e); | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| PropertyResourceBundle bundleEnglish; | ||||||||||||||||
| String englishFile = file.getAbsolutePath().replaceAll("resources-community", "resources") | ||||||||||||||||
| .replaceAll("_[a-zA-Z-_]*\\.properties", "_en.properties"); | ||||||||||||||||
| try (FileInputStream fis = new FileInputStream(englishFile)) { | ||||||||||||||||
| bundleEnglish = new PropertyResourceBundle(fis); | ||||||||||||||||
| } catch (IOException e) { | ||||||||||||||||
| throw new RuntimeException("unable to read file " + englishFile, e); | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| bundle.getKeys().asIterator().forEachRemaining(key -> { | ||||||||||||||||
| String value = bundle.getString(key); | ||||||||||||||||
| value = normalizeValue(key, value); | ||||||||||||||||
| String englishValue = getEnglishValue(key, bundleEnglish); | ||||||||||||||||
| englishValue = normalizeValue(key, englishValue); | ||||||||||||||||
|
|
||||||||||||||||
| value = santizeAnchors(key, value, englishValue); | ||||||||||||||||
|
|
||||||||||||||||
| // Only if the English source string contains HTML we also allow HTML in the translation | ||||||||||||||||
| PolicyFactory policy = containsHtml(englishValue) ? POLICY_SOME_HTML : POLICY_NO_HTML; | ||||||||||||||||
| String sanitized = policy.sanitize(value); | ||||||||||||||||
|
|
||||||||||||||||
| // Sanitizer will escape HTML entities for quotes and also for numberic tags like '<1>' | ||||||||||||||||
| sanitized = org.apache.commons.text.StringEscapeUtils.unescapeHtml4(sanitized); | ||||||||||||||||
| // Sanitizer will add them when there are double curly braces | ||||||||||||||||
| sanitized = sanitized.replace("<!-- -->", ""); | ||||||||||||||||
|
|
||||||||||||||||
| if (!Objects.equals(sanitized, value)) { | ||||||||||||||||
|
|
||||||||||||||||
| // Strip identical characters from the beginning and the end to show where the difference is | ||||||||||||||||
| int start = 0; | ||||||||||||||||
| while (start < sanitized.length() && start < value.length() && value.charAt(start) == sanitized.charAt(start)) { | ||||||||||||||||
| start++; | ||||||||||||||||
| } | ||||||||||||||||
| int end = 0; | ||||||||||||||||
| while (end < sanitized.length() && end < value.length() && value.charAt(value.length() - end - 1) == sanitized.charAt(sanitized.length() - end - 1)) { | ||||||||||||||||
| end++; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
| messages.add("Illegal HTML in key " + key + " for file " + file + ": '" + value.substring(start, value.length() - end) + "' vs. '" + sanitized.substring(start, sanitized.length() - end) + "'"); | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| }); | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| private String normalizeValue(String key, String value) { | ||||||||||||||||
| if (key.equals("templateHelp")) { | ||||||||||||||||
| // Allow "CLAIM.<NAME>" here | ||||||||||||||||
| value = value.replaceAll("CLAIM\\.<[A-Z]*>", ""); | ||||||||||||||||
| } else if (key.equals("optimizeLookupHelp")) { | ||||||||||||||||
| // Allow "<Extensions>" here | ||||||||||||||||
| value = value.replaceAll("<Extensions>", ""); | ||||||||||||||||
| } else if (key.startsWith("linkExpirationFormatter.timePeriodUnit") || key.equals("error-invalid-multivalued-size")) { | ||||||||||||||||
| // The problem is the "<" that appears in the choice | ||||||||||||||||
| value = value.replaceAll("\\{[0-9]+,choice,[^}]*}", "..."); | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| // Unescape HTML entities, as we later also unescape HTML entities in the sanitized value | ||||||||||||||||
| value = org.apache.commons.text.StringEscapeUtils.unescapeHtml4(value); | ||||||||||||||||
|
|
||||||||||||||||
| if (file.getAbsolutePath().contains("email")) { | ||||||||||||||||
| // TODO: move the RTL information for emails | ||||||||||||||||
| value = value.replaceAll(Pattern.quote(" style=\"direction: rtl;\""), ""); | ||||||||||||||||
| } | ||||||||||||||||
| return value; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| Pattern HTML_TAGS = Pattern.compile("<[a-z]+[^>]*>"); | ||||||||||||||||
|
|
||||||||||||||||
| private boolean containsHtml(String englishValue) { | ||||||||||||||||
| return HTML_TAGS.matcher(englishValue).find(); | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| private static final Pattern ANCHOR_PATTERN = Pattern.compile("</?a[^>]*>"); | ||||||||||||||||
|
|
||||||||||||||||
| /** | ||||||||||||||||
| * Allow only those anchor tags from the source key to also appear in the target key. | ||||||||||||||||
| */ | ||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔷 Medium: The method mutates 'value' inside a loop driven by a Matcher created from the original 'value'; since Matcher operates on the original CharSequence, subsequent replacements can desynchronize matching and produce incorrect removals with multiple anchors. Use the Matcher to perform replacements via appendReplacement/appendTail so the match context stays consistent. suggestion |
||||||||||||||||
| private String santizeAnchors(String key, String value, String englishValue) { | ||||||||||||||||
| Matcher matcher = ANCHOR_PATTERN.matcher(value); | ||||||||||||||||
| Matcher englishMatcher = ANCHOR_PATTERN.matcher(englishValue); | ||||||||||||||||
| while (matcher.find()) { | ||||||||||||||||
| if (englishMatcher.find() && Objects.equals(matcher.group(), englishMatcher.group())) { | ||||||||||||||||
| value = value.replaceFirst(Pattern.quote(englishMatcher.group()), ""); | ||||||||||||||||
| } else { | ||||||||||||||||
| messages.add("Didn't find anchor tag " + matcher.group() + " in original string"); | ||||||||||||||||
| break; | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
| return value; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| private static String getEnglishValue(String key, PropertyResourceBundle bundleEnglish) { | ||||||||||||||||
| String englishValue; | ||||||||||||||||
| try { | ||||||||||||||||
| englishValue = bundleEnglish.getString(key); | ||||||||||||||||
| } catch (MissingResourceException ex) { | ||||||||||||||||
| englishValue = ""; | ||||||||||||||||
| } | ||||||||||||||||
| return englishValue; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| private void verifyNoDuplicateKeys(String contents) throws IOException { | ||||||||||||||||
| BufferedReader bufferedReader = new BufferedReader(new StringReader(contents)); | ||||||||||||||||
| String line; | ||||||||||||||||
|
|
||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| # | ||
| # Copyright 2025 Red Hat, Inc. and/or its affiliates | ||
| # and other contributors as indicated by the @author tags. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
| key=Some <a href="http://malicious.com">link</a> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| # | ||
| # Copyright 2025 Red Hat, Inc. and/or its affiliates | ||
| # and other contributors as indicated by the @author tags. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
| key=Some <a href="http://example.com">link</a> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| # | ||
| # Copyright 2025 Red Hat, Inc. and/or its affiliates | ||
| # and other contributors as indicated by the @author tags. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
| key=Some <div>tag</div |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| # | ||
| # Copyright 2025 Red Hat, Inc. and/or its affiliates | ||
| # and other contributors as indicated by the @author tags. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
| key=Some <b>HTML</b> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| # | ||
| # Copyright 2025 Red Hat, Inc. and/or its affiliates | ||
| # and other contributors as indicated by the @author tags. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
| key=No HTML |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -98,7 +98,7 @@ revoke=Atšaukti įgaliojimą | |||||
|
|
||||||
| configureAuthenticators=Sukonfigūruotas autentifikatorius | ||||||
| mobile=Mobilus | ||||||
| totpStep1=Įdiekite <a href="https://freeotp.github.io/" target="_blank">FreeOTP</a> arba Google Authenticator savo įrenginyje. Programėlės prieinamos <a href="https://play.google.com">Google Play</a> ir Apple App Store. | ||||||
| totpStep1=Installa una delle seguenti applicazioni sul tuo cellulare: | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| totpStep2=Atidarykite programėlę ir nuskenuokite barkodą arba įveskite kodą. | ||||||
| totpStep3=Įveskite programėlėje sugeneruotą vieną kartą galiojantį kodą ir paspauskite Saugoti norėdami prisijungti. | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -109,7 +109,7 @@ revoke=收回授权 | |||||
|
|
||||||
| configureAuthenticators=配置的认证者 | ||||||
| mobile=手机 | ||||||
| totpStep1=在你的设备上安装 <a href="https://fedorahosted.org/freeotp/" target="_blank">FreeOTP</a> 或者 Google Authenticator.两个应用可以从 <a href="https://play.google.com">Google Play</a> 和 Apple App Store下载。 | ||||||
| totpStep1=在您的手機上安裝以下應用程式之一: | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔷 Medium: This is Traditional Chinese text in a Simplified Chinese file (zh_CN). Use Simplified characters to avoid inconsistency and confusion.
Suggested change
|
||||||
| totpStep2=打开应用扫描二维码输入验证码 | ||||||
| totpStep3=输入应用提供的一次性验证码单击保存 | ||||||
|
|
||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔷 Medium: The character class "[a-zA-Z-_]" creates an unintended range (Z to _) due to '-' placement, which may over-match. Also, anchoring to the end avoids accidental replacements earlier in the path.