This commit is contained in:
Vitor Pamplona
2024-05-13 10:42:05 -04:00

View File

@@ -130,7 +130,7 @@ object MetaTagsParser {
// - commonly used character references in attribute values are resolved // - commonly used character references in attribute values are resolved
private class Attrs { private class Attrs {
companion object { companion object {
val RE_CHAR_REF = Regex("""&(\w+)(;?)""") val RE_CHAR_REF = Regex("""&(#?)(\w+)(;?)""")
val BASE_CHAR_REFS = val BASE_CHAR_REFS =
mapOf( mapOf(
"amp" to "&", "amp" to "&",
@@ -141,6 +141,8 @@ object MetaTagsParser {
"LT" to "<", "LT" to "<",
"gt" to ">", "gt" to ">",
"GT" to ">", "GT" to ">",
"nbsp" to " ",
"NBSP" to " ",
) )
val CHAR_REFS = val CHAR_REFS =
mapOf( mapOf(
@@ -148,16 +150,26 @@ object MetaTagsParser {
"equals" to "=", "equals" to "=",
"grave" to "`", "grave" to "`",
"DiacriticalGrave" to "`", "DiacriticalGrave" to "`",
"039" to "'",
"8217" to "",
"8216" to "",
"39" to "'",
"ldquo" to "",
"rdquo" to "",
"mdash" to "",
"hellip" to "",
"x27" to "'",
"nbsp" to " ",
) )
fun replaceCharRefs(match: MatchResult): String { fun replaceCharRefs(match: MatchResult): String {
val bcr = BASE_CHAR_REFS[match.groupValues[1]] val bcr = BASE_CHAR_REFS[match.groupValues[2]]
if (bcr != null) { if (bcr != null) {
return bcr return bcr
} }
// non-base char refs must be terminated by ';' // non-base char refs must be terminated by ';'
if (match.groupValues[2].isNotEmpty()) { if (match.groupValues[3].isNotEmpty()) {
val cr = CHAR_REFS[match.groupValues[1]] val cr = CHAR_REFS[match.groupValues[2]]
if (cr != null) { if (cr != null) {
return cr return cr
} }