Compare commits

...

1 Commits

Author SHA1 Message Date
Eve
810439244a fix(server/comment): remove HTML sanitizer that was corrupting Markdown
The bluemonday HTML sanitizer applied to comment content (added in #679)
treats Markdown source as HTML, entity-encoding syntactically meaningful
characters and normalizing whitespace. This corrupts user input:

  - "> quote"   -> "> quote"     (blockquote lost, see #1303)
  - '"foo"'     -> '"foo"'    (literal entities visible)
  - "\n\n2." -> " 2."             (ordered list items merged into prose)

Comment content is stored as Markdown source. XSS is already handled at
two layers:

  - Render: rehype-sanitize in packages/ui/markdown and
    packages/views/editor/readonly-content (mention:// allowlist,
    data-href restricted to http(s), class restricted to
    code/div/span/pre).
  - Edit: @tiptap/markdown is configured with html:false, so Markdown
    source containing raw HTML tags is treated as plain text.

Removing the server-side sanitizer therefore does not lower the security
boundary, and restores faithful Markdown round-tripping.

The PR #1342 workaround in the editor serializer can be dropped once
this lands.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-20 18:30:06 +08:00
5 changed files with 8 additions and 229 deletions

View File

@@ -11,10 +11,12 @@ require (
github.com/go-chi/chi/v5 v5.2.5
github.com/go-chi/cors v1.2.2
github.com/golang-jwt/jwt/v5 v5.3.1
github.com/google/uuid v1.6.0
github.com/gorilla/websocket v1.5.3
github.com/jackc/pgx/v5 v5.8.0
github.com/lmittmann/tint v1.1.3
github.com/resend/resend-go/v2 v2.28.0
github.com/robfig/cron/v3 v3.0.1
github.com/spf13/cobra v1.10.2
)
@@ -34,17 +36,11 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.18 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 // indirect
github.com/aws/smithy-go v1.24.2 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/spf13/pflag v1.0.9 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/text v0.35.0 // indirect
)

View File

@@ -38,8 +38,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 h1:p8ogvvLugcR/zLBXTXrTkj0RYBU
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10/go.mod h1:60dv0eZJfeVXfbT1tFJinbHrDfSJ2GZl4Q//OSSNAVw=
github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng=
github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -52,8 +50,6 @@ github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63Y
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -68,8 +64,6 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/lmittmann/tint v1.1.3 h1:Hv4EaHWXQr+GTFnOU4VKf8UvAtZgn0VuKT+G0wFlO3I=
github.com/lmittmann/tint v1.1.3/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/resend/resend-go/v2 v2.28.0 h1:ttM1/VZR4fApBv3xI1TneSKi1pbfFsVrq7fXFlHKtj4=
@@ -87,8 +81,6 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=

View File

@@ -12,7 +12,6 @@ import (
"github.com/jackc/pgx/v5/pgtype"
"github.com/multica-ai/multica/server/internal/logger"
"github.com/multica-ai/multica/server/internal/mention"
"github.com/multica-ai/multica/server/internal/sanitize"
"github.com/multica-ai/multica/server/internal/util"
db "github.com/multica-ai/multica/server/pkg/db/generated"
"github.com/multica-ai/multica/server/pkg/protocol"
@@ -242,8 +241,11 @@ func (h *Handler) CreateComment(w http.ResponseWriter, r *http.Request) {
// Expand bare issue identifiers (e.g. MUL-117) into mention links.
req.Content = mention.ExpandIssueIdentifiers(r.Context(), h.Queries, issue.WorkspaceID, req.Content)
// Sanitize HTML to prevent stored XSS.
req.Content = sanitize.HTML(req.Content)
// NOTE: Comment content is stored as Markdown source. XSS is handled at the
// rendering layer (rehype-sanitize) and at the editor layer
// (@tiptap/markdown with html:false). Running an HTML sanitizer here would
// entity-encode Markdown syntax characters (>, ", &, <) and corrupt the
// source. See issue #1303 / discussion in MUL-1119, MUL-1125.
comment, err := h.Queries.CreateComment(r.Context(), db.CreateCommentParams{
IssueID: issue.ID,
@@ -496,8 +498,7 @@ func (h *Handler) UpdateComment(w http.ResponseWriter, r *http.Request) {
return
}
// Sanitize HTML to prevent stored XSS.
req.Content = sanitize.HTML(req.Content)
// NOTE: See CreateComment — Markdown is sanitized at render/edit time, not here.
comment, err := h.Queries.UpdateComment(r.Context(), db.UpdateCommentParams{
ID: parseUUID(commentId),

View File

@@ -1,71 +0,0 @@
package sanitize
import (
"fmt"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday"
)
// httpURL matches only http:// and https:// URLs — blocks javascript:, data:, etc.
var httpURL = regexp.MustCompile(`^https?://`)
// policy is a shared bluemonday policy that allows safe Markdown HTML while
// stripping dangerous elements (script, iframe, object, embed, style, on*).
var policy *bluemonday.Policy
func init() {
policy = bluemonday.UGCPolicy()
policy.AllowElements("div", "span")
// Allow file-card data attributes, but restrict data-href to http(s) only
// to prevent javascript: and other dangerous URL schemes.
policy.AllowAttrs("data-type", "data-filename").OnElements("div")
policy.AllowAttrs("data-href").Matching(httpURL).OnElements("div")
policy.AllowAttrs("class").OnElements("code", "div", "span", "pre")
}
// fencedCodeBlock matches ``` or ~~~ fenced code blocks (with optional language tag).
var fencedCodeBlock = regexp.MustCompile("(?m)^(```|~~~)[^\n]*\n[\\s\\S]*?\n(```|~~~)[ \t]*$")
// inlineCode matches backtick-delimited inline code spans.
// Ordered longest-delimiter-first so triple backticks match before doubles/singles.
var inlineCode = regexp.MustCompile("```[^`]+```|``[^`]+``|`[^`]+`")
// HTML sanitizes user-provided HTML/Markdown content, stripping dangerous
// tags (script, iframe, object, embed, etc.) and event-handler attributes.
//
// Code blocks and inline code spans are preserved verbatim so that bluemonday
// does not HTML-escape their contents (e.g. && → &amp;&amp;).
func HTML(input string) string {
// 1. Extract fenced code blocks, replacing with unique placeholders.
var blocks []string
placeholder := func(i int) string { return fmt.Sprintf("\x00CODEBLOCK_%d\x00", i) }
result := fencedCodeBlock.ReplaceAllStringFunc(input, func(m string) string {
idx := len(blocks)
blocks = append(blocks, m)
return placeholder(idx)
})
// 2. Extract inline code spans.
var inlines []string
inlinePH := func(i int) string { return fmt.Sprintf("\x00INLINE_%d\x00", i) }
result = inlineCode.ReplaceAllStringFunc(result, func(m string) string {
idx := len(inlines)
inlines = append(inlines, m)
return inlinePH(idx)
})
// 3. Sanitize the non-code portions.
result = policy.Sanitize(result)
// 4. Restore inline code spans, then fenced code blocks.
for i, code := range inlines {
result = strings.Replace(result, inlinePH(i), code, 1)
}
for i, block := range blocks {
result = strings.Replace(result, placeholder(i), block, 1)
}
return result
}

View File

@@ -1,139 +0,0 @@
package sanitize
import (
"testing"
)
func TestHTML(t *testing.T) {
tests := []struct {
name string
input string
want string
}{
{
name: "plain text",
input: "hello world",
want: "hello world",
},
{
name: "safe markdown",
input: "**bold** and *italic*",
want: "**bold** and *italic*",
},
{
name: "script tag stripped",
input: `<script>alert(1)</script>`,
want: "",
},
{
name: "iframe stripped",
input: `<iframe srcdoc="<script>parent.__xss=1</script>"></iframe>`,
want: "",
},
{
name: "img with onerror stripped",
input: `<img src=x onerror="alert(1)">`,
want: `<img src="x">`,
},
{
name: "safe link preserved",
input: `<a href="https://example.com">link</a>`,
want: `<a href="https://example.com" rel="nofollow">link</a>`,
},
{
name: "file card div preserved",
input: `<div data-type="fileCard" data-href="https://cdn.example.com/file.pdf" data-filename="report.pdf"></div>`,
want: `<div data-type="fileCard" data-href="https://cdn.example.com/file.pdf" data-filename="report.pdf"></div>`,
},
{
name: "object tag stripped",
input: `<object data="evil.swf"></object>`,
want: "",
},
{
name: "embed tag stripped",
input: `<embed src="evil.swf">`,
want: "",
},
{
name: "style tag stripped",
input: `<style>body{display:none}</style>`,
want: "",
},
{
name: "mention link preserved",
input: `[@User](mention://member/abc-123)`,
want: `[@User](mention://member/abc-123)`,
},
{
name: "file card with javascript href stripped",
input: `<div data-type="fileCard" data-href="javascript:alert(1)" data-filename="evil.pdf"></div>`,
want: `<div data-type="fileCard" data-filename="evil.pdf"></div>`,
},
{
name: "file card with data URI stripped",
input: `<div data-type="fileCard" data-href="data:text/html,<script>alert(1)</script>" data-filename="x.html"></div>`,
want: `<div data-type="fileCard" data-filename="x.html"></div>`,
},
{
name: "file card with http href preserved",
input: `<div data-type="fileCard" data-href="http://example.com/file.pdf" data-filename="file.pdf"></div>`,
want: `<div data-type="fileCard" data-href="http://example.com/file.pdf" data-filename="file.pdf"></div>`,
},
// Code block preservation — entities must NOT be escaped inside code.
{
name: "fenced code block preserves ampersands",
input: "```\na && b\n```",
want: "```\na && b\n```",
},
{
name: "fenced code block preserves angle brackets",
input: "```html\n<div class=\"x\">hello</div>\n```",
want: "```html\n<div class=\"x\">hello</div>\n```",
},
{
name: "inline code preserves ampersands",
input: "run `a && b` in shell",
want: "run `a && b` in shell",
},
{
name: "inline code preserves angle brackets",
input: "use `x < y && y > z`",
want: "use `x < y && y > z`",
},
{
name: "double backtick inline code preserved",
input: "use ``a && b`` here",
want: "use ``a && b`` here",
},
{
name: "script in fenced code block preserved",
input: "```\n<script>alert(1)</script>\n```",
want: "```\n<script>alert(1)</script>\n```",
},
{
name: "script outside code block still stripped",
input: "hello <script>alert(1)</script> world",
want: "hello world",
},
{
name: "mixed code and non-code",
input: "text `a && b` more <script>x</script> end",
want: "text `a && b` more end",
},
{
name: "tilde fenced code block preserves content",
input: "~~~\na && b\n~~~",
want: "~~~\na && b\n~~~",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := HTML(tt.input)
if got != tt.want {
t.Errorf("HTML(%q) = %q, want %q", tt.input, got, tt.want)
}
})
}
}