mirror of
https://github.com/multica-ai/multica.git
synced 2026-06-17 03:38:32 +02:00
fix(integrations/lark): union_id-first self mention strip + token-aware scan + local whitespace cleanup (MUL-2671)
Three review blockers on the mention rewrite from PR review: 1. isBotMention now mirrors containsMention's union_id-first policy. When the installation row knows our union_id, we trust it exclusively (open_id is structurally inverted in multi-bot groups — matching on it would re-introduce the routing bug we fixed two commits ago). open_id fallback fires only when union_id is absent. New tests: @-ing both bots in one message correctly strips only self and renders the sibling as @<name>; open_id-matches-but-union_id-differs does NOT strip. 2. resolveMentions no longer collapses or trims whitespace globally. Indentation, tabs, code blocks, tables — all preserved verbatim. When the self mention is removed we eat exactly one adjacent horizontal space (the one after the placeholder, or, when the mention sits at end-of-input, a single space already emitted right before it). New test exercises a multi-line indented + tabbed body and asserts the whole shape survives. 3. Prefix-collision-safe replacement. A chat with 11+ participants exposes both `@_user_1` and `@_user_10`; naive ReplaceAll for `@_user_1` would mangle the substring of `@_user_10`. The resolver now does a single-pass token scan with the mention list sorted longest-key-first, so the longer placeholder always wins at any scan position. New test covers the @_user_1 / @_user_10 case explicitly. Also drops the temporary INFO-level diag logging the previous commit added — root cause was confirmed (union_id swap in the manual backfill; not a decoder bug). Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
db "github.com/multica-ai/multica/server/pkg/db/generated"
|
||||
@@ -153,68 +154,93 @@ type larkMention struct {
|
||||
// when name is empty (defensive — Lark always populates it in
|
||||
// practice).
|
||||
//
|
||||
// Whitespace cleanup: runs of horizontal whitespace introduced by
|
||||
// stripping the bot mention are collapsed to a single space, and
|
||||
// leading/trailing horizontal whitespace per line is trimmed. Line
|
||||
// breaks in the original message are preserved.
|
||||
// Replacement is a single-pass token scan, not naive ReplaceAll. Two
|
||||
// reasons:
|
||||
//
|
||||
// - Prefix collision: a chat with eleven @-mentions exposes keys
|
||||
// `@_user_1` and `@_user_10`; ReplaceAll for `@_user_1` would
|
||||
// mangle the substring of `@_user_10`. We sort keys by length
|
||||
// DESC and try the longest match at each scan position so the
|
||||
// longer placeholder always wins.
|
||||
//
|
||||
// - Whitespace fidelity: when we strip the bot mention we only
|
||||
// touch a single space immediately adjacent to it — either the
|
||||
// space after the placeholder, or, if there is none, a single
|
||||
// trailing space already in the output. Tabs, indentation, code
|
||||
// blocks, table pipes, and any other intentional whitespace in
|
||||
// the user's message are preserved verbatim.
|
||||
func resolveMentions(text string, mentions []larkMention, botOpenID, botUnionID string) string {
|
||||
if text == "" || len(mentions) == 0 {
|
||||
return text
|
||||
}
|
||||
// Filter empty keys and sort longest first so `@_user_10` is
|
||||
// matched before `@_user_1` at any scan position.
|
||||
sorted := make([]larkMention, 0, len(mentions))
|
||||
for _, m := range mentions {
|
||||
if m.Key == "" {
|
||||
continue
|
||||
if m.Key != "" {
|
||||
sorted = append(sorted, m)
|
||||
}
|
||||
var rep string
|
||||
switch {
|
||||
case isBotMention(m, botOpenID, botUnionID):
|
||||
rep = ""
|
||||
case m.Name != "":
|
||||
rep = "@" + m.Name
|
||||
default:
|
||||
continue
|
||||
}
|
||||
text = strings.ReplaceAll(text, m.Key, rep)
|
||||
}
|
||||
return tidyMentionWhitespace(text)
|
||||
}
|
||||
sort.SliceStable(sorted, func(i, j int) bool {
|
||||
return len(sorted[i].Key) > len(sorted[j].Key)
|
||||
})
|
||||
|
||||
func isBotMention(m larkMention, botOpenID, botUnionID string) bool {
|
||||
if botUnionID != "" && m.ID.UnionID == botUnionID {
|
||||
return true
|
||||
}
|
||||
if botOpenID != "" && m.ID.OpenID == botOpenID {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// tidyMentionWhitespace collapses runs of spaces/tabs and trims
|
||||
// horizontal whitespace at the start/end of each line. Newlines are
|
||||
// preserved so multi-line user messages survive intact.
|
||||
func tidyMentionWhitespace(s string) string {
|
||||
if s == "" {
|
||||
return s
|
||||
}
|
||||
lines := strings.Split(s, "\n")
|
||||
for i, line := range lines {
|
||||
var b strings.Builder
|
||||
b.Grow(len(line))
|
||||
prevSpace := false
|
||||
for _, r := range line {
|
||||
if r == ' ' || r == '\t' {
|
||||
if !prevSpace {
|
||||
b.WriteByte(' ')
|
||||
}
|
||||
prevSpace = true
|
||||
continue
|
||||
out := make([]byte, 0, len(text))
|
||||
i := 0
|
||||
for i < len(text) {
|
||||
var matched *larkMention
|
||||
for idx := range sorted {
|
||||
if strings.HasPrefix(text[i:], sorted[idx].Key) {
|
||||
matched = &sorted[idx]
|
||||
break
|
||||
}
|
||||
prevSpace = false
|
||||
b.WriteRune(r)
|
||||
}
|
||||
lines[i] = strings.TrimSpace(b.String())
|
||||
if matched == nil {
|
||||
out = append(out, text[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
end := i + len(matched.Key)
|
||||
switch {
|
||||
case isBotMention(*matched, botOpenID, botUnionID):
|
||||
// Strip: eat one adjacent space (after the placeholder
|
||||
// preferred; else backtrack one space we already emitted)
|
||||
// so the seam is not left with a double space or a
|
||||
// dangling leading space. Tabs / newlines / other chars
|
||||
// are untouched.
|
||||
if end < len(text) && text[end] == ' ' {
|
||||
end++
|
||||
} else if n := len(out); n > 0 && out[n-1] == ' ' {
|
||||
out = out[:n-1]
|
||||
}
|
||||
case matched.Name != "":
|
||||
out = append(out, '@')
|
||||
out = append(out, matched.Name...)
|
||||
default:
|
||||
// Unknown mention — leave the placeholder intact so the
|
||||
// agent at least sees a stable token.
|
||||
out = append(out, matched.Key...)
|
||||
}
|
||||
i = end
|
||||
}
|
||||
return strings.Join(lines, "\n")
|
||||
return string(out)
|
||||
}
|
||||
|
||||
// isBotMention identifies whether a payload mention refers to THIS
|
||||
// bot. Stays in lockstep with containsMention: when union_id is
|
||||
// known we trust it exclusively (open_id is structurally inverted
|
||||
// in multi-bot groups — matching on it would re-introduce the
|
||||
// MUL-2671 routing bug). Only when union_id is missing do we fall
|
||||
// back to open_id, which is correct in single-bot installs and the
|
||||
// best we can do in pre-backfill rows.
|
||||
func isBotMention(m larkMention, botOpenID, botUnionID string) bool {
|
||||
if botUnionID != "" {
|
||||
return m.ID.UnionID == botUnionID
|
||||
}
|
||||
if botOpenID == "" {
|
||||
return false
|
||||
}
|
||||
return m.ID.OpenID == botOpenID
|
||||
}
|
||||
|
||||
func extractTextBody(content string) string {
|
||||
|
||||
@@ -248,7 +248,11 @@ func TestLarkJSONFrameDecoderMentionPlaceholderRewrite(t *testing.T) {
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("preserves newlines and collapses spaces from stripped mention", func(t *testing.T) {
|
||||
t.Run("preserves newlines after stripped mention", func(t *testing.T) {
|
||||
// Strip the bot mention + one adjacent space; the newline that
|
||||
// follows stays put so the rest of the message keeps its
|
||||
// shape. User-typed extra spaces (the double space here) are
|
||||
// preserved verbatim — we do not globally collapse whitespace.
|
||||
inst := db.LarkInstallation{
|
||||
BotOpenID: "ou_bot",
|
||||
BotUnionID: pgText("on_bot"),
|
||||
@@ -258,8 +262,8 @@ func TestLarkJSONFrameDecoderMentionPlaceholderRewrite(t *testing.T) {
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("ok=%v err=%v", ok, err)
|
||||
}
|
||||
if msg.Body != "first line\nsecond line" {
|
||||
t.Errorf("Body = %q; want %q", msg.Body, "first line\nsecond line")
|
||||
if msg.Body != " first line\nsecond line" {
|
||||
t.Errorf("Body = %q; want %q", msg.Body, " first line\nsecond line")
|
||||
}
|
||||
})
|
||||
|
||||
@@ -276,6 +280,97 @@ func TestLarkJSONFrameDecoderMentionPlaceholderRewrite(t *testing.T) {
|
||||
t.Errorf("Body = %q; want %q", msg.Body, "just a normal message")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("preserves indentation and tabs around stripped mention", func(t *testing.T) {
|
||||
// Code-block / indented messages: stripping the bot mention
|
||||
// must not eat the surrounding indent, tabs, or any internal
|
||||
// whitespace the user intentionally typed. We only consume a
|
||||
// single space directly adjacent to the placeholder.
|
||||
inst := db.LarkInstallation{
|
||||
BotOpenID: "ou_bot",
|
||||
BotUnionID: pgText("on_bot"),
|
||||
}
|
||||
mentions := `[{"key":"@_user_1","name":"My Bot","id":{"open_id":"ou_bot_wire","union_id":"on_bot"}}]`
|
||||
raw := " @_user_1 review this snippet:\n\tfunc add(a, b int) int {\n\t\treturn a + b\n\t}"
|
||||
want := " review this snippet:\n\tfunc add(a, b int) int {\n\t\treturn a + b\n\t}"
|
||||
msg, ok, err := d.Decode(mkRaw(raw, mentions), inst)
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("ok=%v err=%v", ok, err)
|
||||
}
|
||||
if msg.Body != want {
|
||||
t.Errorf("Body = %q; want %q", msg.Body, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("avoids @_user_1 / @_user_10 prefix collision", func(t *testing.T) {
|
||||
// Lark assigns mention keys positionally; a chat with eleven+
|
||||
// participants exposes both `@_user_1` and `@_user_10`. Naive
|
||||
// ReplaceAll for `@_user_1` would mangle `@_user_10`, so we
|
||||
// match longest-first.
|
||||
inst := db.LarkInstallation{
|
||||
BotOpenID: "ou_bot",
|
||||
BotUnionID: pgText("on_bot"),
|
||||
}
|
||||
mentions := `[
|
||||
{"key":"@_user_1","name":"My Bot","id":{"open_id":"ou_bot_wire","union_id":"on_bot"}},
|
||||
{"key":"@_user_10","name":"Alice","id":{"open_id":"ou_alice","union_id":"on_alice"}}
|
||||
]`
|
||||
raw := "@_user_1 forward this to @_user_10 please"
|
||||
want := "forward this to @Alice please"
|
||||
msg, ok, err := d.Decode(mkRaw(raw, mentions), inst)
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("ok=%v err=%v", ok, err)
|
||||
}
|
||||
if msg.Body != want {
|
||||
t.Errorf("Body = %q; want %q", msg.Body, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("@-ing both bots in one message strips only self, renders other by name", func(t *testing.T) {
|
||||
// Multi-bot group chat where the user @-mentions BOTH bots in
|
||||
// the same message. From this WS's perspective only the self
|
||||
// mention should be stripped; the sibling bot renders as
|
||||
// @<displayName> so the agent receives a faithful transcript
|
||||
// of the user intent.
|
||||
inst := db.LarkInstallation{
|
||||
BotOpenID: "ou_self_canonical",
|
||||
BotUnionID: pgText("on_self_union"),
|
||||
}
|
||||
mentions := `[
|
||||
{"key":"@_user_1","name":"Self Bot","id":{"open_id":"ou_self_wire","union_id":"on_self_union"}},
|
||||
{"key":"@_user_2","name":"Sibling Bot","id":{"open_id":"ou_sibling_wire","union_id":"on_sibling_union"}}
|
||||
]`
|
||||
raw := "@_user_1 @_user_2 please coordinate"
|
||||
want := "@Sibling Bot please coordinate"
|
||||
msg, ok, err := d.Decode(mkRaw(raw, mentions), inst)
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("ok=%v err=%v", ok, err)
|
||||
}
|
||||
if msg.Body != want {
|
||||
t.Errorf("Body = %q; want %q", msg.Body, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("open_id match does NOT strip when union_id known but differs", func(t *testing.T) {
|
||||
// Mirror of containsMention's union_id-first rule: when we
|
||||
// know our union_id, an open_id-only match means the mention
|
||||
// is for the OTHER bot (the inverse-mapping quirk), so we
|
||||
// must render it as @<name>, not strip it.
|
||||
inst := db.LarkInstallation{
|
||||
BotOpenID: "ou_self_canonical",
|
||||
BotUnionID: pgText("on_self_union"),
|
||||
}
|
||||
mentions := `[{"key":"@_user_1","name":"Sibling Bot","id":{"open_id":"ou_self_canonical","union_id":"on_sibling_union"}}]`
|
||||
raw := "@_user_1 hi"
|
||||
want := "@Sibling Bot hi"
|
||||
msg, ok, err := d.Decode(mkRaw(raw, mentions), inst)
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("ok=%v err=%v", ok, err)
|
||||
}
|
||||
if msg.Body != want {
|
||||
t.Errorf("Body = %q; want %q", msg.Body, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestLarkJSONFrameDecoderDropsHeartbeat(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user