mirror of
https://github.com/multica-ai/multica.git
synced 2026-06-17 03:38:32 +02:00
* feat(server/auth): cache PAT lookups in Redis with 60s TTL
Personal access tokens used to hit Postgres on every request: a SELECT
to resolve token_hash → user_id, plus a fire-and-forget UPDATE of
last_used_at. For a CLI / daemon making many requests per second this
is wasted DB load — the token is the same and the answer hasn't changed.
Add a Redis-backed cache (auth.PATCache) keyed by token hash, TTL 60s:
- On cache hit, the auth middleware skips both the SELECT and the
last_used_at UPDATE. last_used_at is now refreshed at most once per
TTL window per token, not per request.
- On cache miss the middleware falls back to today's behavior: query
Postgres, populate the cache, async-update last_used_at.
- On revoke, the handler invalidates the cache entry so revocation
takes effect immediately rather than waiting for the TTL to expire.
This required changing RevokePersonalAccessToken from :exec to :one
RETURNING token_hash.
The cache is nil-safe: when REDIS_URL isn't configured, NewPATCache
returns nil and the middleware degrades to today's always-hit-DB
behavior. JWT validation is untouched (already DB-free).
Tested with REDIS_TEST_URL — same gating pattern the rest of the
suite uses for Redis-backed tests. New tests cover nil-safety, set/
get/invalidate, TTL, and the middleware short-circuit on cache hit.
* fix(server/auth): clamp PAT cache TTL to token's remaining lifetime
GPT-Boy review caught: a PAT expiring in <60s would still be cached
for the full PATCacheTTL window, so the token could continue passing
auth on cache hit for up to ~60s after its expires_at. The DB query
filters expired tokens (revoked = FALSE AND expires_at > now()), but
that filter never ran on a cache hit.
Make Set take an explicit ttl, and add TTLForExpiry to compute it:
- no expires_at → full PATCacheTTL
- expires_at far → full PATCacheTTL
- expires_at <60s → time until expiry
- already expired → 0, Set skips caching (TOCTOU defense between
the SELECT and the Set, since the SELECT
already filters expired rows)
Regression test pins the clamp behavior end-to-end against Redis.
* feat(server/auth): cache daemon-token + PAT lookups in DaemonAuth, bump TTL to 10m
Daemon /api/daemon/* requests (heartbeat, claim task) hit DaemonAuth
which previously did its own GetDaemonTokenByHash on every request and
*also* duplicated the PAT lookup on the mul_ fallback — bypassing the
cache added in 1cdd674c. Today's daemons authenticate via mul_ PATs
(mdt_ minting isn't wired up yet), so the duplicate PAT path is the one
that actually matters for hot-path DB load.
Three changes:
1. New auth.DaemonTokenCache mirrors PATCache for the mdt_ path
(key = mul:auth:daemon:<sha256>, JSON value = {workspace_id, daemon_id}).
Forward-looking infrastructure for when daemon tokens get minted; the
middleware short-circuits the DB SELECT on cache hit. TTL clamped to
the token's expires_at via the shared TTLForExpiry helper.
2. DaemonAuth now also consults PATCache on its mul_ fallback, sharing
the same cache as the regular Auth middleware. A daemon making 4 hb/min
collapses from 4 GetPersonalAccessTokenByHash + 4 last_used_at writes
per minute to ~1 of each per AuthCacheTTL window (~10 minutes).
3. Rename PATCacheTTL → AuthCacheTTL and bump from 60s to 10 minutes.
The constant is now shared between PAT and daemon caches; 10m matches
the user-requested longer TTL for further DB write reduction. Revoke
latency on the happy path is still instant via active invalidation;
the worst-case (Redis Del miss / direct-DB revoke) grows from ~60s to
~10m.
Tests cover nil-safety, set/get/invalidate, TTL, clamped TTL on near-
expiry tokens, and the middleware short-circuit for both cache paths
(mdt_ via DaemonTokenCache, mul_ fallback via PATCache).
* feat(server/auth): cache PAT lookups on the WebSocket auth path
The third place a PAT is resolved — patResolver.ResolveToken used by
realtime.HandleWebSocket — was still hitting Postgres on every /ws
auth and firing an unconditional last_used_at UPDATE, bypassing the
cache added in 1cdd674c. Wire it through the same shared PATCache so
revoking a token through any path (Auth middleware, DaemonAuth PAT
fallback, or WS auth) hits all three caches with one Invalidate.
Also leaves a comment on DeleteDaemonTokensByWorkspaceAndDaemon —
the query has no caller today, but a future deregister/rotate flow
must remember to call DaemonTokenCache.Invalidate(hash) for each
deleted row, otherwise deleted daemon tokens stay valid until TTL.
168 lines
4.8 KiB
Go
168 lines
4.8 KiB
Go
package auth
|
|
|
|
import (
|
|
"context"
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/redis/go-redis/v9"
|
|
)
|
|
|
|
// newRedisTestClient mirrors the helper in the handler package: connect to
|
|
// REDIS_TEST_URL, flush, and skip when unset so `go test ./...` works on a
|
|
// stock laptop without a Redis instance running.
|
|
func newRedisTestClient(t *testing.T) *redis.Client {
|
|
t.Helper()
|
|
url := os.Getenv("REDIS_TEST_URL")
|
|
if url == "" {
|
|
t.Skip("REDIS_TEST_URL not set")
|
|
}
|
|
opts, err := redis.ParseURL(url)
|
|
if err != nil {
|
|
t.Fatalf("parse REDIS_TEST_URL: %v", err)
|
|
}
|
|
rdb := redis.NewClient(opts)
|
|
ctx := context.Background()
|
|
if err := rdb.Ping(ctx).Err(); err != nil {
|
|
t.Skipf("REDIS_TEST_URL unreachable: %v", err)
|
|
}
|
|
if err := rdb.FlushDB(ctx).Err(); err != nil {
|
|
t.Fatalf("flushdb: %v", err)
|
|
}
|
|
t.Cleanup(func() {
|
|
rdb.FlushDB(context.Background())
|
|
rdb.Close()
|
|
})
|
|
return rdb
|
|
}
|
|
|
|
func TestPATCache_NilSafe(t *testing.T) {
|
|
var c *PATCache // nil
|
|
ctx := context.Background()
|
|
|
|
if v, ok := c.Get(ctx, "any-hash"); ok || v != "" {
|
|
t.Fatalf("nil cache must miss; got (%q, %v)", v, ok)
|
|
}
|
|
c.Set(ctx, "any-hash", "user-1", AuthCacheTTL) // no panic
|
|
c.Invalidate(ctx, "any-hash") // no panic
|
|
}
|
|
|
|
func TestNewPATCache_NilRedisReturnsNil(t *testing.T) {
|
|
if c := NewPATCache(nil); c != nil {
|
|
t.Fatalf("NewPATCache(nil) must return nil, got %#v", c)
|
|
}
|
|
}
|
|
|
|
func TestPATCache_SetGetInvalidate(t *testing.T) {
|
|
rdb := newRedisTestClient(t)
|
|
c := NewPATCache(rdb)
|
|
if c == nil {
|
|
t.Fatal("NewPATCache returned nil")
|
|
}
|
|
ctx := context.Background()
|
|
|
|
if _, ok := c.Get(ctx, "missing"); ok {
|
|
t.Fatal("expected miss before set")
|
|
}
|
|
|
|
c.Set(ctx, "hash-A", "user-A", AuthCacheTTL)
|
|
if v, ok := c.Get(ctx, "hash-A"); !ok || v != "user-A" {
|
|
t.Fatalf("expected hit user-A, got (%q, %v)", v, ok)
|
|
}
|
|
|
|
c.Invalidate(ctx, "hash-A")
|
|
if v, ok := c.Get(ctx, "hash-A"); ok {
|
|
t.Fatalf("expected miss after invalidate, got (%q, %v)", v, ok)
|
|
}
|
|
}
|
|
|
|
// TestPATCache_TTL pins the contract that entries expire on AuthCacheTTL so
|
|
// the auth middleware refreshes last_used_at at most once per window.
|
|
//
|
|
// We don't sleep AuthCacheTTL (60s); instead we assert the TTL is what the
|
|
// constructor set, which is the property the middleware actually depends
|
|
// on.
|
|
func TestPATCache_TTL(t *testing.T) {
|
|
rdb := newRedisTestClient(t)
|
|
c := NewPATCache(rdb)
|
|
if c == nil {
|
|
t.Fatal("NewPATCache returned nil")
|
|
}
|
|
ctx := context.Background()
|
|
|
|
c.Set(ctx, "hash-T", "user-T", AuthCacheTTL)
|
|
ttl, err := rdb.TTL(ctx, patCacheKey("hash-T")).Result()
|
|
if err != nil {
|
|
t.Fatalf("TTL: %v", err)
|
|
}
|
|
// Redis returns the remaining TTL; allow a small skew for rounding.
|
|
if ttl <= 0 || ttl > AuthCacheTTL+time.Second {
|
|
t.Fatalf("unexpected TTL %v (want ~%v)", ttl, AuthCacheTTL)
|
|
}
|
|
}
|
|
|
|
func TestTTLForExpiry(t *testing.T) {
|
|
now := time.Date(2026, 4, 29, 12, 0, 0, 0, time.UTC)
|
|
|
|
// No expiry set → full AuthCacheTTL.
|
|
if got := TTLForExpiry(now, time.Time{}); got != AuthCacheTTL {
|
|
t.Fatalf("zero expires_at: got %v, want %v", got, AuthCacheTTL)
|
|
}
|
|
|
|
// Far-future expiry → full AuthCacheTTL.
|
|
far := now.Add(24 * time.Hour)
|
|
if got := TTLForExpiry(now, far); got != AuthCacheTTL {
|
|
t.Fatalf("far-future expires_at: got %v, want %v", got, AuthCacheTTL)
|
|
}
|
|
|
|
// Sooner-than-TTL expiry → clamped to remaining lifetime.
|
|
soon := now.Add(10 * time.Second)
|
|
if got := TTLForExpiry(now, soon); got != 10*time.Second {
|
|
t.Fatalf("sooner expires_at: got %v, want 10s", got)
|
|
}
|
|
|
|
// Already expired (or exactly now) → 0, caller skips caching.
|
|
if got := TTLForExpiry(now, now); got != 0 {
|
|
t.Fatalf("expires_at == now: got %v, want 0", got)
|
|
}
|
|
if got := TTLForExpiry(now, now.Add(-time.Second)); got != 0 {
|
|
t.Fatalf("past expires_at: got %v, want 0", got)
|
|
}
|
|
}
|
|
|
|
// TestPATCache_Set_RespectsClampedTTL is the regression test for the
|
|
// review finding: a PAT expiring in <AuthCacheTTL must NOT be cached for
|
|
// the full AuthCacheTTL window, otherwise it would continue passing auth
|
|
// on cache hit after expires_at.
|
|
func TestPATCache_Set_RespectsClampedTTL(t *testing.T) {
|
|
rdb := newRedisTestClient(t)
|
|
c := NewPATCache(rdb)
|
|
if c == nil {
|
|
t.Fatal("NewPATCache returned nil")
|
|
}
|
|
ctx := context.Background()
|
|
|
|
// Cache with a 5s TTL — what TTLForExpiry would return for a token
|
|
// expiring 5s from now.
|
|
c.Set(ctx, "hash-short", "user-short", 5*time.Second)
|
|
ttl, err := rdb.TTL(ctx, patCacheKey("hash-short")).Result()
|
|
if err != nil {
|
|
t.Fatalf("TTL: %v", err)
|
|
}
|
|
if ttl <= 0 || ttl > 5*time.Second+time.Second {
|
|
t.Fatalf("expected clamped TTL ~5s, got %v", ttl)
|
|
}
|
|
|
|
// Zero / negative TTL must skip caching entirely (already-expired
|
|
// token's TOCTOU-safe path).
|
|
c.Set(ctx, "hash-zero", "user-zero", 0)
|
|
if _, ok := c.Get(ctx, "hash-zero"); ok {
|
|
t.Fatal("zero-TTL Set must not cache")
|
|
}
|
|
c.Set(ctx, "hash-neg", "user-neg", -time.Second)
|
|
if _, ok := c.Get(ctx, "hash-neg"); ok {
|
|
t.Fatal("negative-TTL Set must not cache")
|
|
}
|
|
}
|