mirror of
https://github.com/multica-ai/multica.git
synced 2026-07-05 13:29:44 +02:00
* feat(auth): support mcn_ Cloud Node PATs verified via Fleet
Adds a new token kind, mcn_ (multica cloud node), recognized in both
the regular Auth and DaemonAuth middlewares. mcn_ tokens are minted
and owned by Multica Cloud (not the local personal_access_tokens
table); the server validates them by POSTing to the Fleet's
/api/v1/pat/verify endpoint and uses the returned owner_id as
X-User-ID for downstream handlers.
Cloud is the authoritative owner of token status, so this is a
verifier-only path with no DB fallback:
* Fleet says valid:false -> 401 (token genuinely bad)
* Fleet unreachable / 5xx -> 503 (transient, retry)
* No MULTICA_CLOUD_FLEET_URL configured -> 401 (fail closed)
Verification results are cached in Redis for 60s under
mul:auth:mcn:<sha256> to bound the per-request load on Fleet without
extending the revocation window beyond what the Cloud doc allows.
Negative results are NOT cached, so a freshly minted token doesn't
get locked out by a stale 'token_not_found'.
Reuses MULTICA_CLOUD_FLEET_URL (the same env the cloud-runtime proxy
already uses) so deployments don't need a second config knob.
Tests cover the happy path, every documented invalid reason, 4xx/5xx
mapping, network error, decode error, ctx cancellation, the
fail-closed valid:true-without-owner_id case, trailing-slash URL
normalization, and the Redis cache short-circuit + negative
no-cache contract. Middleware tests pin the four 401/503/200 outcomes
in both Auth and DaemonAuth.
* auth(mcn): require owner_id to map to a real local user; drop X-User-PAT plumbing
Two related changes:
1. Cloud-verified owner_id is now checked against our local users table.
The Cloud owner_id and our users.id share the same UUID space by
contract; a missing local user means either the row was deleted
under an active node or something is forging owner_ids — either
way, fail closed.
CloudPATVerifier.Verify takes a new OwnerLookupFunc:
- returns (true, nil) -> success, cache + return
- returns (false, nil) -> ErrCloudPATInvalid (reason='owner_unknown'),
NOT cached (so a freshly-created user
doesn't get locked out for a TTL window)
- returns (_, error) -> ErrCloudPATUnavailable (transient,
middleware emits 503)
Both Auth and DaemonAuth wire ownerLookupFor(queries), a new shared
helper that wraps queries.GetUser, mapping pgx.ErrNoRows / unparseable
UUIDs to (false, nil) and other errors to a real Go error.
2. Removed all X-User-PAT plumbing. Cloud now mints node-scoped mcn_
PATs itself during /api/v1/nodes (see multica-cloud
docs/api/node-pat.md) and ships them into the EC2 instance via SSM,
so multica-api no longer needs to forward the caller's mul_ PAT.
Propagating a long-lived user PAT into a remote machine widened
the blast radius of any node compromise; that's gone now.
Removed:
- cloud_runtime.go: withUserPAT option, cloudRuntimeUserPAT,
generateCloudRuntimePAT, revokeGeneratedPAT
- cloudruntime/Request.UserPAT field + X-User-PAT header
- X-User-PAT from CORS allowed headers
- obsolete handler tests:
TestCreateCloudRuntimeNodeForwardsValidatedPAT
TestCreateCloudRuntimeNodeRejectsUnownedPAT
TestCreateCloudRuntimeNodeRejectsExpiredPAT
TestCreateCloudRuntimeNodeAutoGeneratesPAT
replaced with TestCreateCloudRuntimeNodeForwardsBody
- X-User-PAT references in packages/core/api/client.test.ts
Tests:
* 3 new verifier-level tests (owner_unknown not cached, lookup error
-> Unavailable, success path is cached for both fleet AND lookup)
* 5 new owner_lookup_test.go tests (nil queries, existing user,
missing user, malformed UUID, DB error)
* 1 new end-to-end DaemonAuth test (cloud says valid, no local user
-> 401)
* Existing X-User-PAT TS assertions removed; full vitest run passes.
* go test ./... and go vet ./... clean on the server module.
441 lines
18 KiB
Go
441 lines
18 KiB
Go
package auth
|
||
|
||
import (
|
||
"bytes"
|
||
"context"
|
||
"encoding/json"
|
||
"errors"
|
||
"fmt"
|
||
"io"
|
||
"log/slog"
|
||
"net/http"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/redis/go-redis/v9"
|
||
)
|
||
|
||
// CloudPATPrefix is the literal token prefix that identifies an mcn_
|
||
// (Multica Cloud Node) PAT. Tokens with this prefix are validated by
|
||
// calling the Multica Cloud Fleet service rather than by hitting our
|
||
// local personal_access_tokens table — the cloud is the authoritative
|
||
// owner of the token's lifecycle, status, and (owner_id, instance_id)
|
||
// binding.
|
||
const CloudPATPrefix = "mcn_"
|
||
|
||
// cloudPATCachePrefix namespaces cloud-PAT cache keys away from
|
||
// mul_/mdt_ caches so the three token kinds can't accidentally share
|
||
// keys. The trailing slash mirrors the existing patCachePrefix /
|
||
// daemonTokenCachePrefix conventions.
|
||
const cloudPATCachePrefix = "mul:auth:mcn:"
|
||
|
||
// cloudPATCacheTTL bounds how long a verified mcn_ token stays cached
|
||
// before we re-ask Fleet. The Cloud doc explicitly recommends 30–60s
|
||
// "if upstream really needs to cache" — anything longer widens the
|
||
// revocation window beyond what Cloud is comfortable with. We pick the
|
||
// upper bound: short enough that a revoked / instance-terminated PAT
|
||
// stops working within ~1 minute, long enough that a busy daemon /
|
||
// CLI on a node collapses to one verify call per minute per token.
|
||
//
|
||
// We deliberately do NOT reuse AuthCacheTTL (10m) — that's tuned for
|
||
// our own DB-backed PAT/daemon-token paths where revocation
|
||
// invalidates the cache key directly. We have no way to invalidate
|
||
// when Cloud revokes a PAT, so the TTL itself IS the revocation
|
||
// latency bound.
|
||
const cloudPATCacheTTL = 60 * time.Second
|
||
|
||
// cloudPATVerifyPath is the Fleet endpoint we POST to. The Cloud doc
|
||
// places this under /api/v1/pat/verify; the upstream runs no app-layer
|
||
// auth on it (network-level VPC isolation is the access control), so
|
||
// we just send the token plaintext and let Fleet decide.
|
||
const cloudPATVerifyPath = "/api/v1/pat/verify"
|
||
|
||
// cloudPATVerifyRequestMaxBytes is the documented Fleet hard cap on
|
||
// request bodies (4 KiB). Our marshalled requests are well under that
|
||
// — this constant exists to make the intent visible if anyone adds
|
||
// optional fields later.
|
||
const cloudPATVerifyRequestMaxBytes = 4 * 1024
|
||
|
||
// cloudPATVerifyResponseMaxBytes bounds how much of a Fleet response
|
||
// we'll read. The success payload is well under 1 KiB; we pick 64 KiB
|
||
// to stay well clear of pathological responses without enabling a
|
||
// memory-exhaustion vector via a misbehaving Fleet.
|
||
const cloudPATVerifyResponseMaxBytes = 64 * 1024
|
||
|
||
// cloudPATDefaultTimeout is the per-request HTTP timeout for verify
|
||
// calls when the caller doesn't supply an *http.Client. Auth must
|
||
// stay snappy: Fleet should answer in tens of milliseconds, and a
|
||
// hung verify would block every incoming request behind it. Tighter
|
||
// than cloudruntime's 35s because that one proxies arbitrary user
|
||
// traffic; this one only ever sees a small JSON exchange.
|
||
const cloudPATDefaultTimeout = 5 * time.Second
|
||
|
||
// Verifier sentinel errors. Callers (the Auth / DaemonAuth middlewares)
|
||
// branch on these to map cloud outcomes onto HTTP status codes:
|
||
//
|
||
// - ErrCloudPATInvalid → 401 (Fleet says token is bad)
|
||
// - ErrCloudPATUnavailable → 503 (Fleet unreachable / 5xx)
|
||
// - ErrCloudPATNotConfigured → 401 (server has no Fleet URL set; we
|
||
// don't reveal that mcn_ is "supported but disabled" — failing
|
||
// closed avoids treating misconfigured prod the same as enabled)
|
||
var (
|
||
ErrCloudPATInvalid = errors.New("cloud pat invalid")
|
||
ErrCloudPATUnavailable = errors.New("cloud pat verifier unavailable")
|
||
ErrCloudPATNotConfigured = errors.New("cloud pat verifier not configured")
|
||
)
|
||
|
||
// CloudPATIdentity is what a successful verify resolves to. We keep
|
||
// only the fields the auth path actually needs:
|
||
//
|
||
// - OwnerID is the user whose request this is (mapped to X-User-ID).
|
||
// - InstanceID / InstanceRecordID are recorded so downstream code can
|
||
// correlate the request with a specific cloud node; they are not
|
||
// used for authorization today, but stashing them now keeps the
|
||
// wire shape stable for callers that later want to assert a
|
||
// particular instance binding.
|
||
//
|
||
// We deliberately drop token_last4, status, issued_at, etc. — those
|
||
// are diagnostic fields that don't belong in cached auth state.
|
||
type CloudPATIdentity struct {
|
||
OwnerID string `json:"o"`
|
||
InstanceID string `json:"i"`
|
||
InstanceRecordID string `json:"r"`
|
||
}
|
||
|
||
// CloudPATInvalidError carries the Fleet-reported reason for a
|
||
// valid=false response. The middleware uses this to log why an mcn_
|
||
// token was rejected without exposing the reason in the 401 body —
|
||
// per the Cloud doc, callers shouldn't differentiate token_not_found
|
||
// vs token_revoked for security decisions.
|
||
//
|
||
// The "owner_unknown" reason is also produced locally by Verify when
|
||
// Cloud accepted the token but the returned owner_id does not map to
|
||
// a real user in our DB. Treating that as a Cloud-style "invalid"
|
||
// keeps the middleware's response shape uniform — the result is the
|
||
// same: 401, drop the token.
|
||
type CloudPATInvalidError struct {
|
||
Reason string
|
||
}
|
||
|
||
func (e *CloudPATInvalidError) Error() string {
|
||
if e == nil || e.Reason == "" {
|
||
return "cloud pat invalid"
|
||
}
|
||
return "cloud pat invalid: " + e.Reason
|
||
}
|
||
|
||
// Is lets errors.Is(err, ErrCloudPATInvalid) match any
|
||
// CloudPATInvalidError, so callers can branch on the category without
|
||
// caring about the exact reason string.
|
||
func (e *CloudPATInvalidError) Is(target error) bool {
|
||
return target == ErrCloudPATInvalid
|
||
}
|
||
|
||
// CloudPATInvalidReasonOwnerUnknown is the synthetic reason emitted
|
||
// when Cloud verified the token but the returned owner_id was not
|
||
// found in the local users table. The Cloud `owner_id` and our
|
||
// `users.id` share the same UUID space by contract; a mismatch means
|
||
// either the user has been deleted on our side after the node was
|
||
// minted, or (worse) something is impersonating Cloud and trying to
|
||
// surface a forged owner_id. Either way the request must be rejected.
|
||
const CloudPATInvalidReasonOwnerUnknown = "owner_unknown"
|
||
|
||
// OwnerLookupFunc is the user-existence check Verify runs against
|
||
// Cloud's owner_id before caching / returning the identity. The
|
||
// caller (typically the middleware closure) wires it to a
|
||
// queries.GetUser call.
|
||
//
|
||
// Return semantics:
|
||
// - (true, nil) → owner_id is a valid local user; Verify returns success.
|
||
// - (false, nil) → owner_id does not exist locally; Verify returns
|
||
// ErrCloudPATInvalid with reason="owner_unknown" and does NOT
|
||
// cache (a missing user can be re-created later, and we don't
|
||
// want to lock that retry out for a TTL window).
|
||
// - (_, err) → infrastructure error (DB unreachable, etc.);
|
||
// Verify wraps as ErrCloudPATUnavailable so the caller emits 503,
|
||
// not 401.
|
||
type OwnerLookupFunc func(ctx context.Context, ownerID string) (bool, error)
|
||
|
||
// CloudPATVerifier resolves mcn_ PATs by calling
|
||
// POST <fleetURL>/api/v1/pat/verify and caches verified results in
|
||
// Redis for cloudPATCacheTTL.
|
||
//
|
||
// A nil *CloudPATVerifier is safe — Verify returns
|
||
// ErrCloudPATNotConfigured. The Auth/DaemonAuth middlewares treat
|
||
// "verifier nil" the same as "fleet URL empty", so a server with no
|
||
// MULTICA_CLOUD_FLEET_URL configured simply rejects mcn_ tokens at
|
||
// the prefix branch instead of nil-derefing.
|
||
type CloudPATVerifier struct {
|
||
baseURL string
|
||
http *http.Client
|
||
rdb *redis.Client // may be nil — disables caching
|
||
}
|
||
|
||
// CloudPATVerifierConfig assembles the dependencies for
|
||
// NewCloudPATVerifier. Keeping this a struct (vs positional args)
|
||
// leaves room for future knobs (custom TTL, expected_owner_id binding)
|
||
// without churning every call site.
|
||
type CloudPATVerifierConfig struct {
|
||
// FleetBaseURL is the Cloud Fleet base URL (e.g.
|
||
// https://fleet.multica.cloud). Trailing slashes are trimmed.
|
||
// Empty disables the verifier — NewCloudPATVerifier returns nil.
|
||
FleetBaseURL string
|
||
|
||
// HTTPClient is the client used for verify calls. Optional —
|
||
// when nil, a client with cloudPATDefaultTimeout is created.
|
||
// Pass a shared client when you want connection pooling /
|
||
// per-deployment transport tuning.
|
||
HTTPClient *http.Client
|
||
|
||
// Redis backs the positive-result cache. Nil disables caching —
|
||
// every Verify call hits Fleet. Same nil-safe contract as
|
||
// PATCache / DaemonTokenCache.
|
||
Redis *redis.Client
|
||
}
|
||
|
||
// NewCloudPATVerifier returns a verifier for cfg.FleetBaseURL. If the
|
||
// URL is empty after trimming, returns nil — callers (router /
|
||
// middleware) treat nil as "mcn_ not supported on this deployment".
|
||
func NewCloudPATVerifier(cfg CloudPATVerifierConfig) *CloudPATVerifier {
|
||
base := strings.TrimRight(strings.TrimSpace(cfg.FleetBaseURL), "/")
|
||
if base == "" {
|
||
return nil
|
||
}
|
||
client := cfg.HTTPClient
|
||
if client == nil {
|
||
client = &http.Client{Timeout: cloudPATDefaultTimeout}
|
||
}
|
||
return &CloudPATVerifier{
|
||
baseURL: base,
|
||
http: client,
|
||
rdb: cfg.Redis,
|
||
}
|
||
}
|
||
|
||
// Configured reports whether the verifier has a Fleet URL. Convenience
|
||
// for telemetry — a nil receiver also returns false. The middleware
|
||
// uses ordinary nil checks instead of this in hot paths.
|
||
func (v *CloudPATVerifier) Configured() bool {
|
||
return v != nil && v.baseURL != ""
|
||
}
|
||
|
||
// Verify resolves token to a CloudPATIdentity by consulting (in order):
|
||
//
|
||
// 1. Redis cache, keyed by sha256(token). Hit → return cached
|
||
// identity, no Fleet round-trip and no DB lookup. The cache only
|
||
// ever contains identities that have already passed both Cloud's
|
||
// verify AND the local owner-existence check, so a cache hit is
|
||
// a fully-validated decision.
|
||
// 2. Fleet POST /api/v1/pat/verify. The response distinguishes:
|
||
// - HTTP 200 + valid=true → continues to step 3
|
||
// - HTTP 200 + valid=false → CloudPATInvalidError{Reason:...}
|
||
// (also wraps as ErrCloudPATInvalid via Is)
|
||
// - HTTP 4xx/5xx, network, timeout, decode failure
|
||
// → ErrCloudPATUnavailable
|
||
// 3. Local owner-existence check via lookup(owner_id):
|
||
// - exists → cache + return success
|
||
// - missing → ErrCloudPATInvalid (reason="owner_unknown"), NOT cached
|
||
// - error → ErrCloudPATUnavailable
|
||
//
|
||
// `lookup` may be nil — Verify then skips step 3. Production callers
|
||
// (Auth / DaemonAuth) always supply one; nil mode is for unit tests
|
||
// that exercise the verifier in isolation from the DB.
|
||
//
|
||
// We deliberately do NOT cache valid=false responses or
|
||
// owner_unknown rejections — per the Cloud doc, negative results can
|
||
// flip back to positive (lazy-revoke reconciliation, owner created
|
||
// later in our DB), and a stale negative would permanently lock out
|
||
// a freshly minted token within the TTL window.
|
||
//
|
||
// On a nil receiver returns ErrCloudPATNotConfigured so the
|
||
// middleware can map mcn_ tokens to 401 cleanly when the deployment
|
||
// has no Fleet URL.
|
||
func (v *CloudPATVerifier) Verify(ctx context.Context, token string, lookup OwnerLookupFunc) (CloudPATIdentity, error) {
|
||
if v == nil || v.baseURL == "" {
|
||
return CloudPATIdentity{}, ErrCloudPATNotConfigured
|
||
}
|
||
if token == "" {
|
||
return CloudPATIdentity{}, ErrCloudPATInvalid
|
||
}
|
||
|
||
hash := HashToken(token)
|
||
if id, ok := v.cacheGet(ctx, hash); ok {
|
||
return id, nil
|
||
}
|
||
|
||
id, err := v.fetch(ctx, token)
|
||
if err != nil {
|
||
return CloudPATIdentity{}, err
|
||
}
|
||
|
||
if lookup != nil {
|
||
exists, lookupErr := lookup(ctx, id.OwnerID)
|
||
if lookupErr != nil {
|
||
// Treat a DB / infrastructure error the same as a Cloud
|
||
// outage: surface 503 so the caller retries instead of
|
||
// throwing out a still-valid token. The cache is NOT
|
||
// populated, so a transient blip resolves on the next
|
||
// request.
|
||
slog.Warn("cloud_pat: owner lookup failed; treating as unavailable", "error", lookupErr)
|
||
return CloudPATIdentity{}, ErrCloudPATUnavailable
|
||
}
|
||
if !exists {
|
||
// Cloud accepted the token, but the owner_id it returned
|
||
// is not a user we know. Reject without caching — if the
|
||
// user is created later, the next request must succeed
|
||
// without waiting for the TTL.
|
||
slog.Warn("cloud_pat: cloud-verified owner_id has no local user", "owner_id", id.OwnerID)
|
||
return CloudPATIdentity{}, &CloudPATInvalidError{Reason: CloudPATInvalidReasonOwnerUnknown}
|
||
}
|
||
}
|
||
|
||
v.cacheSet(ctx, hash, id)
|
||
return id, nil
|
||
}
|
||
|
||
// fleetVerifyRequest mirrors the Cloud doc's request schema. We only
|
||
// send `token` today — `expected_owner_id` / `expected_instance_id`
|
||
// would let the verifier fail a token bound to a different user than
|
||
// the request claims, but at this layer we don't yet know the
|
||
// "claimed" user. Wiring those in is a future hardening step.
|
||
type fleetVerifyRequest struct {
|
||
Token string `json:"token"`
|
||
}
|
||
|
||
// fleetVerifyResponse is the union response shape. `Valid` discriminates
|
||
// the two arms; on `valid:false` only `Reason` is meaningful (per the
|
||
// Cloud doc, mismatch responses deliberately omit binding info to
|
||
// avoid serving as a probing oracle).
|
||
type fleetVerifyResponse struct {
|
||
Valid bool `json:"valid"`
|
||
Reason string `json:"reason,omitempty"`
|
||
OwnerID string `json:"owner_id,omitempty"`
|
||
InstanceID string `json:"instance_id,omitempty"`
|
||
InstanceRecordID string `json:"instance_record_id,omitempty"`
|
||
}
|
||
|
||
func (v *CloudPATVerifier) fetch(ctx context.Context, token string) (CloudPATIdentity, error) {
|
||
body, err := json.Marshal(fleetVerifyRequest{Token: token})
|
||
if err != nil {
|
||
// json.Marshal of a fixed struct with a string field cannot
|
||
// realistically fail; surfacing as Unavailable keeps callers
|
||
// on the "treat as cloud error" branch.
|
||
return CloudPATIdentity{}, fmt.Errorf("%w: marshal request: %v", ErrCloudPATUnavailable, err)
|
||
}
|
||
if len(body) > cloudPATVerifyRequestMaxBytes {
|
||
// Defense in depth: would only fire if a future change adds
|
||
// expected_* fields and someone passes pathological input.
|
||
return CloudPATIdentity{}, fmt.Errorf("%w: request body exceeds %d bytes", ErrCloudPATUnavailable, cloudPATVerifyRequestMaxBytes)
|
||
}
|
||
|
||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, v.baseURL+cloudPATVerifyPath, bytes.NewReader(body))
|
||
if err != nil {
|
||
return CloudPATIdentity{}, fmt.Errorf("%w: build request: %v", ErrCloudPATUnavailable, err)
|
||
}
|
||
req.Header.Set("Content-Type", "application/json")
|
||
req.Header.Set("Accept", "application/json")
|
||
|
||
resp, err := v.http.Do(req)
|
||
if err != nil {
|
||
// Network error / timeout / DNS / dial failure. We deliberately
|
||
// don't expose `err.Error()` to the HTTP response — let the
|
||
// middleware emit a generic 503. Local logs still see the
|
||
// real cause via the slog at the call site.
|
||
slog.Warn("cloud_pat: verify request failed", "error", err)
|
||
return CloudPATIdentity{}, ErrCloudPATUnavailable
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
// Per the Cloud doc, 400 means our request was malformed and
|
||
// 500 means Fleet itself is broken. Either way the right move
|
||
// upstream is "treat the token as un-verifiable right now,
|
||
// return 503". Read a small chunk of the body for log
|
||
// context only.
|
||
var snippet string
|
||
if buf, _ := io.ReadAll(io.LimitReader(resp.Body, 512)); len(buf) > 0 {
|
||
snippet = strings.TrimSpace(string(buf))
|
||
}
|
||
slog.Warn("cloud_pat: verify returned non-200", "status", resp.StatusCode, "body", snippet)
|
||
return CloudPATIdentity{}, ErrCloudPATUnavailable
|
||
}
|
||
|
||
raw, err := io.ReadAll(io.LimitReader(resp.Body, cloudPATVerifyResponseMaxBytes+1))
|
||
if err != nil {
|
||
slog.Warn("cloud_pat: read response failed", "error", err)
|
||
return CloudPATIdentity{}, ErrCloudPATUnavailable
|
||
}
|
||
if len(raw) > cloudPATVerifyResponseMaxBytes {
|
||
slog.Warn("cloud_pat: verify response too large", "limit", cloudPATVerifyResponseMaxBytes)
|
||
return CloudPATIdentity{}, ErrCloudPATUnavailable
|
||
}
|
||
|
||
var parsed fleetVerifyResponse
|
||
if err := json.Unmarshal(raw, &parsed); err != nil {
|
||
slog.Warn("cloud_pat: decode response failed", "error", err)
|
||
return CloudPATIdentity{}, ErrCloudPATUnavailable
|
||
}
|
||
|
||
if !parsed.Valid {
|
||
// Surface the reason in the error so the middleware can log
|
||
// "why" while still returning a generic 401 to the client.
|
||
return CloudPATIdentity{}, &CloudPATInvalidError{Reason: parsed.Reason}
|
||
}
|
||
|
||
if parsed.OwnerID == "" {
|
||
// Defense against a Fleet response that claims valid:true
|
||
// but omits owner_id — without owner_id we have nothing to
|
||
// put in X-User-ID, so it's effectively unusable. Fail
|
||
// closed rather than passing an empty user id downstream.
|
||
slog.Warn("cloud_pat: verify returned valid=true with empty owner_id")
|
||
return CloudPATIdentity{}, ErrCloudPATUnavailable
|
||
}
|
||
|
||
return CloudPATIdentity{
|
||
OwnerID: parsed.OwnerID,
|
||
InstanceID: parsed.InstanceID,
|
||
InstanceRecordID: parsed.InstanceRecordID,
|
||
}, nil
|
||
}
|
||
|
||
func cloudPATCacheKey(hash string) string { return cloudPATCachePrefix + hash }
|
||
|
||
func (v *CloudPATVerifier) cacheGet(ctx context.Context, hash string) (CloudPATIdentity, bool) {
|
||
if v == nil || v.rdb == nil {
|
||
return CloudPATIdentity{}, false
|
||
}
|
||
raw, err := v.rdb.Get(ctx, cloudPATCacheKey(hash)).Bytes()
|
||
if err != nil {
|
||
if !errors.Is(err, redis.Nil) {
|
||
slog.Warn("cloud_pat: cache get failed; falling back to fleet", "error", err)
|
||
}
|
||
return CloudPATIdentity{}, false
|
||
}
|
||
var id CloudPATIdentity
|
||
if err := json.Unmarshal(raw, &id); err != nil {
|
||
slog.Warn("cloud_pat: cache entry malformed; falling back to fleet", "error", err)
|
||
return CloudPATIdentity{}, false
|
||
}
|
||
if id.OwnerID == "" {
|
||
// Safety net: a malformed cache entry (e.g. left over from a
|
||
// prior schema) without an owner_id must not be treated as a
|
||
// hit, otherwise the middleware would set X-User-ID to "".
|
||
return CloudPATIdentity{}, false
|
||
}
|
||
return id, true
|
||
}
|
||
|
||
func (v *CloudPATVerifier) cacheSet(ctx context.Context, hash string, id CloudPATIdentity) {
|
||
if v == nil || v.rdb == nil {
|
||
return
|
||
}
|
||
raw, err := json.Marshal(id)
|
||
if err != nil {
|
||
slog.Warn("cloud_pat: cache marshal failed", "error", err)
|
||
return
|
||
}
|
||
if err := v.rdb.Set(ctx, cloudPATCacheKey(hash), raw, cloudPATCacheTTL).Err(); err != nil {
|
||
slog.Warn("cloud_pat: cache set failed", "error", err)
|
||
}
|
||
}
|