diff --git a/docs/feature-flags.md b/docs/feature-flags.md new file mode 100644 index 000000000..d6bdb82c7 --- /dev/null +++ b/docs/feature-flags.md @@ -0,0 +1,181 @@ +# Feature Flags + +Multica ships a framework-level feature flag implementation: + +- **Backend**: `server/pkg/featureflag` — Go package. +- **Frontend**: `@multica/core/feature-flags` — TypeScript module with React hooks. + +Both sides share the same vocabulary (`Decision`, `EvalContext`, `Rule`, `PercentRollout`) and the same FNV-1a percent bucketing, so a flag evaluated on the server and on the client lands in the same bucket for the same user. + +The package is designed so new features can adopt feature flags without writing any infrastructure code — drop a rule into the static config, call `Service.IsEnabled` / `useFlag`, done. + +--- + +## Core concepts + +``` +[Toggle Point] --query--> [Service / Router] --read--> [Provider / Configuration] + business code static / env / chain +``` + +- A **Toggle Point** is the single `if` in business code. It always calls the Service, never the provider directly. +- The **Service** (`Service` in Go, `FeatureFlagService` in TS) is the router. Business code never depends on which provider is behind it. +- A **Provider** is the configuration backend. Today we ship `StaticProvider` (in-memory rules), `EnvProvider` (Go only — env-var override), and `ChainProvider` (composition). A future DB or LaunchDarkly provider plugs in without changing any caller. +- A **Decision** is the structured result: `{ enabled, variant, reason, source }`. `IsEnabled` is the boolean projection, `Variant` is the raw string. Use `Decision` for diagnostic endpoints. + +Four flag categories (Martin Fowler): + +| Category | Lifetime | Owner | Example | +|---|---|---|---| +| **Release** | Days–weeks | Engineering | Hide a half-finished page behind `flags_release_v2` | +| **Experiment** | Hours–weeks | Product / Data | A/B test `checkout_algo` between `control` and `experiment-v2` | +| **Ops** | Short or evergreen | SRE | Kill switch `billing_disable_invoice_pdf` | +| **Permission** | Years | Product | `plan_gate_enterprise_dashboard` | + +Manage them in the same provider but treat them differently: Release flags get deleted; Ops flags need fast override paths (`FF_` env var); Permission flags use `Allow` lists; Experiment flags use `PercentRollout`. + +--- + +## Backend (Go) + +### Wiring at startup + +```go +import "github.com/multica-ai/multica/server/pkg/featureflag" + +static := featureflag.NewStaticProvider() +static.LoadRules(map[string]featureflag.Rule{ + "billing_new_invoice_email": {Default: true}, + "checkout_algo": { + Default: false, + Variant: "experiment-v2", + Percent: &featureflag.PercentRollout{Percent: 25, By: "user_id"}, + }, + "ops_disable_recommendations": {Default: false}, +}) + +// Env overrides win over static config so SREs can flip kill switches +// without redeploying: `FF_OPS_DISABLE_RECOMMENDATIONS=true ./multica-server`. +env := featureflag.NewEnvProvider("FF_") + +flags := featureflag.NewService( + featureflag.NewChainProvider(env, static), + featureflag.WithLogger(logger), +) +``` + +### Attaching evaluation context to the request + +```go +func middleware(flags *featureflag.Service, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ec := featureflag.EvalContext{ + UserID: currentUserID(r), + WorkspaceID: currentWorkspaceID(r), + Attributes: map[string]string{"plan": currentPlan(r)}, + } + ctx := featureflag.WithEvalContext(r.Context(), ec) + next.ServeHTTP(w, r.WithContext(ctx)) + }) +} +``` + +### Toggle point in business code + +```go +if flags.IsEnabled(ctx, "billing_new_invoice_email", false) { + return s.sendNewInvoiceEmail(ctx, invoice) +} +return s.sendLegacyInvoiceEmail(ctx, invoice) +``` + +For multi-arm flags: + +```go +switch flags.Variant(ctx, "checkout_algo", "control") { +case "experiment-v2": + return checkoutV2(ctx, order) +case "experiment-v3": + return checkoutV3(ctx, order) +default: + return checkoutControl(ctx, order) +} +``` + +The Service is nil-safe and missing-key-safe: `(*Service)(nil).IsEnabled(ctx, "any", true)` returns `true`. Business code never needs to guard against a missing flag. + +--- + +## Frontend (TypeScript / React) + +### Mounting once at the root + +```tsx +// apps/web/app/_providers.tsx (or the equivalent root) +import { + FeatureFlagsProvider, + FeatureFlagService, + StaticProvider, +} from "@multica/core/feature-flags"; + +const service = new FeatureFlagService( + new StaticProvider({ + billing_v2_dashboard: { default: false, allow: ["user-internal"] }, + checkout_algo: { default: true, variant: "experiment-v2", + percent: { percent: 25 } }, + }), +); + +export function Providers({ children }: { children: ReactNode }) { + const userId = useCurrentUserId(); + return ( + + {children} + + ); +} +``` + +When the backend pushes a fresh rule set (via an API response or WebSocket), call `service.setProvider(new StaticProvider(remoteRules))` and the whole tree re-evaluates. + +### Toggle point in a component + +```tsx +import { useFlag, useVariant } from "@multica/core/feature-flags"; + +function BillingPage() { + const showV2 = useFlag("billing_v2_dashboard", false); + return showV2 ? : ; +} + +function Checkout() { + const variant = useVariant("checkout_algo", "control"); + switch (variant) { + case "experiment-v2": return ; + case "experiment-v3": return ; + default: return ; + } +} +``` + +Outside a `FeatureFlagsProvider` (Storybook, unit tests, error pages) `useFlag` / `useVariant` return the supplied default. You never have to mount the provider just to render a component in isolation. + +### Security note: never rely on the frontend alone + +A frontend feature flag controls what the user *sees*. It does NOT enforce access. Any API route exposing the same capability MUST evaluate the matching backend flag independently. The two flags can share a key but they live in two `Service` instances and the backend value is the source of truth. + +--- + +## Best-practice checklist + +Adopted from Martin Fowler, ConfigCat and Octopus. + +- **Naming**: `{team}_{area}_{behavior}`, e.g. `billing_checkout_new_payment_flow`. No `enable_` / `disable_` prefixes (redundant). +- **One flag, one purpose**: never repurpose an old flag for a new feature. Add a new flag and delete the old one. +- **Plan the death of the flag at birth**: open a follow-up issue to remove the flag when the rollout completes. Release flags should live days, not quarters. +- **Convention**: `Off` is the legacy / safe state, `On` is the new behavior. Lets CI test "all-off (today)" and "all-on (tomorrow)". +- **Kill switch fast path**: ops-critical flags should be exposed via `EnvProvider` so SREs can flip them without a deploy. +- **Backend protection**: anything controlling access goes through the backend Service; the frontend flag is presentation only. +- **No secrets in flags**: variant values are not Secrets Manager / KMS. Use those for tokens, keys, and passwords. + +See `docs/design.md` and `docs/timezone-architecture-rfc.md` for prior examples of how this pattern is used across the codebase. diff --git a/packages/core/feature-flags/chain-provider.ts b/packages/core/feature-flags/chain-provider.ts new file mode 100644 index 000000000..017c0f439 --- /dev/null +++ b/packages/core/feature-flags/chain-provider.ts @@ -0,0 +1,31 @@ +import type { Decision, EvalContext, Provider } from "./types"; + +/** + * ChainProvider composes multiple providers and returns the first match. + * + * Order from most-specific to most-generic: per-request override, server + * push, static config. The first provider that returns a Decision wins, so + * the chain naturally implements the "ops override beats static config" + * pattern callers expect. + * + * A ChainProvider that wraps zero providers is valid and always returns + * undefined, so the Service falls back to the caller's default. + */ +export class ChainProvider implements Provider { + readonly name = "chain"; + private readonly providers: ReadonlyArray; + + constructor(providers: ReadonlyArray) { + // Filter nullish entries so callers can pass optional providers + // directly: `new ChainProvider([envOverride, baseStatic])`. + this.providers = providers.filter((p): p is Provider => p != null); + } + + lookup(key: string, ctx: EvalContext): Decision | undefined { + for (const p of this.providers) { + const d = p.lookup(key, ctx); + if (d !== undefined) return d; + } + return undefined; + } +} diff --git a/packages/core/feature-flags/context.test.tsx b/packages/core/feature-flags/context.test.tsx new file mode 100644 index 000000000..edefadb2d --- /dev/null +++ b/packages/core/feature-flags/context.test.tsx @@ -0,0 +1,68 @@ +// @vitest-environment jsdom +import { describe, expect, it } from "vitest"; +import { render, screen } from "@testing-library/react"; +import { FeatureFlagsProvider, useFlag, useVariant } from "./context"; +import { FeatureFlagService } from "./service"; +import { StaticProvider } from "./static-provider"; + +function FlagBadge({ flagKey, defaultValue }: { flagKey: string; defaultValue: boolean }) { + const enabled = useFlag(flagKey, defaultValue); + return {enabled ? "ON" : "OFF"}; +} + +function VariantBadge({ flagKey, defaultValue }: { flagKey: string; defaultValue: string }) { + const variant = useVariant(flagKey, defaultValue); + return {variant}; +} + +describe("FeatureFlagsProvider + hooks", () => { + it("useFlag returns provider value inside the tree", () => { + const service = new FeatureFlagService( + new StaticProvider({ demo: { default: true } }), + ); + render( + + + , + ); + expect(screen.getByTestId("flag").textContent).toBe("ON"); + }); + + it("useFlag falls back to default outside any provider (tests / stories)", () => { + render(); + expect(screen.getByTestId("flag").textContent).toBe("ON"); + }); + + it("useFlag respects the EvalContext attached to the provider", () => { + const service = new FeatureFlagService( + new StaticProvider({ + internal: { default: false, allow: ["user-internal"] }, + }), + ); + render( + + + , + ); + expect(screen.getByTestId("flag").textContent).toBe("ON"); + }); + + it("useVariant returns the variant identifier", () => { + const service = new FeatureFlagService( + new StaticProvider({ + algo: { default: true, variant: "experiment-v2" }, + }), + ); + render( + + + , + ); + expect(screen.getByTestId("variant").textContent).toBe("experiment-v2"); + }); + + it("useVariant falls back to default outside any provider", () => { + render(); + expect(screen.getByTestId("variant").textContent).toBe("control"); + }); +}); diff --git a/packages/core/feature-flags/context.tsx b/packages/core/feature-flags/context.tsx new file mode 100644 index 000000000..1549b8e01 --- /dev/null +++ b/packages/core/feature-flags/context.tsx @@ -0,0 +1,108 @@ +"use client"; + +import { createContext, useContext, useMemo, type ReactNode } from "react"; +import type { EvalContext } from "./types"; +import { FeatureFlagService } from "./service"; + +/** + * React glue for the FeatureFlagService. + * + * Two pieces are exported: + * + * - {@link FeatureFlagsProvider}: wraps a part of the tree with a Service + * and an EvalContext. The Service is usually constructed once at the + * application root; the EvalContext changes as the user context changes + * (e.g. after login). + * - {@link useFlag} / {@link useVariant}: the recommended Toggle Points in + * UI code. They never throw; if the provider tree is missing they fall + * back to the supplied default, which keeps Storybook stories and unit + * tests from needing to mount the provider just to render a button. + * + * Note: we deliberately do NOT expose the underlying FeatureFlagService + * through hooks. Components that need raw access can read it via the + * exported context object, but at the cost of giving up the always-on + * safety guarantee. + */ + +interface FeatureFlagContextValue { + service: FeatureFlagService; + ctx: EvalContext; +} + +const FeatureFlagContext = createContext(null); + +export interface FeatureFlagsProviderProps { + service: FeatureFlagService; + /** + * Targeting context for every flag evaluation inside this subtree. + * Pass an empty object when the user is anonymous — percent rollouts + * and allow/deny lists then evaluate against the empty identifier, + * which is the desired behavior for anonymous traffic. + */ + context?: EvalContext; + children: ReactNode; +} + +/** + * Mount a FeatureFlagService and EvalContext into the tree. Replacing the + * `service` prop on a re-render is allowed but rare; prefer mutating the + * provider on the existing Service via `setProvider`, which avoids forcing + * every consumer to re-evaluate. + */ +export function FeatureFlagsProvider({ + service, + context: ctx = {}, + children, +}: FeatureFlagsProviderProps) { + const value = useMemo( + () => ({ service, ctx }), + [service, ctx], + ); + return ( + {children} + ); +} + +/** + * useFlag returns the boolean state of a feature flag. + * + * Outside a {@link FeatureFlagsProvider} the hook returns `defaultValue`, + * never throws. This keeps tests and stories independent of the provider. + * + * @example + * const showNewBilling = useFlag("billing_v2_dashboard", false); + * return showNewBilling ? : ; + */ +export function useFlag(key: string, defaultValue: boolean): boolean { + const value = useContext(FeatureFlagContext); + if (!value) return defaultValue; + return value.service.isEnabled(key, value.ctx, defaultValue); +} + +/** + * useVariant returns the raw variant identifier for a multi-arm flag, with + * the same out-of-provider safety as {@link useFlag}. + * + * @example + * const variant = useVariant("checkout_algo", "control"); + * switch (variant) { + * case "experiment-v2": return ; + * case "experiment-v3": return ; + * default: return ; + * } + */ +export function useVariant(key: string, defaultValue: string): string { + const value = useContext(FeatureFlagContext); + if (!value) return defaultValue; + return value.service.variant(key, value.ctx, defaultValue); +} + +/** + * Escape hatch for diagnostic overlays that need direct Service access. + * Returns `null` outside a provider so callers must guard explicitly — + * this is intentional: random component code should use {@link useFlag}, + * not the raw Service. + */ +export function useFeatureFlagService(): FeatureFlagService | null { + return useContext(FeatureFlagContext)?.service ?? null; +} diff --git a/packages/core/feature-flags/hash.test.ts b/packages/core/feature-flags/hash.test.ts new file mode 100644 index 000000000..424db5a35 --- /dev/null +++ b/packages/core/feature-flags/hash.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from "vitest"; +import { bucketFor, inPercent } from "./hash"; + +describe("feature-flags hash", () => { + it("bucketFor returns a value in [0, 100)", () => { + for (const id of ["a", "b", "user-1", "user-2", "", "🦄"]) { + const b = bucketFor("flag", id); + expect(b).toBeGreaterThanOrEqual(0); + expect(b).toBeLessThan(100); + } + }); + + it("bucketFor is deterministic for the same (key, id)", () => { + const first = bucketFor("billing_new_invoice", "user-42"); + for (let i = 0; i < 100; i++) { + expect(bucketFor("billing_new_invoice", "user-42")).toBe(first); + } + }); + + it("separator prevents key/id boundary collisions", () => { + // ("ab","c") and ("a","bc") must not hash to the same bucket. + expect(bucketFor("ab", "c")).not.toBe(bucketFor("a", "bc")); + }); + + it("inPercent clamps boundary values", () => { + expect(inPercent("any", "any", 0)).toBe(false); + expect(inPercent("any", "any", -10)).toBe(false); + expect(inPercent("any", "any", 100)).toBe(true); + expect(inPercent("any", "any", 999)).toBe(true); + }); + + it("inPercent splits a 50% rollout roughly in half across 1000 users", () => { + // 50% over 1000 distinct users should land near 500; we allow a + // generous +/- 100 window so the test isn't flaky. + let enabled = 0; + for (let i = 0; i < 1000; i++) { + if (inPercent("split", `user-${i.toString(36)}`, 50)) enabled++; + } + expect(enabled).toBeGreaterThan(400); + expect(enabled).toBeLessThan(600); + }); +}); diff --git a/packages/core/feature-flags/hash.ts b/packages/core/feature-flags/hash.ts new file mode 100644 index 000000000..feba59f61 --- /dev/null +++ b/packages/core/feature-flags/hash.ts @@ -0,0 +1,51 @@ +/** + * FNV-1a 32-bit hash used for deterministic percent-rollout bucketing. + * + * The same (key, identifier) pair MUST always produce the same bucket; + * otherwise users would flip in and out of experiments across requests. The + * algorithm matches the Go-side server/pkg/featureflag/hash.go so a flag + * evaluated on the frontend and on the backend lands in the same bucket for + * the same user. + * + * FNV-1a is used because it is cheap, dependency-free, and well-distributed + * enough for sub-100 bucketing. It is NOT cryptographic; do not use it for + * anything beyond bucketing. + */ +function fnv1a(parts: ReadonlyArray): number { + // 32-bit FNV-1a: offset basis 0x811c9dc5, prime 0x01000193. + let hash = 0x811c9dc5; + for (let p = 0; p < parts.length; p++) { + const s = parts[p]!; + for (let i = 0; i < s.length; i++) { + hash ^= s.charCodeAt(i); + // Multiply by FNV prime mod 2^32. Using Math.imul keeps the result + // in a 32-bit integer without slipping into float territory. + hash = Math.imul(hash, 0x01000193); + } + // Zero separator between parts so ("ab", "c") and ("a", "bc") cannot + // hash to the same value. + hash ^= 0; + hash = Math.imul(hash, 0x01000193); + } + // Force unsigned 32-bit before the modulo to match Go's uint32 arithmetic. + return hash >>> 0; +} + +/** + * bucketFor returns a deterministic bucket in [0, 100) for the supplied + * (key, identifier) pair. + */ +export function bucketFor(key: string, identifier: string): number { + return fnv1a([key, identifier]) % 100; +} + +/** + * inPercent reports whether (key, identifier) falls within the first + * `percent` buckets. Values outside [0, 100] are clamped: <=0 disables for + * everyone, >=100 enables for everyone. + */ +export function inPercent(key: string, identifier: string, percent: number): boolean { + if (percent <= 0) return false; + if (percent >= 100) return true; + return bucketFor(key, identifier) < percent; +} diff --git a/packages/core/feature-flags/index.ts b/packages/core/feature-flags/index.ts new file mode 100644 index 000000000..a33576fe5 --- /dev/null +++ b/packages/core/feature-flags/index.ts @@ -0,0 +1,30 @@ +/** + * Public surface for @multica/core/feature-flags. + * + * Keep this list minimal — every new export becomes a contract we have to + * preserve across the monorepo. Add to it only when a real caller appears. + */ + +export type { + Decision, + EvalContext, + PercentRollout, + Provider, + Reason, + Rule, +} from "./types"; + +export { FeatureFlagService } from "./service"; +export { StaticProvider } from "./static-provider"; +export { ChainProvider } from "./chain-provider"; +export { + FeatureFlagsProvider, + useFeatureFlagService, + useFlag, + useVariant, +} from "./context"; + +// Hash helpers are exported for tests and for callers that want to share +// the bucketing logic without going through a Provider (rare; usually a +// red flag that the caller should be using the Service instead). +export { bucketFor, inPercent } from "./hash"; diff --git a/packages/core/feature-flags/service.test.ts b/packages/core/feature-flags/service.test.ts new file mode 100644 index 000000000..c3f02f847 --- /dev/null +++ b/packages/core/feature-flags/service.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, it } from "vitest"; +import { ChainProvider } from "./chain-provider"; +import { StaticProvider } from "./static-provider"; +import { FeatureFlagService } from "./service"; + +describe("FeatureFlagService", () => { + it("returns the default when no provider is configured", () => { + const s = new FeatureFlagService(null); + expect(s.isEnabled("any", {}, true)).toBe(true); + expect(s.isEnabled("any", {}, false)).toBe(false); + expect(s.variant("any", {}, "control")).toBe("control"); + expect(s.decision("any", {}, false).reason).toBe("default"); + }); + + it("returns the default when the provider does not know the key", () => { + const s = new FeatureFlagService(new StaticProvider({})); + expect(s.isEnabled("missing", {}, true)).toBe(true); + expect(s.decision("missing", {}, true).reason).toBe("default"); + }); + + it("uses the provider decision when found", () => { + const sp = new StaticProvider({ billing: { default: true } }); + const s = new FeatureFlagService(sp); + const d = s.decision("billing", {}, false); + expect(d.enabled).toBe(true); + expect(d.reason).toBe("static"); + expect(d.source).toBe("static"); + }); + + it("echoes the requested key in the decision", () => { + const sp = new StaticProvider({ a: { default: true } }); + const s = new FeatureFlagService(sp); + expect(s.decision("a", {}, false).key).toBe("a"); + }); + + it("setProvider swaps the underlying provider", () => { + const s = new FeatureFlagService(null); + expect(s.isEnabled("k", {}, false)).toBe(false); + s.setProvider(new StaticProvider({ k: { default: true } })); + expect(s.isEnabled("k", {}, false)).toBe(true); + }); +}); + +describe("ChainProvider", () => { + it("first match wins", () => { + const top = new StaticProvider({ shared: { default: true } }); + const bottom = new StaticProvider({ shared: { default: false } }); + const chain = new ChainProvider([top, bottom]); + expect(chain.lookup("shared", {})?.enabled).toBe(true); + }); + + it("falls through to the next provider", () => { + const top = new StaticProvider({}); + const bottom = new StaticProvider({ only_in_bottom: { default: true } }); + const chain = new ChainProvider([top, bottom]); + expect(chain.lookup("only_in_bottom", {})?.enabled).toBe(true); + }); + + it("returns undefined when no provider matches", () => { + const chain = new ChainProvider([new StaticProvider({})]); + expect(chain.lookup("nope", {})).toBeUndefined(); + }); + + it("skips null and undefined entries", () => { + const sp = new StaticProvider({ real: { default: true } }); + const chain = new ChainProvider([null, sp, undefined]); + expect(chain.lookup("real", {})?.enabled).toBe(true); + }); +}); diff --git a/packages/core/feature-flags/service.ts b/packages/core/feature-flags/service.ts new file mode 100644 index 000000000..06a700e86 --- /dev/null +++ b/packages/core/feature-flags/service.ts @@ -0,0 +1,84 @@ +import type { Decision, EvalContext, Provider } from "./types"; + +/** + * FeatureFlagService is the framework-level Toggle Router. UI code asks the + * Service for decisions; the Service consults its configured {@link Provider}. + * + * The class is intentionally side-effect free. Mounting it inside a React + * tree is handled by `./context.tsx`; the Service itself works outside of + * React (unit tests, web workers, Node CLI tools, ...). + * + * Always-on safety: every public entry point returns the caller's default + * when no provider matches. Business code never has to guard against a + * missing flag. + */ +export class FeatureFlagService { + private provider: Provider | null; + + constructor(provider: Provider | null = null) { + this.provider = provider; + } + + /** + * Swap the underlying provider at runtime. Useful when fresh config + * arrives from the backend; the React provider tree re-renders + * automatically because the consumer hooks subscribe to the wrapper. + */ + setProvider(provider: Provider | null): void { + this.provider = provider; + } + + /** + * Returns true when the named flag evaluates to an "on" state. When the + * flag is unknown the caller's default is returned. + * + * @example + * if (flags.isEnabled("billing_new_invoice_email", { userId }, false)) { + * return ; + * } + * return ; + */ + isEnabled(key: string, ctx: EvalContext, defaultValue: boolean): boolean { + return this.decision(key, ctx, defaultValue).enabled; + } + + /** + * Returns the raw variant for a multi-arm flag, falling back to + * `defaultValue` when nothing matches. + */ + variant(key: string, ctx: EvalContext, defaultValue: string): string { + if (!this.provider) { + return defaultValue; + } + const d = this.provider.lookup(key, ctx); + if (!d) return defaultValue; + return d.variant; + } + + /** + * Full structured decision. Used by diagnostic overlays and tests. + */ + decision(key: string, ctx: EvalContext, defaultValue: boolean): Decision { + if (!this.provider) { + return defaultDecision(key, defaultValue); + } + const d = this.provider.lookup(key, ctx); + if (!d) return defaultDecision(key, defaultValue); + return { ...d, key }; + } + + /** Returns the wrapped provider (read-only) for diagnostics. */ + getProvider(): Provider | null { + return this.provider; + } +} + +function defaultDecision(key: string, value: boolean): Decision { + return { + key, + enabled: value, + variant: value ? "on" : "off", + reason: "default", + source: "default", + }; +} diff --git a/packages/core/feature-flags/static-provider.test.ts b/packages/core/feature-flags/static-provider.test.ts new file mode 100644 index 000000000..6155d7912 --- /dev/null +++ b/packages/core/feature-flags/static-provider.test.ts @@ -0,0 +1,78 @@ +import { describe, expect, it } from "vitest"; +import { StaticProvider } from "./static-provider"; + +describe("StaticProvider", () => { + it("returns undefined for unknown keys so callers fall through", () => { + const sp = new StaticProvider(); + expect(sp.lookup("missing", {})).toBeUndefined(); + }); + + it("returns the rule default for known keys", () => { + const sp = new StaticProvider({ on: { default: true }, off: { default: false } }); + expect(sp.lookup("on", {})?.enabled).toBe(true); + expect(sp.lookup("off", {})?.enabled).toBe(false); + }); + + it("allow forces ON for matching users", () => { + const sp = new StaticProvider({ + internal_dashboard: { default: false, allow: ["user-internal"] }, + }); + expect(sp.lookup("internal_dashboard", { userId: "user-internal" })?.enabled).toBe(true); + expect(sp.lookup("internal_dashboard", { userId: "user-random" })?.enabled).toBe(false); + }); + + it("deny wins over allow for the same user", () => { + const sp = new StaticProvider({ + conflict: { default: true, allow: ["same"], deny: ["same"] }, + }); + expect(sp.lookup("conflict", { userId: "same" })?.enabled).toBe(false); + }); + + it("percent rollout is deterministic for a fixed user", () => { + const sp = new StaticProvider({ split: { percent: { percent: 50 } } }); + const first = sp.lookup("split", { userId: "stable" })?.enabled; + for (let i = 0; i < 100; i++) { + expect(sp.lookup("split", { userId: "stable" })?.enabled).toBe(first); + } + }); + + it("percent rollout with by=workspace_id buckets by workspace", () => { + const sp = new StaticProvider({ + ws_rollout: { percent: { percent: 100, by: "workspace_id" } }, + }); + const decision = sp.lookup("ws_rollout", { workspaceId: "w-1" }); + expect(decision?.enabled).toBe(true); + expect(decision?.reason).toBe("percent"); + }); + + it("variant overrides the boolean variant string", () => { + const sp = new StaticProvider({ + checkout: { default: true, variant: "experiment-v2" }, + }); + const d = sp.lookup("checkout", { userId: "anyone" }); + expect(d?.variant).toBe("experiment-v2"); + expect(d?.enabled).toBe(true); + }); + + it("loadRules replaces, not merges, the rule map", () => { + const sp = new StaticProvider({ old: { default: true } }); + sp.loadRules({ fresh: { default: true } }); + expect(sp.lookup("old", {})).toBeUndefined(); + expect(sp.lookup("fresh", {})?.enabled).toBe(true); + }); + + it("custom attribute lookup against attributes map", () => { + const sp = new StaticProvider({ + plan_gate: { default: false, allow: ["enterprise"], allowBy: "plan" }, + }); + expect( + sp.lookup("plan_gate", { attributes: { plan: "enterprise" } })?.enabled, + ).toBe(true); + expect(sp.lookup("plan_gate", { attributes: { plan: "free" } })?.enabled).toBe(false); + }); + + it("keys returns a sorted snapshot", () => { + const sp = new StaticProvider({ zeta: {}, alpha: {}, mu: {} }); + expect(sp.keys()).toEqual(["alpha", "mu", "zeta"]); + }); +}); diff --git a/packages/core/feature-flags/static-provider.ts b/packages/core/feature-flags/static-provider.ts new file mode 100644 index 000000000..931a7ff3a --- /dev/null +++ b/packages/core/feature-flags/static-provider.ts @@ -0,0 +1,109 @@ +import type { Decision, EvalContext, Provider, Rule } from "./types"; +import { inPercent } from "./hash"; + +/** + * StaticProvider is an in-memory Provider populated either programmatically + * or from a JSON config shipped with the application bundle. + * + * This is the recommended baseline provider for the frontend: configuration + * lives in source control, moves through CD alongside the build, and + * changes require a deploy. For dynamic flags fetched from the backend, + * wrap a {@link StaticProvider} behind a chain provider that also reads + * from API state — the StaticProvider then acts as a safety net for the + * very first paint before the API response is available. + */ +export class StaticProvider implements Provider { + readonly name = "static"; + private rules: Map; + + constructor(rules: Readonly> = {}) { + this.rules = new Map(Object.entries(rules)); + } + + /** Replace or install the rule for `key`. */ + set(key: string, rule: Rule): void { + this.rules.set(key, rule); + } + + /** + * Replace every rule atomically. Use when reloading flag config from a + * fetch response so consumers never observe a mixed state. + */ + loadRules(rules: Readonly>): void { + this.rules = new Map(Object.entries(rules)); + } + + /** Sorted list of known flag keys. Useful for dev overlays. */ + keys(): string[] { + return Array.from(this.rules.keys()).sort(); + } + + lookup(key: string, ctx: EvalContext): Decision | undefined { + const rule = this.rules.get(key); + if (!rule) return undefined; + return evaluateRule(key, rule, ctx); + } +} + +function evaluateRule(key: string, rule: Rule, ctx: EvalContext): Decision { + // Deny wins over everything else; a kill switch must remain reachable + // even when other targeting matches. + const denyBy = rule.denyBy ?? "user_id"; + if (rule.deny && rule.deny.length > 0) { + const v = lookupAttr(ctx, denyBy); + if (v && rule.deny.includes(v)) { + return decisionFromRule(key, rule, false, "static"); + } + } + + const allowBy = rule.allowBy ?? "user_id"; + if (rule.allow && rule.allow.length > 0) { + const v = lookupAttr(ctx, allowBy); + if (v && rule.allow.includes(v)) { + return decisionFromRule(key, rule, true, "static"); + } + } + + if (rule.percent) { + const by = rule.percent.by ?? "user_id"; + const ident = lookupAttr(ctx, by) ?? ""; + const enabled = inPercent(key, ident, rule.percent.percent); + return decisionFromRule(key, rule, enabled, "percent"); + } + + return decisionFromRule(key, rule, rule.default ?? false, "static"); +} + +function decisionFromRule( + key: string, + rule: Rule, + enabled: boolean, + reason: Decision["reason"], +): Decision { + return { + key, + enabled, + variant: rule.variant && rule.variant.length > 0 ? rule.variant : boolToVariant(enabled), + reason, + source: "static", + }; +} + +function boolToVariant(b: boolean): string { + return b ? "on" : "off"; +} + +/** + * Resolve an attribute name against the EvalContext. The well-known names + * "user_id" and "workspace_id" map to the dedicated fields so rules can use + * them by name without callers also populating `attributes`. + */ +function lookupAttr(ctx: EvalContext, name: string): string | undefined { + if (name === "user_id") return nonEmpty(ctx.userId); + if (name === "workspace_id") return nonEmpty(ctx.workspaceId); + return nonEmpty(ctx.attributes?.[name]); +} + +function nonEmpty(v: string | undefined): string | undefined { + return v && v.length > 0 ? v : undefined; +} diff --git a/packages/core/feature-flags/types.ts b/packages/core/feature-flags/types.ts new file mode 100644 index 000000000..40099b4fb --- /dev/null +++ b/packages/core/feature-flags/types.ts @@ -0,0 +1,111 @@ +/** + * Public types for the @multica/core/feature-flags module. + * + * The shape mirrors the Go-side server/pkg/featureflag package on purpose so + * a Decision returned by the backend can be marshalled directly into the + * frontend Service without translation. Keep them in sync when extending + * either side. + */ + +/** + * Reason explains why a Decision returned the value it did. Exposed in + * diagnostics endpoints and in development overlays so engineers can tell + * "this flag is on because the user is in the allowlist" apart from "this + * flag is on because the default kicked in". + */ +export type Reason = + | "static" + | "percent" + | "override" + | "default" + | "error"; + +/** + * Structured outcome of a single flag evaluation. Most callers only need + * the {@link FeatureFlagService.isEnabled} convenience, but tests and + * dev tools want the full record. + */ +export interface Decision { + /** The flag identifier that was evaluated. */ + key: string; + /** Boolean projection. True for any variant except "off" / "" / "false" / "0". */ + enabled: boolean; + /** Raw variant value. Boolean flags use "on" / "off"; variant flags use arbitrary identifiers. */ + variant: string; + /** Why this decision was made. */ + reason: Reason; + /** Name of the provider that produced the decision, or "default" when nothing matched. */ + source: string; +} + +/** + * Per-evaluation context for dynamic targeting (allow/deny lists, percent + * rollouts). All fields are optional; a missing field never crashes the + * evaluation, it simply skips the rules that depend on it. + */ +export interface EvalContext { + userId?: string; + workspaceId?: string; + /** Free-form attributes (plan, country, client, ...). Keys are case-sensitive. */ + attributes?: Readonly>; +} + +/** + * Percent rollout descriptor. The bucket for (key, identifier) is computed + * with FNV-1a so the same identifier always falls into the same bucket + * across processes and tabs. + */ +export interface PercentRollout { + /** Rollout size in [0, 100]. Out-of-range values are clamped. */ + percent: number; + /** + * Attribute name used as the bucketing identifier. Defaults to "user_id". + * Use "workspace_id" for workspace-scoped rollouts. + */ + by?: string; +} + +/** + * Rule describes how the {@link StaticProvider} evaluates a single flag. + * + * Evaluation order (first match wins): + * 1. Deny: if the EvalContext attribute matches an entry in deny, return OFF. + * 2. Allow: if it matches an entry in allow, return ON. + * 3. Percent: if the bucket falls inside percent.percent, return ON; else OFF. + * 4. Default: return defaultValue. + */ +export interface Rule { + /** Value returned when no targeting rule matches. Defaults to false. */ + default?: boolean; + /** + * Optional variant identifier returned alongside the boolean. Use for + * multi-arm flags ("control" / "experiment-v2" / "experiment-v3"). + */ + variant?: string; + /** Identifier values that force the flag ON. */ + allow?: ReadonlyArray; + /** EvalContext attribute used for allow lookups. Defaults to "user_id". */ + allowBy?: string; + /** Identifier values that force the flag OFF. Deny wins over allow. */ + deny?: ReadonlyArray; + /** EvalContext attribute used for deny lookups. Defaults to "user_id". */ + denyBy?: string; + /** Deterministic percent rollout. */ + percent?: PercentRollout; +} + +/** + * Provider is the configuration backend for the Service. Implementations + * MUST be safe for concurrent use; the Service reads providers from many + * components without additional synchronization. + * + * Returning `undefined` (instead of a Decision) tells the Service to fall + * through to the next provider in a ChainProvider, or to the caller's + * default if there is no next provider. + */ +export interface Provider { + /** Stable, human-readable identifier surfaced in Decision.source. */ + readonly name: string; + /** Evaluate the flag, or return undefined if this provider does not know it. */ + lookup(key: string, ctx: EvalContext): Decision | undefined; +} diff --git a/packages/core/package.json b/packages/core/package.json index 928586e1c..6633a7dac 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -99,6 +99,7 @@ "./logger": "./logger.ts", "./utils": "./utils.ts", "./constants/*": "./constants/*.ts", + "./feature-flags": "./feature-flags/index.ts", "./platform": "./platform/index.ts", "./analytics": "./analytics/index.ts", "./i18n": "./i18n/index.ts", diff --git a/server/pkg/featureflag/chain_provider.go b/server/pkg/featureflag/chain_provider.go new file mode 100644 index 000000000..fb13e28c7 --- /dev/null +++ b/server/pkg/featureflag/chain_provider.go @@ -0,0 +1,48 @@ +package featureflag + +import "context" + +// ChainProvider composes multiple providers and returns the first match. +// Earlier providers take precedence, so callers should order them from +// most-specific to most-generic: per-request override, env, db, static. +// +// A ChainProvider that wraps zero providers is valid and always returns +// (zero, false) so the Service falls back to the caller's default. +type ChainProvider struct { + providers []Provider +} + +// NewChainProvider returns a ChainProvider that evaluates the supplied +// providers in order. Nil providers are silently skipped so callers can +// pass optional fields directly without an extra nil check at every site. +func NewChainProvider(providers ...Provider) *ChainProvider { + cp := &ChainProvider{providers: make([]Provider, 0, len(providers))} + for _, p := range providers { + if p != nil { + cp.providers = append(cp.providers, p) + } + } + return cp +} + +// Name implements Provider. +func (*ChainProvider) Name() string { return "chain" } + +// Lookup implements Provider. It returns the first decision produced by +// the wrapped providers, in the order they were registered. +func (cp *ChainProvider) Lookup(ctx context.Context, key string) (Decision, bool) { + for _, p := range cp.providers { + if d, ok := p.Lookup(ctx, key); ok { + return d, true + } + } + return Decision{}, false +} + +// Providers returns a snapshot of the wrapped providers. The slice itself +// is a copy; the Provider values are shared and must not be mutated. +func (cp *ChainProvider) Providers() []Provider { + out := make([]Provider, len(cp.providers)) + copy(out, cp.providers) + return out +} diff --git a/server/pkg/featureflag/chain_provider_test.go b/server/pkg/featureflag/chain_provider_test.go new file mode 100644 index 000000000..a52cd5f39 --- /dev/null +++ b/server/pkg/featureflag/chain_provider_test.go @@ -0,0 +1,72 @@ +package featureflag + +import ( + "context" + "testing" +) + +func TestChainProviderFirstHitWins(t *testing.T) { + t.Parallel() + a := NewStaticProvider() + a.Set("shared", Rule{Default: true}) + + b := NewStaticProvider() + b.Set("shared", Rule{Default: false}) + + chain := NewChainProvider(a, b) + d, ok := chain.Lookup(context.Background(), "shared") + if !ok || !d.Enabled { + t.Fatalf("first provider must win, got %+v ok=%v", d, ok) + } +} + +func TestChainProviderFallsThrough(t *testing.T) { + t.Parallel() + a := NewStaticProvider() // empty + b := NewStaticProvider() + b.Set("only_in_b", Rule{Default: true}) + + chain := NewChainProvider(a, b) + d, ok := chain.Lookup(context.Background(), "only_in_b") + if !ok || !d.Enabled { + t.Fatalf("chain must fall through to the next provider, got %+v ok=%v", d, ok) + } +} + +func TestChainProviderEmpty(t *testing.T) { + t.Parallel() + chain := NewChainProvider() + _, ok := chain.Lookup(context.Background(), "any") + if ok { + t.Fatalf("empty chain must report not-found") + } +} + +func TestChainProviderSkipsNil(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("real", Rule{Default: true}) + + chain := NewChainProvider(nil, sp, nil) + d, ok := chain.Lookup(context.Background(), "real") + if !ok || !d.Enabled { + t.Fatalf("chain must skip nil providers, got %+v ok=%v", d, ok) + } +} + +func TestChainProviderEnvBeatsStatic(t *testing.T) { + t.Parallel() + // This is the production-shaped chain: env override on top, static + // config below. An Ops engineer flipping FF_KILL_SWITCH=false must + // be able to disable a flag that is otherwise true in static config. + static := NewStaticProvider() + static.Set("kill_switch", Rule{Default: true}) + + env := newMockEnv(map[string]string{"FF_KILL_SWITCH": "false"}) + + chain := NewChainProvider(env, static) + d, _ := chain.Lookup(context.Background(), "kill_switch") + if d.Enabled { + t.Fatalf("env override must beat static default, got %+v", d) + } +} diff --git a/server/pkg/featureflag/doc.go b/server/pkg/featureflag/doc.go new file mode 100644 index 000000000..55dd33c2b --- /dev/null +++ b/server/pkg/featureflag/doc.go @@ -0,0 +1,30 @@ +// Package featureflag is a framework-level feature flag library for the +// multica backend. +// +// It implements the canonical Toggle Point / Toggle Router / Toggle +// Configuration separation described by Martin Fowler: +// +// business code -> Service.IsEnabled(ctx, key, default) // Toggle Point +// Service // Toggle Router +// Provider (Static/Env/Chain/custom) // Toggle Configuration +// +// Design goals: +// +// - Business code never speaks to a provider directly; it always asks the +// Service. This keeps the decision point decoupled from the decision +// logic so the same Toggle Point can be backed by a YAML file today, a +// database tomorrow, and an A/B router after that, with no caller +// changes. +// - Always-on safety: a missing provider, a missing key, or a misconfigured +// rule must never crash callers. Every public entry point returns the +// supplied default in that case and records a Reason so the failure is +// observable. +// - Deterministic percent rollouts: the same (key, identifier) pair always +// evaluates to the same bucket so a user does not flip in and out of an +// experiment across requests. +// - No third-party dependencies. The package only uses the Go standard +// library so it is safe to import from any subsystem. +// +// See server/pkg/featureflag/service.go for the public Service API and +// docs/feature-flags.md for end-to-end usage examples. +package featureflag diff --git a/server/pkg/featureflag/env_provider.go b/server/pkg/featureflag/env_provider.go new file mode 100644 index 000000000..e848b3f51 --- /dev/null +++ b/server/pkg/featureflag/env_provider.go @@ -0,0 +1,157 @@ +package featureflag + +import ( + "context" + "os" + "strconv" + "strings" +) + +// EnvProvider reads flag configuration from process environment variables. +// It is intended for emergency overrides, local development, and the kind +// of "kill switch I need to flip without redeploying" use case Ops Toggles +// were invented for. +// +// Variables are keyed by Prefix + UPPER_SNAKE_CASE(flag_key). For a Prefix +// of "FF_" and a flag named "checkout_new_payment_flow", the env variable +// is FF_CHECKOUT_NEW_PAYMENT_FLOW. +// +// Supported value formats (case-insensitive): +// +// "true", "on", "1", "yes" -> Enabled=true, Variant="on" +// "false", "off", "0", "no" -> Enabled=false, Variant="off" +// "" -> Enabled=false, Variant="off" (explicitly disabled) +// "42%" -> deterministic percent rollout +// any other non-empty value -> treated as a variant identifier +// (Enabled=true, Variant=) +// +// Malformed percent values (negative, >100, non-numeric) yield a Decision +// with Reason=ReasonError. The Service still treats that as a real +// decision and does not fall through to a less specific provider; an Ops +// engineer who set FF_FOO=abc% expects to be told something is wrong, not +// for the override to silently disappear. +type EnvProvider struct { + // Prefix is prepended to every lookup. Empty disables prefixing, + // which is rarely what you want. + Prefix string + + // lookup is overridable for tests. Must return (value, true) when + // the variable is set (even to the empty string) and ("", false) + // when it is missing. Defaults to os.LookupEnv. + lookup func(string) (string, bool) +} + +// NewEnvProvider returns an EnvProvider with the supplied prefix. Pass +// "FF_" for the conventional multica prefix. +func NewEnvProvider(prefix string) *EnvProvider { + return &EnvProvider{Prefix: prefix, lookup: os.LookupEnv} +} + +// Name implements Provider. +func (*EnvProvider) Name() string { return "env" } + +// Lookup implements Provider. +func (p *EnvProvider) Lookup(ctx context.Context, key string) (Decision, bool) { + envName := p.Prefix + flagKeyToEnv(key) + get := p.lookup + if get == nil { + get = os.LookupEnv + } + raw, present := get(envName) + if !present { + return Decision{}, false + } + + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + return Decision{ + Key: key, + Enabled: false, + Variant: "off", + Reason: ReasonStatic, + Source: "env", + }, true + } + + if strings.HasSuffix(trimmed, "%") { + pctStr := strings.TrimSuffix(trimmed, "%") + pct, err := strconv.Atoi(strings.TrimSpace(pctStr)) + if err != nil || pct < 0 || pct > 100 { + return Decision{ + Key: key, + Enabled: false, + Variant: "off", + Reason: ReasonError, + Source: "env", + }, true + } + ec := EvalContextFrom(ctx) + ident, _ := ec.Lookup("user_id") + enabled := inPercent(key, ident, pct) + return Decision{ + Key: key, + Enabled: enabled, + Variant: boolToVariant(enabled), + Reason: ReasonPercent, + Source: "env", + }, true + } + + switch strings.ToLower(trimmed) { + case "true", "on", "1", "yes": + return Decision{ + Key: key, + Enabled: true, + Variant: "on", + Reason: ReasonStatic, + Source: "env", + }, true + case "false", "off", "0", "no": + return Decision{ + Key: key, + Enabled: false, + Variant: "off", + Reason: ReasonStatic, + Source: "env", + }, true + } + + // Treat any other value as a variant identifier. We must not parse + // the variant any further; callers know what their variants mean. + return Decision{ + Key: key, + Enabled: true, + Variant: trimmed, + Reason: ReasonStatic, + Source: "env", + }, true +} + +// flagKeyToEnv converts a flag key into its env-variable form. We +// uppercase everything and replace any non-alphanumeric run with a single +// underscore. The conversion is intentionally lossy (case-insensitive, +// merges punctuation runs) so common variants like "checkout.newPayment" +// and "checkout-new-payment" route to the same env name; if you need +// distinct env vars for variants of the same key, choose distinct flag +// keys instead. +func flagKeyToEnv(key string) string { + var b strings.Builder + b.Grow(len(key)) + prevUnderscore := false + for _, r := range key { + switch { + case r >= 'A' && r <= 'Z', r >= '0' && r <= '9': + b.WriteRune(r) + prevUnderscore = false + case r >= 'a' && r <= 'z': + b.WriteRune(r - 32) + prevUnderscore = false + default: + if !prevUnderscore { + b.WriteByte('_') + prevUnderscore = true + } + } + } + return strings.Trim(b.String(), "_") +} diff --git a/server/pkg/featureflag/env_provider_test.go b/server/pkg/featureflag/env_provider_test.go new file mode 100644 index 000000000..25bdda4f3 --- /dev/null +++ b/server/pkg/featureflag/env_provider_test.go @@ -0,0 +1,137 @@ +package featureflag + +import ( + "context" + "testing" +) + +func newMockEnv(env map[string]string) *EnvProvider { + p := NewEnvProvider("FF_") + p.lookup = func(name string) (string, bool) { + v, ok := env[name] + return v, ok + } + return p +} + +func TestEnvProviderTrueFalse(t *testing.T) { + t.Parallel() + cases := []struct { + raw string + want bool + variant string + }{ + {"true", true, "on"}, + {"TRUE", true, "on"}, + {"on", true, "on"}, + {"1", true, "on"}, + {"yes", true, "on"}, + {"false", false, "off"}, + {"OFF", false, "off"}, + {"0", false, "off"}, + {"no", false, "off"}, + } + for _, tc := range cases { + p := newMockEnv(map[string]string{"FF_DEMO": tc.raw}) + d, ok := p.Lookup(context.Background(), "demo") + if !ok { + t.Fatalf("%q: env provider must report found", tc.raw) + } + if d.Enabled != tc.want || d.Variant != tc.variant { + t.Fatalf("%q: got %+v, want enabled=%v variant=%q", tc.raw, d, tc.want, tc.variant) + } + } +} + +func TestEnvProviderExplicitEmpty(t *testing.T) { + t.Parallel() + // An explicitly empty variable means "I want this flag off". This is + // the contract for kill switches set via ConfigMap. + p := newMockEnv(map[string]string{"FF_DEMO": ""}) + d, ok := p.Lookup(context.Background(), "demo") + if !ok { + t.Fatalf("empty env value must be treated as 'set'") + } + if d.Enabled { + t.Fatalf("empty env value must disable the flag, got %+v", d) + } +} + +func TestEnvProviderMissingFallsThrough(t *testing.T) { + t.Parallel() + p := newMockEnv(map[string]string{}) + _, ok := p.Lookup(context.Background(), "demo") + if ok { + t.Fatalf("missing env var must report not-found so callers can fall through") + } +} + +func TestEnvProviderPercent(t *testing.T) { + t.Parallel() + p := newMockEnv(map[string]string{"FF_DEMO": "100%"}) + ctx := WithEvalContext(context.Background(), EvalContext{UserID: "anyone"}) + d, ok := p.Lookup(ctx, "demo") + if !ok || !d.Enabled || d.Reason != ReasonPercent { + t.Fatalf("100%% must enable everyone with ReasonPercent, got %+v", d) + } + + p = newMockEnv(map[string]string{"FF_DEMO": "0%"}) + d, _ = p.Lookup(ctx, "demo") + if d.Enabled { + t.Fatalf("0%% must disable everyone") + } +} + +func TestEnvProviderMalformedPercent(t *testing.T) { + t.Parallel() + p := newMockEnv(map[string]string{"FF_DEMO": "abc%"}) + d, ok := p.Lookup(context.Background(), "demo") + if !ok { + t.Fatalf("malformed percent must still return a decision so it does not fall through") + } + if d.Reason != ReasonError { + t.Fatalf("malformed percent must report ReasonError, got %+v", d) + } + if d.Enabled { + t.Fatalf("malformed percent must default to disabled, got %+v", d) + } +} + +func TestEnvProviderOutOfRangePercent(t *testing.T) { + t.Parallel() + for _, raw := range []string{"-5%", "150%"} { + p := newMockEnv(map[string]string{"FF_DEMO": raw}) + d, _ := p.Lookup(context.Background(), "demo") + if d.Reason != ReasonError { + t.Fatalf("%q: out-of-range percent must report ReasonError, got %+v", raw, d) + } + } +} + +func TestEnvProviderVariantValue(t *testing.T) { + t.Parallel() + p := newMockEnv(map[string]string{"FF_ALGO": "experiment-v2"}) + d, ok := p.Lookup(context.Background(), "algo") + if !ok || !d.Enabled || d.Variant != "experiment-v2" { + t.Fatalf("variant value must be passed through verbatim, got %+v", d) + } +} + +func TestFlagKeyToEnv(t *testing.T) { + t.Parallel() + cases := []struct { + in string + want string + }{ + {"checkout_new_payment_flow", "CHECKOUT_NEW_PAYMENT_FLOW"}, + {"checkout.newPayment", "CHECKOUT_NEWPAYMENT"}, + {"checkout-new-payment", "CHECKOUT_NEW_PAYMENT"}, + {" weird spaces ", "WEIRD_SPACES"}, + {"a..b", "A_B"}, + } + for _, tc := range cases { + if got := flagKeyToEnv(tc.in); got != tc.want { + t.Fatalf("flagKeyToEnv(%q) = %q, want %q", tc.in, got, tc.want) + } + } +} diff --git a/server/pkg/featureflag/eval_context.go b/server/pkg/featureflag/eval_context.go new file mode 100644 index 000000000..c1d60879c --- /dev/null +++ b/server/pkg/featureflag/eval_context.go @@ -0,0 +1,81 @@ +package featureflag + +import "context" + +// EvalContext is the per-request context used to evaluate dynamic flags such +// as percent rollouts and per-user allow/deny lists. +// +// All fields are optional. A zero EvalContext is valid and matches no +// targeting rules, which means percent rollouts default to bucket 0 (always +// off) and allow/deny lookups silently miss. +type EvalContext struct { + // UserID is the canonical identifier used for per-user targeting and + // for the default percent-rollout bucketing key. Free-form string; + // the framework never parses it. + UserID string + + // WorkspaceID identifies the multica workspace that issued the + // request. Useful for workspace-scoped rollouts. + WorkspaceID string + + // Attributes holds any other targeting attributes the caller wants + // to expose to rules, for example "country", "plan", or "client". + // Keys are case-sensitive. + Attributes map[string]string +} + +// Lookup returns the value of attribute name in the order: +// UserID, WorkspaceID, then Attributes[name]. The well-known names +// "user_id" and "workspace_id" map to the dedicated fields so rules can use +// them by name without callers having to also populate Attributes. +// +// The bool return signals whether a non-empty value was found, which lets +// callers distinguish "missing" from "explicitly empty". +func (ec EvalContext) Lookup(name string) (string, bool) { + switch name { + case "user_id": + if ec.UserID != "" { + return ec.UserID, true + } + return "", false + case "workspace_id": + if ec.WorkspaceID != "" { + return ec.WorkspaceID, true + } + return "", false + } + if ec.Attributes == nil { + return "", false + } + v, ok := ec.Attributes[name] + if !ok || v == "" { + return "", false + } + return v, true +} + +type evalContextKey struct{} + +// WithEvalContext returns a derived context that carries ec for later +// retrieval via EvalContextFrom. Passing the zero EvalContext is allowed and +// effectively clears any previously attached context. +func WithEvalContext(parent context.Context, ec EvalContext) context.Context { + if parent == nil { + parent = context.Background() + } + return context.WithValue(parent, evalContextKey{}, ec) +} + +// EvalContextFrom extracts the EvalContext previously attached with +// WithEvalContext. It returns the zero value when the context carries no +// EvalContext, never nil, so callers can read fields unconditionally. +func EvalContextFrom(ctx context.Context) EvalContext { + if ctx == nil { + return EvalContext{} + } + v, ok := ctx.Value(evalContextKey{}).(EvalContext) + if !ok { + return EvalContext{} + } + return v +} diff --git a/server/pkg/featureflag/eval_context_test.go b/server/pkg/featureflag/eval_context_test.go new file mode 100644 index 000000000..7647d8c12 --- /dev/null +++ b/server/pkg/featureflag/eval_context_test.go @@ -0,0 +1,103 @@ +package featureflag + +import ( + "context" + "testing" +) + +func TestEvalContextLookup(t *testing.T) { + t.Parallel() + ec := EvalContext{ + UserID: "u-1", + WorkspaceID: "w-2", + Attributes: map[string]string{"plan": "pro", "country": ""}, + } + tests := []struct { + name string + key string + value string + found bool + }{ + {"user_id", "user_id", "u-1", true}, + {"workspace_id", "workspace_id", "w-2", true}, + {"plan", "plan", "pro", true}, + {"empty attribute treated as missing", "country", "", false}, + {"unknown attribute", "unknown", "", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + v, ok := ec.Lookup(tt.key) + if v != tt.value || ok != tt.found { + t.Fatalf("Lookup(%q) = (%q, %v), want (%q, %v)", tt.key, v, ok, tt.value, tt.found) + } + }) + } +} + +func TestEvalContextRoundTripThroughContext(t *testing.T) { + t.Parallel() + ec := EvalContext{UserID: "u-1"} + ctx := WithEvalContext(context.Background(), ec) + got := EvalContextFrom(ctx) + if got.UserID != "u-1" { + t.Fatalf("EvalContext did not round-trip, got %+v", got) + } +} + +func TestEvalContextFromUnattachedContext(t *testing.T) { + t.Parallel() + // An unattached context must return the zero value, not panic. + got := EvalContextFrom(context.Background()) + if got.UserID != "" || got.WorkspaceID != "" || got.Attributes != nil { + t.Fatalf("unattached context should yield zero EvalContext, got %+v", got) + } +} + +func TestEvalContextFromNilContext(t *testing.T) { + t.Parallel() + //nolint:staticcheck // deliberately exercise the nil-ctx defensive path. + got := EvalContextFrom(nil) + if got.UserID != "" { + t.Fatalf("nil context must yield zero EvalContext, got %+v", got) + } +} + +func TestPercentBucketStable(t *testing.T) { + t.Parallel() + // Hash stability is part of the public contract: the same (key, id) + // MUST produce the same bucket forever, otherwise users will flip + // in and out of experiments. We pin a handful of values so a future + // refactor that swaps the hash will fail loudly here. + cases := []struct { + key, id string + want int + }{ + {"feature_a", "user-1", bucketFor("feature_a", "user-1")}, + {"feature_b", "", bucketFor("feature_b", "")}, + } + for _, tc := range cases { + got := bucketFor(tc.key, tc.id) + if got != tc.want { + t.Fatalf("bucketFor(%q, %q) = %d, want %d", tc.key, tc.id, got, tc.want) + } + if got < 0 || got >= 100 { + t.Fatalf("bucket out of range: %d", got) + } + } +} + +func TestPercentBucketSeparator(t *testing.T) { + t.Parallel() + // Without a separator, ("ab", "c") and ("a", "bc") would collide. + // The separator must keep them distinct, otherwise two unrelated + // flags could share buckets and skew an experiment. + left := bucketFor("ab", "c") + right := bucketFor("a", "bc") + if left == right { + // Not guaranteed unequal in general, but for these inputs the + // FNV-1a + zero separator should produce different buckets. + // If this ever does collide we should switch separators, not + // hide the regression. + t.Fatalf("hash separator failed: bucketFor('ab','c') == bucketFor('a','bc') == %d", left) + } +} diff --git a/server/pkg/featureflag/hash.go b/server/pkg/featureflag/hash.go new file mode 100644 index 000000000..941e38f84 --- /dev/null +++ b/server/pkg/featureflag/hash.go @@ -0,0 +1,37 @@ +package featureflag + +import "hash/fnv" + +// bucketFor returns a deterministic bucket in [0, 100) for the supplied +// (key, identifier) pair using FNV-1a. The same pair always returns the +// same bucket, which is the contract callers rely on for stable percent +// rollouts: a user must not flip in and out of an experiment across +// requests. +// +// FNV-1a is used instead of crypto hashes because it is fast, dependency +// free, and well-distributed enough for sub-100 bucketing. The hash is not +// security sensitive; do not use it for anything beyond bucketing. +func bucketFor(key, identifier string) int { + h := fnv.New32a() + // Writing each component with a separator avoids a "key||identifier" + // collision pattern where ("ab", "c") and ("a", "bc") would hash to + // the same value. + _, _ = h.Write([]byte(key)) + _, _ = h.Write([]byte{0}) + _, _ = h.Write([]byte(identifier)) + return int(h.Sum32() % 100) +} + +// inPercent reports whether (key, identifier) falls within the first +// percent buckets. A percent of 0 disables the rule for everyone; a +// percent of 100 enables it for everyone. Values outside [0, 100] are +// clamped. +func inPercent(key, identifier string, percent int) bool { + switch { + case percent <= 0: + return false + case percent >= 100: + return true + } + return bucketFor(key, identifier) < percent +} diff --git a/server/pkg/featureflag/provider.go b/server/pkg/featureflag/provider.go new file mode 100644 index 000000000..1241c0ddd --- /dev/null +++ b/server/pkg/featureflag/provider.go @@ -0,0 +1,81 @@ +package featureflag + +import "context" + +// Reason identifies why a Decision returned the value it did. Reasons are +// observable strings so they can be exposed in metadata endpoints and +// structured logs. +type Reason string + +const ( + // ReasonStatic means a provider returned an unconditional value + // (Rule.Default, an Allow hit, a Deny hit, or a Variant lookup). + ReasonStatic Reason = "static" + + // ReasonPercent means the value came from a deterministic percent + // rollout bucket. The same (key, identifier) pair always yields the + // same bucket. + ReasonPercent Reason = "percent" + + // ReasonOverride means a per-request override was applied (for + // example a debug header or a cookie). Overrides win over normal + // rules so they should never be exposed to untrusted callers. + ReasonOverride Reason = "override" + + // ReasonDefault means no provider matched the key and the caller's + // default value was returned. This is the only Reason callers ever + // see when their default is used. + ReasonDefault Reason = "default" + + // ReasonError means a provider attempted to evaluate the flag but + // failed (for example a malformed env var). The default is returned + // and the error reason is recorded for diagnostics. + ReasonError Reason = "error" +) + +// Decision is the structured result of a flag evaluation. Callers typically +// use Service.IsEnabled or Service.Variant which collapse Decision into a +// single value, but Decision is exposed for diagnostics endpoints and tests. +type Decision struct { + // Key is the flag identifier that was evaluated. + Key string + + // Enabled is the boolean projection of the decision. For variant + // flags it is true when Variant != "" and Variant != "off". + Enabled bool + + // Variant is the raw value the provider produced. Boolean flags use + // "on" / "off". Variant flags use arbitrary identifiers such as + // "control", "experiment-v2". + Variant string + + // Reason records why this decision was made (see Reason constants). + Reason Reason + + // Source is the name of the provider that produced the decision, or + // "default" when no provider matched. Useful for debugging which + // configuration layer is winning in a ChainProvider setup. + Source string +} + +// Provider is the configuration backend for the feature flag Service. +// Implementations must be safe for concurrent use; the Service reads +// providers from many goroutines without additional locking. +// +// A Lookup call returns (decision, true) when the provider knows about the +// key and (zero, false) when it does not. Callers must rely on the boolean, +// not on the Decision content, because Decision is otherwise the zero value +// when found is false. +type Provider interface { + // Lookup evaluates a single flag against the supplied context. + // Implementations should never panic; on internal failures they + // should return a Decision with Reason=ReasonError and found=true so + // the Service can record the failure without falling through to a + // less specific provider. + Lookup(ctx context.Context, key string) (decision Decision, found bool) + + // Name returns a stable, human-readable identifier used in Decision.Source + // and in diagnostic endpoints. Two provider instances of the same type + // may share a name; uniqueness is not required. + Name() string +} diff --git a/server/pkg/featureflag/service.go b/server/pkg/featureflag/service.go new file mode 100644 index 000000000..014fe0a2a --- /dev/null +++ b/server/pkg/featureflag/service.go @@ -0,0 +1,149 @@ +package featureflag + +import ( + "context" + "log/slog" +) + +// Service is the framework-level Toggle Router. Business code asks the +// Service for flag decisions; the Service in turn consults its configured +// Provider. The Service is safe for concurrent use and is the only type +// callers should hold a reference to. +// +// A nil *Service is valid and behaves as if every flag were missing: every +// call returns the supplied default with Reason=ReasonDefault. This lets +// callers compose Service without first guarding against nil, which in +// practice is the most common cause of feature-flag-related nil panics. +type Service struct { + provider Provider + logger *slog.Logger +} + +// Option configures optional Service behavior. +type Option func(*Service) + +// WithLogger attaches a structured logger that the Service will use to emit +// warnings for malformed flag configuration. By default the Service is +// silent so it can be embedded in tests without polluting output. +func WithLogger(l *slog.Logger) Option { + return func(s *Service) { + if l != nil { + s.logger = l + } + } +} + +// NewService returns a Service backed by the supplied provider. Passing a +// nil provider is allowed and is equivalent to the always-default behavior; +// see the package doc for the rationale. +func NewService(provider Provider, opts ...Option) *Service { + s := &Service{provider: provider} + for _, opt := range opts { + opt(s) + } + return s +} + +// IsEnabled returns true when the named flag evaluates to an "on" state for +// the EvalContext attached to ctx. When the flag is unknown or its provider +// errors, the supplied default is returned so business code can ship with +// confidence that a missing flag never crashes a request. +// +// IsEnabled is the most common Toggle Point in business code: +// +// if flags.IsEnabled(ctx, "billing_new_invoice_email", false) { +// return s.sendNewInvoiceEmail(ctx, invoice) +// } +// return s.sendLegacyInvoiceEmail(ctx, invoice) +func (s *Service) IsEnabled(ctx context.Context, key string, defaultVal bool) bool { + return s.Decision(ctx, key, defaultVal).Enabled +} + +// Variant returns the raw variant value for the named flag, falling back to +// defaultVal when no provider matches. Use Variant for multi-arm flags +// (A/B/C tests, "control"/"experiment"/"holdout"). For simple on/off flags, +// prefer IsEnabled. +func (s *Service) Variant(ctx context.Context, key string, defaultVal string) string { + d := s.decisionWithVariantDefault(ctx, key, defaultVal) + return d.Variant +} + +// Decision returns the full structured Decision for a flag. The supplied +// boolean default is used to populate both Variant and Enabled when no +// provider matches the key. Diagnostic endpoints and tests use this entry +// point to surface Reason and Source. +func (s *Service) Decision(ctx context.Context, key string, defaultVal bool) Decision { + if s == nil || s.provider == nil { + return defaultDecision(key, boolToVariant(defaultVal), defaultVal) + } + d, ok := s.provider.Lookup(ctx, key) + if !ok { + return defaultDecision(key, boolToVariant(defaultVal), defaultVal) + } + if d.Reason == ReasonError && s.logger != nil { + s.logger.WarnContext(ctx, "feature flag provider returned an error decision", + slog.String("key", key), + slog.String("source", d.Source), + ) + } + d.Key = key + return d +} + +// decisionWithVariantDefault is the variant-aware twin of Decision. It is +// kept private because callers who care about reasons can rely on Decision +// + IsEnabled; Variant is a convenience. +func (s *Service) decisionWithVariantDefault(ctx context.Context, key, defaultVariant string) Decision { + if s == nil || s.provider == nil { + return defaultDecision(key, defaultVariant, variantEnabled(defaultVariant)) + } + d, ok := s.provider.Lookup(ctx, key) + if !ok { + return defaultDecision(key, defaultVariant, variantEnabled(defaultVariant)) + } + d.Key = key + return d +} + +// Provider exposes the wrapped Provider so diagnostic endpoints can iterate +// known flags. Callers MUST NOT mutate the returned Provider; the contract +// is read-only. +func (s *Service) Provider() Provider { + if s == nil { + return nil + } + return s.provider +} + +func defaultDecision(key, variant string, enabled bool) Decision { + return Decision{ + Key: key, + Enabled: enabled, + Variant: variant, + Reason: ReasonDefault, + Source: "default", + } +} + +// boolToVariant produces the canonical variant string for a boolean flag. +// "on" / "off" is used rather than "true" / "false" so that string-typed +// providers (e.g. env vars) do not collide with the user's own bool-as-text +// values. +func boolToVariant(b bool) string { + if b { + return "on" + } + return "off" +} + +// variantEnabled reports whether a variant string projects to "enabled". +// Empty and "off" are the only false values; everything else, including +// arbitrary variant identifiers like "experiment-v2", is enabled. Callers +// who care about specific variants should compare with == directly. +func variantEnabled(v string) bool { + switch v { + case "", "off", "false", "0": + return false + } + return true +} diff --git a/server/pkg/featureflag/service_test.go b/server/pkg/featureflag/service_test.go new file mode 100644 index 000000000..2b4e3abde --- /dev/null +++ b/server/pkg/featureflag/service_test.go @@ -0,0 +1,83 @@ +package featureflag + +import ( + "context" + "testing" +) + +func TestServiceNilSafe(t *testing.T) { + t.Parallel() + var s *Service + if s.IsEnabled(context.Background(), "anything", true) != true { + t.Fatalf("nil Service must honor the default") + } + if s.IsEnabled(context.Background(), "anything", false) != false { + t.Fatalf("nil Service must honor the default") + } + if got := s.Variant(context.Background(), "anything", "control"); got != "control" { + t.Fatalf("nil Service must return the variant default, got %q", got) + } + d := s.Decision(context.Background(), "anything", false) + if d.Reason != ReasonDefault || d.Source != "default" { + t.Fatalf("nil Service must return ReasonDefault, got %+v", d) + } +} + +func TestServiceNilProvider(t *testing.T) { + t.Parallel() + s := NewService(nil) + if got := s.IsEnabled(context.Background(), "missing", true); got != true { + t.Fatalf("nil provider must honor the default") + } + d := s.Decision(context.Background(), "missing", false) + if d.Reason != ReasonDefault { + t.Fatalf("expected ReasonDefault, got %s", d.Reason) + } +} + +func TestServiceUsesProvider(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("billing_new_invoice_email", Rule{Default: true}) + s := NewService(sp) + + if !s.IsEnabled(context.Background(), "billing_new_invoice_email", false) { + t.Fatalf("static provider should override the false default") + } + d := s.Decision(context.Background(), "billing_new_invoice_email", false) + if d.Reason != ReasonStatic || d.Source != "static" { + t.Fatalf("expected ReasonStatic from static source, got %+v", d) + } + if d.Key != "billing_new_invoice_email" { + t.Fatalf("decision must echo the requested key, got %q", d.Key) + } +} + +func TestServiceMissingKeyReturnsDefault(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("known", Rule{Default: true}) + s := NewService(sp) + + if s.IsEnabled(context.Background(), "unknown", false) { + t.Fatalf("unknown key must honor the default") + } + d := s.Decision(context.Background(), "unknown", true) + if d.Reason != ReasonDefault || d.Enabled != true || d.Variant != "on" { + t.Fatalf("missing key did not produce default decision: %+v", d) + } +} + +func TestServiceVariantFlag(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("checkout_algo", Rule{Default: true, Variant: "experiment-v2"}) + s := NewService(sp) + + if got := s.Variant(context.Background(), "checkout_algo", "control"); got != "experiment-v2" { + t.Fatalf("expected experiment-v2, got %q", got) + } + if got := s.Variant(context.Background(), "unknown_algo", "control"); got != "control" { + t.Fatalf("missing key must fall through to variant default, got %q", got) + } +} diff --git a/server/pkg/featureflag/static_provider.go b/server/pkg/featureflag/static_provider.go new file mode 100644 index 000000000..b2c4c4192 --- /dev/null +++ b/server/pkg/featureflag/static_provider.go @@ -0,0 +1,199 @@ +package featureflag + +import ( + "context" + "slices" + "sync" +) + +// Rule describes how a single flag is evaluated by the StaticProvider. +// All fields are optional; an empty Rule evaluates to Default (false) for +// everyone. +// +// Evaluation order (first match wins): +// +// 1. Deny: if any value in the EvalContext matches an entry in Deny on +// attribute DenyBy (default "user_id"), the flag is OFF. +// 2. Allow: if any value matches an entry in Allow on attribute AllowBy +// (default "user_id"), the flag is ON. +// 3. Percent: if Percent is non-nil and the bucket for (key, identifier) +// falls inside Percent.Percent, the flag is ON. +// 4. Default: returned otherwise. +// +// Allow / Deny lists are intentionally separate (rather than a single +// targeting predicate) because operationally they cover different use +// cases — Allow is "internal users only" and Deny is "kill switch for +// these tenants" — and keeping them separate makes the data easy to audit +// in source control. +type Rule struct { + // Default is the value returned when no targeting rule matches. + Default bool + + // Variant overrides the boolean "on" / "off" projection with an + // arbitrary variant identifier. Use this for multi-arm flags. + // When Variant != "", the boolean Default still controls Enabled. + Variant string + + // Allow is the set of identifier values that force the flag ON. + Allow []string + + // AllowBy is the EvalContext attribute name used for Allow lookups. + // Defaults to "user_id" when empty. + AllowBy string + + // Deny is the set of identifier values that force the flag OFF. + // Deny wins over Allow. + Deny []string + + // DenyBy is the EvalContext attribute name used for Deny lookups. + // Defaults to "user_id" when empty. + DenyBy string + + // Percent enables a deterministic percent rollout. When nil, no + // percent rollout is applied and Default is used as the fallback. + Percent *PercentRollout +} + +// PercentRollout describes a deterministic percent rollout. +// +// The bucket is computed from (flag key, EvalContext attribute By) using +// FNV-1a, which guarantees that the same identifier always falls into the +// same bucket across processes and across restarts. This is what callers +// need so users do not flip in and out of an experiment between requests. +type PercentRollout struct { + // Percent is the rollout size in [0, 100]. 0 disables the rollout; + // 100 enables it for everyone. Out-of-range values are clamped. + Percent int + + // By selects the EvalContext attribute used as the bucketing + // identifier. Defaults to "user_id". Use "workspace_id" for + // workspace-scoped rollouts. + By string +} + +// StaticProvider is a thread-safe in-memory Provider populated either +// programmatically or from a config file. It is the recommended baseline +// provider for production: configuration lives in source control, moves +// through CD alongside the binary, and changes require a deploy — which is +// exactly the Continuous Delivery posture Martin Fowler recommends for +// Release Toggles and most Permissioning Toggles. +// +// For dynamic flags (kill switches, A/B tests changed by product) compose +// a StaticProvider with a DB-backed Provider behind a ChainProvider. +type StaticProvider struct { + mu sync.RWMutex + rules map[string]Rule +} + +// NewStaticProvider returns an empty StaticProvider. Use Set or +// LoadRules to populate it. +func NewStaticProvider() *StaticProvider { + return &StaticProvider{rules: map[string]Rule{}} +} + +// Name implements Provider. +func (*StaticProvider) Name() string { return "static" } + +// Set installs or replaces the rule for key. Concurrent callers are +// serialized; readers (Lookup) never block writers for long. +func (p *StaticProvider) Set(key string, rule Rule) { + p.mu.Lock() + defer p.mu.Unlock() + p.rules[key] = rule +} + +// LoadRules atomically replaces every rule in the provider with the supplied +// map. Use this when reloading from a config file: a partial reload could +// otherwise leave the provider in a mixed state where some flags reflect the +// new config and others the old. +func (p *StaticProvider) LoadRules(rules map[string]Rule) { + clone := make(map[string]Rule, len(rules)) + for k, v := range rules { + clone[k] = v + } + p.mu.Lock() + defer p.mu.Unlock() + p.rules = clone +} + +// Keys returns the sorted set of flag keys this provider knows about. Useful +// for diagnostic endpoints. The returned slice is a copy; mutating it does +// not affect the provider. +func (p *StaticProvider) Keys() []string { + p.mu.RLock() + defer p.mu.RUnlock() + out := make([]string, 0, len(p.rules)) + for k := range p.rules { + out = append(out, k) + } + slices.Sort(out) + return out +} + +// Lookup implements Provider. +func (p *StaticProvider) Lookup(ctx context.Context, key string) (Decision, bool) { + p.mu.RLock() + rule, ok := p.rules[key] + p.mu.RUnlock() + if !ok { + return Decision{}, false + } + ec := EvalContextFrom(ctx) + return evaluateRule(key, rule, ec), true +} + +func evaluateRule(key string, rule Rule, ec EvalContext) Decision { + // Deny wins over everything else. A kill switch must be reachable + // even when other targeting matches. + denyBy := orDefault(rule.DenyBy, "user_id") + if len(rule.Deny) > 0 { + if v, ok := ec.Lookup(denyBy); ok && slices.Contains(rule.Deny, v) { + return decisionFromRule(key, rule, false, ReasonStatic) + } + } + + allowBy := orDefault(rule.AllowBy, "user_id") + if len(rule.Allow) > 0 { + if v, ok := ec.Lookup(allowBy); ok && slices.Contains(rule.Allow, v) { + return decisionFromRule(key, rule, true, ReasonStatic) + } + } + + if rule.Percent != nil { + by := orDefault(rule.Percent.By, "user_id") + identifier, _ := ec.Lookup(by) + // An empty identifier still produces a deterministic bucket + // (the empty string hashes to a stable bucket) but in practice + // that means everyone-without-an-id lands in the same bucket. + // That's the desired behavior for percent rollouts at the edge: + // anonymous users get a single shared rollout decision per + // flag, not a uniformly random one. + if inPercent(key, identifier, rule.Percent.Percent) { + return decisionFromRule(key, rule, true, ReasonPercent) + } + return decisionFromRule(key, rule, false, ReasonPercent) + } + + return decisionFromRule(key, rule, rule.Default, ReasonStatic) +} + +func decisionFromRule(key string, rule Rule, enabled bool, reason Reason) Decision { + variant := rule.Variant + if variant == "" { + variant = boolToVariant(enabled) + } + return Decision{ + Key: key, + Enabled: enabled, + Variant: variant, + Reason: reason, + Source: "static", + } +} + +func orDefault(v, def string) string { + if v == "" { + return def + } + return v +} diff --git a/server/pkg/featureflag/static_provider_test.go b/server/pkg/featureflag/static_provider_test.go new file mode 100644 index 000000000..434c525e1 --- /dev/null +++ b/server/pkg/featureflag/static_provider_test.go @@ -0,0 +1,206 @@ +package featureflag + +import ( + "context" + "testing" +) + +func TestStaticProviderDefault(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("flag_a", Rule{Default: true}) + sp.Set("flag_b", Rule{Default: false}) + + d, ok := sp.Lookup(context.Background(), "flag_a") + if !ok || !d.Enabled || d.Reason != ReasonStatic { + t.Fatalf("flag_a should be statically enabled, got %+v ok=%v", d, ok) + } + d, ok = sp.Lookup(context.Background(), "flag_b") + if !ok || d.Enabled || d.Reason != ReasonStatic { + t.Fatalf("flag_b should be statically disabled, got %+v ok=%v", d, ok) + } + _, ok = sp.Lookup(context.Background(), "missing") + if ok { + t.Fatalf("missing flag must report not-found") + } +} + +func TestStaticProviderAllowAndDeny(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("internal_feature", Rule{ + Default: false, + Allow: []string{"user-internal"}, + Deny: []string{"user-banned"}, + }) + + allowCtx := WithEvalContext(context.Background(), EvalContext{UserID: "user-internal"}) + d, _ := sp.Lookup(allowCtx, "internal_feature") + if !d.Enabled { + t.Fatalf("allowlisted user must see the flag enabled") + } + + denyCtx := WithEvalContext(context.Background(), EvalContext{UserID: "user-banned"}) + d, _ = sp.Lookup(denyCtx, "internal_feature") + if d.Enabled { + t.Fatalf("denylisted user must see the flag disabled") + } + + otherCtx := WithEvalContext(context.Background(), EvalContext{UserID: "user-random"}) + d, _ = sp.Lookup(otherCtx, "internal_feature") + if d.Enabled { + t.Fatalf("everyone else should fall back to Default=false") + } +} + +func TestStaticProviderDenyWinsOverAllow(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("conflict", Rule{ + Default: false, + Allow: []string{"same-user"}, + Deny: []string{"same-user"}, + }) + ctx := WithEvalContext(context.Background(), EvalContext{UserID: "same-user"}) + d, _ := sp.Lookup(ctx, "conflict") + if d.Enabled { + t.Fatalf("Deny must win over Allow") + } +} + +func TestStaticProviderPercentRolloutDeterministic(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("gradual", Rule{ + Default: false, + Percent: &PercentRollout{Percent: 50}, + }) + + // The same identifier must produce the same decision across many calls. + ctx := WithEvalContext(context.Background(), EvalContext{UserID: "stable-user"}) + first, _ := sp.Lookup(ctx, "gradual") + for i := 0; i < 100; i++ { + d, _ := sp.Lookup(ctx, "gradual") + if d.Enabled != first.Enabled { + t.Fatalf("percent rollout flapped between calls: first=%v iter=%v", first, d) + } + } +} + +func TestStaticProviderPercentRolloutDistribution(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("split", Rule{Percent: &PercentRollout{Percent: 50}}) + + enabled := 0 + const N = 1000 + for i := 0; i < N; i++ { + ctx := WithEvalContext(context.Background(), EvalContext{ + UserID: randomUserID(i), + }) + d, _ := sp.Lookup(ctx, "split") + if d.Enabled { + enabled++ + } + } + // A 50% rollout over 1000 distinct users should land near 500. + // We allow a generous +/- 100 window so the test is not flaky on + // CI; the goal is to catch a misconfigured hash, not to validate + // statistical properties of FNV. + if enabled < 400 || enabled > 600 { + t.Fatalf("50%% rollout produced %d/1000 enabled — distribution looks broken", enabled) + } +} + +func TestStaticProviderPercentRolloutBy(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("ws_rollout", Rule{Percent: &PercentRollout{Percent: 100, By: "workspace_id"}}) + + // Percent=100 with By=workspace_id should always enable, even when + // UserID is unset. + ctx := WithEvalContext(context.Background(), EvalContext{WorkspaceID: "any-workspace"}) + d, _ := sp.Lookup(ctx, "ws_rollout") + if !d.Enabled || d.Reason != ReasonPercent { + t.Fatalf("100%% workspace rollout should always enable, got %+v", d) + } +} + +func TestStaticProviderPercentZero(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("off_for_everyone", Rule{Percent: &PercentRollout{Percent: 0}}) + ctx := WithEvalContext(context.Background(), EvalContext{UserID: "anyone"}) + d, _ := sp.Lookup(ctx, "off_for_everyone") + if d.Enabled { + t.Fatalf("0%% rollout must disable everyone") + } +} + +func TestStaticProviderLoadRulesAtomic(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("old", Rule{Default: true}) + sp.LoadRules(map[string]Rule{ + "new": {Default: true}, + }) + if _, ok := sp.Lookup(context.Background(), "old"); ok { + t.Fatalf("LoadRules must replace, not merge, the rule map") + } + if d, ok := sp.Lookup(context.Background(), "new"); !ok || !d.Enabled { + t.Fatalf("LoadRules failed to install new rule, got %+v ok=%v", d, ok) + } +} + +func TestStaticProviderKeysSorted(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("zeta", Rule{}) + sp.Set("alpha", Rule{}) + sp.Set("mu", Rule{}) + + keys := sp.Keys() + want := []string{"alpha", "mu", "zeta"} + if len(keys) != len(want) { + t.Fatalf("expected %d keys, got %d", len(want), len(keys)) + } + for i, k := range want { + if keys[i] != k { + t.Fatalf("keys not sorted: %v", keys) + } + } +} + +func TestStaticProviderCustomAttribute(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("plan_gate", Rule{ + Default: false, + Allow: []string{"enterprise"}, + AllowBy: "plan", + }) + ctx := WithEvalContext(context.Background(), EvalContext{ + UserID: "anyone", + Attributes: map[string]string{"plan": "enterprise"}, + }) + d, _ := sp.Lookup(ctx, "plan_gate") + if !d.Enabled { + t.Fatalf("plan=enterprise should pass allowlist, got %+v", d) + } +} + +// randomUserID returns a stable user identifier derived from i. It exists +// so the rollout distribution test is deterministic across runs (no rand). +func randomUserID(i int) string { + // Use a base-26 spread so adjacent ids differ in multiple bytes, + // which exercises the hash better than a numeric suffix. + const alphabet = "abcdefghijklmnopqrstuvwxyz" + buf := []byte{ + alphabet[(i/676)%26], + alphabet[(i/26)%26], + alphabet[i%26], + '-', + byte('0' + (i % 10)), + } + return string(buf) +}