diff --git a/docs/feature-flags.md b/docs/feature-flags.md index d6bdb82c7..88ee0bad6 100644 --- a/docs/feature-flags.md +++ b/docs/feature-flags.md @@ -102,6 +102,8 @@ default: } ``` +`Rule.Variant` is the **on-variant**: it is only returned when the rule evaluates to enabled=true (allow hit, percent hit, default-on). When the rule evaluates to disabled (deny hit, percent miss, default-off) the Service returns `"off"` so callers branching on `Variant()` cannot route control users into the experiment arm. This is exercised by `TestStaticProviderVariantOnlyWhenEnabled` and is the same on the TS side. + The Service is nil-safe and missing-key-safe: `(*Service)(nil).IsEnabled(ctx, "any", true)` returns `true`. Business code never needs to guard against a missing flag. --- diff --git a/packages/core/feature-flags/hash.test.ts b/packages/core/feature-flags/hash.test.ts index 424db5a35..adbe70180 100644 --- a/packages/core/feature-flags/hash.test.ts +++ b/packages/core/feature-flags/hash.test.ts @@ -22,6 +22,24 @@ describe("feature-flags hash", () => { expect(bucketFor("ab", "c")).not.toBe(bucketFor("a", "bc")); }); + // Pinned (key, identifier) -> bucket values that MUST agree with the + // Go-side server/pkg/featureflag/hash_test.go::TestPercentBucketCrossLanguageGolden. + // The shared golden table is the single source of truth for "same user, + // same bucket" across backend and frontend; if either side drifts, both + // tests fail and one must be brought back in sync. + it("cross-language golden: bucket values match the Go side exactly", () => { + const cases: ReadonlyArray<[string, string, number]> = [ + ["billing_new_invoice", "user-42", 97], + ["feature_a", "user-1", 50], + ["checkout_algo", "u-7f8a", 11], + ["ws_rollout", "workspace-1", 62], + ["empty_id_flag", "", 83], + ]; + for (const [key, id, want] of cases) { + expect(bucketFor(key, id)).toBe(want); + } + }); + it("inPercent clamps boundary values", () => { expect(inPercent("any", "any", 0)).toBe(false); expect(inPercent("any", "any", -10)).toBe(false); diff --git a/packages/core/feature-flags/hash.ts b/packages/core/feature-flags/hash.ts index feba59f61..6035fcbf6 100644 --- a/packages/core/feature-flags/hash.ts +++ b/packages/core/feature-flags/hash.ts @@ -3,9 +3,10 @@ * * The same (key, identifier) pair MUST always produce the same bucket; * otherwise users would flip in and out of experiments across requests. The - * algorithm matches the Go-side server/pkg/featureflag/hash.go so a flag - * evaluated on the frontend and on the backend lands in the same bucket for - * the same user. + * algorithm matches the Go-side server/pkg/featureflag/hash.go byte-for-byte + * so a flag evaluated on the frontend and on the backend lands in the same + * bucket for the same user. Cross-language equality is exercised by golden + * tests on both sides; see hash.test.ts and hash_test.go. * * FNV-1a is used because it is cheap, dependency-free, and well-distributed * enough for sub-100 bucketing. It is NOT cryptographic; do not use it for @@ -15,17 +16,22 @@ function fnv1a(parts: ReadonlyArray): number { // 32-bit FNV-1a: offset basis 0x811c9dc5, prime 0x01000193. let hash = 0x811c9dc5; for (let p = 0; p < parts.length; p++) { + if (p > 0) { + // Zero-byte separator BETWEEN parts (not after the last one). This + // matches what the Go side writes via h.Write([]byte{0}) between + // key and identifier and is what prevents ("ab", "c") and + // ("a", "bc") from colliding. A trailing separator would diverge + // from Go and silently break cross-tier bucket parity. + hash ^= 0; + hash = Math.imul(hash, 0x01000193); + } const s = parts[p]!; for (let i = 0; i < s.length; i++) { hash ^= s.charCodeAt(i); - // Multiply by FNV prime mod 2^32. Using Math.imul keeps the result - // in a 32-bit integer without slipping into float territory. + // Multiply by FNV prime mod 2^32. Math.imul keeps the result in a + // 32-bit integer without slipping into float territory. hash = Math.imul(hash, 0x01000193); } - // Zero separator between parts so ("ab", "c") and ("a", "bc") cannot - // hash to the same value. - hash ^= 0; - hash = Math.imul(hash, 0x01000193); } // Force unsigned 32-bit before the modulo to match Go's uint32 arithmetic. return hash >>> 0; @@ -33,7 +39,8 @@ function fnv1a(parts: ReadonlyArray): number { /** * bucketFor returns a deterministic bucket in [0, 100) for the supplied - * (key, identifier) pair. + * (key, identifier) pair. Identical to the Go bucketFor in + * server/pkg/featureflag/hash.go. */ export function bucketFor(key: string, identifier: string): number { return fnv1a([key, identifier]) % 100; diff --git a/packages/core/feature-flags/static-provider.test.ts b/packages/core/feature-flags/static-provider.test.ts index 6155d7912..df123fbed 100644 --- a/packages/core/feature-flags/static-provider.test.ts +++ b/packages/core/feature-flags/static-provider.test.ts @@ -54,6 +54,36 @@ describe("StaticProvider", () => { expect(d?.enabled).toBe(true); }); + // Regression test for the MUL-3615 review: when a rule sets `variant` + // but the rule itself evaluates to enabled=false (deny match, percent + // miss, default-off), the decision MUST report variant="off", never + // the on-variant. Otherwise a switch on `useVariant()` would route + // non-rolled-in users into the experiment arm. + it("variant: returns 'off' when the rule evaluates to disabled", () => { + const sp = new StaticProvider({ + exp: { + default: false, + variant: "experiment-v2", + deny: ["banned-user"], + percent: { percent: 0 }, + }, + }); + for (const userId of ["banned-user", "random-user", ""]) { + const d = sp.lookup("exp", { userId }); + expect(d?.enabled).toBe(false); + expect(d?.variant).toBe("off"); + } + }); + + it("variant: returns the on-variant when the rule evaluates to enabled", () => { + const sp = new StaticProvider({ + exp: { default: false, variant: "experiment-v2", allow: ["rolled-in"] }, + }); + const d = sp.lookup("exp", { userId: "rolled-in" }); + expect(d?.enabled).toBe(true); + expect(d?.variant).toBe("experiment-v2"); + }); + it("loadRules replaces, not merges, the rule map", () => { const sp = new StaticProvider({ old: { default: true } }); sp.loadRules({ fresh: { default: true } }); diff --git a/packages/core/feature-flags/static-provider.ts b/packages/core/feature-flags/static-provider.ts index 931a7ff3a..db3fecccf 100644 --- a/packages/core/feature-flags/static-provider.ts +++ b/packages/core/feature-flags/static-provider.ts @@ -80,10 +80,18 @@ function decisionFromRule( enabled: boolean, reason: Decision["reason"], ): Decision { + // Variant policy: rule.variant is the ON-variant. When the rule + // evaluates to false we return the canonical "off" so a caller + // branching on the variant cannot accidentally enter the experiment + // arm for a user that did not roll in. + let variant = boolToVariant(enabled); + if (enabled && rule.variant && rule.variant.length > 0) { + variant = rule.variant; + } return { key, enabled, - variant: rule.variant && rule.variant.length > 0 ? rule.variant : boolToVariant(enabled), + variant, reason, source: "static", }; diff --git a/packages/core/feature-flags/types.ts b/packages/core/feature-flags/types.ts index 40099b4fb..8dd565b12 100644 --- a/packages/core/feature-flags/types.ts +++ b/packages/core/feature-flags/types.ts @@ -78,8 +78,11 @@ export interface Rule { /** Value returned when no targeting rule matches. Defaults to false. */ default?: boolean; /** - * Optional variant identifier returned alongside the boolean. Use for - * multi-arm flags ("control" / "experiment-v2" / "experiment-v3"). + * Variant identifier returned WHEN the rule evaluates to enabled=true. + * Use for multi-arm experiments (e.g. "experiment-v2"). When the rule + * evaluates to enabled=false the Decision's variant is always "off", + * so callers branching on `Variant()` cannot accidentally enter the + * experiment arm for users that did not roll in. */ variant?: string; /** Identifier values that force the flag ON. */ diff --git a/server/pkg/featureflag/eval_context_test.go b/server/pkg/featureflag/eval_context_test.go index 7647d8c12..fede14053 100644 --- a/server/pkg/featureflag/eval_context_test.go +++ b/server/pkg/featureflag/eval_context_test.go @@ -101,3 +101,33 @@ func TestPercentBucketSeparator(t *testing.T) { t.Fatalf("hash separator failed: bucketFor('ab','c') == bucketFor('a','bc') == %d", left) } } + +// TestPercentBucketCrossLanguageGolden pins concrete (key, identifier) -> +// bucket values that the Go side MUST agree on with the TS side. The same +// values are duplicated in packages/core/feature-flags/hash.test.ts; if +// either side drifts, both tests fail and one must be brought back in +// sync. This is the single source of truth for "same user, same bucket" +// across the backend and the frontend. +func TestPercentBucketCrossLanguageGolden(t *testing.T) { + t.Parallel() + cases := []struct { + key, id string + want int + }{ + {"billing_new_invoice", "user-42", 97}, + {"feature_a", "user-1", 50}, + {"checkout_algo", "u-7f8a", 11}, + {"ws_rollout", "workspace-1", 62}, + {"empty_id_flag", "", 83}, + } + for _, tc := range cases { + got := bucketFor(tc.key, tc.id) + if got != tc.want { + t.Fatalf( + "cross-language golden mismatch: bucketFor(%q, %q) = %d, want %d. "+ + "If you changed the hash you MUST also update hash.test.ts.", + tc.key, tc.id, got, tc.want, + ) + } + } +} diff --git a/server/pkg/featureflag/static_provider.go b/server/pkg/featureflag/static_provider.go index b2c4c4192..9977883da 100644 --- a/server/pkg/featureflag/static_provider.go +++ b/server/pkg/featureflag/static_provider.go @@ -29,9 +29,17 @@ type Rule struct { // Default is the value returned when no targeting rule matches. Default bool - // Variant overrides the boolean "on" / "off" projection with an - // arbitrary variant identifier. Use this for multi-arm flags. - // When Variant != "", the boolean Default still controls Enabled. + // Variant is the variant identifier returned WHEN the rule evaluates + // to enabled=true. For multi-arm experiments, set Variant to the + // experiment-arm identifier (e.g. "experiment-v2"); for plain on/off + // flags leave it empty. + // + // When the rule evaluates to enabled=false (default-off, deny hit, + // percent miss, ...) the resulting Decision's Variant is always the + // canonical "off". This is deliberate: a caller that branches on + // Variant("checkout_algo", "control") would otherwise be routed into + // the experiment arm even though the user did not roll into the + // experiment cohort. Variant string // Allow is the set of identifier values that force the flag ON. @@ -178,9 +186,13 @@ func evaluateRule(key string, rule Rule, ec EvalContext) Decision { } func decisionFromRule(key string, rule Rule, enabled bool, reason Reason) Decision { - variant := rule.Variant - if variant == "" { - variant = boolToVariant(enabled) + // Variant policy: rule.Variant is the ON-variant. When the rule + // evaluates to false we return the canonical "off" so a caller + // branching on Variant() cannot accidentally enter the experiment + // arm for a user that did not roll in. + variant := boolToVariant(enabled) + if enabled && rule.Variant != "" { + variant = rule.Variant } return Decision{ Key: key, diff --git a/server/pkg/featureflag/static_provider_test.go b/server/pkg/featureflag/static_provider_test.go index 434c525e1..ce58ef0d6 100644 --- a/server/pkg/featureflag/static_provider_test.go +++ b/server/pkg/featureflag/static_provider_test.go @@ -189,6 +189,49 @@ func TestStaticProviderCustomAttribute(t *testing.T) { } } +// TestStaticProviderVariantOnlyWhenEnabled is the regression test for the +// review feedback from MUL-3615: a Rule with Variant="experiment-v2" but +// enabled=false (deny match, percent miss, default-off) MUST surface +// Variant="off", not the on-variant. Otherwise a caller branching on +// Variant() would route control users into the experiment arm. +func TestStaticProviderVariantOnlyWhenEnabled(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("exp", Rule{ + Default: false, + Variant: "experiment-v2", + Deny: []string{"banned-user"}, + Percent: &PercentRollout{Percent: 0}, // 0% rollout: nobody is in. + }) + + for _, userID := range []string{"banned-user", "random-user", ""} { + ctx := WithEvalContext(context.Background(), EvalContext{UserID: userID}) + d, _ := sp.Lookup(ctx, "exp") + if d.Enabled { + t.Fatalf("user=%q must be disabled at 0%% rollout, got %+v", userID, d) + } + if d.Variant != "off" { + t.Fatalf("user=%q got Variant=%q, want %q — on-variant must not leak when disabled", + userID, d.Variant, "off") + } + } +} + +func TestStaticProviderVariantWhenEnabled(t *testing.T) { + t.Parallel() + sp := NewStaticProvider() + sp.Set("exp", Rule{ + Default: false, + Variant: "experiment-v2", + Allow: []string{"rolled-in-user"}, + }) + ctx := WithEvalContext(context.Background(), EvalContext{UserID: "rolled-in-user"}) + d, _ := sp.Lookup(ctx, "exp") + if !d.Enabled || d.Variant != "experiment-v2" { + t.Fatalf("enabled user should see the on-variant, got %+v", d) + } +} + // randomUserID returns a stable user identifier derived from i. It exists // so the rollout distribution test is deterministic across runs (no rand). func randomUserID(i int) string {