diff --git a/.env.example b/.env.example index 92c7ee816..583f5a2f7 100644 --- a/.env.example +++ b/.env.example @@ -71,6 +71,28 @@ MULTICA_CODEX_MODEL= MULTICA_CODEX_WORKDIR= MULTICA_CODEX_TIMEOUT=20m +# Feature flags +# Optional path to a YAML file declaring feature flag rules. When unset, +# every flag falls through to the caller's default, which lets the server +# boot before any flag config is authored. When set, the file is read once +# at startup and a parse / IO error fails fast — same loud-failure shape as +# DATABASE_URL or JWT_SECRET misconfig. See docs/feature-flags.md for the +# full schema; the minimum example is: +# +# billing_new_invoice_email: +# default: true +# checkout_algo: +# default: false +# variant: experiment-v2 +# percent: { percent: 25, by: user_id } +# +# Individual flags can also be overridden without touching the YAML by +# setting FF_ env vars (FF_BILLING_NEW_INVOICE_EMAIL=false, 25%, +# or any variant string). The env override beats the YAML, which is the +# Ops kill-switch path — flip a flag without redeploying by restarting the +# process with the env var set. +MULTICA_FEATURE_FLAGS_FILE= + # Self-host image channel # Default stable release channel. Pin to an exact release like v0.2.4 if you # want to stay on a specific version. If the selected tag has not been diff --git a/docs/feature-flags.md b/docs/feature-flags.md index 88ee0bad6..c4783846c 100644 --- a/docs/feature-flags.md +++ b/docs/feature-flags.md @@ -40,30 +40,47 @@ Manage them in the same provider but treat them differently: Release flags get d ### Wiring at startup +The server constructs a `featureflag.Service` once in `cmd/server/main.go` via the standard helper: + ```go -import "github.com/multica-ai/multica/server/pkg/featureflag" - -static := featureflag.NewStaticProvider() -static.LoadRules(map[string]featureflag.Rule{ - "billing_new_invoice_email": {Default: true}, - "checkout_algo": { - Default: false, - Variant: "experiment-v2", - Percent: &featureflag.PercentRollout{Percent: 25, By: "user_id"}, - }, - "ops_disable_recommendations": {Default: false}, -}) - -// Env overrides win over static config so SREs can flip kill switches -// without redeploying: `FF_OPS_DISABLE_RECOMMENDATIONS=true ./multica-server`. -env := featureflag.NewEnvProvider("FF_") - -flags := featureflag.NewService( - featureflag.NewChainProvider(env, static), - featureflag.WithLogger(logger), -) +flags, err := featureflag.NewServiceFromEnv(featureflag.WithLogger(slog.Default())) +if err != nil { + slog.Error("feature flag configuration failed to load", "error", err) + os.Exit(1) +} ``` +`NewServiceFromEnv` reads two env vars — both follow the same `MULTICA_*_FILE` / `FF_*` conventions documented in `.env.example`: + +| Env var | Role | +|---|---| +| `MULTICA_FEATURE_FLAGS_FILE` | Path to the YAML rule set (optional; absent = no static rules). | +| `FF_` | Per-flag runtime override. `FF_BILLING_NEW_INVOICE_EMAIL=false` / `25%` / `experiment-v2`. Beats the YAML, no redeploy. | + +The provider chain is `EnvProvider → YAML StaticProvider`. The server can boot with zero flag config — every `IsEnabled` call falls back to the caller's default until someone authors a rule. + +### YAML schema + +```yaml +# /etc/multica/feature-flags.yaml +billing_new_invoice_email: + default: true + +checkout_algo: + default: false + variant: experiment-v2 + percent: + percent: 25 + by: user_id + +ops_disable_recommendations: + default: false + allow: ["user-internal-1", "user-internal-2"] + allow_by: user_id +``` + +Every field except `default` is optional. `variant` is the on-variant — see the multi-arm note below. An empty file is a valid "no flags yet" state. Malformed YAML fails startup the same way `DATABASE_URL` parse errors do, so misconfig surfaces loudly. + ### Attaching evaluation context to the request ```go diff --git a/server/cmd/server/main.go b/server/cmd/server/main.go index ac95c9eb4..43716df61 100644 --- a/server/cmd/server/main.go +++ b/server/cmd/server/main.go @@ -22,6 +22,7 @@ import ( "github.com/multica-ai/multica/server/internal/scheduler" "github.com/multica-ai/multica/server/internal/service" db "github.com/multica-ai/multica/server/pkg/db/generated" + "github.com/multica-ai/multica/server/pkg/featureflag" "github.com/redis/go-redis/v9" ) @@ -141,6 +142,23 @@ func main() { port = "8080" } + // Feature flags: loaded once at startup from MULTICA_FEATURE_FLAGS_FILE + // (a YAML rule set) with FF_ env overrides layered on top. + // See docs/feature-flags.md for the schema and lifecycle rules. + // + // Booting the server without any flag config is intentional: when the + // env var is unset, every IsEnabled call falls through to the caller's + // default, so existing code paths are unchanged until someone adds a + // rule. A misconfigured (malformed / missing) file surfaces as a hard + // error so operators see misconfig the same way they do for any other + // MULTICA_*_FILE knob. + flags, err := featureflag.NewServiceFromEnv(featureflag.WithLogger(slog.Default())) + if err != nil { + slog.Error("feature flag configuration failed to load", "error", err) + os.Exit(1) + } + _ = flags // wired into handlers/services as call sites adopt flags; see docs/feature-flags.md + dbURL := os.Getenv("DATABASE_URL") if dbURL == "" { dbURL = "postgres://multica:multica@localhost:5432/multica?sslmode=disable" diff --git a/server/pkg/featureflag/config.go b/server/pkg/featureflag/config.go new file mode 100644 index 000000000..aca1f4c7c --- /dev/null +++ b/server/pkg/featureflag/config.go @@ -0,0 +1,162 @@ +package featureflag + +import ( + "fmt" + "log/slog" + "os" + "strings" + + "gopkg.in/yaml.v3" +) + +// EnvFlagFile is the environment variable consulted by NewServiceFromEnv to +// locate a YAML rule file. It follows the same convention as the other +// MULTICA_*_CONFIG / MULTICA_*_FILE knobs documented in .env.example. +const EnvFlagFile = "MULTICA_FEATURE_FLAGS_FILE" + +// EnvOverridePrefix is the prefix EnvProvider uses when NewServiceFromEnv +// composes the standard provider chain. Individual flags can be overridden +// at runtime with `FF_=true|false|42%|` env vars without +// touching the YAML file — the env override beats the file value. +const EnvOverridePrefix = "FF_" + +// ruleConfig is the wire format used by the YAML / JSON loader. It mirrors +// Rule but uses snake_case keys (for YAML ergonomics) and pointer types so +// we can tell "unset" from "explicit zero". +// +// Keeping the wire shape separate from runtime Rule means the config format +// can evolve (add fields, deprecate names) without forcing every business +// caller of Rule to recompile against the new shape. +type ruleConfig struct { + Default *bool `yaml:"default,omitempty"` + Variant string `yaml:"variant,omitempty"` + Allow []string `yaml:"allow,omitempty"` + AllowBy string `yaml:"allow_by,omitempty"` + Deny []string `yaml:"deny,omitempty"` + DenyBy string `yaml:"deny_by,omitempty"` + Percent *percentConfig `yaml:"percent,omitempty"` +} + +type percentConfig struct { + Percent int `yaml:"percent"` + By string `yaml:"by,omitempty"` +} + +// toRule converts the wire shape to a runtime Rule, applying defaults for +// fields that the YAML omitted. +func (rc ruleConfig) toRule() Rule { + r := Rule{ + Variant: rc.Variant, + Allow: rc.Allow, + AllowBy: rc.AllowBy, + Deny: rc.Deny, + DenyBy: rc.DenyBy, + } + if rc.Default != nil { + r.Default = *rc.Default + } + if rc.Percent != nil { + r.Percent = &PercentRollout{ + Percent: rc.Percent.Percent, + By: rc.Percent.By, + } + } + return r +} + +// LoadRulesFromYAMLFile reads a YAML file mapping flag keys to rule +// definitions and returns the parsed map ready to be installed on a +// StaticProvider via LoadRules. +// +// Schema (every field except `default` is optional): +// +// billing_new_invoice_email: +// default: true +// +// checkout_algo: +// default: false +// variant: experiment-v2 +// percent: +// percent: 25 +// by: user_id +// +// ops_disable_recommendations: +// default: false +// allow: ["user-internal-1", "user-internal-2"] +// +// An empty or whitespace-only file returns an empty map with no error, so +// operators can drop a flags file in place before authoring any flag +// without breaking server startup. +func LoadRulesFromYAMLFile(path string) (map[string]Rule, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("featureflag: read %s: %w", path, err) + } + return parseRulesYAML(data) +} + +// parseRulesYAML is the file-format-aware core of LoadRulesFromYAMLFile. +// Exposed unexported so tests can exercise the parser without touching the +// filesystem. +func parseRulesYAML(data []byte) (map[string]Rule, error) { + // An empty body is a valid "no flags defined yet" state. yaml.Unmarshal + // on `nil` leaves the destination nil, so handle this explicitly to + // return an empty (non-nil) map for the convenience of callers. + if len(strings.TrimSpace(string(data))) == 0 { + return map[string]Rule{}, nil + } + var raw map[string]ruleConfig + if err := yaml.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("featureflag: parse: %w", err) + } + out := make(map[string]Rule, len(raw)) + for key, rc := range raw { + out[key] = rc.toRule() + } + return out, nil +} + +// NewServiceFromEnv constructs a Service wired with the standard multica +// config sources, in order of decreasing precedence: +// +// 1. EnvProvider (FF_ overrides — Ops kill switches, fastest path). +// 2. StaticProvider loaded from the YAML file at MULTICA_FEATURE_FLAGS_FILE +// (when the env var is set and the file exists). +// +// When MULTICA_FEATURE_FLAGS_FILE is unset, the Service still works — the +// EnvProvider is the sole layer, and IsEnabled falls through to the +// caller's default for any flag without an FF_ override. The server +// can therefore boot before any flag config is authored. +// +// When the file path is set but the file is malformed, this returns an +// error rather than silently dropping the configuration — operators +// expect feature-flag misconfig to fail loudly the way every other +// config knob does (DATABASE_URL parse errors, JWT_SECRET missing in +// production, etc.). +func NewServiceFromEnv(opts ...Option) (*Service, error) { + var providers []Provider + providers = append(providers, NewEnvProvider(EnvOverridePrefix)) + + path := strings.TrimSpace(os.Getenv(EnvFlagFile)) + var loadedCount int + if path != "" { + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + return nil, err + } + sp := NewStaticProvider() + sp.LoadRules(rules) + providers = append(providers, sp) + loadedCount = len(rules) + } + + svc := NewService(NewChainProvider(providers...), opts...) + if svc.logger != nil { + svc.logger.Info("feature flags initialised", + slog.String("file", path), + slog.Int("rules", loadedCount), + slog.String("env_prefix", EnvOverridePrefix), + ) + } + return svc, nil +} diff --git a/server/pkg/featureflag/config_test.go b/server/pkg/featureflag/config_test.go new file mode 100644 index 000000000..7d488aa67 --- /dev/null +++ b/server/pkg/featureflag/config_test.go @@ -0,0 +1,177 @@ +package featureflag + +import ( + "context" + "os" + "path/filepath" + "testing" +) + +func writeTempFile(t *testing.T, name, body string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + t.Fatalf("write temp file: %v", err) + } + return path +} + +func TestLoadRulesFromYAMLFileSimple(t *testing.T) { + t.Parallel() + path := writeTempFile(t, "flags.yaml", ` +billing_new_invoice_email: + default: true +ops_disable_recommendations: + default: false +`) + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + t.Fatalf("LoadRulesFromYAMLFile: %v", err) + } + if got := rules["billing_new_invoice_email"].Default; got != true { + t.Fatalf("billing_new_invoice_email Default = %v, want true", got) + } + if got := rules["ops_disable_recommendations"].Default; got != false { + t.Fatalf("ops_disable_recommendations Default = %v, want false", got) + } +} + +func TestLoadRulesFromYAMLFileFullShape(t *testing.T) { + t.Parallel() + path := writeTempFile(t, "flags.yaml", ` +checkout_algo: + default: false + variant: experiment-v2 + allow: ["user-internal"] + allow_by: user_id + deny: ["banned-tenant"] + deny_by: workspace_id + percent: + percent: 25 + by: user_id +`) + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + t.Fatalf("LoadRulesFromYAMLFile: %v", err) + } + r := rules["checkout_algo"] + if r.Default != false { + t.Fatalf("Default = %v, want false", r.Default) + } + if r.Variant != "experiment-v2" { + t.Fatalf("Variant = %q, want experiment-v2", r.Variant) + } + if len(r.Allow) != 1 || r.Allow[0] != "user-internal" { + t.Fatalf("Allow = %#v", r.Allow) + } + if r.AllowBy != "user_id" { + t.Fatalf("AllowBy = %q", r.AllowBy) + } + if len(r.Deny) != 1 || r.Deny[0] != "banned-tenant" { + t.Fatalf("Deny = %#v", r.Deny) + } + if r.DenyBy != "workspace_id" { + t.Fatalf("DenyBy = %q", r.DenyBy) + } + if r.Percent == nil || r.Percent.Percent != 25 || r.Percent.By != "user_id" { + t.Fatalf("Percent = %#v", r.Percent) + } +} + +func TestLoadRulesFromYAMLFileEmpty(t *testing.T) { + t.Parallel() + // An empty file is a valid "no flags yet" state — server must still + // boot. Same for a whitespace-only file. + for _, body := range []string{"", " \n\n "} { + path := writeTempFile(t, "flags.yaml", body) + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + t.Fatalf("empty file should not error, got %v", err) + } + if rules == nil { + t.Fatalf("empty file should return non-nil empty map") + } + if len(rules) != 0 { + t.Fatalf("empty file should return empty map, got %d entries", len(rules)) + } + } +} + +func TestLoadRulesFromYAMLFileMissing(t *testing.T) { + t.Parallel() + _, err := LoadRulesFromYAMLFile("/no/such/path/flags.yaml") + if err == nil { + t.Fatalf("missing file must error") + } +} + +func TestLoadRulesFromYAMLFileMalformed(t *testing.T) { + t.Parallel() + // Invalid YAML (unmatched bracket) — must surface a parse error so + // operators see the misconfig instead of silently losing the file. + path := writeTempFile(t, "flags.yaml", "billing: { default: true") + _, err := LoadRulesFromYAMLFile(path) + if err == nil { + t.Fatalf("malformed YAML must error") + } +} + +func TestNewServiceFromEnvNoFile(t *testing.T) { + // Service must still work when the file env var is unset; that's the + // "framework adopted but no flags yet" path. Use t.Setenv so the + // state is restored after the test. + t.Setenv(EnvFlagFile, "") + svc, err := NewServiceFromEnv() + if err != nil { + t.Fatalf("NewServiceFromEnv: %v", err) + } + if svc == nil { + t.Fatalf("expected non-nil Service") + } + // No file, no env override → default flows through. + if !svc.IsEnabled(context.Background(), "any_flag", true) { + t.Fatalf("no provider config must honor the caller default") + } +} + +func TestNewServiceFromEnvWithFile(t *testing.T) { + path := writeTempFile(t, "flags.yaml", ` +demo_flag: + default: true +`) + t.Setenv(EnvFlagFile, path) + svc, err := NewServiceFromEnv() + if err != nil { + t.Fatalf("NewServiceFromEnv: %v", err) + } + if !svc.IsEnabled(context.Background(), "demo_flag", false) { + t.Fatalf("file rule must override the false default") + } +} + +func TestNewServiceFromEnvEnvBeatsFile(t *testing.T) { + // The chain is `env -> file`, so FF_ must win over the YAML. + // This is the Ops kill-switch path documented in .env.example. + path := writeTempFile(t, "flags.yaml", ` +demo_flag: + default: true +`) + t.Setenv(EnvFlagFile, path) + t.Setenv("FF_DEMO_FLAG", "false") + svc, err := NewServiceFromEnv() + if err != nil { + t.Fatalf("NewServiceFromEnv: %v", err) + } + if svc.IsEnabled(context.Background(), "demo_flag", true) { + t.Fatalf("env override must beat the YAML file (file=true, env=false)") + } +} + +func TestNewServiceFromEnvBadFileSurfacesError(t *testing.T) { + t.Setenv(EnvFlagFile, "/no/such/file.yaml") + _, err := NewServiceFromEnv() + if err == nil { + t.Fatalf("missing file must surface as an error so operators see misconfig") + } +} diff --git a/server/pkg/featureflag/doc.go b/server/pkg/featureflag/doc.go index 55dd33c2b..f84bb09e1 100644 --- a/server/pkg/featureflag/doc.go +++ b/server/pkg/featureflag/doc.go @@ -22,8 +22,15 @@ // - Deterministic percent rollouts: the same (key, identifier) pair always // evaluates to the same bucket so a user does not flip in and out of an // experiment across requests. -// - No third-party dependencies. The package only uses the Go standard -// library so it is safe to import from any subsystem. +// +// Wiring: +// +// The standard way to construct the Service inside the multica server is +// featureflag.NewServiceFromEnv, which reads MULTICA_FEATURE_FLAGS_FILE for +// the YAML rule set and layers an EnvProvider on top so individual flags +// can be overridden at runtime via FF_ env vars. The core types only +// depend on the standard library; the YAML loader pulls in gopkg.in/yaml.v3 +// which is already a server-level dependency. // // See server/pkg/featureflag/service.go for the public Service API and // docs/feature-flags.md for end-to-end usage examples.