From 81ed5ed4e12657b4672c7d0be2a024933ecd352a Mon Sep 17 00:00:00 2001 From: Eve Date: Wed, 24 Jun 2026 13:45:35 +0800 Subject: [PATCH] =?UTF-8?q?feat(featureflag):=20wire=20into=20main=20app?= =?UTF-8?q?=20config=20=E2=80=94=20YAML=20file=20+=20env=20override=20(MUL?= =?UTF-8?q?-3615)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up requested by Yushen on PR #4496: make the feature flag framework configurable through the existing main-program config system instead of requiring Go code edits. multica's main app is purely env-var driven (see .env.example) with optional MULTICA_*_FILE knobs for richer config; feature flags now follow the same pattern. server/pkg/featureflag/config.go - LoadRulesFromYAMLFile(path) parses a YAML rule set into runtime Rule structs. Empty files are a valid "no flags yet" state; missing or malformed files surface a hard error so operators see misconfig the same way DATABASE_URL parse errors do. - NewServiceFromEnv composes the standard provider chain: 1. EnvProvider("FF_") (runtime kill-switch path) 2. StaticProvider from YAML file (declarative rule set) When MULTICA_FEATURE_FLAGS_FILE is unset, only the env layer is active and every IsEnabled call falls through to the caller's default, so the server can boot before any flag is authored. server/cmd/server/main.go - Construct the Service once at startup right after env-var warnings, fail loudly on malformed YAML, log the loaded rule count via the Service logger. The Service is held in a local `flags` variable ready to be threaded into handler.Handler / service constructors when the first flag user lands. Threading is deferred to the PR that adds the first business consumer so this PR stays a pure framework + config layer. .env.example - New "Feature flags" section documents MULTICA_FEATURE_FLAGS_FILE and the FF_ override convention, with a minimal YAML schema example inline. docs/feature-flags.md - Replace the "build a provider manually" example with the NewServiceFromEnv pattern that now matches what main.go actually does. Show the YAML schema in one place. Note the on-variant / off semantics from the previous review round. server/pkg/featureflag/doc.go - Update package doc to mention the gopkg.in/yaml.v3 dependency (already a server-level dep) instead of the now-inaccurate "no third-party dependencies" claim. Tests: - go test -race -count=1 ./pkg/featureflag/... all green; new config_test.go covers: simple YAML, full-shape YAML, empty file, missing file, malformed YAML, no env var, file-only, env-beats-file, bad file surfaces error. - go test -race -count=1 -run TestHealth ./cmd/server/... sanity check that the main.go boot path with the new wiring still passes. - go vet ./... clean. Co-authored-by: multica-agent --- .env.example | 22 ++++ docs/feature-flags.md | 59 ++++++--- server/cmd/server/main.go | 18 +++ server/pkg/featureflag/config.go | 162 +++++++++++++++++++++++ server/pkg/featureflag/config_test.go | 177 ++++++++++++++++++++++++++ server/pkg/featureflag/doc.go | 11 +- 6 files changed, 426 insertions(+), 23 deletions(-) create mode 100644 server/pkg/featureflag/config.go create mode 100644 server/pkg/featureflag/config_test.go diff --git a/.env.example b/.env.example index 92c7ee816..583f5a2f7 100644 --- a/.env.example +++ b/.env.example @@ -71,6 +71,28 @@ MULTICA_CODEX_MODEL= MULTICA_CODEX_WORKDIR= MULTICA_CODEX_TIMEOUT=20m +# Feature flags +# Optional path to a YAML file declaring feature flag rules. When unset, +# every flag falls through to the caller's default, which lets the server +# boot before any flag config is authored. When set, the file is read once +# at startup and a parse / IO error fails fast — same loud-failure shape as +# DATABASE_URL or JWT_SECRET misconfig. See docs/feature-flags.md for the +# full schema; the minimum example is: +# +# billing_new_invoice_email: +# default: true +# checkout_algo: +# default: false +# variant: experiment-v2 +# percent: { percent: 25, by: user_id } +# +# Individual flags can also be overridden without touching the YAML by +# setting FF_ env vars (FF_BILLING_NEW_INVOICE_EMAIL=false, 25%, +# or any variant string). The env override beats the YAML, which is the +# Ops kill-switch path — flip a flag without redeploying by restarting the +# process with the env var set. +MULTICA_FEATURE_FLAGS_FILE= + # Self-host image channel # Default stable release channel. Pin to an exact release like v0.2.4 if you # want to stay on a specific version. If the selected tag has not been diff --git a/docs/feature-flags.md b/docs/feature-flags.md index 88ee0bad6..c4783846c 100644 --- a/docs/feature-flags.md +++ b/docs/feature-flags.md @@ -40,30 +40,47 @@ Manage them in the same provider but treat them differently: Release flags get d ### Wiring at startup +The server constructs a `featureflag.Service` once in `cmd/server/main.go` via the standard helper: + ```go -import "github.com/multica-ai/multica/server/pkg/featureflag" - -static := featureflag.NewStaticProvider() -static.LoadRules(map[string]featureflag.Rule{ - "billing_new_invoice_email": {Default: true}, - "checkout_algo": { - Default: false, - Variant: "experiment-v2", - Percent: &featureflag.PercentRollout{Percent: 25, By: "user_id"}, - }, - "ops_disable_recommendations": {Default: false}, -}) - -// Env overrides win over static config so SREs can flip kill switches -// without redeploying: `FF_OPS_DISABLE_RECOMMENDATIONS=true ./multica-server`. -env := featureflag.NewEnvProvider("FF_") - -flags := featureflag.NewService( - featureflag.NewChainProvider(env, static), - featureflag.WithLogger(logger), -) +flags, err := featureflag.NewServiceFromEnv(featureflag.WithLogger(slog.Default())) +if err != nil { + slog.Error("feature flag configuration failed to load", "error", err) + os.Exit(1) +} ``` +`NewServiceFromEnv` reads two env vars — both follow the same `MULTICA_*_FILE` / `FF_*` conventions documented in `.env.example`: + +| Env var | Role | +|---|---| +| `MULTICA_FEATURE_FLAGS_FILE` | Path to the YAML rule set (optional; absent = no static rules). | +| `FF_` | Per-flag runtime override. `FF_BILLING_NEW_INVOICE_EMAIL=false` / `25%` / `experiment-v2`. Beats the YAML, no redeploy. | + +The provider chain is `EnvProvider → YAML StaticProvider`. The server can boot with zero flag config — every `IsEnabled` call falls back to the caller's default until someone authors a rule. + +### YAML schema + +```yaml +# /etc/multica/feature-flags.yaml +billing_new_invoice_email: + default: true + +checkout_algo: + default: false + variant: experiment-v2 + percent: + percent: 25 + by: user_id + +ops_disable_recommendations: + default: false + allow: ["user-internal-1", "user-internal-2"] + allow_by: user_id +``` + +Every field except `default` is optional. `variant` is the on-variant — see the multi-arm note below. An empty file is a valid "no flags yet" state. Malformed YAML fails startup the same way `DATABASE_URL` parse errors do, so misconfig surfaces loudly. + ### Attaching evaluation context to the request ```go diff --git a/server/cmd/server/main.go b/server/cmd/server/main.go index ac95c9eb4..43716df61 100644 --- a/server/cmd/server/main.go +++ b/server/cmd/server/main.go @@ -22,6 +22,7 @@ import ( "github.com/multica-ai/multica/server/internal/scheduler" "github.com/multica-ai/multica/server/internal/service" db "github.com/multica-ai/multica/server/pkg/db/generated" + "github.com/multica-ai/multica/server/pkg/featureflag" "github.com/redis/go-redis/v9" ) @@ -141,6 +142,23 @@ func main() { port = "8080" } + // Feature flags: loaded once at startup from MULTICA_FEATURE_FLAGS_FILE + // (a YAML rule set) with FF_ env overrides layered on top. + // See docs/feature-flags.md for the schema and lifecycle rules. + // + // Booting the server without any flag config is intentional: when the + // env var is unset, every IsEnabled call falls through to the caller's + // default, so existing code paths are unchanged until someone adds a + // rule. A misconfigured (malformed / missing) file surfaces as a hard + // error so operators see misconfig the same way they do for any other + // MULTICA_*_FILE knob. + flags, err := featureflag.NewServiceFromEnv(featureflag.WithLogger(slog.Default())) + if err != nil { + slog.Error("feature flag configuration failed to load", "error", err) + os.Exit(1) + } + _ = flags // wired into handlers/services as call sites adopt flags; see docs/feature-flags.md + dbURL := os.Getenv("DATABASE_URL") if dbURL == "" { dbURL = "postgres://multica:multica@localhost:5432/multica?sslmode=disable" diff --git a/server/pkg/featureflag/config.go b/server/pkg/featureflag/config.go new file mode 100644 index 000000000..aca1f4c7c --- /dev/null +++ b/server/pkg/featureflag/config.go @@ -0,0 +1,162 @@ +package featureflag + +import ( + "fmt" + "log/slog" + "os" + "strings" + + "gopkg.in/yaml.v3" +) + +// EnvFlagFile is the environment variable consulted by NewServiceFromEnv to +// locate a YAML rule file. It follows the same convention as the other +// MULTICA_*_CONFIG / MULTICA_*_FILE knobs documented in .env.example. +const EnvFlagFile = "MULTICA_FEATURE_FLAGS_FILE" + +// EnvOverridePrefix is the prefix EnvProvider uses when NewServiceFromEnv +// composes the standard provider chain. Individual flags can be overridden +// at runtime with `FF_=true|false|42%|` env vars without +// touching the YAML file — the env override beats the file value. +const EnvOverridePrefix = "FF_" + +// ruleConfig is the wire format used by the YAML / JSON loader. It mirrors +// Rule but uses snake_case keys (for YAML ergonomics) and pointer types so +// we can tell "unset" from "explicit zero". +// +// Keeping the wire shape separate from runtime Rule means the config format +// can evolve (add fields, deprecate names) without forcing every business +// caller of Rule to recompile against the new shape. +type ruleConfig struct { + Default *bool `yaml:"default,omitempty"` + Variant string `yaml:"variant,omitempty"` + Allow []string `yaml:"allow,omitempty"` + AllowBy string `yaml:"allow_by,omitempty"` + Deny []string `yaml:"deny,omitempty"` + DenyBy string `yaml:"deny_by,omitempty"` + Percent *percentConfig `yaml:"percent,omitempty"` +} + +type percentConfig struct { + Percent int `yaml:"percent"` + By string `yaml:"by,omitempty"` +} + +// toRule converts the wire shape to a runtime Rule, applying defaults for +// fields that the YAML omitted. +func (rc ruleConfig) toRule() Rule { + r := Rule{ + Variant: rc.Variant, + Allow: rc.Allow, + AllowBy: rc.AllowBy, + Deny: rc.Deny, + DenyBy: rc.DenyBy, + } + if rc.Default != nil { + r.Default = *rc.Default + } + if rc.Percent != nil { + r.Percent = &PercentRollout{ + Percent: rc.Percent.Percent, + By: rc.Percent.By, + } + } + return r +} + +// LoadRulesFromYAMLFile reads a YAML file mapping flag keys to rule +// definitions and returns the parsed map ready to be installed on a +// StaticProvider via LoadRules. +// +// Schema (every field except `default` is optional): +// +// billing_new_invoice_email: +// default: true +// +// checkout_algo: +// default: false +// variant: experiment-v2 +// percent: +// percent: 25 +// by: user_id +// +// ops_disable_recommendations: +// default: false +// allow: ["user-internal-1", "user-internal-2"] +// +// An empty or whitespace-only file returns an empty map with no error, so +// operators can drop a flags file in place before authoring any flag +// without breaking server startup. +func LoadRulesFromYAMLFile(path string) (map[string]Rule, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("featureflag: read %s: %w", path, err) + } + return parseRulesYAML(data) +} + +// parseRulesYAML is the file-format-aware core of LoadRulesFromYAMLFile. +// Exposed unexported so tests can exercise the parser without touching the +// filesystem. +func parseRulesYAML(data []byte) (map[string]Rule, error) { + // An empty body is a valid "no flags defined yet" state. yaml.Unmarshal + // on `nil` leaves the destination nil, so handle this explicitly to + // return an empty (non-nil) map for the convenience of callers. + if len(strings.TrimSpace(string(data))) == 0 { + return map[string]Rule{}, nil + } + var raw map[string]ruleConfig + if err := yaml.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("featureflag: parse: %w", err) + } + out := make(map[string]Rule, len(raw)) + for key, rc := range raw { + out[key] = rc.toRule() + } + return out, nil +} + +// NewServiceFromEnv constructs a Service wired with the standard multica +// config sources, in order of decreasing precedence: +// +// 1. EnvProvider (FF_ overrides — Ops kill switches, fastest path). +// 2. StaticProvider loaded from the YAML file at MULTICA_FEATURE_FLAGS_FILE +// (when the env var is set and the file exists). +// +// When MULTICA_FEATURE_FLAGS_FILE is unset, the Service still works — the +// EnvProvider is the sole layer, and IsEnabled falls through to the +// caller's default for any flag without an FF_ override. The server +// can therefore boot before any flag config is authored. +// +// When the file path is set but the file is malformed, this returns an +// error rather than silently dropping the configuration — operators +// expect feature-flag misconfig to fail loudly the way every other +// config knob does (DATABASE_URL parse errors, JWT_SECRET missing in +// production, etc.). +func NewServiceFromEnv(opts ...Option) (*Service, error) { + var providers []Provider + providers = append(providers, NewEnvProvider(EnvOverridePrefix)) + + path := strings.TrimSpace(os.Getenv(EnvFlagFile)) + var loadedCount int + if path != "" { + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + return nil, err + } + sp := NewStaticProvider() + sp.LoadRules(rules) + providers = append(providers, sp) + loadedCount = len(rules) + } + + svc := NewService(NewChainProvider(providers...), opts...) + if svc.logger != nil { + svc.logger.Info("feature flags initialised", + slog.String("file", path), + slog.Int("rules", loadedCount), + slog.String("env_prefix", EnvOverridePrefix), + ) + } + return svc, nil +} diff --git a/server/pkg/featureflag/config_test.go b/server/pkg/featureflag/config_test.go new file mode 100644 index 000000000..7d488aa67 --- /dev/null +++ b/server/pkg/featureflag/config_test.go @@ -0,0 +1,177 @@ +package featureflag + +import ( + "context" + "os" + "path/filepath" + "testing" +) + +func writeTempFile(t *testing.T, name, body string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + t.Fatalf("write temp file: %v", err) + } + return path +} + +func TestLoadRulesFromYAMLFileSimple(t *testing.T) { + t.Parallel() + path := writeTempFile(t, "flags.yaml", ` +billing_new_invoice_email: + default: true +ops_disable_recommendations: + default: false +`) + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + t.Fatalf("LoadRulesFromYAMLFile: %v", err) + } + if got := rules["billing_new_invoice_email"].Default; got != true { + t.Fatalf("billing_new_invoice_email Default = %v, want true", got) + } + if got := rules["ops_disable_recommendations"].Default; got != false { + t.Fatalf("ops_disable_recommendations Default = %v, want false", got) + } +} + +func TestLoadRulesFromYAMLFileFullShape(t *testing.T) { + t.Parallel() + path := writeTempFile(t, "flags.yaml", ` +checkout_algo: + default: false + variant: experiment-v2 + allow: ["user-internal"] + allow_by: user_id + deny: ["banned-tenant"] + deny_by: workspace_id + percent: + percent: 25 + by: user_id +`) + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + t.Fatalf("LoadRulesFromYAMLFile: %v", err) + } + r := rules["checkout_algo"] + if r.Default != false { + t.Fatalf("Default = %v, want false", r.Default) + } + if r.Variant != "experiment-v2" { + t.Fatalf("Variant = %q, want experiment-v2", r.Variant) + } + if len(r.Allow) != 1 || r.Allow[0] != "user-internal" { + t.Fatalf("Allow = %#v", r.Allow) + } + if r.AllowBy != "user_id" { + t.Fatalf("AllowBy = %q", r.AllowBy) + } + if len(r.Deny) != 1 || r.Deny[0] != "banned-tenant" { + t.Fatalf("Deny = %#v", r.Deny) + } + if r.DenyBy != "workspace_id" { + t.Fatalf("DenyBy = %q", r.DenyBy) + } + if r.Percent == nil || r.Percent.Percent != 25 || r.Percent.By != "user_id" { + t.Fatalf("Percent = %#v", r.Percent) + } +} + +func TestLoadRulesFromYAMLFileEmpty(t *testing.T) { + t.Parallel() + // An empty file is a valid "no flags yet" state — server must still + // boot. Same for a whitespace-only file. + for _, body := range []string{"", " \n\n "} { + path := writeTempFile(t, "flags.yaml", body) + rules, err := LoadRulesFromYAMLFile(path) + if err != nil { + t.Fatalf("empty file should not error, got %v", err) + } + if rules == nil { + t.Fatalf("empty file should return non-nil empty map") + } + if len(rules) != 0 { + t.Fatalf("empty file should return empty map, got %d entries", len(rules)) + } + } +} + +func TestLoadRulesFromYAMLFileMissing(t *testing.T) { + t.Parallel() + _, err := LoadRulesFromYAMLFile("/no/such/path/flags.yaml") + if err == nil { + t.Fatalf("missing file must error") + } +} + +func TestLoadRulesFromYAMLFileMalformed(t *testing.T) { + t.Parallel() + // Invalid YAML (unmatched bracket) — must surface a parse error so + // operators see the misconfig instead of silently losing the file. + path := writeTempFile(t, "flags.yaml", "billing: { default: true") + _, err := LoadRulesFromYAMLFile(path) + if err == nil { + t.Fatalf("malformed YAML must error") + } +} + +func TestNewServiceFromEnvNoFile(t *testing.T) { + // Service must still work when the file env var is unset; that's the + // "framework adopted but no flags yet" path. Use t.Setenv so the + // state is restored after the test. + t.Setenv(EnvFlagFile, "") + svc, err := NewServiceFromEnv() + if err != nil { + t.Fatalf("NewServiceFromEnv: %v", err) + } + if svc == nil { + t.Fatalf("expected non-nil Service") + } + // No file, no env override → default flows through. + if !svc.IsEnabled(context.Background(), "any_flag", true) { + t.Fatalf("no provider config must honor the caller default") + } +} + +func TestNewServiceFromEnvWithFile(t *testing.T) { + path := writeTempFile(t, "flags.yaml", ` +demo_flag: + default: true +`) + t.Setenv(EnvFlagFile, path) + svc, err := NewServiceFromEnv() + if err != nil { + t.Fatalf("NewServiceFromEnv: %v", err) + } + if !svc.IsEnabled(context.Background(), "demo_flag", false) { + t.Fatalf("file rule must override the false default") + } +} + +func TestNewServiceFromEnvEnvBeatsFile(t *testing.T) { + // The chain is `env -> file`, so FF_ must win over the YAML. + // This is the Ops kill-switch path documented in .env.example. + path := writeTempFile(t, "flags.yaml", ` +demo_flag: + default: true +`) + t.Setenv(EnvFlagFile, path) + t.Setenv("FF_DEMO_FLAG", "false") + svc, err := NewServiceFromEnv() + if err != nil { + t.Fatalf("NewServiceFromEnv: %v", err) + } + if svc.IsEnabled(context.Background(), "demo_flag", true) { + t.Fatalf("env override must beat the YAML file (file=true, env=false)") + } +} + +func TestNewServiceFromEnvBadFileSurfacesError(t *testing.T) { + t.Setenv(EnvFlagFile, "/no/such/file.yaml") + _, err := NewServiceFromEnv() + if err == nil { + t.Fatalf("missing file must surface as an error so operators see misconfig") + } +} diff --git a/server/pkg/featureflag/doc.go b/server/pkg/featureflag/doc.go index 55dd33c2b..f84bb09e1 100644 --- a/server/pkg/featureflag/doc.go +++ b/server/pkg/featureflag/doc.go @@ -22,8 +22,15 @@ // - Deterministic percent rollouts: the same (key, identifier) pair always // evaluates to the same bucket so a user does not flip in and out of an // experiment across requests. -// - No third-party dependencies. The package only uses the Go standard -// library so it is safe to import from any subsystem. +// +// Wiring: +// +// The standard way to construct the Service inside the multica server is +// featureflag.NewServiceFromEnv, which reads MULTICA_FEATURE_FLAGS_FILE for +// the YAML rule set and layers an EnvProvider on top so individual flags +// can be overridden at runtime via FF_ env vars. The core types only +// depend on the standard library; the YAML loader pulls in gopkg.in/yaml.v3 +// which is already a server-level dependency. // // See server/pkg/featureflag/service.go for the public Service API and // docs/feature-flags.md for end-to-end usage examples.