配置管理与特性开关:构建灵活可控的运行时配置系统

深入讲解现代应用的配置管理最佳实践,涵盖环境变量、配置文件、配置中心的设计,详解特性开关(Feature Flags)的实现模式、灰度发布策略与A/B测试集成。

引言

配置管理是应用架构的基础设施,而特性开关(Feature Flags)让团队能够安全地发布新功能、进行灰度发布和A/B测试。本文将系统介绍配置管理与特性开关的设计与实现。

配置管理最佳实践

配置分层

// 配置优先级(从高到低)
type Config struct {
    // 1. 命令行参数(最高优先级)
    ServerPort int `env:"SERVER_PORT" flag:"port"`
    
    // 2. 环境变量
    DatabaseURL string `env:"DATABASE_URL"`
    
    // 3. 配置文件
    ConfigFile string `env:"CONFIG_FILE" default:"config.yaml"`
    
    // 4. 默认值
    LogLevel string `env:"LOG_LEVEL" default:"info"`
}

func LoadConfig() (*Config, error) {
    cfg := &Config{}
    
    // 1. 加载配置文件
    configFile := os.Getenv("CONFIG_FILE")
    if configFile == "" {
        configFile = "config.yaml"
    }
    
    data, err := os.ReadFile(configFile)
    if err != nil {
        return nil, err
    }
    
    yaml.Unmarshal(data, cfg)
    
    // 2. 环境变量覆盖
    env.Parse(cfg)
    
    // 3. 命令行参数覆盖
    flag.Parse()
    
    return cfg, nil
}
# config.yaml 示例
server:
  port: 8080
  host: 0.0.0.0
  timeout: 30s

database:
  host: localhost
  port: 5432
  name: myapp
  max_conns: 100

cache:
  redis:
    host: localhost
    port: 6379
    ttl: 600

features:
  new_checkout: false
  recommendation_v2: false

配置热更新

type HotConfig struct {
    mu     sync.RWMutex
    config *Config
    
    watchers []chan *Config
}

func (h *HotConfig) Get() *Config {
    h.mu.RLock()
    defer h.mu.RUnlock()
    return h.config
}

func (h *HotConfig) Update(newConfig *Config) {
    h.mu.Lock()
    h.config = newConfig
    
    // 通知所有监听者
    for _, watcher := range h.watchers {
        select {
        case watcher <- newConfig:
        default:
            // 非阻塞发送
        }
    }
    h.mu.Unlock()
}

func (h *HotConfig) Watch() <-chan *Config {
    ch := make(chan *Config, 1)
    h.mu.Lock()
    h.watchers = append(h.watchers, ch)
    h.mu.Unlock()
    return ch
}

// 文件监听热更新
func (h *HotConfig) StartFileWatcher(configFile string) {
    watcher, _ := fsnotify.NewWatcher()
    watcher.Add(configFile)
    
    go func() {
        for range watcher.Events {
            // 重新加载配置
            newConfig, err := LoadConfigFile(configFile)
            if err != nil {
                log.Errorf("Failed to reload config: %v", err)
                continue
            }
            
            h.Update(newConfig)
            log.Info("Configuration reloaded successfully")
        }
    }()
}

配置中心(Apollo/Nacos)

// Apollo配置中心集成
type ApolloConfig struct {
    client *apollo.Client
    cache  *HotConfig
}

func NewApolloConfig(appID, cluster string) (*ApolloConfig, error) {
    client := apollo.NewClient(&apollo.Options{
        AppID:          appID,
        Cluster:        cluster,
        ConfigServiceURL: os.Getenv("APOLLO_CONFIG_SERVICE"),
    })
    
    ac := &ApolloConfig{
        client: client,
        cache:  NewHotConfig(),
    }
    
    // 订阅配置变更
    client.Subscribe("application", func(event *apollo.ChangeEvent) {
        for key, change := range event.Changes {
            log.Infof("Config changed: %s = %v -> %v", 
                key, change.OldValue, change.NewValue)
        }
        
        // 重新加载配置
        ac.reloadConfig()
    })
    
    return ac, nil
}

func (ac *ApolloConfig) Get(key string, defaultValue interface{}) interface{} {
    return ac.client.GetValue(key, defaultValue)
}

func (ac *ApolloConfig) GetInt(key string, defaultValue int) int {
    return ac.client.GetIntValue(key, defaultValue)
}

func (ac *ApolloConfig) GetBool(key string, defaultValue bool) bool {
    return ac.client.GetBoolValue(key, defaultValue)
}

特性开关(Feature Flags)

基础实现

type FeatureFlags struct {
    mu    sync.RWMutex
    flags map[string]*FeatureFlag
}

type FeatureFlag struct {
    Name        string
    Enabled     bool
    Description string
    RolloutPercentage int  // 0-100
    TargetUsers []string   // 白名单用户
    CreatedAt   time.Time
    UpdatedAt   time.Time
}

func (f *FeatureFlags) IsEnabled(featureName string) bool {
    f.mu.RLock()
    defer f.mu.RUnlock()
    
    flag, exists := f.flags[featureName]
    if !exists {
        return false
    }
    
    return flag.Enabled
}

func (f *FeatureFlags) IsEnabledForUser(featureName string, userID string) bool {
    f.mu.RLock()
    defer f.mu.RUnlock()
    
    flag, exists := f.flags[featureName]
    if !exists {
        return false
    }
    
    // 1. 检查白名单
    for _, targetUser := range flag.TargetUsers {
        if targetUser == userID {
            return true
        }
    }
    
    // 2. 检查是否启用
    if !flag.Enabled {
        return false
    }
    
    // 3. 按百分比灰度
    if flag.RolloutPercentage < 100 {
        hash := crc32.ChecksumIEEE([]byte(userID))
        bucket := hash % 100
        return bucket < uint32(flag.RolloutPercentage)
    }
    
    return true
}

// 使用示例
func (s *OrderService) CreateOrder(ctx context.Context, req *CreateOrderRequest) (*Order, error) {
    // 使用新算法
    if s.featureFlags.IsEnabledForUser("new_pricing_algorithm", req.UserID) {
        return s.createOrderWithNewPricing(ctx, req)
    }
    
    // 使用旧算法
    return s.createOrderWithOldPricing(ctx, req)
}

高级特性开关模式

type AdvancedFeatureFlag struct {
    Name              string
    Enabled           bool
    
    // 目标规则
    Rules []TargetingRule
    
    // 变体(支持多版本)
    Variants []FlagVariant
    DefaultVariant string
}

type TargetingRule struct {
    Name       string
    Conditions []Condition
    Variant    string
    Priority   int
}

type Condition struct {
    Attribute string      // user.country, user.age, request.path
    Operator  string      // in, not_in, contains, gt, lt
    Value     interface{}
}

type FlagVariant struct {
    Name  string
    Value interface{}  // 可以是任意类型
}

func (f *AdvancedFeatureFlag) Evaluate(ctx *EvaluationContext) (interface{}, error) {
    if !f.Enabled {
        return f.getVariantValue(f.DefaultVariant), nil
    }
    
    // 按优先级评估规则
    sort.Slice(f.Rules, func(i, j int) bool {
        return f.Rules[i].Priority > f.Rules[j].Priority
    })
    
    for _, rule := range f.Rules {
        if rule.matches(ctx) {
            return f.getVariantValue(rule.Variant), nil
        }
    }
    
    return f.getVariantValue(f.DefaultVariant), nil
}

func (r *TargetingRule) matches(ctx *EvaluationContext) bool {
    for _, condition := range r.Conditions {
        if !condition.matches(ctx) {
            return false
        }
    }
    return true
}

func (c *Condition) matches(ctx *EvaluationContext) bool {
    attrValue, exists := ctx.GetAttribute(c.Attribute)
    if !exists {
        return false
    }
    
    switch c.Operator {
    case "in":
        return c.valueIn(attrValue)
    case "not_in":
        return !c.valueIn(attrValue)
    case "contains":
        return c.containsValue(attrValue)
    case "gt":
        return c.greaterThan(attrValue)
    case "lt":
        return c.lessThan(attrValue)
    }
    
    return false
}

// 评估上下文
type EvaluationContext struct {
    UserID      string
    UserEmail   string
    UserCountry string
    UserAge     int
    RequestPath string
    CustomAttrs map[string]interface{}
}

func (ctx *EvaluationContext) GetAttribute(attr string) (interface{}, bool) {
    switch attr {
    case "user.id":
        return ctx.UserID, true
    case "user.email":
        return ctx.UserEmail, true
    case "user.country":
        return ctx.UserCountry, true
    case "user.age":
        return ctx.UserAge, true
    case "request.path":
        return ctx.RequestPath, true
    default:
        val, ok := ctx.CustomAttrs[attr]
        return val, ok
    }
}

特性开关与A/B测试集成

type ABTestManager struct {
    flags     *FeatureFlags
    analytics *AnalyticsClient
}

type ABTest struct {
    Name       string
    Variants   []ABTestVariant
    StartTime  time.Time
    EndTime    time.Time
    Goal       string  // 转化目标
}

type ABTestVariant struct {
    Name       string
    Percentage int  // 流量分配百分比
}

func (m *ABTestManager) GetVariant(testName string, userID string) (string, error) {
    test, err := m.getTest(testName)
    if err != nil {
        return "", err
    }
    
    // 检查测试是否在运行时间内
    now := time.Now()
    if now.Before(test.StartTime) || now.After(test.EndTime) {
        return "control", nil  // 返回对照组
    }
    
    // 一致性分配:同一用户总是看到同一变体
    variant := m.consistentAssignment(testName, userID, test.Variants)
    
    // 记录曝光事件
    m.analytics.Track("ab_test_exposure", map[string]interface{}{
        "test_name": testName,
        "variant":   variant,
        "user_id":   userID,
        "timestamp": now,
    })
    
    return variant, nil
}

func (m *ABTestManager) consistentAssignment(testName, userID string, variants []ABTestVariant) string {
    // 使用哈希确保一致性
    hash := crc32.ChecksumIEEE([]byte(testName + ":" + userID))
    bucket := hash % 100
    
    cumulative := 0
    for _, variant := range variants {
        cumulative += variant.Percentage
        if bucket < uint32(cumulative) {
            return variant.Name
        }
    }
    
    return variants[len(variants)-1].Name
}

// 记录转化事件
func (m *ABTestManager) RecordConversion(testName, userID, conversionType string) {
    m.analytics.Track("ab_test_conversion", map[string]interface{}{
        "test_name":       testName,
        "user_id":         userID,
        "conversion_type": conversionType,
        "timestamp":       time.Now(),
    })
}

特性开关管理后台

// REST API for Feature Flags
type FeatureFlagAPI struct {
    flags *FeatureFlags
    store FeatureFlagStore
}

func (api *FeatureFlagAPI) CreateFlag(ctx context.Context, req CreateFlagRequest) (*FeatureFlag, error) {
    flag := &FeatureFlag{
        Name:        req.Name,
        Enabled:     req.Enabled,
        Description: req.Description,
        RolloutPercentage: req.RolloutPercentage,
        TargetUsers: req.TargetUsers,
        CreatedAt:   time.Now(),
        UpdatedAt:   time.Now(),
    }
    
    // 保存到数据库
    if err := api.store.Create(flag); err != nil {
        return nil, err
    }
    
    // 更新内存缓存
    api.flags.mu.Lock()
    api.flags.flags[flag.Name] = flag
    api.flags.mu.Unlock()
    
    // 发布变更事件
    api.publishEvent("flag.created", flag)
    
    return flag, nil
}

func (api *FeatureFlagAPI) UpdateFlag(ctx context.Context, name string, req UpdateFlagRequest) (*FeatureFlag, error) {
    flag, err := api.store.Get(name)
    if err != nil {
        return nil, err
    }
    
    // 记录变更历史
    oldEnabled := flag.Enabled
    oldPercentage := flag.RolloutPercentage
    
    if req.Enabled != nil {
        flag.Enabled = *req.Enabled
    }
    if req.RolloutPercentage != nil {
        flag.RolloutPercentage = *req.RolloutPercentage
    }
    if req.TargetUsers != nil {
        flag.TargetUsers = req.TargetUsers
    }
    
    flag.UpdatedAt = time.Now()
    
    // 保存并更新缓存
    api.store.Update(flag)
    api.flags.mu.Lock()
    api.flags.flags[name] = flag
    api.flags.mu.Unlock()
    
    // 发布变更事件
    api.publishEvent("flag.updated", map[string]interface{}{
        "flag":             flag,
        "old_enabled":      oldEnabled,
        "old_percentage":   oldPercentage,
    })
    
    return flag, nil
}

func (api *FeatureFlagAPI) GetFlagHistory(ctx context.Context, name string) ([]FlagChangeEvent, error) {
    return api.store.GetHistory(name)
}

灰度发布策略

type GradualRollout struct {
    flags *FeatureFlags
}

func (g *GradualRollout) ExecuteRolloutPlan(featureName string, plan RolloutPlan) error {
    for _, stage := range plan.Stages {
        log.Infof("Starting rollout stage: %s (%d%%)", stage.Name, stage.Percentage)
        
        // 更新特性开关
        g.flags.UpdateFlag(featureName, UpdateFlagRequest{
            RolloutPercentage: &stage.Percentage,
        })
        
        // 等待观察期
        time.Sleep(stage.Duration)
        
        // 检查指标是否健康
        if !g.checkHealthMetrics(featureName) {
            log.Errorf("Health check failed, rolling back")
            g.rollback(featureName)
            return fmt.Errorf("rollout failed at stage %s", stage.Name)
        }
        
        log.Infof("Stage %s completed successfully", stage.Name)
    }
    
    // 完全启用
    enabled := true
    percentage := 100
    g.flags.UpdateFlag(featureName, UpdateFlagRequest{
        Enabled:           &enabled,
        RolloutPercentage: &percentage,
    })
    
    return nil
}

type RolloutPlan struct {
    Stages []RolloutStage
}

type RolloutStage struct {
    Name       string
    Percentage int
    Duration   time.Duration
}

// 示例:5阶段灰度发布
var DefaultRolloutPlan = RolloutPlan{
    Stages: []RolloutStage{
        {Name: "内部测试", Percentage: 1, Duration: 24 * time.Hour},
        {Name: "小流量", Percentage: 5, Duration: 24 * time.Hour},
        {Name: "中流量", Percentage: 20, Duration: 48 * time.Hour},
        {Name: "大流量", Percentage: 50, Duration: 48 * time.Hour},
        {Name: "全量", Percentage: 100, Duration: 0},
    },
}

总结

配置管理与特性开关是现代应用的基础设施:

配置管理

  • 分层配置:命令行 > 环境变量 > 配置文件 > 默认值
  • 热更新:支持运行时动态更新配置
  • 配置中心:集中管理多环境配置

特性开关

  • 基础开关:简单的布尔开关
  • 高级规则:基于用户属性的条件判断
  • A/B测试:支持多版本对比实验
  • 灰度发布:渐进式发布新功能

关键原则:

  • 配置与代码分离
  • 特性开关要有清理机制,避免技术债务
  • 灰度发布要监控核心指标,及时回滚

延伸阅读

继续阅读

探索更多技术文章

浏览归档,发现更多关于系统设计、工具链和工程实践的内容。

全部文章 返回首页