|
| 1 | +package metrics |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + "fmt" |
| 6 | + "net/http" |
| 7 | + "sort" |
| 8 | + "strings" |
| 9 | + "sync" |
| 10 | + "sync/atomic" |
| 11 | + "time" |
| 12 | +) |
| 13 | + |
| 14 | +const ( |
| 15 | + DefaultServiceName = "kite-mcp" |
| 16 | + DefaultHistoricalDays = 7 |
| 17 | + DefaultCleanupRetentionDays = 30 |
| 18 | + DefaultCleanupHour = 3 // 3 AM |
| 19 | + DefaultCleanupDay = 6 // Saturday (0=Sunday, 6=Saturday) |
| 20 | + |
| 21 | + PrometheusContentType = "text/plain; version=0.0.4; charset=utf-8" |
| 22 | + AdminPathPrefix = "/admin/" |
| 23 | + MetricsPathSuffix = "/metrics" |
| 24 | +) |
| 25 | + |
| 26 | +// Config holds configuration for creating a metrics manager |
| 27 | +type Config struct { |
| 28 | + ServiceName string // defaults to DefaultServiceName |
| 29 | + AdminSecretPath string // required for admin endpoint, empty = disabled |
| 30 | + HistoricalDays int // defaults to DefaultHistoricalDays |
| 31 | + CleanupRetentionDays int // defaults to DefaultCleanupRetentionDays |
| 32 | + AutoCleanup bool // defaults to true |
| 33 | +} |
| 34 | + |
| 35 | +// Manager handles metrics collection and export |
| 36 | +type Manager struct { |
| 37 | + serviceName string |
| 38 | + adminSecretPath string |
| 39 | + historicalDays int |
| 40 | + cleanupRetentionDays int |
| 41 | + |
| 42 | + counters sync.Map // map[string]*int64 |
| 43 | + dailyUsers sync.Map // map[string]*userSet |
| 44 | + |
| 45 | + cleanupStop chan struct{} |
| 46 | + cleanupOnce sync.Once |
| 47 | +} |
| 48 | + |
| 49 | +// userSet holds unique users for a day with count |
| 50 | +type userSet struct { |
| 51 | + users sync.Map // map[string]bool |
| 52 | + count int64 // atomic counter |
| 53 | +} |
| 54 | + |
| 55 | +// New creates a new metrics manager with the given configuration |
| 56 | +func New(cfg Config) *Manager { |
| 57 | + if cfg.ServiceName == "" { |
| 58 | + cfg.ServiceName = DefaultServiceName |
| 59 | + } |
| 60 | + if cfg.HistoricalDays == 0 { |
| 61 | + cfg.HistoricalDays = DefaultHistoricalDays |
| 62 | + } |
| 63 | + if cfg.CleanupRetentionDays == 0 { |
| 64 | + cfg.CleanupRetentionDays = DefaultCleanupRetentionDays |
| 65 | + } |
| 66 | + |
| 67 | + m := &Manager{ |
| 68 | + serviceName: cfg.ServiceName, |
| 69 | + adminSecretPath: cfg.AdminSecretPath, |
| 70 | + historicalDays: cfg.HistoricalDays, |
| 71 | + cleanupRetentionDays: cfg.CleanupRetentionDays, |
| 72 | + cleanupStop: make(chan struct{}), |
| 73 | + } |
| 74 | + |
| 75 | + if cfg.AutoCleanup { |
| 76 | + m.startCleanupRoutine() |
| 77 | + } |
| 78 | + |
| 79 | + return m |
| 80 | +} |
| 81 | + |
| 82 | +// Increment atomically increments a counter |
| 83 | +func (m *Manager) Increment(key string) { |
| 84 | + m.IncrementBy(key, 1) |
| 85 | +} |
| 86 | + |
| 87 | +// IncrementBy atomically increments a counter by n |
| 88 | +func (m *Manager) IncrementBy(key string, n int64) { |
| 89 | + val, _ := m.counters.LoadOrStore(key, new(int64)) |
| 90 | + atomic.AddInt64(val.(*int64), n) |
| 91 | +} |
| 92 | + |
| 93 | +// TrackDailyUser tracks a unique user login for today |
| 94 | +func (m *Manager) TrackDailyUser(userID string) { |
| 95 | + if userID == "" { |
| 96 | + return |
| 97 | + } |
| 98 | + |
| 99 | + today := time.Now().UTC().Format("2006-01-02") |
| 100 | + |
| 101 | + dayUsersInterface, _ := m.dailyUsers.LoadOrStore(today, &userSet{}) |
| 102 | + dayUsers, ok := dayUsersInterface.(*userSet) |
| 103 | + if !ok { |
| 104 | + return // Skip if type assertion fails |
| 105 | + } |
| 106 | + |
| 107 | + if _, exists := dayUsers.users.LoadOrStore(userID, true); !exists { |
| 108 | + atomic.AddInt64(&dayUsers.count, 1) |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +// GetCounterValue returns the current value of a counter |
| 113 | +func (m *Manager) GetCounterValue(key string) int64 { |
| 114 | + if val, ok := m.counters.Load(key); ok { |
| 115 | + return atomic.LoadInt64(val.(*int64)) |
| 116 | + } |
| 117 | + return 0 |
| 118 | +} |
| 119 | + |
| 120 | +// GetDailyUserCount returns unique user count for a specific date |
| 121 | +func (m *Manager) GetDailyUserCount(date string) int64 { |
| 122 | + if dayUsersInterface, ok := m.dailyUsers.Load(date); ok { |
| 123 | + if dayUsers, ok := dayUsersInterface.(*userSet); ok { |
| 124 | + return atomic.LoadInt64(&dayUsers.count) |
| 125 | + } |
| 126 | + } |
| 127 | + return 0 |
| 128 | +} |
| 129 | + |
| 130 | +// GetTodayUserCount returns today's unique user count |
| 131 | +func (m *Manager) GetTodayUserCount() int64 { |
| 132 | + today := time.Now().UTC().Format("2006-01-02") |
| 133 | + return m.GetDailyUserCount(today) |
| 134 | +} |
| 135 | + |
| 136 | +// CleanupOldData removes user data older than the configured retention period |
| 137 | +func (m *Manager) CleanupOldData() error { |
| 138 | + cutoff := time.Now().UTC().AddDate(0, 0, -m.cleanupRetentionDays) |
| 139 | + |
| 140 | + var keysToDelete []string |
| 141 | + m.dailyUsers.Range(func(key, _ interface{}) bool { |
| 142 | + dateStr, ok := key.(string) |
| 143 | + if !ok { |
| 144 | + return true |
| 145 | + } |
| 146 | + |
| 147 | + if date, err := time.Parse("2006-01-02", dateStr); err == nil && date.Before(cutoff) { |
| 148 | + keysToDelete = append(keysToDelete, dateStr) |
| 149 | + } |
| 150 | + return true |
| 151 | + }) |
| 152 | + |
| 153 | + for _, key := range keysToDelete { |
| 154 | + m.dailyUsers.Delete(key) |
| 155 | + } |
| 156 | + |
| 157 | + return nil |
| 158 | +} |
| 159 | + |
| 160 | +// startCleanupRoutine starts automatic cleanup every Saturday at 3 AM UTC |
| 161 | +func (m *Manager) startCleanupRoutine() { |
| 162 | + go func() { |
| 163 | + for { |
| 164 | + now := time.Now().UTC() |
| 165 | + next := getNextCleanupTime(now) |
| 166 | + delay := next.Sub(now) |
| 167 | + |
| 168 | + select { |
| 169 | + case <-time.After(delay): |
| 170 | + _ = m.CleanupOldData() |
| 171 | + case <-m.cleanupStop: |
| 172 | + return |
| 173 | + } |
| 174 | + } |
| 175 | + }() |
| 176 | +} |
| 177 | + |
| 178 | +// getNextCleanupTime calculates the next Saturday at 3 AM UTC |
| 179 | +func getNextCleanupTime(now time.Time) time.Time { |
| 180 | + // Find next Saturday at 3 AM |
| 181 | + daysUntilSaturday := (DefaultCleanupDay - int(now.Weekday()) + 7) % 7 |
| 182 | + if daysUntilSaturday == 0 && (now.Hour() >= DefaultCleanupHour) { |
| 183 | + daysUntilSaturday = 7 // Next Saturday if we're past 3 AM today |
| 184 | + } |
| 185 | + |
| 186 | + next := now.AddDate(0, 0, daysUntilSaturday) |
| 187 | + return time.Date(next.Year(), next.Month(), next.Day(), DefaultCleanupHour, 0, 0, 0, time.UTC) |
| 188 | +} |
| 189 | + |
| 190 | +// Shutdown stops the cleanup routine |
| 191 | +func (m *Manager) Shutdown() { |
| 192 | + m.cleanupOnce.Do(func() { |
| 193 | + close(m.cleanupStop) |
| 194 | + }) |
| 195 | +} |
| 196 | + |
| 197 | +// formatMetric formats a single metric in Prometheus format |
| 198 | +func (m *Manager) formatMetric(buf *bytes.Buffer, name string, labels map[string]string, value float64) { |
| 199 | + if labels == nil { |
| 200 | + labels = make(map[string]string) |
| 201 | + } |
| 202 | + labels["service"] = m.serviceName |
| 203 | + |
| 204 | + var labelPairs []string |
| 205 | + for k, v := range labels { |
| 206 | + labelPairs = append(labelPairs, fmt.Sprintf(`%s="%s"`, k, v)) |
| 207 | + } |
| 208 | + sort.Strings(labelPairs) |
| 209 | + |
| 210 | + fmt.Fprintf(buf, "%s{%s} %g\n", name, strings.Join(labelPairs, ","), value) |
| 211 | +} |
| 212 | + |
| 213 | +// WritePrometheus writes all metrics in Prometheus format |
| 214 | +func (m *Manager) WritePrometheus(buf *bytes.Buffer) { |
| 215 | + now := time.Now().UTC() |
| 216 | + today := now.Format("2006-01-02") |
| 217 | + |
| 218 | + // Write counter metrics |
| 219 | + m.counters.Range(func(key, val interface{}) bool { |
| 220 | + name, ok := key.(string) |
| 221 | + if !ok { |
| 222 | + return true |
| 223 | + } |
| 224 | + value := atomic.LoadInt64(val.(*int64)) |
| 225 | + m.formatMetric(buf, fmt.Sprintf("%s_total", name), nil, float64(value)) |
| 226 | + return true |
| 227 | + }) |
| 228 | + |
| 229 | + // Write current daily user count |
| 230 | + todayCount := m.GetDailyUserCount(today) |
| 231 | + m.formatMetric(buf, "daily_unique_users_total", map[string]string{"date": today}, float64(todayCount)) |
| 232 | + |
| 233 | + // Write historical daily user counts |
| 234 | + for i := 1; i <= m.historicalDays; i++ { |
| 235 | + date := now.AddDate(0, 0, -i).Format("2006-01-02") |
| 236 | + count := m.GetDailyUserCount(date) |
| 237 | + if count > 0 { |
| 238 | + m.formatMetric(buf, "daily_unique_users_total", map[string]string{"date": date}, float64(count)) |
| 239 | + } |
| 240 | + } |
| 241 | +} |
| 242 | + |
| 243 | +// HTTPHandler returns an HTTP handler for the metrics endpoint |
| 244 | +func (m *Manager) HTTPHandler() http.HandlerFunc { |
| 245 | + return func(w http.ResponseWriter, r *http.Request) { |
| 246 | + if r.Method != http.MethodGet { |
| 247 | + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) |
| 248 | + return |
| 249 | + } |
| 250 | + |
| 251 | + buf := new(bytes.Buffer) |
| 252 | + m.WritePrometheus(buf) |
| 253 | + |
| 254 | + w.Header().Set("Content-Type", PrometheusContentType) |
| 255 | + w.WriteHeader(http.StatusOK) |
| 256 | + if _, err := w.Write(buf.Bytes()); err != nil { |
| 257 | + // Log error but don't panic |
| 258 | + return |
| 259 | + } |
| 260 | + } |
| 261 | +} |
| 262 | + |
| 263 | +// AdminHTTPHandler returns an HTTP handler with admin path protection |
| 264 | +func (m *Manager) AdminHTTPHandler() http.HandlerFunc { |
| 265 | + if m.adminSecretPath == "" { |
| 266 | + return m.disabledHandler() |
| 267 | + } |
| 268 | + |
| 269 | + expectedPath := AdminPathPrefix + m.adminSecretPath + MetricsPathSuffix |
| 270 | + |
| 271 | + return func(w http.ResponseWriter, r *http.Request) { |
| 272 | + if !m.isValidAdminPath(r.URL.Path, expectedPath) { |
| 273 | + http.Error(w, "Not found", http.StatusNotFound) |
| 274 | + return |
| 275 | + } |
| 276 | + |
| 277 | + m.HTTPHandler()(w, r) |
| 278 | + } |
| 279 | +} |
| 280 | + |
| 281 | +// disabledHandler returns a handler that always returns 404 |
| 282 | +func (m *Manager) disabledHandler() http.HandlerFunc { |
| 283 | + return func(w http.ResponseWriter, r *http.Request) { |
| 284 | + http.Error(w, "Admin endpoint disabled", http.StatusNotFound) |
| 285 | + } |
| 286 | +} |
| 287 | + |
| 288 | +// isValidAdminPath checks if the request path matches the expected admin path |
| 289 | +func (m *Manager) isValidAdminPath(requestPath, expectedPath string) bool { |
| 290 | + return requestPath == expectedPath |
| 291 | +} |
0 commit comments