Skip to content

Commit 6a89d20

Browse files
committed
fix-user-oauth
1 parent cc998c6 commit 6a89d20

File tree

3 files changed

+318
-86
lines changed

3 files changed

+318
-86
lines changed

mcpproxy/mcp_profile.go

Lines changed: 116 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,21 @@ func saveMcpProfile(profile *McpProfile) error {
6464
return fmt.Errorf("failed to create config directory %q: %w", dir, err)
6565
}
6666

67+
// log.Printf("saveMcpProfile: Before marshaling - RefreshToken length=%d, RefreshToken empty=%v",
68+
// len(profile.MCPOAuthRefreshToken), profile.MCPOAuthRefreshToken == "")
69+
6770
tempFile := mcpConfigPath + ".tmp"
6871

6972
bytes, err := json.MarshalIndent(profile, "", "\t")
7073
if err != nil {
7174
return fmt.Errorf("failed to marshal profile: %w", err)
7275
}
7376

77+
// jsonStr := string(bytes)
78+
// hasRefreshToken := strings.Contains(jsonStr, "mcp_oauth_refresh_token")
79+
// log.Printf("saveMcpProfile: After marshaling - JSON contains 'mcp_oauth_refresh_token'=%v, JSON length=%d",
80+
// hasRefreshToken, len(jsonStr))
81+
7482
if err := os.WriteFile(tempFile, bytes, 0600); err != nil {
7583
return fmt.Errorf("failed to write temp file %q: %w", tempFile, err)
7684
}
@@ -80,64 +88,121 @@ func saveMcpProfile(profile *McpProfile) error {
8088
_ = os.Remove(tempFile)
8189
return fmt.Errorf("failed to rename temp file to %q: %w", mcpConfigPath, err)
8290
}
91+
92+
log.Printf("saveMcpProfile: Successfully saved MCP profile")
8393
return nil
8494
}
8595

96+
// loadExistingMCPProfile 加载并验证已有的 MCP profile,如果有效则返回,避免重复拉起 OAuth
97+
func loadExistingMCPProfile(ctx *cli.Context, profile config.Profile, opts ProxyConfig, desiredAppName string) *McpProfile {
98+
mcpConfigPath := getMCPConfigPath()
99+
bytes, err := os.ReadFile(mcpConfigPath)
100+
if err != nil {
101+
return nil
102+
}
103+
mcpProfile, err := NewMcpProfileFromBytes(bytes)
104+
if err != nil {
105+
return nil
106+
}
107+
108+
if mcpProfile.MCPOAuthSiteType != string(opts.RegionType) {
109+
log.Printf("Region type mismatch: saved=%s, requested=%s, ignoring local profile", mcpProfile.MCPOAuthSiteType, string(opts.RegionType))
110+
return nil
111+
}
112+
113+
if mcpProfile.MCPOAuthAppName != desiredAppName {
114+
log.Printf("App name mismatch: saved=%s, requested=%s, ignoring local profile", mcpProfile.MCPOAuthAppName, desiredAppName)
115+
return nil
116+
}
117+
118+
if mcpProfile.MCPOAuthAppId == "" {
119+
log.Printf("MCP profile with AppId is empty, ignoring local profile")
120+
return nil
121+
}
122+
123+
if mcpProfile.MCPOAuthRefreshToken == "" {
124+
log.Printf("MCP profile with RefreshToken is empty, ignoring local profile")
125+
return nil
126+
}
127+
128+
if mcpProfile.MCPOAuthRefreshTokenExpire <= util.GetCurrentUnixTime() {
129+
log.Printf("MCP profile with RefreshTokenExpire is expired, ignoring local profile")
130+
return nil
131+
}
132+
133+
app, err := findOAuthApplicationById(ctx, profile, mcpProfile.MCPOAuthAppId, opts.RegionType)
134+
if err != nil {
135+
log.Printf("Failed to reuse existing MCP profile (app: %s): %v, ignoring local profile", mcpProfile.MCPOAuthAppName, err)
136+
return nil
137+
}
138+
if app == nil {
139+
log.Printf("OAuth application with AppId '%s' not found, ignoring local profile", mcpProfile.MCPOAuthAppId)
140+
return nil
141+
}
142+
143+
if err := validateOAuthApplication(app, opts.Scope, opts.Host, opts.Port); err != nil {
144+
log.Printf("Reused existing MCP profile validation failed: %v, ignoring local profile", err)
145+
return nil
146+
}
147+
148+
// 根据远端 app 信息更新 mcp profile 中的相关字段,其他字段(如 token)保持不变
149+
mcpProfile.MCPOAuthAppName = app.AppName
150+
mcpProfile.MCPOAuthAppId = app.ApplicationId
151+
mcpProfile.MCPOAuthAccessTokenValidity = app.AccessTokenValidity
152+
mcpProfile.MCPOAuthRefreshTokenValidity = app.RefreshTokenValidity
153+
154+
log.Printf("Reused existing MCP profile with app '%s' (AppId: %s)", app.AppName, app.ApplicationId)
155+
156+
return mcpProfile
157+
}
158+
86159
func getOrCreateMCPProfile(ctx *cli.Context, opts ProxyConfig) (*McpProfile, error) {
87160
profile, err := config.LoadProfileWithContext(ctx)
88161
if err != nil {
89162
return nil, fmt.Errorf("failed to load profile: %w", err)
90163
}
91164

92-
// 如果传入了 oauth-app-name,先验证该应用是否存在且合法
93-
// 如果已经验证过 oauth-app-name,直接使用验证过的 app;否则查找或创建默认的 OAuth 应用
94-
var validatedApp *OAuthApplication
95-
if opts.OAuthAppName != "" {
96-
app, err := findOAuthApplicationByName(ctx, profile, opts.RegionType, opts.OAuthAppName)
97-
if err != nil {
98-
return nil, fmt.Errorf("failed to find OAuth application '%s': %w", opts.OAuthAppName, err)
99-
}
100-
if app == nil {
101-
return nil, fmt.Errorf("OAuth application '%s' not found", opts.OAuthAppName)
102-
}
165+
// 如果未显式指定 app name,则使用默认的 MCPOAuthAppName,便于复用本地 profile
166+
desiredAppName := opts.OAuthAppName
167+
if desiredAppName == "" {
168+
desiredAppName = MCPOAuthAppName
169+
}
103170

104-
// 验证 Scopes 和 Callback URI
105-
requiredRedirectURI := buildRedirectUri(opts.Host, opts.Port)
106-
if err := validateOAuthApplication(app, opts.Scope, requiredRedirectURI); err != nil {
107-
return nil, fmt.Errorf("OAuth application validation failed: %w", err)
171+
existingMcpProfile := loadExistingMCPProfile(ctx, profile, opts, desiredAppName)
172+
if existingMcpProfile != nil {
173+
// mcpprofile might change, save it again to ensure the latest state is saved
174+
if err := saveMcpProfile(existingMcpProfile); err != nil {
175+
return nil, fmt.Errorf("failed to save mcp profile: %w", err)
108176
}
177+
return existingMcpProfile, nil
178+
}
109179

110-
validatedApp = app
111-
cli.Printf(ctx.Stdout(), "Using existing OAuth application '%s' (AppId: %s)\n", app.AppName, app.ApplicationId)
112-
} else {
113-
// 查找或创建默认的 OAuth 应用
114-
mcpConfigPath := getMCPConfigPath()
115-
if bytes, err := os.ReadFile(mcpConfigPath); err == nil {
116-
if mcpProfile, err := NewMcpProfileFromBytes(bytes); err == nil {
117-
log.Println("MCP Profile loaded from file", mcpProfile.Name, "app id", mcpProfile.MCPOAuthAppId)
118-
119-
// 检查 region type 是否匹配,因为国内和国际站的 OAuth 地址不同, Region type 不匹配则重新创建 profile
120-
if mcpProfile.MCPOAuthSiteType != string(opts.RegionType) {
121-
log.Printf("Region type mismatch: saved=%s, requested=%s, recreating profile", mcpProfile.MCPOAuthSiteType, string(opts.RegionType))
122-
} else {
123-
err = findOAuthApplicationById(ctx, profile, mcpProfile, opts.RegionType)
124-
if err == nil {
125-
return mcpProfile, nil
126-
} else {
127-
log.Println("Failed to find existing OAuth application", err.Error())
128-
}
129-
}
130-
}
180+
app, err := findOAuthApplicationByName(ctx, profile, opts.RegionType, desiredAppName)
181+
if err != nil {
182+
return nil, fmt.Errorf("failed to find OAuth application '%s': %w", desiredAppName, err)
183+
}
184+
185+
if app == nil {
186+
if opts.OAuthAppName != "" {
187+
// if user provide app name, but not found, return error
188+
return nil, fmt.Errorf("OAuth application '%s' not found", opts.OAuthAppName)
131189
}
132-
app, err := getOrCreateMCPOAuthApplication(ctx, profile, opts.RegionType, opts.Host, opts.Port, opts.Scope)
190+
cli.Printf(ctx.Stdout(), "Creating new default MCP profile '%s'...\n", DefaultMcpProfileName)
191+
app, err = createDefaultMCPOauthApplication(ctx, profile, opts.RegionType, opts.Host, opts.Port, opts.Scope)
133192
if err != nil {
134-
return nil, fmt.Errorf("failed to get or create OAuth application: %w", err)
193+
return nil, fmt.Errorf("failed to create default OAuth application: %w", err)
135194
}
136-
validatedApp = app
195+
cli.Printf(ctx.Stdout(), "Created new default OAuth application '%s' (AppId: %s)\n", app.AppName, app.ApplicationId)
196+
} else {
197+
cli.Printf(ctx.Stdout(), "Using existing OAuth application '%s' (AppId: %s)\n", app.AppName, app.ApplicationId)
137198
}
138199

139-
cli.Printf(ctx.Stdout(), "Setting up MCPOAuth profile '%s'...\n", DefaultMcpProfileName)
200+
if err := validateOAuthApplication(app, opts.Scope, opts.Host, opts.Port); err != nil {
201+
return nil, fmt.Errorf("OAuth application validation failed: %w", err)
202+
}
203+
validatedApp := app
140204

205+
cli.Printf(ctx.Stdout(), "Setting up MCPOAuth profile '%s'...\n", DefaultMcpProfileName)
141206
mcpProfile := NewMcpProfile(DefaultMcpProfileName)
142207
mcpProfile.MCPOAuthSiteType = string(opts.RegionType)
143208
mcpProfile.MCPOAuthAppId = validatedApp.ApplicationId
@@ -153,9 +218,20 @@ func getOrCreateMCPProfile(ctx *cli.Context, opts ProxyConfig) (*McpProfile, err
153218
if err != nil {
154219
return nil, fmt.Errorf("OAuth login failed: %w", err)
155220
}
221+
222+
log.Printf("OAuth flow completed: AccessToken length=%d, RefreshToken length=%d, AccessTokenExpire=%d",
223+
len(tokenResult.AccessToken), len(tokenResult.RefreshToken), tokenResult.AccessTokenExpire)
224+
if tokenResult.RefreshToken == "" {
225+
return nil, fmt.Errorf("OAuth flow returned empty RefreshToken (Region=%s, AppId=%s). "+
226+
"Please delete this application and let the system create a new NativeApp, or manually create a NativeApp",
227+
opts.RegionType, mcpProfile.MCPOAuthAppId)
228+
}
229+
156230
mcpProfile.MCPOAuthAccessToken = tokenResult.AccessToken
157231
mcpProfile.MCPOAuthRefreshToken = tokenResult.RefreshToken
158232
mcpProfile.MCPOAuthAccessTokenExpire = tokenResult.AccessTokenExpire
233+
// refresh token will be updated each time latest access token is refreshed,
234+
// however the validity and expiration time is the same as the original when finishing oauth flow
159235
mcpProfile.MCPOAuthRefreshTokenExpire = currentTime + int64(validatedApp.RefreshTokenValidity)
160236

161237
if err = saveMcpProfile(mcpProfile); err != nil {

mcpproxy/mcp_server.go

Lines changed: 98 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,11 @@ type RuntimeStats struct {
8989
ErrorRequests int64
9090
ActiveRequests int64
9191

92-
TokenRefreshes int64
93-
TokenRefreshErrors int64
94-
LastTokenRefresh int64
92+
TokenRefreshes int64
93+
TokenRefreshErrors int64
94+
LastTokenRefresh int64
95+
HealthCheckCounter int64 // 用于定期输出健康检查日志
96+
LastHealthCheckTime int64
9597

9698
// 启动时的内存状态
9799
InitialMemStats runtime.MemStats
@@ -691,6 +693,8 @@ func (r *TokenRefresher) Stop() {
691693
func (r *TokenRefresher) checkAndRefresh() {
692694
r.mu.RLock()
693695
currentTime := util.GetCurrentUnixTime()
696+
accessTokenRemaining := r.profile.MCPOAuthAccessTokenExpire - currentTime
697+
refreshTokenRemaining := r.profile.MCPOAuthRefreshTokenExpire - currentTime
694698
needRefresh := false
695699
needReauth := false
696700

@@ -704,14 +708,32 @@ func (r *TokenRefresher) checkAndRefresh() {
704708
}
705709
r.mu.RUnlock()
706710

711+
// 定期输出健康检查日志(每 120 次检查,即每小时)
712+
checkCount := atomic.AddInt64(&r.stats.HealthCheckCounter, 1)
713+
if checkCount%120 == 0 {
714+
log.Printf("Token health check #%d: access_token_remaining=%dm (%.1fh), refresh_token_remaining=%dh (%.1fd), need_refresh=%v, need_reauth=%v",
715+
checkCount,
716+
accessTokenRemaining/60,
717+
float64(accessTokenRemaining)/3600,
718+
refreshTokenRemaining/3600,
719+
float64(refreshTokenRemaining)/86400,
720+
needRefresh,
721+
needReauth)
722+
atomic.StoreInt64(&r.stats.LastHealthCheckTime, currentTime)
723+
}
724+
707725
if needReauth {
726+
log.Printf("Token check: refresh token expiring soon (remaining: %dm), triggering re-authorization", refreshTokenRemaining/60)
708727
if err := r.reauthorizeWithProxy(); err != nil {
709-
r.reportFatalError(fmt.Errorf("re-authorization failed: %v. Please restart aliyun mcp-proxy", err))
728+
log.Printf("MCP Proxy token refresher re-authorization failed: %v", err)
729+
r.reportFatalError(fmt.Errorf("re-authorization failed: %w", err))
710730
return
711731
}
712732
} else if needRefresh {
733+
log.Printf("Token check: access token expiring soon (remaining: %dm), triggering refresh access token", accessTokenRemaining/60)
713734
if err := r.refreshAccessToken(); err != nil {
714-
r.reportFatalError(fmt.Errorf("refresh access token failed. Please restart aliyun mcp-proxy"))
735+
log.Printf("MCP Proxy token refresher refresh access token failed: %v", err)
736+
r.reportFatalError(fmt.Errorf("refresh access token failed: %w", err))
715737
return
716738
}
717739
}
@@ -736,28 +758,89 @@ func (r *TokenRefresher) refreshAccessToken() error {
736758
endpoint := EndpointMap[r.regionType].OAuth
737759
clientId := r.profile.MCPOAuthAppId
738760
refreshToken := r.profile.MCPOAuthRefreshToken
761+
accessTokenExpire := r.profile.MCPOAuthAccessTokenExpire
739762
r.mu.Unlock()
740763

741764
// 执行网络请求(不持有锁,避免阻塞)
742765
data := url.Values{}
743766
data.Set("grant_type", "refresh_token")
744767
data.Set("client_id", clientId)
745768
data.Set("refresh_token", refreshToken)
746-
// fmt.Println("refresh access token data", data.Encode())
747-
// fmt.Println("refresh access token endpoint", endpoint)
748-
// fmt.Println("refresh access token clientId", clientId)
749-
// fmt.Println("refresh access token refreshToken", refreshToken)
750769

751-
newTokens, err := oauthRefresh(endpoint, data)
770+
// 重试逻辑:最多重试 3 次,使用指数退避,避免因为临时网络问题导致服务直接关闭
771+
var newTokens *OAuthTokenResponse
772+
var err error
773+
maxRetries := 3
774+
successAttempt := 0
775+
776+
for attempt := 1; attempt <= maxRetries; attempt++ {
777+
if attempt > 1 {
778+
backoffDuration := time.Duration(1<<uint(attempt-1)) * time.Second
779+
log.Printf("OAuth refresh retry attempt %d/%d after %v backoff", attempt, maxRetries, backoffDuration)
780+
time.Sleep(backoffDuration)
781+
}
782+
783+
newTokens, err = oauthRefresh(endpoint, data)
784+
if err == nil {
785+
successAttempt = attempt
786+
break
787+
}
788+
789+
log.Printf("OAuth refresh attempt %d/%d failed: %v", attempt, maxRetries, err)
790+
791+
// 检查是否是永久性错误(不可重试)
792+
if IsPermanentError(err) {
793+
log.Printf("Detected permanent OAuth error, stopping retry immediately")
794+
break
795+
}
796+
797+
// 如果还没达到最大重试次数,记录日志并继续
798+
if attempt < maxRetries {
799+
currentTime := util.GetCurrentUnixTime()
800+
accessTimeRemaining := accessTokenExpire - currentTime
801+
log.Printf("Temporary error, will retry (access_token_remaining: %ds)", accessTimeRemaining)
802+
}
803+
}
804+
752805
if err != nil {
753806
r.mu.Lock()
754807
r.refreshing = false
808+
currentAccessTokenExpire := r.profile.MCPOAuthAccessTokenExpire
809+
currentRefreshTokenExpire := r.profile.MCPOAuthRefreshTokenExpire
755810
r.mu.Unlock()
811+
756812
atomic.AddInt64(&r.stats.TokenRefreshErrors, 1)
757-
return fmt.Errorf("oauth refresh failed: %w", err)
813+
814+
currentTime := util.GetCurrentUnixTime()
815+
accessTimeRemaining := currentAccessTokenExpire - currentTime
816+
refreshTimeRemaining := currentRefreshTokenExpire - currentTime
817+
818+
log.Printf("OAuth refresh failed after %d attempts at %s: %v (access_token_remaining: %ds, refresh_token_remaining: %dh)",
819+
maxRetries, time.Now().Format(time.RFC3339), err,
820+
accessTimeRemaining, refreshTimeRemaining/3600)
821+
822+
// 检查是否是永久性错误(如 refresh token 失效、OAuth 应用被删除等)
823+
if IsPermanentError(err) {
824+
log.Printf("Detected permanent OAuth error (invalid_grant/invalid_client), refresh token is no longer valid")
825+
log.Printf("This typically means: 1) OAuth app was deleted, 2) Refresh token was revoked, 3) App credentials changed")
826+
log.Printf("Reporting fatal error - service requires re-authorization")
827+
return fmt.Errorf("oauth permanent error, re-authorization required: %w", err)
828+
}
829+
830+
// 如果 access token 还有效(提前刷新失败),继续使用当前 token
831+
if accessTimeRemaining > 0 {
832+
log.Printf("Temporary refresh failure, access token still valid for %ds, continuing to use current token (will retry on next check in 30s)", accessTimeRemaining)
833+
return nil
834+
}
835+
836+
// Access token 已经失效,报告致命错误
837+
log.Printf("Access token expired and refresh failed, service is unavailable")
838+
log.Printf("Reporting fatal error - service requires restart or re-authorization")
839+
return fmt.Errorf("oauth refresh failed and access token expired: %w", err)
758840
}
759841

760-
log.Println("Access token refresh request successfully")
842+
log.Printf("Access token refresh request successfully after %d attempt(s), access token length=%d, refresh token length=%d, new access token expires in %d seconds",
843+
successAttempt, len(newTokens.AccessToken), len(newTokens.RefreshToken), newTokens.ExpiresIn)
761844

762845
r.mu.Lock()
763846
currentTime := util.GetCurrentUnixTime()
@@ -935,9 +1018,11 @@ func (r *TokenRefresher) reauthorizeWithProxy() error {
9351018
r.reauthorizing = false
9361019
r.mu.Unlock()
9371020
atomic.AddInt64(&r.stats.TokenRefreshErrors, 1)
1021+
log.Printf("OAuth re-authorization request failed: %v", err)
9381022
return err
9391023
}
940-
log.Println("OAuth re-authorization request successfully")
1024+
log.Printf("OAuth re-authorization request successfully: AccessToken length=%d, RefreshToken length=%d, ExpiresIn=%d",
1025+
len(tokenResult.AccessToken), len(tokenResult.RefreshToken), refreshTokenValidity)
9411026

9421027
r.mu.Lock()
9431028
currentTime := util.GetCurrentUnixTime()

0 commit comments

Comments
 (0)