-
Notifications
You must be signed in to change notification settings - Fork 140
CBG-5006: Split ServerContext mutexes #7889
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
e8ffc70
ce2cd12
ca69b40
4d6cdd4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2989,7 +2989,7 @@ func TestCreateDBWithXattrsDisabled(t *testing.T) { | |
| resp = rt.CreateDatabase(dbName, dbConfig) | ||
| RequireStatus(t, resp, http.StatusCreated) | ||
|
|
||
| rt.RestTesterServerContext.dbConfigs[dbName].DatabaseConfig.EnableXattrs = base.Ptr(false) | ||
| rt.RestTesterServerContext._dbConfigs[dbName].DatabaseConfig.EnableXattrs = base.Ptr(false) | ||
|
|
||
| _, err := rt.RestTesterServerContext.ReloadDatabase(t.Context(), dbName, false) | ||
| assert.Error(t, err, errResp) | ||
|
|
@@ -3482,7 +3482,7 @@ func TestAllowConflictsConfig(t *testing.T) { | |
| bucketName := rt.GetDatabase().Bucket.GetName() | ||
|
|
||
| // Attempt to set the `AllowConflicts` property to true in the database configuration. | ||
| rt.RestTesterServerContext.dbConfigs[dbName].DatabaseConfig.DbConfig.AllowConflicts = base.Ptr(true) | ||
| rt.RestTesterServerContext._dbConfigs[dbName].DatabaseConfig.DbConfig.AllowConflicts = base.Ptr(true) | ||
|
||
|
|
||
| // Reload the database configuration and verify that an error is returned. | ||
| _, err := rt.RestTesterServerContext.ReloadDatabase(ctx, dbName, false) | ||
|
|
@@ -3525,7 +3525,7 @@ func TestDisableAllowStarChannel(t *testing.T) { | |
| RequireStatus(t, resp, http.StatusCreated) | ||
|
|
||
| // Attempting to disable `enable_star_channel` | ||
| rt.ServerContext().dbConfigs[dbName].DatabaseConfig.CacheConfig.ChannelCacheConfig.EnableStarChannel = base.Ptr(false) | ||
| rt.ServerContext()._dbConfigs[dbName].DatabaseConfig.CacheConfig.ChannelCacheConfig.EnableStarChannel = base.Ptr(false) | ||
|
||
|
|
||
| // Reloading the database after updating the config | ||
| _, err := rt.ServerContext().ReloadDatabase(ctx, dbName, false) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1657,21 +1657,21 @@ func (sc *ServerContext) fetchAndLoadConfigs(ctx context.Context, isInitialStart | |
| // we don't need to do this two-stage lock on initial startup as the REST APIs aren't even online yet. | ||
| var deletedDatabases []string | ||
| if !isInitialStartup { | ||
| sc.lock.RLock() | ||
| for dbName, _ := range sc.dbRegistry { | ||
| sc._databasesLock.RLock() | ||
| for dbName, _ := range sc._dbRegistry { | ||
| if _, foundMatchingDb := fetchedConfigs[dbName]; !foundMatchingDb { | ||
| deletedDatabases = append(deletedDatabases, dbName) | ||
| delete(fetchedConfigs, dbName) | ||
| } | ||
| } | ||
| for dbName, fetchedConfig := range fetchedConfigs { | ||
| if dbConfig, ok := sc.dbConfigs[dbName]; ok && dbConfig.cfgCas >= fetchedConfig.cfgCas { | ||
| if dbConfig, ok := sc._dbConfigs[dbName]; ok && dbConfig.cfgCas >= fetchedConfig.cfgCas { | ||
| sc.invalidDatabaseConfigTracking.remove(dbName) | ||
| base.DebugfCtx(ctx, base.KeyConfig, "Database %q bucket %q config has not changed since last update", fetchedConfig.Name, *fetchedConfig.Bucket) | ||
| delete(fetchedConfigs, dbName) | ||
| } | ||
| } | ||
| sc.lock.RUnlock() | ||
| sc._databasesLock.RUnlock() | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to file a separate ticket for this, if there was a panic between this line and and the readLock, then I want this to unlock. If the panic happens in config polling, I believe that this will take down Sync Gateway. However, this can get called from any db scoped REST API handler:
Since this is only serverless, this is low priority. For the issue that spawned this ticket (see https://jira.issues.couchbase.com/browse/CBG-5010 for the full logs), something locked the serverContext.lock mutex but didn't unlock it. I don't think it's this line but this is one of the only cases where this is called outside a defer. I think it would be fine to do this as part of a separate defensive ticket.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is already a ticket to review usage of the lock. This ticket is simply splitting out the http server lock. I tested this by running Will review with the other larger |
||
|
|
||
| // nothing to do, we can bail out without needing the write lock | ||
| if len(deletedDatabases) == 0 && len(fetchedConfigs) == 0 { | ||
|
|
@@ -1681,10 +1681,10 @@ func (sc *ServerContext) fetchAndLoadConfigs(ctx context.Context, isInitialStart | |
| } | ||
|
|
||
| // we have databases to update/remove | ||
| sc.lock.Lock() | ||
| defer sc.lock.Unlock() | ||
| sc._databasesLock.Lock() | ||
| defer sc._databasesLock.Unlock() | ||
| for _, dbName := range deletedDatabases { | ||
| dbc, ok := sc.databases_[dbName] | ||
| dbc, ok := sc._databases[dbName] | ||
| if !ok { | ||
| base.DebugfCtx(ctx, base.KeyConfig, "Database %q already removed from server context after acquiring write lock - do not need to remove not removing database", base.MD(dbName)) | ||
| continue | ||
|
|
@@ -1723,8 +1723,8 @@ func (sc *ServerContext) fetchAndLoadDatabaseSince(ctx context.Context, dbName s | |
| } | ||
|
|
||
| func (sc *ServerContext) fetchAndLoadDatabase(nonContextStruct base.NonCancellableContext, dbName string, forceReload bool) (found bool, err error) { | ||
| sc.lock.Lock() | ||
| defer sc.lock.Unlock() | ||
| sc._databasesLock.Lock() | ||
| defer sc._databasesLock.Unlock() | ||
| return sc._fetchAndLoadDatabase(nonContextStruct, dbName, forceReload) | ||
| } | ||
|
|
||
|
|
@@ -1809,8 +1809,8 @@ func (sc *ServerContext) findBucketWithCallback(ctx context.Context, callback fu | |
|
|
||
| func (sc *ServerContext) fetchDatabase(ctx context.Context, dbName string) (found bool, dbConfig *DatabaseConfig, err error) { | ||
| // fetch will update the databses | ||
| sc.lock.Lock() | ||
| defer sc.lock.Unlock() | ||
| sc._databasesLock.Lock() | ||
| defer sc._databasesLock.Unlock() | ||
| return sc._fetchDatabase(ctx, dbName) | ||
| } | ||
|
|
||
|
|
@@ -1880,8 +1880,8 @@ func (sc *ServerContext) _fetchDatabase(ctx context.Context, dbName string) (fou | |
| } | ||
|
|
||
| func (sc *ServerContext) handleInvalidDatabaseConfig(ctx context.Context, bucket string, cnf DatabaseConfig) { | ||
| sc.lock.Lock() | ||
| defer sc.lock.Unlock() | ||
| sc._databasesLock.Lock() | ||
| defer sc._databasesLock.Unlock() | ||
| sc._handleInvalidDatabaseConfig(ctx, bucket, cnf, nil) | ||
| } | ||
|
|
||
|
|
@@ -1895,9 +1895,9 @@ func (sc *ServerContext) _handleInvalidDatabaseConfig(ctx context.Context, bucke | |
| func (sc *ServerContext) bucketNameFromDbName(ctx context.Context, dbName string) (bucketName string, found bool) { | ||
| // Minimal representation of config struct to be tolerant of invalid database configurations where we still need to find a database name | ||
| // see if we find the database in-memory first, otherwise fall back to scanning buckets for db configs | ||
| sc.lock.RLock() | ||
| dbc, ok := sc.databases_[dbName] | ||
| sc.lock.RUnlock() | ||
| sc._databasesLock.RLock() | ||
| dbc, ok := sc._databases[dbName] | ||
| sc._databasesLock.RUnlock() | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is so unlikely that this would panic, but due to excessive paranoia I am almost tempted to wrap this in a function in case Feel free to disregard.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will review with the other larger |
||
|
|
||
| if ok { | ||
| return dbc.Bucket.GetName(), true | ||
|
|
@@ -1948,7 +1948,7 @@ func (sc *ServerContext) fetchConfigsSince(ctx context.Context, refreshInterval | |
| sc.fetchConfigsLastUpdate = time.Now() | ||
| } | ||
|
|
||
| return sc.dbConfigs, nil | ||
| return sc._dbConfigs, nil | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is weird because it returns a value that would require you to hold a lock. This behavior should be noted in the docstring and considered. I didn't look through the implications of this.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I agree this is weird and seems wrong - will review with the other larger |
||
| } | ||
|
|
||
| // GetBucketNames returns a slice of the bucket names associated with the server context | ||
|
|
@@ -2064,8 +2064,8 @@ func (sc *ServerContext) _applyConfigs(ctx context.Context, dbNameConfigs map[st | |
| } | ||
|
|
||
| func (sc *ServerContext) applyConfigs(ctx context.Context, dbNameConfigs map[string]DatabaseConfig) (count int) { | ||
| sc.lock.Lock() | ||
| defer sc.lock.Unlock() | ||
| sc._databasesLock.Lock() | ||
| defer sc._databasesLock.Unlock() | ||
| return sc._applyConfigs(ctx, dbNameConfigs, false, false) | ||
| } | ||
|
|
||
|
|
@@ -2104,13 +2104,13 @@ func (sc *ServerContext) _applyConfig(nonContextStruct base.NonCancellableContex | |
| } | ||
|
|
||
| // skip if we already have this config loaded, and we've got a cas value to compare with | ||
| _, exists := sc.dbRegistry[cnf.Name] | ||
| _, exists := sc._dbRegistry[cnf.Name] | ||
| if exists { | ||
| if cnf.cfgCas == 0 { | ||
| // force an update when the new config's cas was set to zero prior to load | ||
| base.InfofCtx(ctx, base.KeyConfig, "Forcing update of config for database %q bucket %q", cnf.Name, *cnf.Bucket) | ||
| } else { | ||
| if sc.dbConfigs[cnf.Name].cfgCas >= cnf.cfgCas { | ||
| if sc._dbConfigs[cnf.Name].cfgCas >= cnf.cfgCas { | ||
| base.DebugfCtx(ctx, base.KeyConfig, "Database %q bucket %q config has not changed since last update", cnf.Name, *cnf.Bucket) | ||
| return false, nil | ||
| } | ||
|
|
@@ -2211,8 +2211,8 @@ func StartServer(ctx context.Context, config *StartupConfig, sc *ServerContext) | |
| } | ||
|
|
||
| func sharedBucketDatabaseCheck(ctx context.Context, sc *ServerContext) (errors error) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we lock this function or rename to _sharedBucketDatabaseCheck and note which lock you are required to hold?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will review with the other larger |
||
| bucketUUIDToDBContext := make(map[string][]*db.DatabaseContext, len(sc.databases_)) | ||
| for _, dbContext := range sc.databases_ { | ||
| bucketUUIDToDBContext := make(map[string][]*db.DatabaseContext, len(sc._databases)) | ||
| for _, dbContext := range sc._databases { | ||
| if uuid, err := dbContext.Bucket.UUID(); err == nil { | ||
| bucketUUIDToDBContext[uuid] = append(bucketUUIDToDBContext[uuid], dbContext) | ||
| } | ||
|
|
@@ -2272,15 +2272,15 @@ func (sc *ServerContext) _findDuplicateCollections(cnf DatabaseConfig) []string | |
| // If scopes aren't defined, check the default collection | ||
| if cnf.Scopes == nil { | ||
| defaultFQName := base.FullyQualifiedCollectionName(*cnf.Bucket, base.DefaultScope, base.DefaultCollection) | ||
| existingDbName, ok := sc.collectionRegistry[defaultFQName] | ||
| existingDbName, ok := sc._collectionRegistry[defaultFQName] | ||
| if ok && existingDbName != cnf.Name { | ||
| duplicatedCollections = append(duplicatedCollections, defaultFQName) | ||
| } | ||
| } else { | ||
| for scopeName, scope := range cnf.Scopes { | ||
| for collectionName, _ := range scope.Collections { | ||
| fqName := base.FullyQualifiedCollectionName(*cnf.Bucket, scopeName, collectionName) | ||
| existingDbName, ok := sc.collectionRegistry[fqName] | ||
| existingDbName, ok := sc._collectionRegistry[fqName] | ||
| if ok && existingDbName != cnf.Name { | ||
| duplicatedCollections = append(duplicatedCollections, fqName) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this line should be a lock but actually i'd just change the test so it makes a different REST api call
I don't want to accidentally have the race detector pick up this error because we are doing something naughty in the tests and config polling runs simultaneously.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test case isn't quite a regular
/db/_configoperation - it's effectively doing a persistent config load of an old configuration - bypassing the upfront validation that/db/_configwould've run.Either way, I can move it to a function to wrap in a read mutex (the read lock is required to read the dbName entry, but nothing more to update the underlying configuration)