diff --git a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift index c18d64004b..64e5e728f1 100644 --- a/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift +++ b/Sources/SwiftDocC/Infrastructure/ConvertActionConverter.swift @@ -29,14 +29,13 @@ package enum ConvertActionConverter { /// - sourceRepository: The source repository where the documentation's sources are hosted. /// - emitDigest: Whether the conversion should pass additional metadata output––such as linkable entities information, indexing information, or asset references by asset type––to the consumer. /// - documentationCoverageOptions: The level of experimental documentation coverage information that the conversion should pass to the consumer. - /// - Returns: A list of problems that occurred during the conversion (excluding the problems that the context already encountered). package static func convert( context: DocumentationContext, outputConsumer: some ConvertOutputConsumer & ExternalNodeConsumer, sourceRepository: SourceRepository?, emitDigest: Bool, documentationCoverageOptions: DocumentationCoverageOptions - ) throws -> [Problem] { + ) async throws { let signposter = Self.signposter defer { @@ -54,7 +53,7 @@ package enum ConvertActionConverter { if emitDigest { try (_Deprecated(outputConsumer) as (any _DeprecatedConsumeProblemsAccess))._consume(problems: context.problems) } - return [] + return } // Precompute the render context @@ -72,98 +71,66 @@ package enum ConvertActionConverter { sourceRepository: sourceRepository ) - // Arrays to gather additional metadata if `emitDigest` is `true`. - var indexingRecords = [IndexingRecord]() - var linkSummaries = [LinkDestinationSummary]() - var assets = [RenderReferenceType : [any RenderReference]]() - var coverageInfo = [CoverageDataEntry]() - let coverageFilterClosure = documentationCoverageOptions.generateFilterClosure() - - // An inner function to gather problems for errors encountered during the conversion. - // - // These problems only represent unexpected thrown errors and aren't particularly user-facing. - // For now we emit them as diagnostics because `DocumentationConverter.convert(outputConsumer:)` (which this replaced) used to do that. - // - // FIXME: In the future we could simplify this control flow by not catching these errors and turning them into diagnostics. - // Since both error-level diagnostics and thrown errors fail the documentation build, - // the only practical different this would have is that we stop on the first unexpected error instead of processing all pages and gathering all unexpected errors. - func recordProblem(from error: any Swift.Error, in problems: inout [Problem], withIdentifier identifier: String) { - let problem = Problem(diagnostic: Diagnostic( - severity: .error, - identifier: "org.swift.docc.documentation-converter.\(identifier)", - summary: error.localizedDescription - ), possibleSolutions: []) - - context.diagnosticEngine.emit(problem) - problems.append(problem) - } - - let resultsSyncQueue = DispatchQueue(label: "Convert Serial Queue", qos: .unspecified, attributes: []) - let resultsGroup = DispatchGroup() - let renderSignpostHandle = signposter.beginInterval("Render", id: signposter.makeSignpostID(), "Render \(context.knownPages.count) pages") - var conversionProblems: [Problem] = context.knownPages.concurrentPerform { identifier, results in - // If cancelled skip all concurrent conversion work in this block. - guard !Task.isCancelled else { return } - - // Wrap JSON encoding in an autorelease pool to avoid retaining the autoreleased ObjC objects returned by `JSONSerialization` - autoreleasepool { - do { - let entity = try context.entity(with: identifier) - - guard let renderNode = converter.renderNode(for: entity) else { - // No render node was produced for this entity, so just skip it. - return - } - - try outputConsumer.consume(renderNode: renderNode) + // Render all pages and gather their supplementary "digest" information if enabled. + let coverageFilterClosure = documentationCoverageOptions.generateFilterClosure() + let supplementaryRenderInfo = try await context.knownPages._concurrentPerform( + taskName: "Render", + batchWork: { slice in + var supplementaryRenderInfo = SupplementaryRenderInformation() + + for identifier in slice { + try autoreleasepool { + let entity = try context.entity(with: identifier) - switch documentationCoverageOptions.level { - case .detailed, .brief: - let coverageEntry = try CoverageDataEntry( - documentationNode: entity, - renderNode: renderNode, - context: context - ) - if coverageFilterClosure(coverageEntry) { - resultsGroup.async(queue: resultsSyncQueue) { - coverageInfo.append(coverageEntry) - } + guard let renderNode = converter.renderNode(for: entity) else { + // No render node was produced for this entity, so just skip it. + return } - case .none: - break - } - - if emitDigest { - let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: true) - let nodeIndexingRecords = try renderNode.indexingRecords(onPage: identifier) - resultsGroup.async(queue: resultsSyncQueue) { - assets.merge(renderNode.assetReferences, uniquingKeysWith: +) - linkSummaries.append(contentsOf: nodeLinkSummaries) - indexingRecords.append(contentsOf: nodeIndexingRecords) + try outputConsumer.consume(renderNode: renderNode) + + switch documentationCoverageOptions.level { + case .detailed, .brief: + let coverageEntry = try CoverageDataEntry(documentationNode: entity, renderNode: renderNode, context: context) + if coverageFilterClosure(coverageEntry) { + supplementaryRenderInfo.coverageInfo.append(coverageEntry) + } + case .none: + break } - } else if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { - let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: false) - resultsGroup.async(queue: resultsSyncQueue) { - linkSummaries.append(contentsOf: nodeLinkSummaries) + if emitDigest { + let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: true) + let nodeIndexingRecords = try renderNode.indexingRecords(onPage: identifier) + + supplementaryRenderInfo.assets.merge(renderNode.assetReferences, uniquingKeysWith: +) + supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) + supplementaryRenderInfo.indexingRecords.append(contentsOf: nodeIndexingRecords) + } else if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { + let nodeLinkSummaries = entity.externallyLinkableElementSummaries(context: context, renderNode: renderNode, includeTaskGroups: false) + + supplementaryRenderInfo.linkSummaries.append(contentsOf: nodeLinkSummaries) } } - } catch { - recordProblem(from: error, in: &results, withIdentifier: "render-node") } + + return supplementaryRenderInfo + }, + initialResult: SupplementaryRenderInformation(), + combineResults: { accumulated, partialResult in + accumulated.assets.merge(partialResult.assets, uniquingKeysWith: +) + accumulated.linkSummaries.append(contentsOf: partialResult.linkSummaries) + accumulated.indexingRecords.append(contentsOf: partialResult.indexingRecords) + accumulated.coverageInfo.append(contentsOf: partialResult.coverageInfo) } - } - - // Wait for any concurrent updates to complete. - resultsGroup.wait() + ) signposter.endInterval("Render", renderSignpostHandle) - guard !Task.isCancelled else { return [] } - + guard !Task.isCancelled else { return } + // Consumes all external links and adds them into the sidebar. // This consumes all external links referenced across all content, and indexes them so they're available for reference in the navigator. // This is not ideal as it means that links outside of the Topics section can impact the content of the navigator. @@ -186,49 +153,33 @@ package enum ConvertActionConverter { // Write various metadata if emitDigest { - signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { - do { - try outputConsumer.consume(linkableElementSummaries: linkSummaries) - try outputConsumer.consume(indexingRecords: indexingRecords) - try outputConsumer.consume(assets: assets) - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "metadata") - } + try signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { + try outputConsumer.consume(linkableElementSummaries: supplementaryRenderInfo.linkSummaries) + try outputConsumer.consume(indexingRecords: supplementaryRenderInfo.indexingRecords) + try outputConsumer.consume(assets: supplementaryRenderInfo.assets) } } if FeatureFlags.current.isExperimentalLinkHierarchySerializationEnabled { - signposter.withIntervalSignpost("Serialize link hierarchy", id: signposter.makeSignpostID()) { - do { - let serializableLinkInformation = try context.linkResolver.localResolver.prepareForSerialization(bundleID: context.inputs.id) - try outputConsumer.consume(linkResolutionInformation: serializableLinkInformation) - - if !emitDigest { - try outputConsumer.consume(linkableElementSummaries: linkSummaries) - } - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "link-resolver") + try signposter.withIntervalSignpost("Serialize link hierarchy", id: signposter.makeSignpostID()) { + let serializableLinkInformation = try context.linkResolver.localResolver.prepareForSerialization(bundleID: context.inputs.id) + try outputConsumer.consume(linkResolutionInformation: serializableLinkInformation) + + if !emitDigest { + try outputConsumer.consume(linkableElementSummaries: supplementaryRenderInfo.linkSummaries) } } } if emitDigest { - signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { - do { - try (_Deprecated(outputConsumer) as (any _DeprecatedConsumeProblemsAccess))._consume(problems: context.problems + conversionProblems) - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "problems") - } + try signposter.withIntervalSignpost("Emit digest", id: signposter.makeSignpostID()) { + try (_Deprecated(outputConsumer) as (any _DeprecatedConsumeProblemsAccess))._consume(problems: context.problems) } } switch documentationCoverageOptions.level { case .detailed, .brief: - do { - try outputConsumer.consume(documentationCoverageInfo: coverageInfo) - } catch { - recordProblem(from: error, in: &conversionProblems, withIdentifier: "coverage") - } + try outputConsumer.consume(documentationCoverageInfo: supplementaryRenderInfo.coverageInfo) case .none: break } @@ -243,7 +194,12 @@ package enum ConvertActionConverter { benchmark(add: Benchmark.ExternalTopicsHash(context: context)) // Log the peak memory. benchmark(add: Benchmark.PeakMemory()) - - return conversionProblems } } + +private struct SupplementaryRenderInformation { + var indexingRecords = [IndexingRecord]() + var linkSummaries = [LinkDestinationSummary]() + var assets = [RenderReferenceType : [any RenderReference]]() + var coverageInfo = [CoverageDataEntry]() +} diff --git a/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift b/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift index b40d8dd7f6..426e7b4015 100644 --- a/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift +++ b/Sources/SwiftDocC/Infrastructure/DocumentationContext.swift @@ -218,7 +218,7 @@ public class DocumentationContext { self.linkResolver = LinkResolver(dataProvider: dataProvider) ResolvedTopicReference.enableReferenceCaching(for: inputs.id) - try register() + try await register() } /// Perform semantic analysis on a given `document` at a given `source` location and append any problems found to `problems`. @@ -1950,7 +1950,7 @@ public class DocumentationContext { /** Register a documentation bundle with this context. */ - private func register() throws { + private func register() async throws { try shouldContinueRegistration() let currentFeatureFlags: FeatureFlags? @@ -1982,111 +1982,61 @@ public class DocumentationContext { } } - // Note: Each bundle is registered and processed separately. - // Documents and symbols may both reference each other so the bundle is registered in 4 steps - - // In the bundle discovery phase all tasks run in parallel as they don't depend on each other. - let discoveryGroup = DispatchGroup() - let discoveryQueue = DispatchQueue(label: "org.swift.docc.Discovery", qos: .unspecified, attributes: .concurrent, autoreleaseFrequency: .workItem) - - let discoveryError = Synchronized<(any Error)?>(nil) + // Documents and symbols may both reference each other so the inputs is registered in 4 steps - // Load all bundle symbol graphs into the loader. - var symbolGraphLoader: SymbolGraphLoader! - var hierarchyBasedResolver: PathHierarchyBasedLinkResolver! - - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - symbolGraphLoader = SymbolGraphLoader( + // Load symbol information and construct data structures that only rely on symbol information. + async let loadSymbols = { [signposter, inputs, dataProvider, configuration] in + var symbolGraphLoader = SymbolGraphLoader( bundle: inputs, dataProvider: dataProvider, symbolGraphTransformer: configuration.convertServiceConfiguration.symbolGraphTransformer ) - do { - try signposter.withIntervalSignpost("Load symbols", id: signposter.makeSignpostID()) { + try signposter.withIntervalSignpost("Load symbols", id: signposter.makeSignpostID()) { + try autoreleasepool { try symbolGraphLoader.loadAll() } - hierarchyBasedResolver = signposter.withIntervalSignpost("Build PathHierarchy", id: signposter.makeSignpostID()) { + } + try shouldContinueRegistration() + let hierarchyBasedResolver = signposter.withIntervalSignpost("Build PathHierarchy", id: signposter.makeSignpostID()) { + autoreleasepool { PathHierarchyBasedLinkResolver(pathHierarchy: PathHierarchy( symbolGraphLoader: symbolGraphLoader, bundleName: urlReadablePath(inputs.displayName), knownDisambiguatedPathComponents: configuration.convertServiceConfiguration.knownDisambiguatedSymbolPathComponents )) } - - self.snippetResolver = SnippetResolver(symbolGraphLoader: symbolGraphLoader) - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) } - } + + let snippetResolver = SnippetResolver(symbolGraphLoader: symbolGraphLoader) + + return (symbolGraphLoader, hierarchyBasedResolver, snippetResolver) + }() - // First, all the resources are added since they don't reference anything else. - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - do { - try signposter.withIntervalSignpost("Load resources", id: signposter.makeSignpostID()) { - try self.registerMiscResources() - } - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) + // Load resources like images and videos + async let loadResources: Void = try signposter.withIntervalSignpost("Load resources", id: signposter.makeSignpostID()) { + try autoreleasepool { + try self.registerMiscResources() } } - // Second, all the documents and symbols are added. - // - // Note: Documents and symbols may look up resources at this point but shouldn't lookup other documents or - // symbols or attempt to resolve links/references since the topic graph may not contain all documents - // or all symbols yet. - var result: ( - tutorialTableOfContentsResults: [SemanticResult], - tutorials: [SemanticResult], - tutorialArticles: [SemanticResult], - articles: [SemanticResult
], - documentationExtensions: [SemanticResult
] - )! - - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - do { - result = try signposter.withIntervalSignpost("Load documents", id: signposter.makeSignpostID()) { - try self.registerDocuments() - } - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) + // Load documents + async let loadDocuments = try signposter.withIntervalSignpost("Load documents", id: signposter.makeSignpostID()) { + try autoreleasepool { + try self.registerDocuments() } } - discoveryGroup.async(queue: discoveryQueue) { [unowned self] in - do { - try signposter.withIntervalSignpost("Load external resolvers", id: signposter.makeSignpostID()) { - try linkResolver.loadExternalResolvers(dependencyArchives: configuration.externalDocumentationConfiguration.dependencyArchives) - } - } catch { - // Pipe the error out of the dispatch queue. - discoveryError.sync({ - if $0 == nil { $0 = error } - }) + // Load any external resolvers + async let loadExternalResolvers: Void = try signposter.withIntervalSignpost("Load external resolvers", id: signposter.makeSignpostID()) { + try autoreleasepool { + try linkResolver.loadExternalResolvers(dependencyArchives: configuration.externalDocumentationConfiguration.dependencyArchives) } } - discoveryGroup.wait() - - try shouldContinueRegistration() - - // Re-throw discovery errors - if let encounteredError = discoveryError.sync({ $0 }) { - throw encounteredError - } - // All discovery went well, process the inputs. - let (tutorialTableOfContentsResults, tutorials, tutorialArticles, allArticles, documentationExtensions) = result + let (tutorialTableOfContentsResults, tutorials, tutorialArticles, allArticles, documentationExtensions) = try await loadDocuments + try shouldContinueRegistration() var (otherArticles, rootPageArticles) = splitArticles(allArticles) let globalOptions = (allArticles + documentationExtensions).compactMap { article in @@ -2126,7 +2076,10 @@ public class DocumentationContext { options = globalOptions.first } + let (symbolGraphLoader, hierarchyBasedResolver, snippetResolver) = try await loadSymbols + try shouldContinueRegistration() self.linkResolver.localResolver = hierarchyBasedResolver + self.snippetResolver = snippetResolver hierarchyBasedResolver.addMappingForRoots(bundle: inputs) for tutorial in tutorials { hierarchyBasedResolver.addTutorial(tutorial) @@ -2139,9 +2092,10 @@ public class DocumentationContext { } registerRootPages(from: rootPageArticles) + try registerSymbols(symbolGraphLoader: symbolGraphLoader, documentationExtensions: documentationExtensions) // We don't need to keep the loader in memory after we've registered all symbols. - symbolGraphLoader = nil + _ = consume symbolGraphLoader try shouldContinueRegistration() @@ -2168,12 +2122,17 @@ public class DocumentationContext { try shouldContinueRegistration() } + _ = try await loadExternalResolvers + // Third, any processing that relies on resolving other content is done, mainly resolving links. preResolveExternalLinks(semanticObjects: tutorialTableOfContentsResults.map(referencedSemanticObject) + tutorials.map(referencedSemanticObject) + tutorialArticles.map(referencedSemanticObject)) + // References to resources aren't used until the links are resolved + _ = try await loadResources + resolveLinks( tutorialTableOfContents: tutorialTableOfContentsResults, tutorials: tutorials, diff --git a/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift b/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift index 37a0a5fe33..bec134687d 100644 --- a/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift +++ b/Sources/SwiftDocC/Utility/Collection+ConcurrentPerform.swift @@ -144,3 +144,103 @@ extension Collection where Index == Int, Self: SendableMetatype { return allResults.sync({ $0 }) } } + +extension Collection { + /// Concurrently performs work on slices of the collection's elements, combining the partial results into a final result. + /// + /// This method is intended as a building block that other higher-level `concurrent...` methods can be built upon. + /// That said, calling code can opt to use this method directly as opposed to writing overly specific single-use helper methods. + /// + /// - Parameters: + /// - taskName: A human readable name of the tasks that the collection uses to perform this work. + /// - batchWork: The concurrent work to perform on each slice of the collection's elements. + /// - initialResult: The initial result to accumulate the partial results into. + /// - combineResults: A closure that updates the accumulated result with a partial result from performing the work over one slice of the collection's elements. + /// - Returns: The final result of accumulating all partial results, out of order, into the initial result. + func _concurrentPerform( + taskName: String? = nil, + batchWork: (consuming SubSequence) throws -> PartialResult, + initialResult: Result, + combineResults: (inout Result, consuming PartialResult) -> Void + ) async throws -> Result { + try await withThrowingTaskGroup(of: PartialResult.self, returning: Result.self) { taskGroup in + try await withoutActuallyEscaping(batchWork) { work in + try await withoutActuallyEscaping(combineResults) { combineResults in + var remaining = self[...] + + // Don't run more tasks in parallel than there are cores to run them + let maxParallelTasks: Int = ProcessInfo.processInfo.processorCount + // Finding the right number of tasks is a balancing act. + // If the tasks are too small, then there's increased overhead from scheduling a lot of tasks and accumulating their results. + // If the tasks are too large, then there's a risk that some tasks take longer to complete than others, increasing the amount of idle time. + // + // Here, we aim to schedule at most 10 tasks per core but create fewer tasks if the collection is fairly small to avoid some concurrent overhead. + // The table below shows the approximate number of tasks per CPU core and the number of elements per task, within parenthesis, + // for different collection sizes and number of CPU cores, given a minimum task size of 20 elements: + // + // | 500 | 1000 | 2500 | 5000 | 10000 | 25000 + // ----------|------------|------------|------------|------------|-------------|------------- + // 8 cores | ~3,2 (20) | ~6,3 (20) | ~9,8 (32) | ~9,9 (63) | ~9,9 (126) | ~9,9 (313) + // 12 cores | ~2,1 (20) | ~4,2 (20) | ~10,0 (21) | ~10,0 (42) | ~10,0 (84) | ~10,0 (209) + // 16 cores | ~1,6 (20) | ~3,2 (20) | ~7,9 (20) | ~9,8 (32) | ~9,9 (63) | ~10,0 (157) + // 32 cores | ~0,8 (20) | ~1,6 (20) | ~4,0 (20) | ~7,9 (20) | ~9,8 (32) | ~9,9 (79) + // + let numberOfElementsPerTask: Int = Swift.max( + Int(Double(remaining.count) / Double(maxParallelTasks * 10) + 1), + 20 // (this is a completely arbitrary task size threshold) + ) + + // Start the first round of work. + // If the collection is big, this will add one task per core. + // If the collection is small, this will only add a few tasks. + for _ in 0..