From ca2943e844452f052a9bb98f34e6c05c2babb7a1 Mon Sep 17 00:00:00 2001 From: Jamie Magee Date: Fri, 14 Nov 2025 16:12:33 -0800 Subject: [PATCH] Initial RPM database detector --- Directory.Packages.props | 1 + .../LinuxDistribution.cs | 94 ++++++ .../SystemPackageDetector.cs | 269 ++++++++++++++++ .../DetectorClass.cs | 3 + .../TypedComponent/ComponentType.cs | 3 + .../TypedComponent/RpmComponent.cs | 125 ++++++++ ...rosoft.ComponentDetection.Detectors.csproj | 1 + .../rpm/RpmDbDetector.cs | 187 +++++++++++ .../rpm/RpmHeaderParser.cs | 298 ++++++++++++++++++ .../rpm/RpmPackageInfo.cs | 32 ++ .../Extensions/ServiceCollectionExtensions.cs | 4 + 11 files changed, 1017 insertions(+) create mode 100644 src/Microsoft.ComponentDetection.Common/LinuxDistribution.cs create mode 100644 src/Microsoft.ComponentDetection.Common/SystemPackageDetector.cs create mode 100644 src/Microsoft.ComponentDetection.Contracts/TypedComponent/RpmComponent.cs create mode 100644 src/Microsoft.ComponentDetection.Detectors/rpm/RpmDbDetector.cs create mode 100644 src/Microsoft.ComponentDetection.Detectors/rpm/RpmHeaderParser.cs create mode 100644 src/Microsoft.ComponentDetection.Detectors/rpm/RpmPackageInfo.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index 69144e02b..1439109f6 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -18,6 +18,7 @@ + diff --git a/src/Microsoft.ComponentDetection.Common/LinuxDistribution.cs b/src/Microsoft.ComponentDetection.Common/LinuxDistribution.cs new file mode 100644 index 000000000..52f4d58ca --- /dev/null +++ b/src/Microsoft.ComponentDetection.Common/LinuxDistribution.cs @@ -0,0 +1,94 @@ +namespace Microsoft.ComponentDetection.Common; + +using System; +using System.Collections.Generic; + +/// +/// Represents Linux distribution information parsed from /etc/os-release or /usr/lib/os-release. +/// +public sealed class LinuxDistribution +{ + /// + /// Gets the lower-case operating system identifier (e.g., "ubuntu", "rhel", "fedora"). + /// + public string Id { get; init; } + + /// + /// Gets the operating system version number or identifier. + /// + public string VersionId { get; init; } + + /// + /// Gets the operating system name without version information. + /// + public string Name { get; init; } + + /// + /// Gets a human-readable operating system name with version. + /// + public string PrettyName { get; init; } + + /// + /// Parses an os-release file content and returns a LinuxDistribution object. + /// The os-release format is defined at https://www.freedesktop.org/software/systemd/man/os-release.html. + /// + /// The content of the os-release file. + /// A LinuxDistribution object or null if parsing fails. + public static LinuxDistribution ParseOsRelease(string content) + { + if (string.IsNullOrWhiteSpace(content)) + { + return null; + } + + var values = new Dictionary(StringComparer.OrdinalIgnoreCase); + + foreach (var line in content.Split(['\r', '\n'], StringSplitOptions.RemoveEmptyEntries)) + { + var trimmedLine = line.Trim(); + + // Skip comments and empty lines + if (string.IsNullOrEmpty(trimmedLine) || trimmedLine.StartsWith('#')) + { + continue; + } + + var parts = trimmedLine.Split('=', 2); + if (parts.Length != 2) + { + continue; + } + + var key = parts[0].Trim(); + var value = parts[1].Trim(); + + // Remove quotes if present + if ( + value.Length >= 2 + && ( + (value.StartsWith('\"') && value.EndsWith('\"')) + || (value.StartsWith('\'') && value.EndsWith('\'')) + ) + ) + { + value = value[1..^1]; + } + + values[key] = value; + } + + // At minimum, we need an ID field + if (!values.ContainsKey("ID")) + { + return null; + } + + return new LinuxDistribution + { + Id = values.GetValueOrDefault("ID"), + VersionId = values.GetValueOrDefault("VERSION_ID"), + Name = values.GetValueOrDefault("NAME"), + PrettyName = values.GetValueOrDefault("PRETTY_NAME"), + }; + } +} diff --git a/src/Microsoft.ComponentDetection.Common/SystemPackageDetector.cs b/src/Microsoft.ComponentDetection.Common/SystemPackageDetector.cs new file mode 100644 index 000000000..7c63b8948 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Common/SystemPackageDetector.cs @@ -0,0 +1,269 @@ +namespace Microsoft.ComponentDetection.Common; + +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.ComponentDetection.Contracts; +using Microsoft.ComponentDetection.Contracts.Internal; +using Microsoft.ComponentDetection.Contracts.TypedComponent; +using Microsoft.Extensions.Logging; + +/// +/// Abstract base class for system package detectors (RPM, APK, DPKG, etc.). +/// +public abstract class SystemPackageDetector : FileComponentDetector +{ + /// + protected override async Task OnFileFoundAsync( + ProcessRequest processRequest, + IDictionary detectorArgs, + CancellationToken cancellationToken = default + ) + { + // Only run on Linux + if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + this.Logger.LogDebug("Skipping {DetectorId} - not running on Linux", this.Id); + return; + } + + var file = processRequest.ComponentStream; + var recorder = processRequest.SingleFileComponentRecorder; + + try + { + // Find the Linux distribution + var distro = await this.FindDistributionAsync().ConfigureAwait(false); + + if (distro == null) + { + this.Logger.LogWarning( + "Could not determine Linux distribution for {FilePath}, using 'linux' as default namespace", + file.Location + ); + } + + // Parse packages from the database + var packages = await this.ParsePackagesAsync(file.Stream, file.Location, distro) + .ConfigureAwait(false); + + if (packages.Count == 0) + { + this.Logger.LogDebug("No packages found in {FilePath}", file.Location); + return; + } + + // Build dependency graph and register components + this.BuildDependencyGraph(packages, recorder, distro); + } + catch (Exception ex) + { + this.Logger.LogError( + ex, + "Error processing system package database at {FilePath}", + file.Location + ); + throw; + } + } + + /// + /// Parses packages from the system package database. + /// + /// The database file stream. + /// The location of the database file. + /// The detected Linux distribution. + /// A list of parsed package information. + protected abstract Task> ParsePackagesAsync( + Stream dbStream, + string location, + LinuxDistribution distro + ); + + /// + /// Creates a TypedComponent from system package information. + /// + /// The package information. + /// The Linux distribution. + /// A TypedComponent representing the package. + protected abstract TypedComponent CreateComponent( + SystemPackageInfo package, + LinuxDistribution distro + ); + + /// + /// Finds the Linux distribution by looking for os-release files relative to the database location. + /// + /// A LinuxDistribution object or null if not found. + protected virtual async Task FindDistributionAsync() + { + // Try common os-release locations relative to the database + var possiblePaths = new[] { "/etc/os-release", "/usr/lib/os-release" }; + + foreach (var path in possiblePaths) + { + try + { + if (File.Exists(path)) + { + var content = await File.ReadAllTextAsync(path).ConfigureAwait(false); + var distro = LinuxDistribution.ParseOsRelease(content); + if (distro is not null) + { + this.Logger.LogDebug( + "Found Linux distribution: {Id} {VersionId} at {Path}", + distro.Id, + distro.VersionId, + path + ); + return distro; + } + } + } + catch (Exception ex) + { + this.Logger.LogTrace(ex, "Failed to read os-release file at {Path}", path); + } + } + + return null; + } + + /// + /// Builds the dependency graph from package information using Provides/Requires relationships. + /// + /// The list of packages to process. + /// The component recorder. + /// The Linux distribution. + protected virtual void BuildDependencyGraph( + List packages, + ISingleFileComponentRecorder recorder, + LinuxDistribution distro + ) + { + // Create a provides index: capability -> list of packages that provide it + var providesIndex = new Dictionary>(packages.Count); + + // Index all packages by what they provide + foreach (var pkg in packages) + { + // Package name is always a "provides" + if (!providesIndex.TryGetValue(pkg.Name, out var pkgList)) + { + pkgList = []; + providesIndex[pkg.Name] = pkgList; + } + + pkgList.Add(pkg); + + // Add explicit provides + if (pkg.Provides is not null) + { + foreach (var provides in pkg.Provides) + { + if (string.IsNullOrWhiteSpace(provides)) + { + continue; + } + + if (!providesIndex.TryGetValue(provides, out var providesList)) + { + providesList = []; + providesIndex[provides] = providesList; + } + + providesList.Add(pkg); + } + } + } + + // Create components and track them by package name + var componentsByPackageName = new Dictionary(packages.Count); + + // First pass: register all components as root dependencies + foreach (var pkg in packages) + { + var component = new DetectedComponent(this.CreateComponent(pkg, distro)); + recorder.RegisterUsage(component, isExplicitReferencedDependency: true); + componentsByPackageName[pkg.Name] = component; + } + + // Second pass: add dependency relationships + foreach (var pkg in packages) + { + if (!componentsByPackageName.TryGetValue(pkg.Name, out var childComponent)) + { + continue; + } + + if (pkg.Requires is not null) + { + foreach (var require in pkg.Requires) + { + if (string.IsNullOrWhiteSpace(require)) + { + continue; + } + + // Skip boolean expressions (not supported) + if (require.TrimStart().StartsWith('(')) + { + continue; + } + + // Find packages that provide this requirement + if (providesIndex.TryGetValue(require, out var providers)) + { + foreach (var provider in providers) + { + // Skip self-references + if (provider.Name == pkg.Name) + { + continue; + } + + if ( + componentsByPackageName.TryGetValue( + provider.Name, + out var parentComponent + ) + ) + { + // Register the dependency relationship + recorder.RegisterUsage( + childComponent, + isExplicitReferencedDependency: false, + parentComponentId: parentComponent.Component.Id + ); + } + } + } + } + } + } + + this.Logger.LogInformation( + "Registered {PackageCount} packages with dependency relationships", + packages.Count + ); + } + + /// + /// Represents package information extracted from a system package database. + /// + protected class SystemPackageInfo + { + public required string Name { get; init; } + + public required string Version { get; init; } + + public List Provides { get; init; } = []; + + public List Requires { get; init; } = []; + + public object Metadata { get; init; } + } +} diff --git a/src/Microsoft.ComponentDetection.Contracts/DetectorClass.cs b/src/Microsoft.ComponentDetection.Contracts/DetectorClass.cs index becfa4f9f..eee1af300 100644 --- a/src/Microsoft.ComponentDetection.Contracts/DetectorClass.cs +++ b/src/Microsoft.ComponentDetection.Contracts/DetectorClass.cs @@ -47,4 +47,7 @@ public enum DetectorClass /// Indicates a detector applies to Swift packages. Swift, + + /// Indicates a detector applies to system packages (RPM, APK, DPKG, etc.). + SystemPackages, } diff --git a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/ComponentType.cs b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/ComponentType.cs index 31b245d59..a30ddb655 100644 --- a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/ComponentType.cs +++ b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/ComponentType.cs @@ -62,4 +62,7 @@ public enum ComponentType : byte [EnumMember] DotNet = 19, + + [EnumMember] + Rpm = 20, } diff --git a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/RpmComponent.cs b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/RpmComponent.cs new file mode 100644 index 000000000..2b60e069c --- /dev/null +++ b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/RpmComponent.cs @@ -0,0 +1,125 @@ +namespace Microsoft.ComponentDetection.Contracts.TypedComponent; + +using System.Collections.Generic; +using PackageUrl; + +/// +/// Represents an RPM package component. +/// +public class RpmComponent : TypedComponent +{ + private RpmComponent() + { + // Reserved for deserialization + } + + public RpmComponent( + string name, + string version, + string arch, + string release, + int? epoch = null, + string sourceRpm = null, + string vendor = null, + string[] provides = null, + string[] requires = null + ) + { + this.Name = this.ValidateRequiredInput(name, nameof(this.Name), nameof(ComponentType.Rpm)); + this.Version = this.ValidateRequiredInput( + version, + nameof(this.Version), + nameof(ComponentType.Rpm) + ); + this.Arch = this.ValidateRequiredInput(arch, nameof(this.Arch), nameof(ComponentType.Rpm)); + this.Release = this.ValidateRequiredInput( + release, + nameof(this.Release), + nameof(ComponentType.Rpm) + ); + this.Epoch = epoch; + this.SourceRpm = sourceRpm; + this.Vendor = vendor; + this.Provides = provides ?? []; + this.Requires = requires ?? []; + } + + /// + /// Gets the package name. + /// + public string Name { get; set; } + + /// + /// Gets the package version. + /// + public string Version { get; set; } + + /// + /// Gets the package architecture (e.g., x86_64, aarch64, noarch). + /// + public string Arch { get; set; } + + /// + /// Gets the package release string. + /// + public string Release { get; set; } + + /// + /// Gets the package epoch (used for version comparison). + /// + public int? Epoch { get; set; } + + /// + /// Gets the source RPM filename this package was built from. + /// + public string SourceRpm { get; set; } + + /// + /// Gets the vendor or organization that packaged this component. + /// + public string Vendor { get; set; } + + /// + /// Gets the list of capabilities this package provides. + /// + public string[] Provides { get; set; } + + /// + /// Gets the list of capabilities this package requires. + /// + public string[] Requires { get; set; } + + /// + public override ComponentType Type => ComponentType.Rpm; + + /// + public override PackageURL PackageUrl + { + get + { + var qualifiers = new SortedDictionary { ["arch"] = this.Arch }; + + if (this.Epoch.HasValue) + { + qualifiers["epoch"] = this.Epoch.Value.ToString(); + } + + if (!string.IsNullOrEmpty(this.SourceRpm)) + { + qualifiers["upstream"] = this.SourceRpm; + } + + // Note: Namespace should be set by the detector based on distribution ID + // For now, we'll use null and let the detector override if needed + var version = $"{this.Version}-{this.Release}"; + return new PackageURL("rpm", null, this.Name, version, qualifiers, null); + } + } + + /// + protected override string ComputeId() + { + var epochStr = this.Epoch.HasValue ? $"{this.Epoch}:" : string.Empty; + return $"{this.Name}@{epochStr}{this.Version}-{this.Release}/{this.Arch} - {this.Type}"; + } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj b/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj index a0fb3f5e4..dc50b2927 100644 --- a/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj +++ b/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj @@ -2,6 +2,7 @@ + diff --git a/src/Microsoft.ComponentDetection.Detectors/rpm/RpmDbDetector.cs b/src/Microsoft.ComponentDetection.Detectors/rpm/RpmDbDetector.cs new file mode 100644 index 000000000..c103c00d7 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/rpm/RpmDbDetector.cs @@ -0,0 +1,187 @@ +namespace Microsoft.ComponentDetection.Detectors.Rpm; + +using System; +using System.Buffers; +using System.Collections.Generic; +using System.IO; +using System.Threading.Tasks; +using Microsoft.ComponentDetection.Common; +using Microsoft.ComponentDetection.Contracts; +using Microsoft.ComponentDetection.Contracts.TypedComponent; +using Microsoft.Data.Sqlite; +using Microsoft.Extensions.Logging; + +/// +/// Detector for RPM packages from SQLite-format RPM databases. +/// Supports Azure Linux, Fedora 33+, RHEL 9+, and other modern RPM-based distributions. +/// +public sealed class RpmDbDetector : SystemPackageDetector +{ + public RpmDbDetector( + IComponentStreamEnumerableFactory componentStreamEnumerableFactory, + IObservableDirectoryWalkerFactory walkerFactory, + ILogger logger + ) + { + this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory; + this.Scanner = walkerFactory; + this.Logger = logger; + } + + /// + public override string Id => "RpmDb"; + + /// + public override IEnumerable Categories => + [nameof(DetectorClass.SystemPackages), nameof(DetectorClass.Linux)]; + + /// + public override IEnumerable SupportedComponentTypes => [ComponentType.Rpm]; + + /// + public override int Version => 1; + + /// + public override IList SearchPatterns => ["rpmdb.sqlite"]; + + /// + protected override async Task> ParsePackagesAsync( + Stream dbStream, + string location, + LinuxDistribution distro + ) + { + var packages = new List(); + + // SQLite requires a file path, so copy the stream to a temporary file + var tempFile = Path.GetTempFileName(); + try + { + await using (var fileStream = File.Create(tempFile)) + { + await dbStream.CopyToAsync(fileStream).ConfigureAwait(false); + } + + using var connection = new SqliteConnection($"Data Source={tempFile};Mode=ReadOnly"); + await connection.OpenAsync().ConfigureAwait(false); + + // Modern RPM SQLite databases store package data as BLOBs + // Schema: Packages(hnum INTEGER PRIMARY KEY, blob BLOB) + var command = connection.CreateCommand(); + command.CommandText = "SELECT blob FROM Packages"; + + using var reader = await command.ExecuteReaderAsync().ConfigureAwait(false); + while (await reader.ReadAsync().ConfigureAwait(false)) + { + // Read the BLOB data + var blobSize = (int)reader.GetBytes(0, 0, null, 0, 0); + var blob = ArrayPool.Shared.Rent(blobSize); + + try + { + reader.GetBytes(0, 0, blob, 0, blobSize); + + // Parse the RPM header from the BLOB (pass only the actual data, not the entire rented array) + var pkgInfo = RpmHeaderParser.ParseHeader(blob.AsSpan(0, blobSize)); + + if (string.IsNullOrEmpty(pkgInfo.Name) || string.IsNullOrEmpty(pkgInfo.Version)) + { + this.Logger.LogDebug("Skipping package with missing name or version"); + continue; + } + + packages.Add( + new SystemPackageInfo + { + Name = pkgInfo.Name, + Version = pkgInfo.Version, + Provides = + pkgInfo.Provides.Count > 0 ? pkgInfo.Provides : [pkgInfo.Name], + Requires = pkgInfo.Requires, + Metadata = new RpmMetadata + { + Epoch = pkgInfo.Epoch, + Arch = pkgInfo.Arch, + Release = pkgInfo.Release, + SourceRpm = pkgInfo.SourceRpm, + Vendor = pkgInfo.Vendor, + }, + } + ); + } + catch (Exception ex) + { + this.Logger.LogWarning(ex, "Failed to parse RPM header BLOB, skipping package"); + } + finally + { + ArrayPool.Shared.Return(blob); + } + } + + this.Logger.LogInformation( + "Parsed {PackageCount} RPM packages from {Location}", + packages.Count, + location + ); + } + catch (SqliteException ex) + { + this.Logger.LogError(ex, "Failed to parse RPM database at {Location}", location); + throw; + } + finally + { + try + { + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + catch (Exception ex) + { + this.Logger.LogTrace(ex, "Failed to delete temporary file {TempFile}", tempFile); + } + } + + return packages; + } + + /// + protected override TypedComponent CreateComponent( + SystemPackageInfo package, + LinuxDistribution distro + ) + { + var metadata = (RpmMetadata)package.Metadata; + + // Create the RPM component + var component = new RpmComponent( + name: package.Name, + version: package.Version, + arch: metadata.Arch, + release: metadata.Release, + epoch: metadata.Epoch, + sourceRpm: metadata.SourceRpm, + vendor: metadata.Vendor, + provides: [.. package.Provides], + requires: [.. package.Requires] + ); + + return component; + } + + private sealed class RpmMetadata + { + public int? Epoch { get; init; } + + public string Arch { get; init; } + + public string Release { get; init; } + + public string SourceRpm { get; init; } + + public string Vendor { get; init; } + } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/rpm/RpmHeaderParser.cs b/src/Microsoft.ComponentDetection.Detectors/rpm/RpmHeaderParser.cs new file mode 100644 index 000000000..989bb5348 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/rpm/RpmHeaderParser.cs @@ -0,0 +1,298 @@ +namespace Microsoft.ComponentDetection.Detectors.Rpm; + +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.Text; + +/// +/// Parses RPM package headers from binary BLOB data. +/// +internal static class RpmHeaderParser +{ + // RPM tag constants - https://github.com/rpm-software-management/rpm/blob/master/lib/header.cc + private const int RPMTAG_NAME = 1000; + private const int RPMTAG_VERSION = 1001; + private const int RPMTAG_RELEASE = 1002; + private const int RPMTAG_EPOCH = 1003; + private const int RPMTAG_ARCH = 1022; + private const int RPMTAG_SOURCERPM = 1044; + private const int RPMTAG_PROVIDENAME = 1047; + private const int RPMTAG_REQUIRENAME = 1049; + private const int RPMTAG_VENDOR = 1011; + + // RPM Type constants + private const int RPM_STRING_TYPE = 6; + private const int RPM_INT32_TYPE = 4; + private const int RPM_STRING_ARRAY_TYPE = 8; + + /// + /// Parses an RPM header BLOB and extracts package information. + /// + /// The binary RPM header data. + /// Package information extracted from the header. + public static RpmPackageInfo ParseHeader(ReadOnlySpan headerBlob) + { + if (headerBlob.Length < 8) + { + throw new ArgumentException("Invalid RPM header: too short", nameof(headerBlob)); + } + + // SQLite format: starts directly with index count and store size + var indexCount = BinaryPrimitives.ReadInt32BigEndian(headerBlob[..4]); + var storeSize = BinaryPrimitives.ReadInt32BigEndian(headerBlob[4..8]); + + // Calculate where the data store starts (8 bytes header + 16 bytes per index entry) + var dataStoreOffset = 8 + (indexCount * 16); + if (dataStoreOffset + storeSize > headerBlob.Length) + { + throw new ArgumentException( + "Invalid RPM header: data store extends beyond buffer", + nameof(headerBlob) + ); + } + + var dataStore = headerBlob.Slice(dataStoreOffset, storeSize); + + // Read index entries + var indexEntries = + indexCount <= 64 ? stackalloc IndexEntry[indexCount] : new IndexEntry[indexCount]; + + var indexSpan = headerBlob.Slice(8, indexCount * 16); + for (var i = 0; i < indexCount; i++) + { + var entryOffset = i * 16; + indexEntries[i] = new IndexEntry + { + Tag = BinaryPrimitives.ReadInt32BigEndian(indexSpan.Slice(entryOffset, 4)), + Type = BinaryPrimitives.ReadInt32BigEndian(indexSpan.Slice(entryOffset + 4, 4)), + Offset = BinaryPrimitives.ReadInt32BigEndian(indexSpan.Slice(entryOffset + 8, 4)), + Count = BinaryPrimitives.ReadInt32BigEndian(indexSpan.Slice(entryOffset + 12, 4)), + }; + } + + // Parse package info from index entries + return ParsePackageInfo(indexEntries, dataStore); + } + + private static RpmPackageInfo ParsePackageInfo( + ReadOnlySpan indexEntries, + ReadOnlySpan dataStore + ) + { + var packageInfo = new RpmPackageInfo(); + + foreach (var entry in indexEntries) + { + try + { + var data = ExtractData(entry, dataStore); + + switch (entry.Tag) + { + case RPMTAG_NAME: + if (entry.Type == RPM_STRING_TYPE && data != null) + { + packageInfo.Name = ParseString(data); + } + + break; + + case RPMTAG_VERSION: + if (entry.Type == RPM_STRING_TYPE && data != null) + { + packageInfo.Version = ParseString(data); + } + + break; + + case RPMTAG_RELEASE: + if (entry.Type == RPM_STRING_TYPE && data != null) + { + packageInfo.Release = ParseString(data); + } + + break; + + case RPMTAG_EPOCH: + if (entry.Type == RPM_INT32_TYPE && data != null && data.Length >= 4) + { + packageInfo.Epoch = BinaryPrimitives.ReadInt32BigEndian(data); + } + + break; + + case RPMTAG_ARCH: + if (entry.Type == RPM_STRING_TYPE && data != null) + { + packageInfo.Arch = ParseString(data); + } + + break; + + case RPMTAG_SOURCERPM: + if (entry.Type == RPM_STRING_TYPE && data != null) + { + var sourceRpm = ParseString(data); + packageInfo.SourceRpm = sourceRpm == "(none)" ? null : sourceRpm; + } + + break; + + case RPMTAG_VENDOR: + if (entry.Type == RPM_STRING_TYPE && data != null) + { + var vendor = ParseString(data); + packageInfo.Vendor = vendor == "(none)" ? null : vendor; + } + + break; + + case RPMTAG_PROVIDENAME: + if (entry.Type == RPM_STRING_ARRAY_TYPE) + { + packageInfo.Provides = ParseStringArray(data); + } + + break; + + case RPMTAG_REQUIRENAME: + if (entry.Type == RPM_STRING_ARRAY_TYPE) + { + packageInfo.Requires = ParseStringArray(data); + } + + break; + } + } + catch (Exception) + { + // Skip malformed entries + continue; + } + } + + return packageInfo; + } + + private static ReadOnlySpan ExtractData(IndexEntry entry, ReadOnlySpan dataStore) + { + if (entry.Offset < 0 || entry.Offset >= dataStore.Length) + { + return []; + } + + // Calculate data length based on type + int dataLength; + switch (entry.Type) + { + case RPM_STRING_TYPE: + // Find null terminator + dataLength = 0; + for (var i = entry.Offset; i < dataStore.Length && dataStore[i] != 0; i++) + { + dataLength++; + } + + dataLength++; // Include null terminator + break; + + case RPM_INT32_TYPE: + dataLength = 4 * entry.Count; + break; + + case RPM_STRING_ARRAY_TYPE: + // Find the end of the string array (double null or end of data) + dataLength = 0; + var consecutiveNulls = 0; + for (var i = entry.Offset; i < dataStore.Length; i++) + { + dataLength++; + if (dataStore[i] == 0) + { + consecutiveNulls++; + if (consecutiveNulls >= 2) + { + break; + } + } + else + { + consecutiveNulls = 0; + } + } + + break; + + default: + // Unknown type, try to read count bytes + dataLength = entry.Count; + break; + } + + if (entry.Offset + dataLength > dataStore.Length) + { + dataLength = dataStore.Length - entry.Offset; + } + + return dataStore.Slice(entry.Offset, dataLength); + } + + private static string ParseString(ReadOnlySpan data) + { + if (data.IsEmpty) + { + return string.Empty; + } + + // Find null terminator + var length = data.IndexOf((byte)0); + if (length < 0) + { + length = data.Length; + } + + return Encoding.UTF8.GetString(data[..length]); + } + + private static List ParseStringArray(ReadOnlySpan data) + { + if (data.IsEmpty) + { + return []; + } + + var result = new List(); + var start = 0; + + for (var i = 0; i < data.Length; i++) + { + if (data[i] == 0) + { + if (i > start) + { + var str = Encoding.UTF8.GetString(data[start..i]); + if (!string.IsNullOrEmpty(str)) + { + result.Add(str); + } + } + + start = i + 1; + } + } + + return result; + } + + private struct IndexEntry + { + public int Tag { get; set; } + + public int Type { get; set; } + + public int Offset { get; set; } + + public int Count { get; set; } + } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/rpm/RpmPackageInfo.cs b/src/Microsoft.ComponentDetection.Detectors/rpm/RpmPackageInfo.cs new file mode 100644 index 000000000..85ebcf01f --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/rpm/RpmPackageInfo.cs @@ -0,0 +1,32 @@ +namespace Microsoft.ComponentDetection.Detectors.Rpm; + +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; + +/// +/// Represents package information extracted from an RPM header. +/// +internal sealed class RpmPackageInfo +{ + public string Name { get; set; } + + public string Version { get; set; } + + public string Release { get; set; } + + public int? Epoch { get; set; } + + public string Arch { get; set; } + + public string SourceRpm { get; set; } + + public string Vendor { get; set; } + + public List Provides { get; set; } = []; + + public List Requires { get; set; } = []; +} diff --git a/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs b/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs index 1ee0f776b..11f5544e3 100644 --- a/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs +++ b/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs @@ -17,6 +17,7 @@ namespace Microsoft.ComponentDetection.Orchestrator.Extensions; using Microsoft.ComponentDetection.Detectors.Pip; using Microsoft.ComponentDetection.Detectors.Pnpm; using Microsoft.ComponentDetection.Detectors.Poetry; +using Microsoft.ComponentDetection.Detectors.Rpm; using Microsoft.ComponentDetection.Detectors.Ruby; using Microsoft.ComponentDetection.Detectors.Rust; using Microsoft.ComponentDetection.Detectors.Spdx; @@ -101,6 +102,9 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s services.AddSingleton(); services.AddSingleton(); + // RPM + services.AddSingleton(); + // Maven services.AddSingleton(); services.AddSingleton();