Skip to content
This repository was archived by the owner on May 7, 2024. It is now read-only.

Commit 531a061

Browse files
committed
Skipping subtrees that are not relevant for deletion for much better performance on exact file deletion
1 parent d1d4fdb commit 531a061

File tree

5 files changed

+66
-7
lines changed

5 files changed

+66
-7
lines changed

GitRewrite/Delete/DeleteObjects.cs

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,28 @@ public static void Run(string repositoryPath, IEnumerable<string> filesToDelete,
1414
{
1515
var fileDeleteStrategies = new FileDeletionStrategies(filesToDelete);
1616
var folderDeleteStrategies = new FolderDeletionStrategies(foldersToDelete);
17-
var rewrittenCommits = RemoveObjectsFromTree(repositoryPath, fileDeleteStrategies, folderDeleteStrategies);
17+
18+
var relevantPathes =
19+
fileDeleteStrategies.RelevantPaths.Union(folderDeleteStrategies.RelevantPaths).ToList();
20+
21+
var rewrittenCommits = RemoveObjectsFromTree(repositoryPath, fileDeleteStrategies, folderDeleteStrategies,
22+
relevantPathes);
1823
if (rewrittenCommits.Any())
1924
Refs.Update(repositoryPath, rewrittenCommits);
2025
}
2126

2227
public static Dictionary<ObjectHash, ObjectHash> RemoveObjectsFromTree(string vcsPath,
23-
FileDeletionStrategies filesToDelete, FolderDeletionStrategies foldersToDelete)
28+
FileDeletionStrategies filesToDelete, FolderDeletionStrategies foldersToDelete,
29+
List<byte[]> relevantPaths)
2430
{
2531
var rewrittenCommits = new Dictionary<ObjectHash, ObjectHash>();
2632
var rewrittenTrees = new ConcurrentDictionary<ObjectHash, ObjectHash>();
2733

2834
foreach (var commit in CommitWalker
2935
.CommitsInOrder(vcsPath))
3036
{
31-
var newTreeHash = RemoveObjectFromTree(vcsPath, commit.TreeHash, filesToDelete, foldersToDelete, rewrittenTrees, new byte[0]);
37+
var newTreeHash = RemoveObjectFromTree(vcsPath, commit.TreeHash, filesToDelete, foldersToDelete,
38+
rewrittenTrees, new byte[0], relevantPaths);
3239
if (newTreeHash != commit.TreeHash)
3340
{
3441
var newCommit = Commit.GetSerializedCommitWithChangedTreeAndParents(commit, newTreeHash,
@@ -51,17 +58,50 @@ public static Dictionary<ObjectHash, ObjectHash> RemoveObjectsFromTree(string vc
5158
private static readonly ArrayPool<byte> FilePathPool = ArrayPool<byte>.Shared;
5259
private static readonly TreeLineByHashComparer TreeLineByHashComparer = new TreeLineByHashComparer();
5360

61+
private static bool IsPathRelevant(in ReadOnlySpan<byte> currentPath, List<byte[]> relevantPathes)
62+
{
63+
if (currentPath.Length == 0 || !relevantPathes.Any())
64+
return true;
65+
66+
for (int i = relevantPathes.Count - 1; i >= 0; i--)
67+
{
68+
var path = relevantPathes[i];
69+
70+
if (currentPath.Length > path.Length)
71+
continue;
72+
73+
var isRelevant = true;
74+
for (int j = currentPath.Length - 1; j >= 0; j--)
75+
{
76+
if (currentPath[j] != path[j])
77+
{
78+
isRelevant = false;
79+
break;
80+
}
81+
}
82+
83+
if (isRelevant)
84+
return true;
85+
}
86+
87+
return false;
88+
}
89+
5490
private static ObjectHash RemoveObjectFromTree(
5591
string vcsPath,
5692
ObjectHash treeHash,
5793
FileDeletionStrategies filesToRemove,
5894
FolderDeletionStrategies foldersToRemove,
5995
ConcurrentDictionary<ObjectHash, ObjectHash> rewrittenTrees,
60-
ReadOnlySpan<byte> currentPath)
96+
in ReadOnlySpan<byte> currentPath,
97+
List<byte[]> relevantPathes)
6198
{
6299
if (rewrittenTrees.TryGetValue(treeHash, out var rewrittenHash))
63100
return rewrittenHash;
64101

102+
if (!IsPathRelevant(currentPath, relevantPathes))
103+
return treeHash;
104+
65105
var tree = GitObjectFactory.ReadTree(vcsPath, treeHash);
66106
var resultingLines = new List<Tree.TreeLine>();
67107
foreach (var line in tree.Lines)
@@ -89,7 +129,8 @@ private static ObjectHash RemoveObjectFromTree(
89129
filesToRemove,
90130
foldersToRemove,
91131
rewrittenTrees,
92-
path);
132+
path,
133+
relevantPathes);
93134

94135
FilePathPool.Return(rentedPathBytes);
95136

GitRewrite/Delete/FileDeletionStrategies.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Text;
34

45
namespace GitRewrite.Delete
56
{
@@ -15,14 +16,23 @@ public FileDeletionStrategies(IEnumerable<string> filePatterns)
1516
if (objectPattern[0] == '*')
1617
_strategies.Add(new FileEndsWithDeletionStrategy(objectPattern));
1718
else if (objectPattern[0] == '/')
19+
{
1820
_strategies.Add(new FileExactDeletionStrategy(objectPattern));
21+
22+
var indexToCut = objectPattern.LastIndexOf('/');
23+
var pathString = objectPattern.Substring(0, indexToCut);
24+
var bytes = Encoding.UTF8.GetBytes(pathString);
25+
RelevantPaths.Add(bytes);
26+
}
1927
else if (objectPattern[objectPattern.Length - 1] == '*')
2028
_strategies.Add(new FileStartsWithDeletionStrategy(objectPattern));
2129
else
2230
_strategies.Add(new FileSimpleDeleteStrategy(objectPattern));
2331
}
2432
}
2533

34+
public List<byte[]> RelevantPaths { get; } = new List<byte[]>();
35+
2636
public bool DeleteObject(in ReadOnlySpan<byte> fileName, ReadOnlySpan<byte> currentPath)
2737
{
2838
foreach (var strategy in _strategies)

GitRewrite/Delete/FolderDeletionStrategies.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Text;
34

45
namespace GitRewrite.Delete
56
{
@@ -15,14 +16,20 @@ public FolderDeletionStrategies(IEnumerable<string> patterns)
1516
if (objectPattern[0] == '*')
1617
_strategies.Add(new FolderEndsWithDeletionStrategy(objectPattern));
1718
else if (objectPattern[0] == '/')
18-
_strategies.Add(new FolderExactDeletionStrategy(objectPattern));
19+
{
20+
var bytes = Encoding.UTF8.GetBytes(objectPattern);
21+
_strategies.Add(new FolderExactDeletionStrategy(bytes));
22+
RelevantPaths.Add(bytes);
23+
}
1924
else if (objectPattern[objectPattern.Length - 1] == '*')
2025
_strategies.Add(new FolderStartsWithDeletionStrategy(objectPattern));
2126
else
2227
_strategies.Add(new FolderSimpleDeleteStrategy(objectPattern));
2328
}
2429
}
2530

31+
public List<byte[]> RelevantPaths { get; } = new List<byte[]>();
32+
2633
public bool DeleteObject(ReadOnlySpan<byte> currentPath)
2734
{
2835
foreach (var strategy in _strategies)

GitRewrite/Delete/FolderExactDeletionStrategy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ class FolderExactDeletionStrategy : IFolderDeletionStrategy
88
{
99
private readonly Memory<byte> _folderName;
1010

11-
public FolderExactDeletionStrategy(string fileName) => _folderName = Encoding.UTF8.GetBytes(fileName);
11+
public FolderExactDeletionStrategy(byte[] fileName) => _folderName = fileName;
1212

1313
public bool DeleteObject(in ReadOnlySpan<byte> currentPath) => _folderName.Span.SpanEquals(currentPath);
1414
}

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Deleting should be pretty fast, in my tests it even outperformed the bfg repo cl
2020
Simple wildcards for the beginning and the end of the filename are supported, like &ast;.zip.
2121
It also lets you specify the complete path to the file instead of only a file name.
2222
For this the path has to be prefixed by a forward slash and the path seperator also is a forward slash: /path/to/file.txt
23+
Specifying only files with complete path will result in much better performance as not all subtrees have to be checked.
2324

2425
### Deleting folders
2526
```

0 commit comments

Comments
 (0)