Skip to content

Commit 722ff87

Browse files
committed
Implement compacting for the shared source table
1 parent 06d187d commit 722ff87

File tree

2 files changed

+157
-32
lines changed

2 files changed

+157
-32
lines changed

src/profile-logic/profile-compacting.ts

Lines changed: 104 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,34 +18,40 @@ import type {
1818
export type CompactedProfileWithTranslationMaps = {
1919
profile: Profile;
2020
oldStringToNewStringPlusOne: Int32Array;
21+
oldSourceToNewSourcePlusOne: Int32Array;
2122
};
2223

2324
/**
24-
* Returns a new profile with all unreferenced strings removed.
25+
* Returns a new profile with all unreferenced strings and sources removed.
2526
*
26-
* Since the string table is shared between all threads, if the user asks for a
27-
* thread to be removed during sanitization, by default we'd keep the strings
28-
* from the removed threads in the profile.
27+
* Since the string table and source table are shared between all threads, if
28+
* the user asks for a thread to be removed during sanitization, by default
29+
* we'd keep the strings and sources from the removed threads in the profile.
2930
*
30-
* By calling this function, you can get a profile with an adjusted string table
31-
* where those unused strings from the removed threads have been removed.
31+
* By calling this function, you can get a profile with adjusted string and
32+
* source tables where those unused strings and sources from the removed
33+
* threads have been removed.
3234
*/
3335
export function computeCompactedProfile(
3436
profile: Profile
3537
): CompactedProfileWithTranslationMaps {
3638
const stringIndexMarkerFieldsByDataType =
3739
computeStringIndexMarkerFieldsByDataType(profile.meta.markerSchema);
3840

39-
// Step 1: Gather all references.
41+
// Step 1: Gather all references of strings.
4042
const referencedStrings = _gatherStringReferencesInProfile(
4143
profile,
4244
stringIndexMarkerFieldsByDataType
4345
);
4446

45-
// Step 2: Adjust all tables to use new string indexes.
46-
return _createProfileWithTranslatedStringIndexes(
47+
// Step 2: Gather all references of sources.
48+
const referencedSources = _gatherSourceReferencesInProfile(profile);
49+
50+
// Step 3: Adjust all tables to use new string and source indexes.
51+
return _createProfileWithTranslatedIndexes(
4752
profile,
4853
referencedStrings,
54+
referencedSources,
4955
stringIndexMarkerFieldsByDataType
5056
);
5157
}
@@ -68,36 +74,53 @@ function _gatherStringReferencesInProfile(
6874
return referencedStrings;
6975
}
7076

71-
function _createProfileWithTranslatedStringIndexes(
77+
function _gatherSourceReferencesInProfile(profile: Profile): Uint8Array {
78+
const referencedSources = new Uint8Array(profile.shared.sources.length);
79+
80+
for (const thread of profile.threads) {
81+
_gatherSourceReferencesInThread(thread, referencedSources);
82+
}
83+
84+
return referencedSources;
85+
}
86+
87+
function _gatherSourceReferencesInThread(
88+
thread: RawThread,
89+
referencedSources: Uint8Array
90+
) {
91+
for (let i = 0; i < thread.funcTable.length; i++) {
92+
const sourceIndex = thread.funcTable.source[i];
93+
if (sourceIndex !== null) {
94+
referencedSources[sourceIndex] = 1;
95+
}
96+
}
97+
}
98+
99+
function _createProfileWithTranslatedIndexes(
72100
profile: Profile,
73101
referencedStrings: Uint8Array,
102+
referencedSources: Uint8Array,
74103
stringIndexMarkerFieldsByDataType: Map<string, string[]>
75104
): CompactedProfileWithTranslationMaps {
76105
const { newStringArray, oldStringToNewStringPlusOne } =
77106
_createCompactedStringArray(profile.shared.stringArray, referencedStrings);
78107

108+
const { newSources, oldSourceToNewSourcePlusOne } =
109+
_createCompactedSourceTable(
110+
profile.shared.sources,
111+
referencedSources,
112+
oldStringToNewStringPlusOne
113+
);
114+
79115
const newThreads = profile.threads.map((thread) =>
80-
_createThreadWithTranslatedStringIndexes(
116+
_createThreadWithTranslatedIndexes(
81117
thread,
82118
oldStringToNewStringPlusOne,
119+
oldSourceToNewSourcePlusOne,
83120
stringIndexMarkerFieldsByDataType
84121
)
85122
);
86123

87-
// Update sources table with translated string indexes
88-
const newSources = {
89-
...profile.shared.sources,
90-
filename: profile.shared.sources.filename.map((oldUrlIndex) => {
91-
const newIndexPlusOne = oldStringToNewStringPlusOne[oldUrlIndex];
92-
if (newIndexPlusOne === 0) {
93-
throw new Error(
94-
`String index ${oldUrlIndex} was not found in the translation map`
95-
);
96-
}
97-
return newIndexPlusOne - 1;
98-
}),
99-
};
100-
101124
const newShared: RawProfileSharedData = {
102125
stringArray: newStringArray,
103126
sources: newSources,
@@ -112,6 +135,7 @@ function _createProfileWithTranslatedStringIndexes(
112135
return {
113136
profile: newProfile,
114137
oldStringToNewStringPlusOne,
138+
oldSourceToNewSourcePlusOne,
115139
};
116140
}
117141

@@ -132,9 +156,10 @@ function _gatherStringReferencesInThread(
132156
_gatherReferencesInNativeSymbols(thread.nativeSymbols, referencedStrings);
133157
}
134158

135-
function _createThreadWithTranslatedStringIndexes(
159+
function _createThreadWithTranslatedIndexes(
136160
thread: RawThread,
137161
oldStringToNewStringPlusOne: Int32Array,
162+
oldSourceToNewSourcePlusOne: Int32Array,
138163
stringIndexMarkerFieldsByDataType: Map<string, string[]>
139164
): RawThread {
140165
const newNativeSymbols = _createNativeSymbolsWithTranslatedStringIndexes(
@@ -145,9 +170,10 @@ function _createThreadWithTranslatedStringIndexes(
145170
thread.resourceTable,
146171
oldStringToNewStringPlusOne
147172
);
148-
const newFuncTable = _createFuncTableWithTranslatedStringIndexes(
173+
const newFuncTable = _createFuncTableWithTranslatedIndexes(
149174
thread.funcTable,
150-
oldStringToNewStringPlusOne
175+
oldStringToNewStringPlusOne,
176+
oldSourceToNewSourcePlusOne
151177
);
152178
const newMarkers = _createMarkersWithTranslatedStringIndexes(
153179
thread.markers,
@@ -253,18 +279,23 @@ function _gatherReferencesInFuncTable(
253279
}
254280
}
255281

256-
function _createFuncTableWithTranslatedStringIndexes(
282+
function _createFuncTableWithTranslatedIndexes(
257283
funcTable: FuncTable,
258-
oldStringToNewStringPlusOne: Int32Array
284+
oldStringToNewStringPlusOne: Int32Array,
285+
oldSourceToNewSourcePlusOne: Int32Array
259286
): FuncTable {
260287
const newFuncTableNameCol = funcTable.name.slice();
261288
const newFuncTableSourceCol = funcTable.source.slice();
262289
for (let i = 0; i < funcTable.length; i++) {
263290
const name = funcTable.name[i];
264291
newFuncTableNameCol[i] = oldStringToNewStringPlusOne[name] - 1;
265292

266-
// Note: source indexes don't need translation as they point to sources table, not strings
267-
// Source table will be handled separately in _createSourcesTableWithTranslatedStringIndexes
293+
// Translate source indexes to new compacted source table.
294+
const sourceIndex = funcTable.source[i];
295+
if (sourceIndex !== null) {
296+
const newSourceIndexPlusOne = oldSourceToNewSourcePlusOne[sourceIndex];
297+
newFuncTableSourceCol[i] = newSourceIndexPlusOne - 1;
298+
}
268299
}
269300

270301
const newFuncTable = {
@@ -357,3 +388,44 @@ function _createCompactedStringArray(
357388

358389
return { newStringArray, oldStringToNewStringPlusOne };
359390
}
391+
392+
function _createCompactedSourceTable(
393+
sourceTable: SourceTable,
394+
referencedSources: Uint8Array,
395+
oldStringToNewStringPlusOne: Int32Array
396+
): { newSources: SourceTable; oldSourceToNewSourcePlusOne: Int32Array } {
397+
const oldSourceToNewSourcePlusOne = new Int32Array(sourceTable.length);
398+
let nextIndex = 0;
399+
const newUuid = [];
400+
const newFilename = [];
401+
402+
for (let i = 0; i < sourceTable.length; i++) {
403+
if (referencedSources[i] === 0) {
404+
continue;
405+
}
406+
407+
const newIndex = nextIndex++;
408+
newUuid[newIndex] = sourceTable.uuid[i];
409+
410+
// Translate the filename string index
411+
const oldFilenameIndex = sourceTable.filename[i];
412+
const newFilenameIndexPlusOne =
413+
oldStringToNewStringPlusOne[oldFilenameIndex];
414+
if (newFilenameIndexPlusOne === 0) {
415+
throw new Error(
416+
`String index ${oldFilenameIndex} was not found in the translation map`
417+
);
418+
}
419+
newFilename[newIndex] = newFilenameIndexPlusOne - 1;
420+
421+
oldSourceToNewSourcePlusOne[i] = newIndex + 1;
422+
}
423+
424+
const newSources: SourceTable = {
425+
length: nextIndex,
426+
uuid: newUuid,
427+
filename: newFilename,
428+
};
429+
430+
return { newSources, oldSourceToNewSourcePlusOne };
431+
}

src/test/unit/sanitize.test.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,4 +1206,57 @@ describe('sanitizePII', function () {
12061206
]);
12071207
});
12081208
});
1209+
1210+
it('should compact the source table when threads are removed', function () {
1211+
// Create a profile with multiple threads that reference different sources
1212+
const { profile } = getProfileFromTextSamples(
1213+
`A[file:file1.js]`,
1214+
`B[file:file2.js]`,
1215+
`C[file:file3.js]`
1216+
);
1217+
1218+
const originalSourcesLength = profile.shared.sources.length;
1219+
1220+
// Verify we have sources for each thread
1221+
expect(originalSourcesLength).toEqual(3);
1222+
1223+
// Verify that different threads reference different sources
1224+
const thread0SourceIndex = profile.threads[0].funcTable.source[0];
1225+
const thread1SourceIndex = profile.threads[1].funcTable.source[0];
1226+
const thread2SourceIndex = profile.threads[2].funcTable.source[0];
1227+
1228+
expect(thread0SourceIndex).not.toBe(thread1SourceIndex);
1229+
expect(thread1SourceIndex).not.toBe(thread2SourceIndex);
1230+
1231+
// Remove threads 0 and 2, keeping only thread 1.
1232+
const { sanitizedProfile } = setup(
1233+
{
1234+
shouldRemoveThreads: new Set([0, 2]),
1235+
},
1236+
profile
1237+
);
1238+
1239+
// The source table should be compacted to only contain sources referenced
1240+
// by remaining threads
1241+
expect(sanitizedProfile.shared.sources.length).toBeLessThan(
1242+
originalSourcesLength
1243+
);
1244+
expect(sanitizedProfile.shared.sources.length).toEqual(1);
1245+
1246+
// The remaining thread should still have a valid source reference
1247+
const remainingSourceIndex =
1248+
sanitizedProfile.threads[0].funcTable.source[0];
1249+
expect(remainingSourceIndex).not.toBeNull();
1250+
expect(remainingSourceIndex).toBeLessThan(
1251+
sanitizedProfile.shared.sources.length
1252+
);
1253+
1254+
// Verify that the filename string is still accessible
1255+
expect(remainingSourceIndex).not.toBeNull();
1256+
const filenameStringIndex =
1257+
sanitizedProfile.shared.sources.filename[remainingSourceIndex!];
1258+
expect(sanitizedProfile.shared.stringArray[filenameStringIndex]).toContain(
1259+
'file2.js'
1260+
);
1261+
});
12091262
});

0 commit comments

Comments
 (0)