From 6cf51d91ca671f0287f6dfa397052a4413acbf60 Mon Sep 17 00:00:00 2001 From: Jan Benz Date: Sun, 23 Nov 2025 12:33:01 +0100 Subject: [PATCH 1/2] Add pseudonymization of groups --- CHANGELOG.md | 1 + .../pseudonymization/Pseudonymization.java | 46 +++++++++++++++++++ .../PseudonymizationTest.java | 37 +++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70097501ff8..10c8a801766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - We added the possibility to configure the email provided to unpaywall. [#14340](https://github.com/JabRef/jabref/pull/14340) - We added a "Regenerate" button for the AI chat allowing the user to make the language model reformulate its response to the previous prompt. [#12191](https://github.com/JabRef/jabref/issues/12191) - We added support for transliteration of fields to English and automatic transliteration of generated citation key. [#11377](https://github.com/JabRef/jabref/issues/11377) +- We added pseudonymization of groups [#14117](https://github.com/JabRef/jabref/issues/14117) ### Changed diff --git a/jablib/src/main/java/org/jabref/logic/pseudonymization/Pseudonymization.java b/jablib/src/main/java/org/jabref/logic/pseudonymization/Pseudonymization.java index abdedcc84d7..6667c65f697 100644 --- a/jablib/src/main/java/org/jabref/logic/pseudonymization/Pseudonymization.java +++ b/jablib/src/main/java/org/jabref/logic/pseudonymization/Pseudonymization.java @@ -5,11 +5,14 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import org.jabref.model.database.BibDatabase; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.groups.GroupTreeNode; import org.jspecify.annotations.NullMarked; @@ -31,6 +34,8 @@ public Result pseudonymizeLibrary(BibDatabaseContext bibDatabaseContext) { Map> fieldToValueToIdMap = new HashMap<>(); List newEntries = pseudonymizeEntries(bibDatabaseContext, fieldToValueToIdMap); + Optional newGroups = pseudonymizeGroups(bibDatabaseContext, fieldToValueToIdMap); + Map valueMapping = new HashMap<>(); fieldToValueToIdMap.forEach((field, stringToIntMap) -> stringToIntMap.forEach((value, id) -> valueMapping.put(field.getName().toLowerCase(Locale.ROOT) + "-" + id, value))); @@ -38,6 +43,7 @@ public Result pseudonymizeLibrary(BibDatabaseContext bibDatabaseContext) { BibDatabase bibDatabase = new BibDatabase(newEntries); BibDatabaseContext result = new BibDatabaseContext(bibDatabase); result.setMode(bibDatabaseContext.getMode()); + newGroups.ifPresent(result.getMetaData()::setGroups); return new Result(result, valueMapping); } @@ -63,4 +69,44 @@ private static List pseudonymizeEntries(BibDatabaseContext bibDatabase } return newEntries; } + + /** + * Pseudonymizes the root group and all subgroups. + * If no groups exist, returns empty. + */ + private static Optional pseudonymizeGroups(BibDatabaseContext bibDatabaseContext, Map> fieldToValueToIdMap) { + var metadata = bibDatabaseContext.getMetaData(); + var groupsOpt = metadata.getGroups(); + + if (groupsOpt.isEmpty()) { + return Optional.empty(); + } + + var originalRoot = groupsOpt.get(); + var groupValueMap = fieldToValueToIdMap.computeIfAbsent(StandardField.GROUPS, _ -> new HashMap<>()); + + var newRoot = pseudonymizeGroupNode(originalRoot, groupValueMap); + return Optional.of(newRoot); + } + + /** + * Recursively rewrites a group node and its children. + * Each original group receives a generated ID, resulting in: original -> "groups-n" + */ + private static GroupTreeNode pseudonymizeGroupNode(GroupTreeNode node, Map valueToIdMap) { + var originalGroup = node.getGroup(); + var groupCopy = originalGroup.deepCopy(); + + var originalName = node.getName(); + var id = valueToIdMap.computeIfAbsent(originalName, _ -> valueToIdMap.size() + 1); + groupCopy.nameProperty().setValue(StandardField.GROUPS.getName() + "-" + id); + + var newNode = new GroupTreeNode(groupCopy); + for (GroupTreeNode child : node.getChildren()) { + var childCopy = pseudonymizeGroupNode(child, valueToIdMap); + newNode.addChild(childCopy); + } + + return newNode; + } } diff --git a/jablib/src/test/java/org/jabref/logic/pseudonymization/PseudonymizationTest.java b/jablib/src/test/java/org/jabref/logic/pseudonymization/PseudonymizationTest.java index 38443d4c19e..cab3d640970 100644 --- a/jablib/src/test/java/org/jabref/logic/pseudonymization/PseudonymizationTest.java +++ b/jablib/src/test/java/org/jabref/logic/pseudonymization/PseudonymizationTest.java @@ -21,6 +21,10 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibEntryTypesManager; import org.jabref.model.entry.field.StandardField; +import org.jabref.model.groups.AllEntriesGroup; +import org.jabref.model.groups.ExplicitGroup; +import org.jabref.model.groups.GroupHierarchyType; +import org.jabref.model.groups.GroupTreeNode; import org.jabref.model.metadata.SaveOrder; import org.jabref.model.util.DummyFileUpdateMonitor; @@ -129,4 +133,37 @@ void pseudonymizeLibraryFile(@TempDir Path tempDir) throws URISyntaxException, I assertTrue(Files.exists(target)); } + + @Test + void pseudonymizeGroups() { + // given + var root = new GroupTreeNode(new AllEntriesGroup("Root")); + var used = root.addSubgroup(new ExplicitGroup("Used", GroupHierarchyType.INDEPENDENT, ',')); + used.addSubgroup(new ExplicitGroup("Sub", GroupHierarchyType.INDEPENDENT, ',')); + + var databaseContext = new BibDatabaseContext(new BibDatabase()); + databaseContext.getMetaData().setGroups(root); + + var pseudonymization = new Pseudonymization(); + + // when + Pseudonymization.Result result = pseudonymization.pseudonymizeLibrary(databaseContext); + var newRoot = result.bibDatabaseContext().getMetaData().getGroups().orElseThrow(); + + // then + assertEquals("groups-1", newRoot.getName()); + assertTrue(newRoot.getFirstChild().isPresent()); + + var newUsed = newRoot.getFirstChild().orElseThrow(); + assertEquals("groups-2", newUsed.getName()); + assertTrue(newUsed.getFirstChild().isPresent()); + + var newSub = newUsed.getFirstChild().orElseThrow(); + assertEquals("groups-3", newSub.getName()); + + var mapping = result.valueMapping(); + assertEquals("Root", mapping.get("groups-1")); + assertEquals("Used", mapping.get("groups-2")); + assertEquals("Sub", mapping.get("groups-3")); + } } From 9fb6d023ebfca5a004b0813ee9b2534e011ee9db Mon Sep 17 00:00:00 2001 From: Jan Benz Date: Sun, 23 Nov 2025 12:59:05 +0100 Subject: [PATCH 2/2] Add pseudonymized groups to Chocolate-pseudnomyized.bib --- .../pseudonymization/Chocolate-pseudnomyized.bib | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/jablib/src/test/resources/org/jabref/logic/pseudonymization/Chocolate-pseudnomyized.bib b/jablib/src/test/resources/org/jabref/logic/pseudonymization/Chocolate-pseudnomyized.bib index b7598ce584d..4a6949b4966 100644 --- a/jablib/src/test/resources/org/jabref/logic/pseudonymization/Chocolate-pseudnomyized.bib +++ b/jablib/src/test/resources/org/jabref/logic/pseudonymization/Chocolate-pseudnomyized.bib @@ -211,3 +211,14 @@ @Article{citationkey-15 } @Comment{jabref-meta: databaseType:biblatex;} + +@Comment{jabref-meta: grouping: +0 AllEntriesGroup:; +1 SearchGroup:groups-3\;0\;groups !=~ .+\;0\;1\;1\;\;\;\;; +1 SearchGroup:groups-4\;0\;file !=~ .+\;0\;1\;1\;\;\;\;; +1 StaticGroup:groups-5\;0\;1\;\;\;\;; +1 SearchGroup:groups-6\;0\;groups !=~ .+ and readstatus !=~ .+\;0\;1\;1\;\;\;\;; +1 KeywordGroup:groups-7\;0\;readstatus\;skimmed\;0\;0\;1\;\;\;\;; +1 KeywordGroup:groups-8\;0\;readstatus\;read\;0\;0\;1\;\;\;\;; +1 StaticGroup:groups-1\;0\;1\;\;\;\;; +}