Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6cf51d9
Add pseudonymization of groups
janbnz Nov 23, 2025
9fb6d02
Add pseudonymized groups to Chocolate-pseudnomyized.bib
janbnz Nov 23, 2025
93b00c2
Replace var with explicit types
janbnz Dec 14, 2025
ade8062
Add test for entries having a group
janbnz Dec 14, 2025
5b3fd57
Merge remote-tracking branch 'upstream/main' into fix-for-issue-14117
janbnz Dec 14, 2025
9f851a3
Apply OpenRewrite cleanup
janbnz Dec 14, 2025
477deb2
Add pseudonymization of groups
janbnz Nov 23, 2025
038a80f
Add pseudonymized groups to Chocolate-pseudnomyized.bib
janbnz Nov 23, 2025
037e21a
Replace var with explicit types
janbnz Dec 14, 2025
2e4d891
Add test for entries having a group
janbnz Dec 14, 2025
2027aab
Apply OpenRewrite cleanup
janbnz Dec 14, 2025
0d7eca5
Merge remote-tracking branch 'origin/fix-for-issue-14117' into fix-fo…
janbnz Dec 14, 2025
7a4b009
Remove duplicate changelog entry
janbnz Dec 14, 2025
88d93f7
Merge branch 'main' into fix-for-issue-14117
janbnz Dec 19, 2025
e55286b
Rename "groups" prefix to "group"
janbnz Dec 20, 2025
30a0994
Copy group search syntax version from original file
janbnz Dec 20, 2025
76ee2dc
Fix Chocolate-pseudnomyized.bib
janbnz Dec 20, 2025
57e13a6
Fix group pseudonymization
janbnz Dec 20, 2025
fe698bc
Merge branch 'main' into fix-for-issue-14117
calixtus Dec 22, 2025
35e62f7
Merge remote-tracking branch 'origin/main' into fix-for-issue-14117
koppor Dec 22, 2025
7b478a2
Convert JavaDoc to Markdown
koppor Dec 22, 2025
4d3f275
Adapt test
koppor Dec 22, 2025
cf35bf7
Improve test
koppor Dec 22, 2025
69cbcc7
Fix typo
koppor Dec 22, 2025
c7ce689
Fix test
koppor Dec 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
- We added support for `html` when parsing the arXiv identifiers. [#14451](https://github.com/JabRef/jabref/issues/14451)
- We added the option to change the Git username and PAT in Network Preferences. [#14509](https://github.com/JabRef/jabref/pull/14509)
- When parsing a plain text citation, we added support for recognizing and extracting arXiv identifiers. [#14455](https://github.com/JabRef/jabref/pull/14455)
- We introduced a new "Search Engine URL Template" setting in Preferences to allow users to customize their search engine URL templates [#12268](https://github.com/JabRef/jabref/issues/12268)
- We introduced a new "Search Engine URL Template" setting in Preferences to allow users to customize their search engine URL templates. [#12268](https://github.com/JabRef/jabref/issues/12268)
- We added pseudonymization of groups. [#14117](https://github.com/JabRef/jabref/issues/14117)
- We enabled CLI parameters for customizing citation key generation in JabKit, allowing users to override citation key patterns without modifying GUI settings. [#14361](https://github.com/JabRef/jabref/issues/14361)
- We added the option to pseudonymize a library using the GUI, via the tools tab in the Main Menu. [#14118](https://github.com/JabRef/jabref/issues/14118)
- We added export options (Markdown and JSON) for AI Summary and AI Chat. [#13868](https://github.com/JabRef/jabref/issues/13868)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,27 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;

import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.groups.AbstractGroup;
import org.jabref.model.groups.GroupTreeNode;
import org.jabref.model.metadata.MetaData;

import org.jspecify.annotations.NullMarked;

/**
* This class is used to anonymize a library. It is required to make private libraries available for public use.
* <p>
* For "just" generating large .bib files, scripts/bib-file-generator.py can be used.
*/
/// This class is used to anonymize a library. It is required to make private libraries available for public use.
///
/// For "just" generating large `.bib` files, `scripts/bib-file-generator.py` can be used.
@NullMarked
public class Pseudonymization {

private static final String GROUPS_PSEUDONYM_PREFIX = "group";

public record Result(BibDatabaseContext bibDatabaseContext, Map<String, String> valueMapping) {
}

Expand All @@ -31,13 +36,18 @@ public Result pseudonymizeLibrary(BibDatabaseContext bibDatabaseContext) {
Map<Field, Map<String, Integer>> fieldToValueToIdMap = new HashMap<>();
List<BibEntry> newEntries = pseudonymizeEntries(bibDatabaseContext, fieldToValueToIdMap);

Optional<GroupTreeNode> newGroups = pseudonymizeGroups(bibDatabaseContext, fieldToValueToIdMap);

Map<String, String> valueMapping = new HashMap<>();
fieldToValueToIdMap.forEach((field, stringToIntMap) ->
stringToIntMap.forEach((value, id) -> valueMapping.put(field.getName().toLowerCase(Locale.ROOT) + "-" + id, value)));
stringToIntMap.forEach((value, id) -> valueMapping.put(getFieldContent(field, id), value)));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Method name not matching - for groups its NOT the entire field content.


BibDatabase bibDatabase = new BibDatabase(newEntries);
BibDatabaseContext result = new BibDatabaseContext(bibDatabase);
result.setMode(bibDatabaseContext.getMode());
newGroups.ifPresent(result.getMetaData()::setGroups);

bibDatabaseContext.getMetaData().getGroupSearchSyntaxVersion().ifPresent(result.getMetaData()::setGroupSearchSyntaxVersion);

return new Result(result, valueMapping);
}
Expand All @@ -57,10 +67,80 @@ private static List<BibEntry> pseudonymizeEntries(BibDatabaseContext bibDatabase
// TODO: Use {@link org.jabref.model.entry.field.FieldProperty} to distinguish cases.
// See {@link org.jabref.model.entry.field.StandardField} for usages.
String fieldContent = entry.getField(field).get();
Integer id = valueToIdMap.computeIfAbsent(fieldContent, k -> valueToIdMap.size() + 1);
newEntry.setField(field, field.getName() + "-" + id);

if (field == StandardField.GROUPS) {
List<String> groups = splitGroups(fieldContent);
String pseudonymizedGroups = pseudonymizeGroupValue(groups, valueToIdMap);
newEntry.setField(field, pseudonymizedGroups);
} else {
Integer id = valueToIdMap.computeIfAbsent(fieldContent, k -> valueToIdMap.size() + 1);
newEntry.setField(field, getFieldContent(field, id));
}
}
}
return newEntries;
}

/**
* Pseudonymizes the root group and all subgroups.
* If no groups exist, returns empty.
*/
private static Optional<GroupTreeNode> pseudonymizeGroups(BibDatabaseContext bibDatabaseContext, Map<Field, Map<String, Integer>> fieldToValueToIdMap) {
MetaData metadata = bibDatabaseContext.getMetaData();
Optional<GroupTreeNode> groupsOpt = metadata.getGroups();

if (groupsOpt.isEmpty()) {
return Optional.empty();
}

GroupTreeNode originalRoot = groupsOpt.get();
Map<String, Integer> groupValueMap = fieldToValueToIdMap.computeIfAbsent(StandardField.GROUPS, _ -> new HashMap<>());

GroupTreeNode newRoot = pseudonymizeGroupNode(originalRoot, groupValueMap);
return Optional.of(newRoot);
}

/**
* Recursively rewrites a group node and its children.
* Each original group receives a generated ID, resulting in: original -> "groups-n"
*/
private static GroupTreeNode pseudonymizeGroupNode(GroupTreeNode node, Map<String, Integer> valueToIdMap) {
AbstractGroup originalGroup = node.getGroup();
AbstractGroup groupCopy = originalGroup.deepCopy();

String originalName = node.getName();
int id = valueToIdMap.computeIfAbsent(originalName, _ -> valueToIdMap.size() + 1);
groupCopy.nameProperty().setValue(getFieldContent(StandardField.GROUPS, id));

GroupTreeNode newNode = new GroupTreeNode(groupCopy);
for (GroupTreeNode child : node.getChildren()) {
GroupTreeNode childCopy = pseudonymizeGroupNode(child, valueToIdMap);
newNode.addChild(childCopy);
}

return newNode;
}

private static List<String> splitGroups(String content) {
return List.of(content.split("\\s*,\\s*"));
}

private static String pseudonymizeGroupValue(List<String> values, Map<String, Integer> valueToIdMap) {
List<String> pseudonymized = new ArrayList<>(values.size());

for (String value : values) {
Integer id = valueToIdMap.computeIfAbsent(value, k -> valueToIdMap.size() + 1);
pseudonymized.add(GROUPS_PSEUDONYM_PREFIX + "-" + id);
}

return String.join(", ", pseudonymized);
}

private static String getFieldContent(Field field, int id) {
String prefix = field == StandardField.GROUPS
? GROUPS_PSEUDONYM_PREFIX
: field.getName().toLowerCase(Locale.ROOT);

return prefix + "-" + id;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import org.jabref.logic.bibtex.FieldPreferences;
import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
Expand All @@ -21,6 +22,10 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibEntryTypesManager;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.groups.AllEntriesGroup;
import org.jabref.model.groups.ExplicitGroup;
import org.jabref.model.groups.GroupHierarchyType;
import org.jabref.model.groups.GroupTreeNode;
import org.jabref.model.metadata.SaveOrder;
import org.jabref.model.util.DummyFileUpdateMonitor;

Expand All @@ -30,6 +35,7 @@
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;

Expand Down Expand Up @@ -129,4 +135,93 @@ void pseudonymizeLibraryFile(@TempDir Path tempDir) throws URISyntaxException, I

assertTrue(Files.exists(target));
}

@Test
void pseudonymizeGroups() {
// given
GroupTreeNode root = new GroupTreeNode(new AllEntriesGroup("Root"));
GroupTreeNode used = root.addSubgroup(new ExplicitGroup("Used", GroupHierarchyType.INDEPENDENT, ','));
used.addSubgroup(new ExplicitGroup("Sub", GroupHierarchyType.INDEPENDENT, ','));

BibDatabaseContext databaseContext = new BibDatabaseContext(new BibDatabase());
databaseContext.getMetaData().setGroups(root);

Pseudonymization pseudonymization = new Pseudonymization();

// when
Pseudonymization.Result result = pseudonymization.pseudonymizeLibrary(databaseContext);
GroupTreeNode newRoot = result.bibDatabaseContext().getMetaData().getGroups().orElseThrow();

// then
assertEquals("group-1", newRoot.getName());
assertTrue(newRoot.getFirstChild().isPresent());

GroupTreeNode newUsed = newRoot.getFirstChild().orElseThrow();
assertEquals("group-2", newUsed.getName());
assertTrue(newUsed.getFirstChild().isPresent());

GroupTreeNode newSub = newUsed.getFirstChild().orElseThrow();
assertEquals("group-3", newSub.getName());

Map<String, String> mapping = result.valueMapping();
assertEquals("Root", mapping.get("group-1"));
assertEquals("Used", mapping.get("group-2"));
assertEquals("Sub", mapping.get("group-3"));
}

@Test
void pseudonymizeEntriesWithGroup() {
// given
BibDatabaseContext databaseContext = new BibDatabaseContext(new BibDatabase(List.of(
new BibEntry("first").withField(StandardField.GROUPS, "MyGroup"),
new BibEntry("second").withField(StandardField.GROUPS, "MyGroup, OtherGroup"),
new BibEntry("third").withField(StandardField.GROUPS, "OtherGroup")
)));

Pseudonymization pseudonymization = new Pseudonymization();

// when
Pseudonymization.Result result = pseudonymization.pseudonymizeLibrary(databaseContext);

// then
List<BibEntry> entries = result.bibDatabaseContext().getEntries();
assertEquals(3, entries.size());

assertEquals(Optional.of("group-1"), entries.getFirst().getField(StandardField.GROUPS));
assertEquals(Optional.of("group-1, group-2"), entries.get(1).getField(StandardField.GROUPS));
assertEquals(Optional.of("group-2"), entries.get(2).getField(StandardField.GROUPS));

Map<String, String> mapping = result.valueMapping();
assertEquals("MyGroup", mapping.get("group-1"));
assertEquals("OtherGroup", mapping.get("group-2"));
}

@Test
void pseudonymizeEntryWithMultipleGroups() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test case missing containing the complete library - I fear the identifiers numbers are not in sync.

// given
BibDatabaseContext databaseContext = new BibDatabaseContext(new BibDatabase(List.of(
new BibEntry("first").withField(StandardField.GROUPS, "one, two, three")
)));

Pseudonymization pseudonymization = new Pseudonymization();

// when
Pseudonymization.Result result = pseudonymization.pseudonymizeLibrary(databaseContext);

// then
BibEntry pseudonymizedEntry = result.bibDatabaseContext().getEntries().getFirst();
String pseudonymizedGroups = pseudonymizedEntry.getField(StandardField.GROUPS).orElseThrow();

String[] groups = pseudonymizedGroups.split(", ");
assertEquals(3, groups.length);

assertEquals("group-1", groups[0]);
assertEquals("group-2", groups[1]);
assertEquals("group-3", groups[2]);

Map<String, String> mapping = result.valueMapping();
assertEquals("one", mapping.get(groups[0]));
assertEquals("two", mapping.get(groups[1]));
assertEquals("three", mapping.get(groups[2]));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ @Article{citationkey-6
number = {number-4},
pages = {pages-5},
volume = {volume-6},
groups = {groups-1},
groups = {group-1},
publisher = {publisher-6},
readstatus = {readstatus-2},
}
Expand All @@ -93,7 +93,7 @@ @Article{citationkey-7
pages = {pages-6},
volume = {volume-7},
file = {file-5},
groups = {groups-1},
groups = {group-1},
publisher = {publisher-7},
readstatus = {readstatus-2},
}
Expand Down Expand Up @@ -123,7 +123,7 @@ @Article{citationkey-9
number = {number-6},
pages = {pages-8},
volume = {volume-9},
groups = {groups-1},
groups = {group-1},
publisher = {publisher-6},
readstatus = {readstatus-2},
}
Expand Down Expand Up @@ -177,7 +177,7 @@ @Article{citationkey-13
number = {number-1},
pages = {pages-11},
volume = {volume-13},
groups = {groups-1},
groups = {group-1},
publisher = {publisher-4},
readstatus = {readstatus-2},
}
Expand All @@ -190,7 +190,7 @@ @Article{citationkey-14
doi = {doi-14},
issn = {issn-14},
volume = {volume-14},
groups = {groups-1},
groups = {group-1},
publisher = {publisher-10},
readstatus = {readstatus-2},
}
Expand All @@ -211,3 +211,16 @@ @Article{citationkey-15
}

@Comment{jabref-meta: databaseType:biblatex;}

@Comment{jabref-meta: grouping:
0 AllEntriesGroup:;
1 SearchGroup:group-3\;0\;groups !=~ .+\;0\;1\;1\;\;\;\;;
1 SearchGroup:group-4\;0\;file !=~ .+\;0\;1\;1\;\;\;\;;
1 StaticGroup:group-5\;0\;1\;\;\;\;;
1 SearchGroup:group-6\;0\;groups !=~ .+ and readstatus !=~ .+\;0\;1\;1\;\;\;\;;
1 KeywordGroup:group-7\;0\;readstatus\;skimmed\;0\;0\;1\;\;\;\;;
1 KeywordGroup:group-8\;0\;readstatus\;read\;0\;0\;1\;\;\;\;;
1 StaticGroup:group-1\;0\;1\;\;\;\;;
}

@Comment{jabref-meta: groups-search-syntax-version:6.0-alpha_1}
Loading