Skip to content
Open
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
b9e664b
update
xnuohz Oct 30, 2025
4cfa0a3
update
xnuohz Oct 30, 2025
3080221
add changelog
xnuohz Oct 31, 2025
fef36f4
Merge branch 'master' into cov/llm/vit
xnuohz Nov 1, 2025
ec9c462
update
xnuohz Nov 1, 2025
beafdc4
update
xnuohz Nov 1, 2025
eca7f10
update
xnuohz Nov 1, 2025
8881879
update
xnuohz Nov 1, 2025
57c9b55
update
xnuohz Nov 1, 2025
7c9efbe
update
xnuohz Nov 1, 2025
2c97909
improve models/llm.py
xnuohz Nov 3, 2025
a1a39d7
Merge branch 'master' into cov/llm/vit
xnuohz Nov 3, 2025
93d0693
improve models/llm_judge.py
xnuohz Nov 3, 2025
94ae196
update changelog
xnuohz Nov 3, 2025
3bb82b2
improve models/molecule_gpt.py
xnuohz Nov 3, 2025
7dd529a
improve models/glem.py
xnuohz Nov 3, 2025
5680e00
update
xnuohz Nov 3, 2025
17c2210
improve models/txt2kg.py
xnuohz Nov 3, 2025
2a6ab6f
update
xnuohz Nov 4, 2025
117fe75
update
xnuohz Nov 4, 2025
9be3f11
improve utils/backend_utils.py
xnuohz Nov 4, 2025
c204e58
update
xnuohz Nov 4, 2025
b6576c5
update
xnuohz Nov 4, 2025
b2ecd18
upload cov in testing_rag.yaml
xnuohz Nov 4, 2025
7bc110c
update
xnuohz Nov 4, 2025
25b27b1
update
xnuohz Nov 5, 2025
3c19b2e
update
xnuohz Nov 5, 2025
502207e
update
xnuohz Nov 5, 2025
ebc1429
update
xnuohz Nov 5, 2025
48c984e
upload cov in testing_rag (v1)
xnuohz Nov 5, 2025
e9b2286
update
xnuohz Nov 7, 2025
c121caa
add flag when upload coverage
xnuohz Nov 7, 2025
64c960c
add flag in coverage not useful
xnuohz Nov 7, 2025
c0cef4a
fix molecule_gpt_dataset test
xnuohz Nov 8, 2025
3cb3efc
Merge branch 'master' into cov/llm/vit
xnuohz Nov 8, 2025
52b9d87
remove onlyrag in llm/utils
xnuohz Nov 8, 2025
554bc38
ignore llm test in minimal/nightly/prev test
xnuohz Nov 8, 2025
d5caa53
remove onlyrag in llm/models
xnuohz Nov 8, 2025
99aff66
update timeout minutes
xnuohz Nov 8, 2025
4e5ac17
remove onlyrag in txt2kg
xnuohz Nov 8, 2025
c978391
set onlyLinux in txt2kg test
xnuohz Nov 8, 2025
8ae6f28
cleanup
xnuohz Nov 8, 2025
58d0484
fuse rag and latest test
xnuohz Nov 8, 2025
1f9a56e
update
xnuohz Nov 11, 2025
f382f38
update
xnuohz Nov 11, 2025
a1809fc
remove onlyRAG
xnuohz Nov 11, 2025
33e9ebb
remove onlyRAG
xnuohz Nov 11, 2025
17cd41c
update
xnuohz Nov 16, 2025
42ffa92
fix ci
xnuohz Nov 16, 2025
44b780b
fix ci
xnuohz Nov 16, 2025
7b31783
fix ci
xnuohz Nov 16, 2025
00ca6b9
update
xnuohz Nov 16, 2025
ac0796c
Merge branch 'master' into cov/llm/vit
xnuohz Nov 17, 2025
da82264
Merge branch 'master' into cov/llm/vit
xnuohz Nov 22, 2025
b143673
trigger ci
xnuohz Nov 26, 2025
ffec466
Merge branch 'master' into cov/llm/vit
xnuohz Dec 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/testing_latest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ jobs:
- name: Install main package (Windows)
if: ${{ steps.changed-files-specific.outputs.only_changed != 'true' && runner.os == 'Windows' }}
run: |
uv pip install -e ".[test]"
uv pip install -e ".[test,rag]"

- name: Install main package
if: ${{ steps.changed-files-specific.outputs.only_changed != 'true' && runner.os != 'Windows' }}
run: |
uv pip install -e ".[full,test]"
uv pip install -e ".[full,test,rag]"

- name: Check installation
if: steps.changed-files-specific.outputs.only_changed != 'true'
Expand All @@ -69,7 +69,7 @@ jobs:

- name: Run tests
if: steps.changed-files-specific.outputs.only_changed != 'true'
timeout-minutes: 15
timeout-minutes: 20
run: |
uv run --no-project pytest --cov --cov-report=xml --durations 10

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/testing_minimal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,4 @@ jobs:
if: steps.changed-files-specific.outputs.only_changed != 'true'
timeout-minutes: 15
run: |
uv run --no-project pytest --durations 10
uv run --no-project pytest --ignore=test/llm --durations 10
2 changes: 1 addition & 1 deletion .github/workflows/testing_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,4 @@ jobs:
if: steps.changed-files-specific.outputs.only_changed != 'true'
timeout-minutes: 15
run: |
uv run --no-project pytest --durations 10
uv run --no-project pytest --ignore=test/llm --durations 10
2 changes: 1 addition & 1 deletion .github/workflows/testing_prev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ jobs:
if: steps.changed-files-specific.outputs.only_changed != 'true'
timeout-minutes: 15
run: |
uv run --no-project pytest --durations 10
uv run --no-project pytest --ignore=test/llm --durations 10
52 changes: 0 additions & 52 deletions .github/workflows/testing_rag.yml

This file was deleted.

2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

### Changed

- Improved `.llm` code coverage ([#10516](https://github.com/pyg-team/pytorch_geometric/pull/10516))

### Deprecated

### Removed
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ rag=[
"sentencepiece",
"accelerate",
"torchmetrics",
"peft",
]
test=[
"onnx",
Expand Down
3 changes: 2 additions & 1 deletion test/datasets/test_protein_mpnn_dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from torch_geometric.datasets import ProteinMPNNDataset
from torch_geometric.testing import onlyOnline, withPackage
from torch_geometric.testing import onlyLinux, onlyOnline, withPackage


@onlyLinux
@onlyOnline
@withPackage('pandas')
def test_protein_mpnn_dataset():
Expand Down
14 changes: 8 additions & 6 deletions test/llm/models/test_g_retriever.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import gc

import pytest
import torch

from torch_geometric.llm.models import LLM, GRetriever
from torch_geometric.nn import GAT
from torch_geometric.testing import onlyRAG, withPackage
from torch_geometric.testing import withPackage


@onlyRAG
@withPackage('transformers', 'sentencepiece', 'accelerate')
def test_g_retriever() -> None:
@withPackage('transformers', 'sentencepiece', 'accelerate', 'peft')
@pytest.mark.parametrize('use_lora', [True, False])
def test_g_retriever(use_lora: bool) -> None:
llm = LLM(model_name='Qwen/Qwen3-0.6B', dtype=torch.float32,
sys_prompt="You're an agent, answer my questions.")

Expand All @@ -25,6 +26,7 @@ def test_g_retriever() -> None:
model = GRetriever(
llm=llm,
gnn=gnn,
use_lora=use_lora,
)
assert str(model) == ('GRetriever(\n'
' llm=LLM(Qwen/Qwen3-0.6B),\n'
Expand Down Expand Up @@ -54,8 +56,7 @@ def test_g_retriever() -> None:
torch.cuda.empty_cache()


@onlyRAG
@withPackage('transformers', 'sentencepiece', 'accelerate')
@withPackage('transformers', 'sentencepiece', 'accelerate', 'peft')
def test_g_retriever_many_tokens() -> None:
llm = LLM(model_name='Qwen/Qwen3-0.6B', dtype=torch.float32,
sys_prompt="You're an agent, answer my questions.")
Expand All @@ -73,6 +74,7 @@ def test_g_retriever_many_tokens() -> None:
llm=llm,
gnn=gnn,
mlp_out_tokens=2,
use_lora=True,
)
assert str(model) == ('GRetriever(\n'
' llm=LLM(Qwen/Qwen3-0.6B),\n'
Expand Down
15 changes: 14 additions & 1 deletion test/llm/models/test_git_mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,24 @@ def test_git_mol():
[1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
])
edge_attr = torch.zeros(edge_index.size(1), 16, dtype=torch.long)
# batch size = 1
batch = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
smiles = ['CC(C)([C@H]1CC2=C(O1)C=CC3=C2OC(=O)C=C3)O']
captions = ['The molecule is the (R)-(-)-enantiomer of columbianetin.']
images = torch.randn(1, 3, 224, 224)
loss = model(x, edge_index, batch, edge_attr, smiles, images, captions)
assert loss >= 0

# Test train:
# batch size > 1
batch = torch.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
smiles = [
'CC(C)([C@H]1CC2=C(O1)C=CC3=C2OC(=O)C=C3)O',
'CCOc1ccccc1',
]
captions = [
'The molecule is the (R)-(-)-enantiomer of columbianetin.',
'Ethoxybenzene is an aromatic ether.'
]
images = torch.randn(2, 3, 224, 224)
loss = model(x, edge_index, batch, edge_attr, smiles, images, captions)
assert loss >= 0
180 changes: 180 additions & 0 deletions test/llm/models/test_glem.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import pytest
import torch

from torch_geometric.data import Data
from torch_geometric.llm.models import GLEM
from torch_geometric.llm.models.glem import deal_nan
from torch_geometric.loader import DataLoader, NeighborLoader
from torch_geometric.nn.models import GraphSAGE
from torch_geometric.testing import withPackage


def test_deal_nan_tensor_replaces_nans():
Expand Down Expand Up @@ -46,3 +51,178 @@ def test_deal_nan_is_non_mutating():
_ = deal_nan(x)
assert torch.isnan(x).any() # Original still contains NaN
assert torch.allclose(x, x_copy, equal_nan=True)


@pytest.fixture
def tiny_graph_data():
x = torch.randn(10, 16) # 10 nodes, 16-dim features
edge_index = torch.tensor([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5]],
dtype=torch.long)
y = torch.randint(0, 3, (10, )) # 3 classes
is_gold = torch.tensor([True] * 5 + [False] * 5) # 5 gold + 5 non-gold
n_id = torch.arange(10)
return Data(x=x, edge_index=edge_index, y=y, is_gold=is_gold, n_id=n_id)


@pytest.fixture
def dummy_text_data():
class DummyDataset(torch.utils.data.Dataset):
def __init__(self):
self.indices = torch.arange(10)
self._data = type('obj', (), {'num_nodes': 10})()

def __len__(self):
return 10

def __getitem__(self, idx):
return {
'input': {
'input_ids': torch.randint(100, 1000, (16, )),
'attention_mask': torch.ones(16, dtype=torch.long)
},
'labels': torch.tensor(idx % 3),
'is_gold': torch.tensor(idx < 5),
'n_id': torch.tensor(idx),
}

dataset = DummyDataset()
loader = DataLoader(dataset, batch_size=2, shuffle=False)
return loader


@pytest.fixture
def glem_model():
gnn = GraphSAGE(
in_channels=16,
hidden_channels=32,
num_layers=2,
out_channels=3,
)

model = GLEM(lm_to_use='prajjwal1/bert-tiny', gnn_to_use=gnn,
out_channels=3, lm_use_lora=True, device='cpu')
return model


@withPackage('transformers', 'sentencepiece', 'accelerate')
def test_glem_initialization(glem_model):
assert glem_model.lm is not None
assert glem_model.gnn is not None
assert glem_model.lm.num_labels == 3


@withPackage('transformers', 'sentencepiece', 'accelerate')
@pytest.mark.parametrize('is_augmented', [True, False])
def test_glem_pretrain(glem_model, tiny_graph_data, dummy_text_data,
is_augmented):
# Test LM pretraining
optimizer = torch.optim.Adam(glem_model.lm.parameters(), lr=1e-3)
pseudo_labels = torch.randint(0, 3, (10, ))

glem_model.pre_train_lm(
train_loader=dummy_text_data,
optimizer=optimizer,
num_epochs=5,
patience=1,
ext_pseudo_labels=pseudo_labels,
is_augmented=is_augmented,
verbose=True,
)

# Test GNN pretraining
loader = NeighborLoader(tiny_graph_data, num_neighbors=[2, 2],
batch_size=4, input_nodes=torch.arange(10))
optimizer = torch.optim.Adam(glem_model.gnn.parameters(), lr=1e-3)
pseudo_labels = torch.randint(0, 3, (10, ))

glem_model.pre_train_gnn(
train_loader=loader,
optimizer=optimizer,
num_epochs=5,
patience=1,
ext_pseudo_labels=pseudo_labels,
is_augmented=is_augmented,
verbose=True,
)


@withPackage('transformers', 'sentencepiece', 'accelerate')
@pytest.mark.parametrize('is_augmented', [True, False])
def test_glem_train(glem_model, tiny_graph_data, dummy_text_data,
is_augmented):
# Test LM training
optimizer = torch.optim.Adam(glem_model.lm.parameters(), lr=1e-3)
pseudo_labels = torch.randint(0, 3, (10, ))

acc, loss = glem_model.train(
em_phase='lm',
train_loader=dummy_text_data,
optimizer=optimizer,
epoch=1,
pseudo_labels=pseudo_labels,
is_augmented=is_augmented,
verbose=True,
)
assert isinstance(acc, float) and isinstance(loss, float)
assert 0 <= acc <= 1
assert loss >= 0

# Test GNN training
loader = NeighborLoader(tiny_graph_data, num_neighbors=[2, 2],
batch_size=4, input_nodes=torch.arange(10))
optimizer = torch.optim.Adam(glem_model.gnn.parameters(), lr=1e-3)
pseudo_labels = torch.randint(0, 3, (10, ))

acc, loss = glem_model.train(
em_phase='gnn',
train_loader=loader,
optimizer=optimizer,
epoch=1,
pseudo_labels=pseudo_labels,
is_augmented=is_augmented,
verbose=True,
)
assert isinstance(acc, float) and isinstance(loss, float)
assert 0 <= acc <= 1
assert loss >= 0


@withPackage('transformers', 'sentencepiece', 'accelerate')
def test_glem_inference(glem_model, tiny_graph_data, dummy_text_data):
# Test LM inference
preds = glem_model.inference('lm', dummy_text_data, verbose=True)
assert preds.shape == (10, 3) # 10 nodes, 3 classes
assert not torch.isnan(preds).any()

# Test GNN inference
loader = NeighborLoader(tiny_graph_data, num_neighbors=[-1], batch_size=10,
input_nodes=torch.arange(10))

preds = glem_model.inference('gnn', loader, verbose=True)
assert preds.shape == (10, 3)
assert not torch.isnan(preds).any()


@withPackage('transformers', 'sentencepiece', 'accelerate')
def test_glem_loss_function(glem_model):
logits = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
requires_grad=True)
labels = torch.tensor([0, 1])
is_gold = torch.tensor([True, False])
pseudo_labels = torch.tensor([0, 2])

loss_func = torch.nn.CrossEntropyLoss()

# only gold
loss1 = glem_model.loss(logits, labels, loss_func, is_gold, None, 0.5,
is_augmented=False)
expected1 = loss_func(logits, labels)
assert torch.allclose(loss1, expected1)

# mix gold + pseudo
loss2 = glem_model.loss(logits, labels, loss_func, is_gold, pseudo_labels,
0.3, is_augmented=True)
mle = loss_func(logits[0:1], labels[0:1]) # gold part
pseudo = loss_func(logits[1:2], pseudo_labels[1:2]) # pseudo part
expected2 = 0.3 * pseudo + 0.7 * mle
assert torch.allclose(loss2, expected2, atol=1e-6)
Loading
Loading