Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ We implement the following models for supporting multiple healthcare predictive
:maxdepth: 3

models/pyhealth.models.BaseModel
models/pyhealth.models.BHCToAVS
models/pyhealth.models.LogisticRegression
models/pyhealth.models.MLP
models/pyhealth.models.CNN
Expand Down
11 changes: 11 additions & 0 deletions docs/api/models/pyhealth.models.BHCToAVS.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pyhealth.models.bhc_to_avs
==========================

BHCToAVS
------------------------------

.. autoclass:: pyhealth.models.bhc_to_avs.BHCToAVS
:members:
:inherited-members:
:show-inheritance:
:undoc-members:
21 changes: 21 additions & 0 deletions examples/bhc_to_avs_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pyhealth.models.bhc_to_avs import BHCToAVS

# Initialize the model
model = BHCToAVS()

# Example Brief Hospital Course (BHC) text with common clinical abbreviations generated synthetically via ChatGPT 5.1
bhc = (
"Pt admitted with acute onset severe epigastric pain and hypotension. "
"Labs notable for elevated lactate, WBC 18K, mild AST/ALT elevation, and Cr 1.4 (baseline 0.9). "
"CT A/P w/ contrast demonstrated peripancreatic fat stranding c/w acute pancreatitis; "
"no necrosis or peripancreatic fluid collection. "
"Pt received aggressive IVFs, electrolyte repletion, IV analgesia, and NPO status initially. "
"Serial abd exams remained benign with no rebound or guarding. "
"BP stabilized, lactate downtrended, and pt tolerated ADAT to low-fat diet without recurrence of sx. "
"Discharged in stable condition w/ instructions for GI f/u and outpatient CMP in 1 week."
)

# Generate a patient-friendly After-Visit Summary
print(model.predict(bhc))

# Expected output: A simplified, patient-friendly summary explaining the hospital stay without medical jargon.
3 changes: 2 additions & 1 deletion pyhealth/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .adacare import AdaCare, AdaCareLayer
from .agent import Agent, AgentLayer
from .base_model import BaseModel
from .bhc_to_avs import BHCToAVS
from .cnn import CNN, CNNLayer
from .concare import ConCare, ConCareLayer
from .contrawr import ContraWR, ResBlock2D
Expand All @@ -26,4 +27,4 @@
from .transformer import Transformer, TransformerLayer
from .transformers_model import TransformersModel
from .vae import VAE
from .sdoh import SdohClassifier
from .sdoh import SdohClassifier
98 changes: 98 additions & 0 deletions pyhealth/models/bhc_to_avs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Author: Charan Williams
# NetID: charanw2
# Description: Converts clinical brief hospital course (BHC) data to after visit summaries using a fine-tuned Mistral 7B model.
Comment on lines +1 to +3
Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The module docstring header uses "# Description:" format which is not standard Python docstring style. The description should either be a proper module-level docstring (triple-quoted string) or follow a consistent comment format without the "Description:" label.

Suggested change
# Author: Charan Williams
# NetID: charanw2
# Description: Converts clinical brief hospital course (BHC) data to after visit summaries using a fine-tuned Mistral 7B model.
"""Convert clinical brief hospital course (BHC) data to after-visit
summaries using a fine-tuned Mistral 7B model."""
# Author: Charan Williams
# NetID: charanw2

Copilot uses AI. Check for mistakes.

from typing import Dict, Any
Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import of 'Dict' is not used.
Import of 'Any' is not used.

Suggested change
from typing import Dict, Any

Copilot uses AI. Check for mistakes.
from dataclasses import dataclass, field
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModelForCausalLM
from pyhealth.models.base_model import BaseModel

_PROMPT = """Summarize for the patient what happened during the hospital stay:

### Brief Hospital Course:
{bhc}

### Patient Summary:
"""

# System prompt used during inference
_SYSTEM_PROMPT = (
"You are a clinical summarization model. Produce accurate, patient-friendly summaries "
"using only information from the doctor's note. Do not add new details.\n\n"
)

# Prompt used during fine-tuning
_PROMPT = (
"Summarize for the patient what happened during the hospital stay based on this doctor's note:\n"
"{bhc}\n\n"
"Summary for the patient:\n"
)
Comment on lines +12 to +31
Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _PROMPT variable is defined twice (lines 12-18 and lines 27-31), with the second definition overwriting the first. This creates dead code and potential confusion. Only one prompt definition should be kept, or they should be renamed to reflect their different purposes (e.g., _TRAINING_PROMPT and _INFERENCE_PROMPT).

Copilot uses AI. Check for mistakes.

@dataclass
class BHCToAVS(BaseModel):
Comment on lines +33 to +34
Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dataclass decorator on a class inheriting from BaseModel (which inherits from nn.Module) may not properly initialize the parent class. The dataclass-generated init should include a post_init method that calls super().init() to ensure nn.Module is properly initialized. Without this, features like the _dummy_param used for device detection may not work correctly.

Copilot uses AI. Check for mistakes.
base_model_id: str = field(default="mistralai/Mistral-7B-Instruct")
"""HuggingFace repo containing the base Mistral 7B model."""

adapter_model_id: str = field(default="williach31/mistral-7b-bhc-to-avs-lora")
"""HuggingFace repo containing only LoRA adapter weights."""

Comment on lines +33 to +40
Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing documentation for the BHCToAVS class itself. The class lacks a docstring explaining its purpose, parameters, and usage. Only the individual fields and methods have documentation.

Copilot uses AI. Check for mistakes.
def _get_pipeline(self):
"""Create and cache the text-generation pipeline."""
if not hasattr(self, "_pipeline"):
# Load base model
base = AutoModelForCausalLM.from_pretrained(
self.base_model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)

# Load LoRA adapter
model = PeftModelForCausalLM.from_pretrained(
base,
self.adapter_model_id,
torch_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(self.base_model_id)

# Create HF pipeline
self._pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
device_map="auto",
Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pipeline is created with device_map="auto" parameter twice: once in the AutoModelForCausalLM.from_pretrained call (line 48) and again in the pipeline constructor (line 65). The second device_map parameter in the pipeline call is redundant since the model has already been placed on devices, and may cause conflicts or unexpected behavior.

Suggested change
device_map="auto",

Copilot uses AI. Check for mistakes.
model_kwargs={"torch_dtype": torch.bfloat16}
)

return self._pipeline

def predict(self, bhc_text: str) -> str:
"""
Generate an After-Visit Summary (AVS) from a Brief Hospital Course (BHC) note.

Parameters
----------
bhc_text : str
Raw BHC text.

Returns
-------
str
Patient-friendly summary.
"""

Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing input validation for the bhc_text parameter. The method should validate that bhc_text is not None and is a non-empty string before processing to provide clearer error messages to users.

Suggested change
# Validate input to provide clear error messages and avoid unexpected failures.
if bhc_text is None:
raise ValueError("bhc_text must not be None.")
if not isinstance(bhc_text, str):
raise TypeError(f"bhc_text must be a string, got {type(bhc_text).__name__}.")
if not bhc_text.strip():
raise ValueError("bhc_text must be a non-empty string.")

Copilot uses AI. Check for mistakes.
prompt = _SYSTEM_PROMPT + _PROMPT.format(bhc=bhc_text)

pipe = self._get_pipeline()
outputs = pipe(
prompt,
max_new_tokens=512,
temperature=0.0,
eos_token_id=[pipe.tokenizer.eos_token_id],
pad_token_id=pipe.tokenizer.eos_token_id,
Copy link

Copilot AI Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pipeline is missing the return_full_text=False parameter in the generate call. By default, Hugging Face text-generation pipelines return the full text including the input prompt. To return only the newly generated text, you should either set return_full_text=False in the pipeline call or manually strip the prompt from the output.

Suggested change
pad_token_id=pipe.tokenizer.eos_token_id,
pad_token_id=pipe.tokenizer.eos_token_id,
return_full_text=False,

Copilot uses AI. Check for mistakes.
)

# Output is a single text string
return outputs[0]["generated_text"].strip()
36 changes: 36 additions & 0 deletions tests/core/test_bhc_to_avs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from tests.base import BaseTestCase
from pyhealth.models.bhc_to_avs import BHCToAVS


class TestBHCToAVS(BaseTestCase):
"""Unit tests for the BHCToAVS model."""

def setUp(self):
self.set_random_seed()

def test_predict(self):
"""Test the predict method of BHCToAVS."""
bhc_text = (
"Patient admitted with abdominal pain. Imaging showed no acute findings. "
"Pain improved with supportive care and the patient was discharged in stable condition."
)
model = BHCToAVS()
try:

summary = model.predict(bhc_text)

# Output must be type str
self.assertIsInstance(summary, str)

# Output should not be empty
self.assertGreater(len(summary.strip()), 0)

# Output should be different from input
self.assertNotIn(bhc_text[:40], summary)

except OSError as e:
# Allow test to pass if model download fails on e.g. on GitHub workflows
if "gated repo" in str(e).lower() or "404" in str(e):
pass
else:
raise e
Loading