Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.9", "3.10", "3.11"]
steps:
- name: Checkout repo
uses: actions/checkout@v3
Expand All @@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.9", "3.10", "3.11"]
steps:
- name: Checkout repo
uses: actions/checkout@v3
Expand All @@ -47,6 +47,7 @@ jobs:
REPLICATE_API_KEY: ${{ secrets.REPLICATE_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
HUMANLOOP_API_KEY: ${{ secrets.HUMANLOOP_API_KEY }}

publish:
needs: [compile, test]
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ poetry.toml
.ruff_cache/
.vscode
.env
tests/assets/*.jsonl
tests/assets/*.parquet
2,483 changes: 1,223 additions & 1,260 deletions poetry.lock

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ parse = ">=1"
pydantic = ">= 1.9.2"
pydantic-core = "^2.18.2"
typing_extensions = ">= 4.0.0"
chromadb = "<0.3.7"
deepdiff = {extras = ["murmur"], version = "^8.2.0"}
mmh3 = "^5.1.0"

[tool.poetry.dev-dependencies]
mypy = "1.0.1"
Expand All @@ -68,9 +69,11 @@ python-dotenv = "^1.0.1"
replicate = "^1.0.3"
ruff = "^0.5.6"
types-jsonschema = "^4.23.0.20240813"
chromadb="<0.3.5"
pandas = "<2.2.0"
onnxruntime = "<=1.19.2"
chromadb = "^0.6.3"
pandas = "^2.2.0"
pyarrow = "^19.0.0"
numpy = "<2.0.0"

[tool.pytest.ini_options]
testpaths = [ "tests" ]
Expand Down
35 changes: 7 additions & 28 deletions src/humanloop/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from contextvars import ContextVar
import os
import typing
from typing import List, Optional, Sequence
Expand All @@ -10,8 +9,8 @@
from opentelemetry.trace import Tracer

from humanloop.core.client_wrapper import SyncClientWrapper
from humanloop.eval_utils.run import prompt_call_evaluation_aware
from humanloop.utilities.types import DecoratorPromptKernelRequestParams
from humanloop.eval_utils.context import EVALUATION_CONTEXT_VARIABLE_NAME, EvaluationContext

from humanloop.eval_utils import log_with_evaluation_context, run_eval
from humanloop.eval_utils.types import Dataset, Evaluator, EvaluatorCheck, File
Expand All @@ -38,10 +37,8 @@ def __init__(
self,
*,
client_wrapper: SyncClientWrapper,
evaluation_context_variable: ContextVar[Optional[EvaluationContext]],
):
super().__init__(client_wrapper=client_wrapper)
self._evaluation_context_variable = evaluation_context_variable

def run(
self,
Expand Down Expand Up @@ -70,7 +67,6 @@ def run(
dataset=dataset,
evaluators=evaluators,
workers=workers,
evaluation_context_variable=self._evaluation_context_variable,
)


Expand Down Expand Up @@ -118,31 +114,15 @@ def __init__(
httpx_client=httpx_client,
)

self.evaluation_context_variable: ContextVar[Optional[EvaluationContext]] = ContextVar(
EVALUATION_CONTEXT_VARIABLE_NAME
)

eval_client = ExtendedEvalsClient(
client_wrapper=self._client_wrapper,
evaluation_context_variable=self.evaluation_context_variable,
)
eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
eval_client.client = self
self.evaluations = eval_client
self.prompts = ExtendedPromptsClient(client_wrapper=self._client_wrapper)

# Overload the .log method of the clients to be aware of Evaluation Context
# TODO: Overload the log for Evaluators and Tools once run_id is added
# to them.
self.prompts = log_with_evaluation_context(
client=self.prompts,
evaluation_context_variable=self.evaluation_context_variable,
)
# self.evaluators = log_with_evaluation_context(client=self.evaluators)
# self.tools = log_with_evaluation_context(client=self.tools)
self.flows = log_with_evaluation_context(
client=self.flows,
evaluation_context_variable=self.evaluation_context_variable,
)
self.prompts = log_with_evaluation_context(client=self.prompts)
self.prompts = prompt_call_evaluation_aware(client=self.prompts)
self.flows = log_with_evaluation_context(client=self.flows)

if opentelemetry_tracer_provider is not None:
self._tracer_provider = opentelemetry_tracer_provider
Expand All @@ -157,9 +137,8 @@ def __init__(
instrument_provider(provider=self._tracer_provider)
self._tracer_provider.add_span_processor(
HumanloopSpanProcessor(
exporter=HumanloopSpanExporter(
client=self,
)
client=self,
exporter=HumanloopSpanExporter(client=self),
),
)

Expand Down
103 changes: 100 additions & 3 deletions src/humanloop/eval_utils/context.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from typing import Callable, TypedDict
from contextvars import ContextVar
from dataclasses import dataclass
from typing import Any, Callable
from opentelemetry.trace import Tracer


class EvaluationContext(TypedDict):
@dataclass
class EvaluationContext:
"""Context Log to Humanloop.

Per datapoint state that is set when an Evaluation is ran.
Expand All @@ -23,4 +27,97 @@ class EvaluationContext(TypedDict):
run_id: str


EVALUATION_CONTEXT_VARIABLE_NAME = "__EVALUATION_CONTEXT"
_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar("__EVALUATION_CONTEXT")

_UnsafeContextRead = RuntimeError("Attempting to read from thread Context when variable was not set.")


def set_evaluation_context(context: EvaluationContext):
_EVALUATION_CONTEXT_VAR.set(context)


def get_evaluation_context() -> EvaluationContext:
try:
return _EVALUATION_CONTEXT_VAR.get()
except LookupError:
raise _UnsafeContextRead


def evaluation_context_set() -> bool:
try:
_EVALUATION_CONTEXT_VAR.get()
return True
except LookupError:
return False


def log_belongs_to_evaluated_file(log_args: dict[str, Any]) -> bool:
try:
evaluation_context: EvaluationContext = _EVALUATION_CONTEXT_VAR.get()
return evaluation_context.file_id == log_args.get("id") or evaluation_context.path == log_args.get("path")
except LookupError:
# Not in an evaluation context
return False


def is_evaluated_file(file_path) -> bool:
try:
evaluation_context = _EVALUATION_CONTEXT_VAR.get()
return evaluation_context.path == file_path
except LookupError:
raise _UnsafeContextRead


@dataclass
class PromptUtilityContext:
tracer: Tracer
_in_prompt_utility: int

@property
def in_prompt_utility(self) -> bool:
return self._in_prompt_utility > 0


_PROMPT_UTILITY_CONTEXT_VAR: ContextVar[PromptUtilityContext] = ContextVar("__PROMPT_UTILITY_CONTEXT")


def in_prompt_utility_context() -> bool:
try:
return _PROMPT_UTILITY_CONTEXT_VAR.get().in_prompt_utility
except LookupError:
return False


def set_prompt_utility_context(tracer: Tracer):
global _PROMPT_UTILITY_CONTEXT_VAR
try:
prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
# Already set, push another context
prompt_utility_context._in_prompt_utility += 1
_PROMPT_UTILITY_CONTEXT_VAR.set(prompt_utility_context)
except LookupError:
_PROMPT_UTILITY_CONTEXT_VAR.set(
PromptUtilityContext(
tracer=tracer,
_in_prompt_utility=1,
)
)


def get_prompt_utility_context() -> PromptUtilityContext:
try:
return _PROMPT_UTILITY_CONTEXT_VAR.get()
except LookupError:
raise _UnsafeContextRead


def unset_prompt_utility_context():
global _PROMPT_UTILITY_CONTEXT_VAR_TOKEN
try:
prompt_utility_context = _PROMPT_UTILITY_CONTEXT_VAR.get()
if prompt_utility_context._in_prompt_utility >= 1:
prompt_utility_context._in_prompt_utility -= 1
else:
raise ValueError("No matching unset_prompt_utility_context() call.")
except LookupError:
raise _UnsafeContextRead
Loading
Loading