Skip to content

Commit 61641aa

Browse files
GenAI Utils | Add metrics to LLMInvocations (#3891)
* add metrics to genai utils * add Instruments class for GenAI metrics and refactor metric recording * refactor: streamline metric payload handling in InvocationMetricsRecorder * fix: grammar * doc: added changelog * fix: update version for semconvs * Lint fixes * Fix Nits, remove overly defensive code * Add monotonic timing support for LLM invocation duration calculations * small cleanups to types to simplify code * cleanup test * Simplify histogram duration time calculations * Remove unused constants and helper function * Refactor histogram creation to use standalone functions instead of a class --------- Co-authored-by: aaronabbott <aaronabbott@google.com>
1 parent 0870f96 commit 61641aa

File tree

6 files changed

+346
-6
lines changed

6 files changed

+346
-6
lines changed

util/opentelemetry-util-genai/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3943](#3943))
1313
- Add more Semconv attributes to LLMInvocation spans.
1414
([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3862](#3862))
15+
- Add metrics to LLMInvocation traces
16+
([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3891](#3891))
1517

1618
## Version 0.2b0 (2025-10-14)
1719

util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,20 +60,24 @@
6060

6161
from __future__ import annotations
6262

63+
import timeit
6364
from contextlib import contextmanager
6465
from typing import Iterator
6566

6667
from opentelemetry import context as otel_context
68+
from opentelemetry.metrics import MeterProvider, get_meter
6769
from opentelemetry.semconv._incubating.attributes import (
6870
gen_ai_attributes as GenAI,
6971
)
7072
from opentelemetry.semconv.schemas import Schemas
7173
from opentelemetry.trace import (
74+
Span,
7275
SpanKind,
7376
TracerProvider,
7477
get_tracer,
7578
set_span_in_context,
7679
)
80+
from opentelemetry.util.genai.metrics import InvocationMetricsRecorder
7781
from opentelemetry.util.genai.span_utils import (
7882
_apply_error_attributes,
7983
_apply_finish_attributes,
@@ -88,13 +92,35 @@ class TelemetryHandler:
8892
them as spans, metrics, and events.
8993
"""
9094

91-
def __init__(self, tracer_provider: TracerProvider | None = None):
95+
def __init__(
96+
self,
97+
tracer_provider: TracerProvider | None = None,
98+
meter_provider: MeterProvider | None = None,
99+
):
92100
self._tracer = get_tracer(
93101
__name__,
94102
__version__,
95103
tracer_provider,
96104
schema_url=Schemas.V1_37_0.value,
97105
)
106+
self._metrics_recorder: InvocationMetricsRecorder | None = None
107+
meter = get_meter(__name__, meter_provider=meter_provider)
108+
self._metrics_recorder = InvocationMetricsRecorder(meter)
109+
110+
def _record_llm_metrics(
111+
self,
112+
invocation: LLMInvocation,
113+
span: Span | None = None,
114+
*,
115+
error_type: str | None = None,
116+
) -> None:
117+
if self._metrics_recorder is None or span is None:
118+
return
119+
self._metrics_recorder.record(
120+
span,
121+
invocation,
122+
error_type=error_type,
123+
)
98124

99125
def start_llm(
100126
self,
@@ -106,6 +132,9 @@ def start_llm(
106132
name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}",
107133
kind=SpanKind.CLIENT,
108134
)
135+
# Record a monotonic start timestamp (seconds) for duration
136+
# calculation using timeit.default_timer.
137+
invocation.monotonic_start_s = timeit.default_timer()
109138
invocation.span = span
110139
invocation.context_token = otel_context.attach(
111140
set_span_in_context(span)
@@ -118,10 +147,12 @@ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disab
118147
# TODO: Provide feedback that this invocation was not started
119148
return invocation
120149

121-
_apply_finish_attributes(invocation.span, invocation)
150+
span = invocation.span
151+
_apply_finish_attributes(span, invocation)
152+
self._record_llm_metrics(invocation, span)
122153
# Detach context and end span
123154
otel_context.detach(invocation.context_token)
124-
invocation.span.end()
155+
span.end()
125156
return invocation
126157

127158
def fail_llm( # pylint: disable=no-self-use
@@ -132,11 +163,14 @@ def fail_llm( # pylint: disable=no-self-use
132163
# TODO: Provide feedback that this invocation was not started
133164
return invocation
134165

166+
span = invocation.span
135167
_apply_finish_attributes(invocation.span, invocation)
136-
_apply_error_attributes(invocation.span, error)
168+
_apply_error_attributes(span, error)
169+
error_type = getattr(error.type, "__qualname__", None)
170+
self._record_llm_metrics(invocation, span, error_type=error_type)
137171
# Detach context and end span
138172
otel_context.detach(invocation.context_token)
139-
invocation.span.end()
173+
span.end()
140174
return invocation
141175

142176
@contextmanager
@@ -166,6 +200,7 @@ def llm(
166200

167201
def get_telemetry_handler(
168202
tracer_provider: TracerProvider | None = None,
203+
meter_provider: MeterProvider | None = None,
169204
) -> TelemetryHandler:
170205
"""
171206
Returns a singleton TelemetryHandler instance.
@@ -174,6 +209,9 @@ def get_telemetry_handler(
174209
get_telemetry_handler, "_default_handler", None
175210
)
176211
if handler is None:
177-
handler = TelemetryHandler(tracer_provider=tracer_provider)
212+
handler = TelemetryHandler(
213+
tracer_provider=tracer_provider,
214+
meter_provider=meter_provider,
215+
)
178216
setattr(get_telemetry_handler, "_default_handler", handler)
179217
return handler
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from opentelemetry.metrics import Histogram, Meter
2+
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
3+
4+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
5+
0.01,
6+
0.02,
7+
0.04,
8+
0.08,
9+
0.16,
10+
0.32,
11+
0.64,
12+
1.28,
13+
2.56,
14+
5.12,
15+
10.24,
16+
20.48,
17+
40.96,
18+
81.92,
19+
]
20+
21+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
22+
1,
23+
4,
24+
16,
25+
64,
26+
256,
27+
1024,
28+
4096,
29+
16384,
30+
65536,
31+
262144,
32+
1048576,
33+
4194304,
34+
16777216,
35+
67108864,
36+
]
37+
38+
39+
def create_duration_histogram(meter: Meter) -> Histogram:
40+
return meter.create_histogram(
41+
name=gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION,
42+
description="Duration of GenAI client operation",
43+
unit="s",
44+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
45+
)
46+
47+
48+
def create_token_histogram(meter: Meter) -> Histogram:
49+
return meter.create_histogram(
50+
name=gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE,
51+
description="Number of input and output tokens used by GenAI clients",
52+
unit="{token}",
53+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
54+
)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Helpers for emitting GenAI metrics from LLM invocations."""
2+
3+
from __future__ import annotations
4+
5+
import timeit
6+
from numbers import Number
7+
from typing import Dict, Optional
8+
9+
from opentelemetry.metrics import Histogram, Meter
10+
from opentelemetry.semconv._incubating.attributes import (
11+
gen_ai_attributes as GenAI,
12+
)
13+
from opentelemetry.trace import Span, set_span_in_context
14+
from opentelemetry.util.genai.instruments import (
15+
create_duration_histogram,
16+
create_token_histogram,
17+
)
18+
from opentelemetry.util.genai.types import LLMInvocation
19+
from opentelemetry.util.types import AttributeValue
20+
21+
22+
class InvocationMetricsRecorder:
23+
"""Records duration and token usage histograms for GenAI invocations."""
24+
25+
def __init__(self, meter: Meter):
26+
self._duration_histogram: Histogram = create_duration_histogram(meter)
27+
self._token_histogram: Histogram = create_token_histogram(meter)
28+
29+
def record(
30+
self,
31+
span: Optional[Span],
32+
invocation: LLMInvocation,
33+
*,
34+
error_type: Optional[str] = None,
35+
) -> None:
36+
"""Record duration and token metrics for an invocation if possible."""
37+
if span is None:
38+
return
39+
40+
token_counts: list[tuple[int, str]] = []
41+
if invocation.input_tokens is not None:
42+
token_counts.append(
43+
(
44+
invocation.input_tokens,
45+
GenAI.GenAiTokenTypeValues.INPUT.value,
46+
)
47+
)
48+
if invocation.output_tokens is not None:
49+
token_counts.append(
50+
(
51+
invocation.output_tokens,
52+
GenAI.GenAiTokenTypeValues.OUTPUT.value,
53+
)
54+
)
55+
56+
attributes: Dict[str, AttributeValue] = {
57+
GenAI.GEN_AI_OPERATION_NAME: GenAI.GenAiOperationNameValues.CHAT.value
58+
}
59+
if invocation.request_model:
60+
attributes[GenAI.GEN_AI_REQUEST_MODEL] = invocation.request_model
61+
if invocation.provider:
62+
attributes[GenAI.GEN_AI_PROVIDER_NAME] = invocation.provider
63+
if invocation.response_model_name:
64+
attributes[GenAI.GEN_AI_RESPONSE_MODEL] = (
65+
invocation.response_model_name
66+
)
67+
68+
# Calculate duration from span timing or invocation monotonic start
69+
duration_seconds: Optional[float] = None
70+
if invocation.monotonic_start_s is not None:
71+
duration_seconds = max(
72+
timeit.default_timer() - invocation.monotonic_start_s, 0.0
73+
)
74+
75+
span_context = set_span_in_context(span)
76+
if error_type:
77+
attributes["error.type"] = error_type
78+
79+
if (
80+
duration_seconds is not None
81+
and isinstance(duration_seconds, Number)
82+
and duration_seconds >= 0
83+
):
84+
self._duration_histogram.record(
85+
duration_seconds,
86+
attributes=attributes,
87+
context=span_context,
88+
)
89+
90+
for token_count, token_type in token_counts:
91+
self._token_histogram.record(
92+
token_count,
93+
attributes=attributes | {GenAI.GEN_AI_TOKEN_TYPE: token_type},
94+
context=span_context,
95+
)
96+
97+
98+
__all__ = ["InvocationMetricsRecorder"]

util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ class LLMInvocation:
123123
max_tokens: int | None = None
124124
stop_sequences: list[str] | None = None
125125
seed: int | None = None
126+
monotonic_start_s: float | None = None
127+
"""
128+
Monotonic start time in seconds (from timeit.default_timer) used
129+
for duration calculations to avoid mixing clock sources. This is
130+
populated by the TelemetryHandler when starting an invocation.
131+
"""
126132

127133

128134
@dataclass

0 commit comments

Comments
 (0)