From eebf58ed18e1f3bf6d4115423a907776f8d644ad Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 17:17:37 +0800 Subject: [PATCH 1/7] Add support for emitting inference events and enrich message types Change-Id: I8fd0b896fc103a986f78c7351ce627611e545a62 Co-developed-by: Cursor --- util/opentelemetry-util-genai/CHANGELOG.md | 1 + util/opentelemetry-util-genai/README.rst | 11 +- .../src/opentelemetry/util/genai/handler.py | 24 +- .../opentelemetry/util/genai/span_utils.py | 193 ++++++++++--- .../src/opentelemetry/util/genai/types.py | 54 +++- .../tests/test_utils.py | 260 +++++++++++++++++- 6 files changed, 486 insertions(+), 57 deletions(-) diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index ca2c2d0a0e..45a2d662de 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- Add support for emitting inference events and enrich message types. ([]()) - Minor change to check LRU cache in Completion Hook before acquiring semaphore/thread ([#3907](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3907)). - Add environment variable for genai upload hook queue size ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3943](#3943)) diff --git a/util/opentelemetry-util-genai/README.rst b/util/opentelemetry-util-genai/README.rst index a06b3a0fd0..c69530278a 100644 --- a/util/opentelemetry-util-genai/README.rst +++ b/util/opentelemetry-util-genai/README.rst @@ -9,7 +9,11 @@ while providing standardization for generating both types of otel, "spans and me This package relies on environment variables to configure capturing of message content. By default, message content will not be captured. Set the environment variable `OTEL_SEMCONV_STABILITY_OPT_IN` to `gen_ai_latest_experimental` to enable experimental features. -And set the environment variable `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to `SPAN_ONLY` or `SPAN_AND_EVENT` to capture message content in spans. +And set the environment variable `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to one of: +- `NO_CONTENT`: Do not capture message content (default). +- `SPAN_ONLY`: Capture message content in spans only. +- `EVENT_ONLY`: Capture message content in events only. +- `SPAN_AND_EVENT`: Capture message content in both spans and events. This package provides these span attributes: @@ -23,6 +27,11 @@ This package provides these span attributes: - `gen_ai.usage.output_tokens`: Int(7) - `gen_ai.input.messages`: Str('[{"role": "Human", "parts": [{"content": "hello world", "type": "text"}]}]') - `gen_ai.output.messages`: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]') +- `gen_ai.system.instructions`: Str('[{"content": "You are a helpful assistant.", "type": "text"}]') (when system instruction is provided) + +When `EVENT_ONLY` or `SPAN_AND_EVENT` mode is enabled and a LoggerProvider is configured, +the package also emits `gen_ai.client.inference.operation.details` events with structured +message content (as dictionaries instead of JSON strings). Installation diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index bc2f2fa350..5271840246 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -66,6 +66,10 @@ from opentelemetry import context as otel_context from opentelemetry.metrics import MeterProvider, get_meter +from opentelemetry._logs import ( + LoggerProvider, + get_logger, +) from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -80,7 +84,8 @@ from opentelemetry.util.genai.metrics import InvocationMetricsRecorder from opentelemetry.util.genai.span_utils import ( _apply_error_attributes, - _apply_finish_attributes, + _apply_llm_finish_attributes, + _maybe_emit_llm_event, ) from opentelemetry.util.genai.types import Error, LLMInvocation from opentelemetry.util.genai.version import __version__ @@ -96,6 +101,7 @@ def __init__( self, tracer_provider: TracerProvider | None = None, meter_provider: MeterProvider | None = None, + logger_provider: LoggerProvider | None = None, ): self._tracer = get_tracer( __name__, @@ -106,6 +112,12 @@ def __init__( self._metrics_recorder: InvocationMetricsRecorder | None = None meter = get_meter(__name__, meter_provider=meter_provider) self._metrics_recorder = InvocationMetricsRecorder(meter) + self._logger = get_logger( + __name__, + __version__, + logger_provider, + schema_url=Schemas.V1_37_0.value, + ) def _record_llm_metrics( self, @@ -148,8 +160,9 @@ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disab return invocation span = invocation.span - _apply_finish_attributes(span, invocation) + _apply_llm_finish_attributes(span, invocation) self._record_llm_metrics(invocation, span) + _maybe_emit_llm_event(self._logger, invocation) # Detach context and end span otel_context.detach(invocation.context_token) span.end() @@ -164,10 +177,11 @@ def fail_llm( # pylint: disable=no-self-use return invocation span = invocation.span - _apply_finish_attributes(invocation.span, invocation) - _apply_error_attributes(span, error) + _apply_llm_finish_attributes(invocation.span, invocation) + _apply_error_attributes(invocation.span, error) error_type = getattr(error.type, "__qualname__", None) self._record_llm_metrics(invocation, span, error_type=error_type) + _maybe_emit_llm_event(self._logger, invocation, error) # Detach context and end span otel_context.detach(invocation.context_token) span.end() @@ -201,6 +215,7 @@ def llm( def get_telemetry_handler( tracer_provider: TracerProvider | None = None, meter_provider: MeterProvider | None = None, + logger_provider: LoggerProvider | None = None, ) -> TelemetryHandler: """ Returns a singleton TelemetryHandler instance. @@ -212,6 +227,7 @@ def get_telemetry_handler( handler = TelemetryHandler( tracer_provider=tracer_provider, meter_provider=meter_provider, + logger_provider=logger_provider ) setattr(get_telemetry_handler, "_default_handler", handler) return handler diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index b9b8777ec2..03c0d5349b 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -17,6 +17,7 @@ from dataclasses import asdict from typing import Any +from opentelemetry._logs import Logger, LogRecord from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -31,6 +32,7 @@ Error, InputMessage, LLMInvocation, + MessagePart, OutputMessage, ) from opentelemetry.util.genai.utils import ( @@ -41,63 +43,159 @@ ) -def _apply_common_span_attributes( - span: Span, invocation: LLMInvocation -) -> None: - """Apply attributes shared by finish() and error() and compute metrics. +def _get_llm_common_attributes( + invocation: LLMInvocation, +) -> dict[str, Any]: + """Get common LLM attributes shared by finish() and error() paths. - Returns (genai_attributes) for use with metrics. + Returns a dictionary of attributes. """ - span.update_name( - f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}".strip() - ) - span.set_attribute( - GenAI.GEN_AI_OPERATION_NAME, GenAI.GenAiOperationNameValues.CHAT.value + attributes: dict[str, Any] = {} + attributes[GenAI.GEN_AI_OPERATION_NAME] = ( + GenAI.GenAiOperationNameValues.CHAT.value ) if invocation.request_model: - span.set_attribute( - GenAI.GEN_AI_REQUEST_MODEL, invocation.request_model - ) + attributes[GenAI.GEN_AI_REQUEST_MODEL] = invocation.request_model if invocation.provider is not None: # TODO: clean provider name to match GenAiProviderNameValues? - span.set_attribute(GenAI.GEN_AI_PROVIDER_NAME, invocation.provider) + attributes[GenAI.GEN_AI_PROVIDER_NAME] = invocation.provider + return attributes + - _apply_response_attributes(span, invocation) +def _get_llm_span_name(invocation: LLMInvocation) -> str: + """Get the span name for an LLM invocation.""" + return f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}".strip() -def _maybe_set_span_messages( - span: Span, +def _get_llm_messages_attributes_for_span( input_messages: list[InputMessage], output_messages: list[OutputMessage], -) -> None: + system_instruction: list[MessagePart] | None = None, +) -> dict[str, Any]: + """Get message attributes formatted for span (JSON string format). + + Returns empty dict if not in experimental mode or content capturing is disabled. + """ + attributes: dict[str, Any] = {} if not is_experimental_mode() or get_content_capturing_mode() not in ( ContentCapturingMode.SPAN_ONLY, ContentCapturingMode.SPAN_AND_EVENT, ): - return + return attributes if input_messages: - span.set_attribute( - GenAI.GEN_AI_INPUT_MESSAGES, - gen_ai_json_dumps([asdict(message) for message in input_messages]), + attributes[GenAI.GEN_AI_INPUT_MESSAGES] = gen_ai_json_dumps( + [asdict(message) for message in input_messages] ) if output_messages: - span.set_attribute( - GenAI.GEN_AI_OUTPUT_MESSAGES, - gen_ai_json_dumps( - [asdict(message) for message in output_messages] - ), + attributes[GenAI.GEN_AI_OUTPUT_MESSAGES] = gen_ai_json_dumps( + [asdict(message) for message in output_messages] + ) + if system_instruction: + attributes[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = gen_ai_json_dumps( + [asdict(part) for part in system_instruction] ) + return attributes -def _apply_finish_attributes(span: Span, invocation: LLMInvocation) -> None: +def _get_llm_messages_attributes_for_event( + input_messages: list[InputMessage], + output_messages: list[OutputMessage], + system_instruction: list[MessagePart] | None = None, +) -> dict[str, Any]: + """Get message attributes formatted for event (structured format). + + Returns empty dict if not in experimental mode or content capturing is disabled. + """ + attributes: dict[str, Any] = {} + if not is_experimental_mode() or get_content_capturing_mode() not in ( + ContentCapturingMode.EVENT_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ): + return attributes + if input_messages: + attributes[GenAI.GEN_AI_INPUT_MESSAGES] = [ + asdict(message) for message in input_messages + ] + if output_messages: + attributes[GenAI.GEN_AI_OUTPUT_MESSAGES] = [ + asdict(message) for message in output_messages + ] + if system_instruction: + attributes[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = [ + asdict(part) for part in system_instruction + ] + return attributes + + +def _maybe_emit_llm_event( + logger: Logger | None, + invocation: LLMInvocation, + error: Error | None = None, +) -> None: + """Emit a gen_ai.client.inference.operation.details event to the logger. + + This function creates a LogRecord event following the semantic convention + for gen_ai.client.inference.operation.details as specified in the GenAI + event semantic conventions. + """ + if not is_experimental_mode() or get_content_capturing_mode() not in ( + ContentCapturingMode.EVENT_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ): + return + + if logger is None: + return + + # Build event attributes by reusing the attribute getter functions + attributes: dict[str, Any] = {} + attributes.update(_get_llm_common_attributes(invocation)) + attributes.update(_get_llm_request_attributes(invocation)) + attributes.update(_get_llm_response_attributes(invocation)) + attributes.update( + _get_llm_messages_attributes_for_event( + invocation.input_messages, + invocation.output_messages, + invocation.system_instruction, + ) + ) + + # Add error.type if operation ended in error + if error is not None: + attributes[ErrorAttributes.ERROR_TYPE] = error.type.__qualname__ + + # Create and emit the event + event = LogRecord( + event_name="gen_ai.client.inference.operation.details", + attributes=attributes, + ) + logger.emit(event) + + +def _apply_llm_finish_attributes( + span: Span, invocation: LLMInvocation +) -> None: """Apply attributes/messages common to finish() paths.""" - _apply_common_span_attributes(span, invocation) - _maybe_set_span_messages( - span, invocation.input_messages, invocation.output_messages + # Update span name + span.update_name(_get_llm_span_name(invocation)) + + # Build all attributes by reusing the attribute getter functions + attributes: dict[str, Any] = {} + attributes.update(_get_llm_common_attributes(invocation)) + attributes.update(_get_llm_request_attributes(invocation)) + attributes.update(_get_llm_response_attributes(invocation)) + attributes.update( + _get_llm_messages_attributes_for_span( + invocation.input_messages, + invocation.output_messages, + invocation.system_instruction, + ) ) - _apply_request_attributes(span, invocation) - _apply_response_attributes(span, invocation) - span.set_attributes(invocation.attributes) + attributes.update(invocation.attributes) + + # Set all attributes on the span + if attributes: + span.set_attributes(attributes) def _apply_error_attributes(span: Span, error: Error) -> None: @@ -107,8 +205,10 @@ def _apply_error_attributes(span: Span, error: Error) -> None: span.set_attribute(ErrorAttributes.ERROR_TYPE, error.type.__qualname__) -def _apply_request_attributes(span: Span, invocation: LLMInvocation) -> None: - """Attach GenAI request semantic convention attributes to the span.""" +def _get_llm_request_attributes( + invocation: LLMInvocation, +) -> dict[str, Any]: + """Get GenAI request semantic convention attributes.""" attributes: dict[str, Any] = {} if invocation.temperature is not None: attributes[GenAI.GEN_AI_REQUEST_TEMPERATURE] = invocation.temperature @@ -130,12 +230,13 @@ def _apply_request_attributes(span: Span, invocation: LLMInvocation) -> None: ) if invocation.seed is not None: attributes[GenAI.GEN_AI_REQUEST_SEED] = invocation.seed - if attributes: - span.set_attributes(attributes) + return attributes -def _apply_response_attributes(span: Span, invocation: LLMInvocation) -> None: - """Attach GenAI response semantic convention attributes to the span.""" +def _get_llm_response_attributes( + invocation: LLMInvocation, +) -> dict[str, Any]: + """Get GenAI response semantic convention attributes.""" attributes: dict[str, Any] = {} finish_reasons: list[str] | None @@ -169,13 +270,15 @@ def _apply_response_attributes(span: Span, invocation: LLMInvocation) -> None: if invocation.output_tokens is not None: attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] = invocation.output_tokens - if attributes: - span.set_attributes(attributes) + return attributes __all__ = [ - "_apply_finish_attributes", + "_apply_llm_finish_attributes", "_apply_error_attributes", - "_apply_request_attributes", - "_apply_response_attributes", + "_get_llm_common_attributes", + "_get_llm_request_attributes", + "_get_llm_response_attributes", + "_get_llm_span_name", + "_maybe_emit_llm_event", ] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 4fbb059e73..203abb1c5d 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -53,18 +53,53 @@ class ToolCallResponse: type: Literal["tool_call_response"] = "tool_call_response" -FinishReason = Literal[ - "content_filter", "error", "length", "stop", "tool_calls" -] - - @dataclass() class Text: content: str type: Literal["text"] = "text" -MessagePart = Union[Text, ToolCall, ToolCallResponse, Any] +@dataclass() +class Reasoning: + content: str + type: Literal["reasoning"] = "reasoning" + + +Modality = Literal["image", "video", "audio"] + + +@dataclass() +class Blob: + mime_type: str | None + modality: Union[Modality, str] + content: bytes + type: Literal["blob"] = "blob" + + +@dataclass() +class File: + mime_type: str | None + modality: Union[Modality, str] + file_id: str + type: Literal["file"] = "file" + + +@dataclass() +class Uri: + mime_type: str | None + modality: Union[Modality, str] + uri: str + type: Literal["uri"] = "uri" + + +MessagePart = Union[ + Text, ToolCall, ToolCallResponse, Blob, File, Uri, Reasoning, Any +] + + +FinishReason = Literal[ + "content_filter", "error", "length", "stop", "tool_calls" +] @dataclass() @@ -88,6 +123,10 @@ def _new_output_messages() -> list[OutputMessage]: return [] +def _new_system_instruction() -> list[MessagePart]: + return [] + + def _new_str_any_dict() -> dict[str, Any]: return {} @@ -109,6 +148,9 @@ class LLMInvocation: output_messages: list[OutputMessage] = field( default_factory=_new_output_messages ) + system_instruction: list[MessagePart] = field( + default_factory=_new_system_instruction + ) provider: str | None = None response_model_name: str | None = None response_id: str | None = None diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index aecb16c541..4e65c3b618 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -28,6 +28,23 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) + +# Backward compatibility for InMemoryLogExporter -> InMemoryLogRecordExporter rename +# Changed in opentelemetry-sdk@0.60b0 +try: + from opentelemetry.sdk._logs.export import ( # pylint: disable=no-name-in-module + InMemoryLogRecordExporter, + SimpleLogRecordProcessor, + ) +except ImportError: + # Fallback to old name for compatibility with older SDK versions + from opentelemetry.sdk._logs.export import ( + InMemoryLogExporter as InMemoryLogRecordExporter, + ) + from opentelemetry.sdk._logs.export import ( + SimpleLogRecordProcessor, + ) +from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -42,8 +59,10 @@ from opentelemetry.util.genai.handler import get_telemetry_handler from opentelemetry.util.genai.types import ( ContentCapturingMode, + Error, InputMessage, LLMInvocation, + MessagePart, OutputMessage, Text, ) @@ -84,6 +103,12 @@ def _create_output_message( ) +def _create_system_instruction( + content: str = "You are a helpful assistant.", +) -> list[MessagePart]: + return [Text(content=content)] + + def _get_single_span(span_exporter: InMemorySpanExporter) -> ReadableSpan: spans = span_exporter.get_finished_spans() assert len(spans) == 1 @@ -183,13 +208,19 @@ def setUp(self): tracer_provider.add_span_processor( SimpleSpanProcessor(self.span_exporter) ) + self.log_exporter = InMemoryLogRecordExporter() + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + SimpleLogRecordProcessor(self.log_exporter) + ) self.telemetry_handler = get_telemetry_handler( - tracer_provider=tracer_provider + tracer_provider=tracer_provider, logger_provider=logger_provider ) def tearDown(self): # Clear spans and reset the singleton telemetry handler so each test starts clean self.span_exporter.clear() + self.log_exporter.clear() if hasattr(get_telemetry_handler, "_default_handler"): delattr(get_telemetry_handler, "_default_handler") @@ -200,11 +231,13 @@ def tearDown(self): def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use message = _create_input_message("hello world") chat_generation = _create_output_message("hello back") + system_instruction = _create_system_instruction() with self.telemetry_handler.llm() as invocation: for attr, value in { "request_model": "test-model", "input_messages": [message], + "system_instruction": system_instruction, "provider": "test-provider", "attributes": {"custom_attr": "value"}, "temperature": 0.5, @@ -256,6 +289,15 @@ def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use self.assertEqual(invocation.attributes.get("custom_attr"), "value") self.assertEqual(invocation.attributes.get("extra"), "info") + # Verify system instruction is present in span as JSON string + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, span_attrs) + span_system = json.loads(span_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS]) + self.assertIsInstance(span_system, list) + self.assertEqual( + span_system[0]["content"], "You are a helpful assistant." + ) + self.assertEqual(span_system[0]["type"], "text") + @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_ONLY", @@ -467,3 +509,219 @@ class BoomError(RuntimeError): GenAI.GEN_AI_USAGE_OUTPUT_TOKENS: 22, }, ) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + ) + def test_emits_llm_event(self): + message = _create_input_message("test query") + chat_generation = _create_output_message("test response") + system_instruction = _create_system_instruction() + + invocation = LLMInvocation( + request_model="event-model", + input_messages=[message], + system_instruction=system_instruction, + provider="test-provider", + temperature=0.7, + max_tokens=100, + response_model_name="response-model", + response_id="event-response-id", + input_tokens=10, + output_tokens=20, + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check that event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_data = logs[0] + log_record = log_data.log_record + + # Verify event name + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + + # Verify event attributes + attrs = log_record.attributes + self.assertIsNotNone(attrs) + self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "event-model") + self.assertEqual(attrs[GenAI.GEN_AI_PROVIDER_NAME], "test-provider") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_TEMPERATURE], 0.7) + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MAX_TOKENS], 100) + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_MODEL], "response-model") + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_ID], "event-response-id") + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_INPUT_TOKENS], 10) + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS], 20) + + # Verify messages are in structured format (not JSON string) + # OpenTelemetry may convert lists to tuples, so we normalize + input_messages = attrs[GenAI.GEN_AI_INPUT_MESSAGES] + input_messages_list = ( + list(input_messages) + if isinstance(input_messages, tuple) + else input_messages + ) + self.assertEqual(len(input_messages_list), 1) + input_msg = ( + dict(input_messages_list[0]) + if isinstance(input_messages_list[0], tuple) + else input_messages_list[0] + ) + self.assertEqual(input_msg["role"], "Human") + parts = ( + list(input_msg["parts"]) + if isinstance(input_msg["parts"], tuple) + else input_msg["parts"] + ) + self.assertEqual(parts[0]["content"], "test query") + + output_messages = attrs[GenAI.GEN_AI_OUTPUT_MESSAGES] + output_messages_list = ( + list(output_messages) + if isinstance(output_messages, tuple) + else output_messages + ) + self.assertEqual(len(output_messages_list), 1) + output_msg = ( + dict(output_messages_list[0]) + if isinstance(output_messages_list[0], tuple) + else output_messages_list[0] + ) + self.assertEqual(output_msg["role"], "AI") + output_parts = ( + list(output_msg["parts"]) + if isinstance(output_msg["parts"], tuple) + else output_msg["parts"] + ) + self.assertEqual(output_parts[0]["content"], "test response") + self.assertEqual(output_msg["finish_reason"], "stop") + + # Verify system instruction is present in event in structured format + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, attrs) + system_instructions = attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] + system_instructions_list = ( + list(system_instructions) + if isinstance(system_instructions, tuple) + else system_instructions + ) + self.assertEqual(len(system_instructions_list), 1) + sys_instr = ( + dict(system_instructions_list[0]) + if isinstance(system_instructions_list[0], tuple) + else system_instructions_list[0] + ) + self.assertEqual(sys_instr["content"], "You are a helpful assistant.") + self.assertEqual(sys_instr["type"], "text") + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_AND_EVENT", + ) + def test_emits_llm_event_and_span(self): + message = _create_input_message("combined test") + chat_generation = _create_output_message("combined response") + system_instruction = _create_system_instruction("System prompt here") + + invocation = LLMInvocation( + request_model="combined-model", + input_messages=[message], + system_instruction=system_instruction, + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check span was created + span = _get_single_span(self.span_exporter) + span_attrs = _get_span_attributes(span) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, span_attrs) + + # Check event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, log_record.attributes) + # Verify system instruction in both span and event + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, span_attrs) + span_system = json.loads(span_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS]) + self.assertEqual(span_system[0]["content"], "System prompt here") + event_attrs = log_record.attributes + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, event_attrs) + event_system = event_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] + event_system_list = ( + list(event_system) + if isinstance(event_system, tuple) + else event_system + ) + event_sys_instr = ( + dict(event_system_list[0]) + if isinstance(event_system_list[0], tuple) + else event_system_list[0] + ) + self.assertEqual(event_sys_instr["content"], "System prompt here") + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + ) + def test_emits_llm_event_with_error(self): + class TestError(RuntimeError): + pass + + message = _create_input_message("error test") + invocation = LLMInvocation( + request_model="error-model", + input_messages=[message], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + error = Error(message="Test error occurred", type=TestError) + self.telemetry_handler.fail_llm(invocation, error) + + # Check event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + attrs = log_record.attributes + + # Verify error attribute is present + self.assertEqual( + attrs[ErrorAttributes.ERROR_TYPE], TestError.__qualname__ + ) + self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "error-model") + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="NO_CONTENT", + ) + def test_does_not_emit_llm_event_when_no_content(self): + message = _create_input_message("no content test") + chat_generation = _create_output_message("no content response") + + invocation = LLMInvocation( + request_model="no-content-model", + input_messages=[message], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check no event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 0) From cd4b29d2a1cf55399a75e523e2e0c83999beb051 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 17:23:35 +0800 Subject: [PATCH 2/7] Add change log Change-Id: I5c4c93613e3e1084245b7298955a08cbc7c9708d Co-developed-by: Cursor --- util/opentelemetry-util-genai/CHANGELOG.md | 2 +- util/opentelemetry-util-genai/tests/test_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 45a2d662de..0ad42baf5e 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased -- Add support for emitting inference events and enrich message types. ([]()) +- Add support for emitting inference events and enrich message types. ([#3994](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3994)) - Minor change to check LRU cache in Completion Hook before acquiring semaphore/thread ([#3907](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3907)). - Add environment variable for genai upload hook queue size ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3943](#3943)) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index 4e65c3b618..1aa3a4ba57 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -32,7 +32,7 @@ # Backward compatibility for InMemoryLogExporter -> InMemoryLogRecordExporter rename # Changed in opentelemetry-sdk@0.60b0 try: - from opentelemetry.sdk._logs.export import ( # pylint: disable=no-name-in-module + from opentelemetry.sdk._logs.export import (https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3994 # pylint: disable=no-name-in-module InMemoryLogRecordExporter, SimpleLogRecordProcessor, ) From b34297838a80be0cbfb39e3777b1e6d5a3c7e6a9 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 17:25:55 +0800 Subject: [PATCH 3/7] Fix unit tests Change-Id: If34cfce0e7eb130db6a1e8e30a5f4be7c215285f Co-developed-by: Cursor --- util/opentelemetry-util-genai/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index 1aa3a4ba57..4e65c3b618 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -32,7 +32,7 @@ # Backward compatibility for InMemoryLogExporter -> InMemoryLogRecordExporter rename # Changed in opentelemetry-sdk@0.60b0 try: - from opentelemetry.sdk._logs.export import (https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3994 # pylint: disable=no-name-in-module + from opentelemetry.sdk._logs.export import ( # pylint: disable=no-name-in-module InMemoryLogRecordExporter, SimpleLogRecordProcessor, ) From 7f7b3cf0305c92efba19b1e02b366d98dea777ff Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 18:00:33 +0800 Subject: [PATCH 4/7] Fix linting failure Change-Id: I847f75259e01729db88129a44b241afb0ea2aca4 Co-developed-by: Cursor --- .../tests/test_utils.py | 78 ++++++------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index 4e65c3b618..d7104d4311 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -165,6 +165,16 @@ def _assert_text_message( assert message.get("finish_reason") == finish_reason +def _normalize_to_list(value: Any) -> list[Any]: + """Normalize tuple or list to list for OpenTelemetry compatibility.""" + return list(value) if isinstance(value, tuple) else value + + +def _normalize_to_dict(value: Any) -> dict[str, Any]: + """Normalize tuple or dict to dict for OpenTelemetry compatibility.""" + return dict(value) if isinstance(value, tuple) else value + + class TestVersion(unittest.TestCase): @patch_env_vars( stability_mode="gen_ai_latest_experimental", @@ -515,14 +525,10 @@ class BoomError(RuntimeError): content_capturing="EVENT_ONLY", ) def test_emits_llm_event(self): - message = _create_input_message("test query") - chat_generation = _create_output_message("test response") - system_instruction = _create_system_instruction() - invocation = LLMInvocation( request_model="event-model", - input_messages=[message], - system_instruction=system_instruction, + input_messages=[_create_input_message("test query")], + system_instruction=_create_system_instruction(), provider="test-provider", temperature=0.7, max_tokens=100, @@ -533,14 +539,13 @@ def test_emits_llm_event(self): ) self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [chat_generation] + invocation.output_messages = [_create_output_message("test response")] self.telemetry_handler.stop_llm(invocation) # Check that event was emitted logs = self.log_exporter.get_finished_logs() self.assertEqual(len(logs), 1) - log_data = logs[0] - log_record = log_data.log_record + log_record = logs[0].log_record # Verify event name self.assertEqual( @@ -562,60 +567,27 @@ def test_emits_llm_event(self): # Verify messages are in structured format (not JSON string) # OpenTelemetry may convert lists to tuples, so we normalize - input_messages = attrs[GenAI.GEN_AI_INPUT_MESSAGES] - input_messages_list = ( - list(input_messages) - if isinstance(input_messages, tuple) - else input_messages - ) - self.assertEqual(len(input_messages_list), 1) - input_msg = ( - dict(input_messages_list[0]) - if isinstance(input_messages_list[0], tuple) - else input_messages_list[0] + input_msg = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_INPUT_MESSAGES])[0] ) self.assertEqual(input_msg["role"], "Human") - parts = ( - list(input_msg["parts"]) - if isinstance(input_msg["parts"], tuple) - else input_msg["parts"] + self.assertEqual( + _normalize_to_list(input_msg["parts"])[0]["content"], "test query" ) - self.assertEqual(parts[0]["content"], "test query") - output_messages = attrs[GenAI.GEN_AI_OUTPUT_MESSAGES] - output_messages_list = ( - list(output_messages) - if isinstance(output_messages, tuple) - else output_messages - ) - self.assertEqual(len(output_messages_list), 1) - output_msg = ( - dict(output_messages_list[0]) - if isinstance(output_messages_list[0], tuple) - else output_messages_list[0] + output_msg = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_OUTPUT_MESSAGES])[0] ) self.assertEqual(output_msg["role"], "AI") - output_parts = ( - list(output_msg["parts"]) - if isinstance(output_msg["parts"], tuple) - else output_msg["parts"] + self.assertEqual( + _normalize_to_list(output_msg["parts"])[0]["content"], + "test response", ) - self.assertEqual(output_parts[0]["content"], "test response") self.assertEqual(output_msg["finish_reason"], "stop") # Verify system instruction is present in event in structured format - self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, attrs) - system_instructions = attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] - system_instructions_list = ( - list(system_instructions) - if isinstance(system_instructions, tuple) - else system_instructions - ) - self.assertEqual(len(system_instructions_list), 1) - sys_instr = ( - dict(system_instructions_list[0]) - if isinstance(system_instructions_list[0], tuple) - else system_instructions_list[0] + sys_instr = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS])[0] ) self.assertEqual(sys_instr["content"], "You are a helpful assistant.") self.assertEqual(sys_instr["type"], "text") From 1d312c52fd46933e69692ac6e67ef910579b3ae2 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 18:18:45 +0800 Subject: [PATCH 5/7] Fix readme Change-Id: I818a042d275d3c8e3348647d73e34560e7d92f54 Co-developed-by: Cursor --- util/opentelemetry-util-genai/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/opentelemetry-util-genai/README.rst b/util/opentelemetry-util-genai/README.rst index c69530278a..50c869c517 100644 --- a/util/opentelemetry-util-genai/README.rst +++ b/util/opentelemetry-util-genai/README.rst @@ -27,7 +27,7 @@ This package provides these span attributes: - `gen_ai.usage.output_tokens`: Int(7) - `gen_ai.input.messages`: Str('[{"role": "Human", "parts": [{"content": "hello world", "type": "text"}]}]') - `gen_ai.output.messages`: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]') -- `gen_ai.system.instructions`: Str('[{"content": "You are a helpful assistant.", "type": "text"}]') (when system instruction is provided) +- `gen_ai.system_instructions`: Str('[{"content": "You are a helpful assistant.", "type": "text"}]') (when system instruction is provided) When `EVENT_ONLY` or `SPAN_AND_EVENT` mode is enabled and a LoggerProvider is configured, the package also emits `gen_ai.client.inference.operation.details` events with structured From d32ec3e5d16db8eb16dc494cd0e4d189e7466dc6 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Thu, 4 Dec 2025 13:55:36 +0800 Subject: [PATCH 6/7] Format codes Change-Id: I40b8e01bbe4fa9c182e99085a7c71d4536042247 Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index 5271840246..a49535a301 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -65,11 +65,11 @@ from typing import Iterator from opentelemetry import context as otel_context -from opentelemetry.metrics import MeterProvider, get_meter from opentelemetry._logs import ( LoggerProvider, get_logger, ) +from opentelemetry.metrics import MeterProvider, get_meter from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -227,7 +227,7 @@ def get_telemetry_handler( handler = TelemetryHandler( tracer_provider=tracer_provider, meter_provider=meter_provider, - logger_provider=logger_provider + logger_provider=logger_provider, ) setattr(get_telemetry_handler, "_default_handler", handler) return handler From e0d35b29cb9e0e29474cc2a63a02c0327be6387c Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Thu, 4 Dec 2025 14:45:07 +0800 Subject: [PATCH 7/7] Fix missing trace context in events Change-Id: Ie07c495002143fb2f0cf88033206290eb85386ad Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/handler.py | 4 ++-- .../opentelemetry/util/genai/span_utils.py | 5 +++++ .../tests/test_utils.py | 22 +++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index a49535a301..0725fe0900 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -162,7 +162,7 @@ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disab span = invocation.span _apply_llm_finish_attributes(span, invocation) self._record_llm_metrics(invocation, span) - _maybe_emit_llm_event(self._logger, invocation) + _maybe_emit_llm_event(self._logger, span, invocation) # Detach context and end span otel_context.detach(invocation.context_token) span.end() @@ -181,7 +181,7 @@ def fail_llm( # pylint: disable=no-self-use _apply_error_attributes(invocation.span, error) error_type = getattr(error.type, "__qualname__", None) self._record_llm_metrics(invocation, span, error_type=error_type) - _maybe_emit_llm_event(self._logger, invocation, error) + _maybe_emit_llm_event(self._logger, span, invocation, error) # Detach context and end span otel_context.detach(invocation.context_token) span.end() diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 03c0d5349b..02e8c3bb28 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -18,6 +18,7 @@ from typing import Any from opentelemetry._logs import Logger, LogRecord +from opentelemetry.context import get_current from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -27,6 +28,7 @@ from opentelemetry.trace import ( Span, ) +from opentelemetry.trace.propagation import set_span_in_context from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util.genai.types import ( Error, @@ -129,6 +131,7 @@ def _get_llm_messages_attributes_for_event( def _maybe_emit_llm_event( logger: Logger | None, + span: Span, invocation: LLMInvocation, error: Error | None = None, ) -> None: @@ -165,9 +168,11 @@ def _maybe_emit_llm_event( attributes[ErrorAttributes.ERROR_TYPE] = error.type.__qualname__ # Create and emit the event + context = set_span_in_context(span, get_current()) event = LogRecord( event_name="gen_ai.client.inference.operation.details", attributes=attributes, + context=context, ) logger.emit(event) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index d7104d4311..a6767ecb01 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -592,6 +592,14 @@ def test_emits_llm_event(self): self.assertEqual(sys_instr["content"], "You are a helpful assistant.") self.assertEqual(sys_instr["type"], "text") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) + @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_AND_EVENT", @@ -643,6 +651,13 @@ def test_emits_llm_event_and_span(self): else event_system_list[0] ) self.assertEqual(event_sys_instr["content"], "System prompt here") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) @patch_env_vars( stability_mode="gen_ai_latest_experimental", @@ -675,6 +690,13 @@ class TestError(RuntimeError): ) self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "error-model") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) @patch_env_vars( stability_mode="gen_ai_latest_experimental",