File tree Expand file tree Collapse file tree 1 file changed +19
-8
lines changed
jupyter_ai_jupyternaut/jupyternaut Expand file tree Collapse file tree 1 file changed +19
-8
lines changed Original file line number Diff line number Diff line change @@ -344,18 +344,29 @@ async def acompletion_with_retry(
344344 """Use tenacity to retry the async completion call."""
345345 retry_decorator = _create_retry_decorator (self , run_manager = run_manager )
346346
347+ # Enables ephemeral prompt caching of the last system message by
348+ # default when passed to `litellm.acompletion()`.
349+ #
350+ # See: https://docs.litellm.ai/docs/tutorials/prompt_caching
351+ cache_control_kwargs = {
352+ "cache_control_injection_points" : [
353+ { "location" : "message" , "role" : "system" }
354+ ]
355+ }
356+
357+ # Disable ephemeral prompt caching on Amazon Bedrock when the
358+ # InvokeModel API is used instead of Converse API. This is motivated by
359+ # an upstream bug in LiteLLM that has yet to be patched.
360+ #
361+ # See: github.com/BerriAI/litellm/issues/17479
362+ if self .model .startswith ("bedrock/" ) and not self .model .startswith ("bedrock/converse/" ):
363+ cache_control_kwargs = {}
364+
347365 @retry_decorator
348366 async def _completion_with_retry (** kwargs : Any ) -> Any :
349367 return await self .client .acompletion (
350368 ** kwargs ,
351- # Enables ephemeral prompt caching of the last system message by
352- # default.
353- cache_control_injection_points = [
354- {
355- "location" : "message" ,
356- "role" : "system" ,
357- }
358- ],
369+ ** cache_control_kwargs ,
359370 )
360371
361372 return await _completion_with_retry (** kwargs )
You can’t perform that action at this time.
0 commit comments