What Model are you using?
I tried the example from https://python.useinstructor.com/concepts/error_handling/#exception-reference
import instructor
from pydantic import BaseModel
from instructor.core.exceptions import IncompleteOutputException, InstructorRetryException
class Report(BaseModel):
content: str
client = instructor.from_provider("openai/gpt-4.1-mini", mode=instructor.Mode.JSON)
try:
response = client.create(
response_model=Report,
messages=[{"role": "user", "content": "Write a long report..."}],
max_tokens=50,
max_retries=0,
)
except (IncompleteOutputException, InstructorRetryException) as e:
print(f"Output truncated: {e}")
print(f"Last completion: {e.last_completion}")
with
!pip list | grep instructor
!pip list | grep openai
instructor 1.15.1
openai 2.31.0
and I don't think it works as documented (or I am missing/misunderstanding something), because the output is this
Note: I think it would be GREAT to include the 'expected' output on the web/documentation.
Output truncated: <failed_attempts>
<generation number="1">
<exception>
The output is incomplete due to a max_tokens length limit.
</exception>
<completion>
ChatCompletion(id='chatcmpl-DUz5or9V1YeJA31yEt1CKZ7Eg09Rz', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n "content": "This report provides an in-depth analysis of the current market trends, examining both macroeconomic factors and industry-specific dynamics that influence business operations globally. We begin by exploring the overall economic climate, highlighting inflation rates, employment statistics,', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1776277052, model='gpt-4.1-mini-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_283a574ac4', usage=CompletionUsage(completion_tokens=50, prompt_tokens=98, total_tokens=148, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))
</completion>
</generation>
</failed_attempts>
<last_exception>
The output is incomplete due to a max_tokens length limit.
</last_exception>
Last completion: ChatCompletion(id='chatcmpl-DUz5or9V1YeJA31yEt1CKZ7Eg09Rz', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n "content": "This report provides an in-depth analysis of the current market trends, examining both macroeconomic factors and industry-specific dynamics that influence business operations globally. We begin by exploring the overall economic climate, highlighting inflation rates, employment statistics,', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1776277052, model='gpt-4.1-mini-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_283a574ac4', usage=CompletionUsage(completion_tokens=50, prompt_tokens=98, total_tokens=148, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))
but if you change this
except (IncompleteOutputException, InstructorRetryException) as e:
for
except IncompleteOutputException as e:
you get
---------------------------------------------------------------------------
IncompleteOutputException Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/instructor/core/retry.py in retry_sync(func, response_model, args, kwargs, context, max_retries, strict, mode, hooks)
308 )
--> 309 raise e
310 except RetryError as e:
10 frames/usr/local/lib/python3.12/dist-packages/instructor/core/retry.py in retry_sync(func, response_model, args, kwargs, context, max_retries, strict, mode, hooks)
203
--> 204 return process_response( # type: ignore
205 response=response,
/usr/local/lib/python3.12/dist-packages/instructor/processing/response.py in process_response(response, response_model, stream, validation_context, strict, mode)
369
--> 370 model = response_model.from_response( # type: ignore
371 response,
/usr/local/lib/python3.12/dist-packages/instructor/processing/function_calls.py in from_response(cls, completion, validation_context, strict, mode)
225 if completion.choices[0].finish_reason == "length":
--> 226 raise IncompleteOutputException(last_completion=completion)
227
IncompleteOutputException: The output is incomplete due to a max_tokens length limit.
The above exception was the direct cause of the following exception:
RetryError Traceback (most recent call last)
/usr/local/lib/python3.12/dist-packages/instructor/core/retry.py in retry_sync(func, response_model, args, kwargs, context, max_retries, strict, mode, hooks)
192 response = None
--> 193 for attempt in max_retries:
194 with attempt:
/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py in __iter__(self)
437 while True:
--> 438 do = self.iter(retry_state=retry_state)
439 if isinstance(do, DoAttempt):
/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py in iter(self, retry_state)
370 for action in self.iter_state.actions:
--> 371 result = action(retry_state)
372 return result
/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py in exc_check(rs)
413 raise retry_exc.reraise()
--> 414 raise retry_exc from fut.exception()
415
RetryError: RetryError[<Future at 0x7ddd7c303050 state=finished raised IncompleteOutputException>]
The above exception was the direct cause of the following exception:
InstructorRetryException Traceback (most recent call last)
/tmp/ipykernel_4190/812995971.py in <cell line: 0>()
13
14 try:
---> 15 response = client_realapi.create(
16 response_model=Report,
17 messages=[{"role": "user", "content": "Write a long report..."}],
/usr/local/lib/python3.12/dist-packages/instructor/core/client.py in create(self, response_model, messages, max_retries, validation_context, context, strict, hooks, **kwargs)
374 combined_hooks = self.hooks + hooks
375
--> 376 return self.create_fn(
377 response_model=response_model,
378 messages=messages,
/usr/local/lib/python3.12/dist-packages/instructor/core/patch.py in new_create_sync(response_model, validation_context, context, max_retries, strict, hooks, *args, **kwargs)
256 return obj # type: ignore[return-value]
257
--> 258 response = retry_sync(
259 func=func, # type: ignore
260 response_model=response_model,
/usr/local/lib/python3.12/dist-packages/instructor/core/retry.py in retry_sync(func, response_model, args, kwargs, context, max_retries, strict, mode, hooks)
310 except RetryError as e:
311 logger.debug(f"Retry error: {e}")
--> 312 raise InstructorRetryException(
313 e.last_attempt._exception,
314 last_completion=response,
InstructorRetryException: <failed_attempts>
<generation number="1">
<exception>
The output is incomplete due to a max_tokens length limit.
</exception>
<completion>
ChatCompletion(id='chatcmpl-DUz7hihgylbiAnmGyYCIn5w3tyrHr', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n "content": "This report provides a comprehensive overview of the subject matter at hand. It delves into various aspects, analyzing data and presenting insights to aid in understanding and decision-making. The report begins with an introduction that outlines the purpose and', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1776277169, model='gpt-4.1-mini-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_283a574ac4', usage=CompletionUsage(completion_tokens=50, prompt_tokens=98, total_tokens=148, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))
</completion>
</generation>
</failed_attempts>
<last_exception>
The output is incomplete due to a max_tokens length limit.
</last_exception>
That is, the IncompleteOutputException is buried inside InstructorRetryException, EVEN when max_retries=0
Whether that is on purpose or not is unknown to me, but it does look cumbersome (to me).
I figured out I had to do this to be able to capture what IS indeed happening.
except InstructorRetryException as e:
# The actual exception (IncompleteOutputException) is wrapped inside InstructorRetryException
inner_error = e.args[0] if e.args else None
if isinstance(inner_error, IncompleteOutputException):
print(f"Caught expected IncompleteOutputException via InstructorRetryException!")
print(f"Output truncated: {inner_error}")
print(f"Last completion: {inner_error.last_completion}")
else:
print(f"Caught InstructorRetryException with different inner error: {type(inner_error)}")
print(e)
except IncompleteOutputException as e:
# This block might still be useful if the library behavior changes or in different modes
print(f"Caught IncompleteOutputException directly: {e}")
print(f"Last completion: {e.last_completion}")
which produced
Caught expected IncompleteOutputException via InstructorRetryException!
Output truncated: The output is incomplete due to a max_tokens length limit.
Last completion: ChatCompletion(id='chatcmpl-DUynRyJRGqwHszFrHydyojcbuZag6', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n "content":', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1776275913, model='gpt-4.1-mini-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_283a574ac4', usage=CompletionUsage(completion_tokens=5, prompt_tokens=98, total_tokens=103, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=None, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=None), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))
What Model are you using?
I tried the example from https://python.useinstructor.com/concepts/error_handling/#exception-reference
with
and I don't think it works as documented (or I am missing/misunderstanding something), because the output is this
Note: I think it would be GREAT to include the 'expected' output on the web/documentation.
but if you change this
except (IncompleteOutputException, InstructorRetryException) as e:for
except IncompleteOutputException as e:you get
That is, the
IncompleteOutputExceptionis buried insideInstructorRetryException, EVEN whenmax_retries=0Whether that is on purpose or not is unknown to me, but it does look cumbersome (to me).
I figured out I had to do this to be able to capture what IS indeed happening.
which produced