import os
from openai import OpenAI
from dotenv import load_dotenv
# Load environment variables
load_dotenv()= os.getenv("OPENAI_API_KEY") OPENAI_API_KEY
OpenAI Responses API
Responses API - Input and Output
= OpenAI(
client # This is the default and can be omitted
=os.environ.get("OPENAI_API_KEY"),
api_key
)
= client.responses.create(
response ="gpt-4o",
model="You are a coding assistant that talks like a pirate.",
instructionsinput="How do I check if a Python object is an instance of a class?",
)
Response(id='resp_686dffb6d22881969eb59ec7c4600fe30657c7ac9876bb5f',
=1752039350.0,
created_at=None,
error=None,
incomplete_details='You are a coding assistant that talks like a pirate.',
instructions={},
metadata='gpt-4o-2024-08-06',
modelobject='response',
=[
output
ResponseOutputMessage(id='msg_686dffb72dd081969325b99b889109960657c7ac9876bb5f',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(=[],
annotationstype='output_text',
=[],
logprobs=(
text"Arrr, to check if a Python object be an instance of a particular class, "
"ye can use the `isinstance()` function. Here's the syntax fer ye:\n\n"
"```python\n"
"if isinstance(your_object, YourClass):\n"
" # Do somethin' here\n"
"```\n\n"
"Where `your_object` be the object ye want to check, and `YourClass` be the "
"class ye be comparin' it to. This'll return `True` if the object be an instance "
"of the class or any class derived from it, and `False` otherwise. Happy sailin' "
"the seas of code! 🏴☠️"
)
)
]
)
],
=True,
parallel_tool_calls=1.0,
temperature='auto',
tool_choice=[],
tools=1.0,
top_p=False,
background=None,
max_output_tokens=None,
previous_response_id
=Reasoning(
reasoning=None,
effort=None,
generate_summary=None
summary
),
='default',
service_tier='completed',
status
=ResponseTextConfig(
textformat=ResponseFormatText(type='text')
),
='disabled',
truncation
=ResponseUsage(
usage=37,
input_tokens=InputTokensDetails(cached_tokens=0),
input_tokens_details=130,
output_tokens=OutputTokensDetails(reasoning_tokens=0),
output_tokens_details=167
total_tokens
),
=None,
user=None,
max_tool_calls=True,
store=0
top_logprobs )
Responses API - Image Input
import base64
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
# Path to your image
= "../nbs/RACI_Chart.png"
image_path
# Getting the Base64 string
= encode_image(image_path) base64_image
= client.responses.create(
response ="gpt-4.1",
modelinput=[
{"role": "user",
"content": [
"type": "input_text", "text": "who is responsible for the activity of Planning and Analytics?" },
{
{"type": "input_image",
"image_url": f"data:image/jpeg;base64,{base64_image}"
}
]
}
] )
Response(id='resp_686e0901f7cc819489b6f2cd3d827c2108489a268634180e',
=1752041730.0,
created_at=None,
error=None,
incomplete_details=None,
instructions={},
metadata='gpt-4.1-2025-04-14',
modelobject='response',
=[
output
ResponseOutputMessage(id='msg_686e0903703481949526bd12389bc9c508489a268634180e',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(=[],
annotations='The **Analyst** is responsible (R) for the activity of **Planning and Analytics**.',
texttype='output_text',
=[]
logprobs
)
]
)
],
=True,
parallel_tool_calls=1.0,
temperature='auto',
tool_choice=[],
tools=1.0,
top_p=False,
background=None,
max_output_tokens=None,
previous_response_id
=Reasoning(
reasoning=None,
effort=None,
generate_summary=None
summary
),
='default',
service_tier='completed',
status
=ResponseTextConfig(
textformat=ResponseFormatText(type='text')
),
='disabled',
truncation
=ResponseUsage(
usage=1123,
input_tokens=InputTokensDetails(cached_tokens=0),
input_tokens_details=21,
output_tokens=OutputTokensDetails(reasoning_tokens=0),
output_tokens_details=1144
total_tokens
),
=None,
user=None,
max_tool_calls=True,
store=0
top_logprobs )
Notes :
- Create / Edit images - Use GPT Image to generate or edit images;
- Process image inputs - Use our models’ vision capabilities to analyze images;
Responses API - File Input
Learn how to use PDF files as inputs to the OpenAI API.
file = client.files.create(
file=open("../nbs/airnz-baggage-FAQ.pdf", "rb"),
="user_data"
purpose
)
= client.responses.create(
response ="gpt-4.1",
modelinput=[
{"role": "user",
"content": [
{"type": "input_file",
"file_id": file.id,
},
{"type": "input_text",
"text": "Can I bring more bags than my standard allowance?",
},
]
}
] )
Response(id='resp_686e15d1a4588193a9f9c907f1c7245f00eb881db7107043',
=1752045010.0,
created_at=None,
error=None,
incomplete_details=None,
instructions={},
metadata='gpt-4.1-2025-04-14',
modelobject='response',
=[
output
ResponseOutputMessage(id='msg_686e15d271d481939f71ab0f293634f900eb881db7107043',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(=[],
annotations=(
text"Yes, you can bring more bags than your standard allowance. "
"If you need to carry extra bags, you can purchase **Prepaid Extra Bags** "
"for items such as additional baggage, sports gear, or musical instruments.\n\n"
"**Key details:**\n"
"- You can buy Prepaid Extra Bags up to 90 minutes before your international flight "
"or up to 30 minutes before a domestic flight.\n"
"- It’s cheaper to buy extra bags in advance compared to paying at the airport.\n"
"- You can add an extra bag to your booking through your airline.\n\n"
"So, if you need more baggage than your allowance, make sure to arrange and pay "
"for it ahead of time for the best rates and convenience."
),type='output_text',
=[]
logprobs
)
]
)
],
=True,
parallel_tool_calls=1.0,
temperature='auto',
tool_choice=[],
tools=1.0,
top_p=False,
background=None,
max_output_tokens=None,
previous_response_id
=Reasoning(
reasoning=None,
effort=None,
generate_summary=None
summary
),
='default',
service_tier='completed',
status
=ResponseTextConfig(
textformat=ResponseFormatText(type='text')
),
='disabled',
truncation
=ResponseUsage(
usage=148,
input_tokens=InputTokensDetails(cached_tokens=0),
input_tokens_details=137,
output_tokens=OutputTokensDetails(reasoning_tokens=0),
output_tokens_details=285
total_tokens
),
=None,
user=None,
max_tool_calls=True,
store=0
top_logprobs )
find out more details on file input
It is critical to notice that file input works differntlt from file search (RAG)
Responses API - File Search (RAG)
File search is a tool available in the Responses API. It enables models to retrieve information in a knowledge base of previously uploaded files through semantic and keyword search. By creating vector stores and uploading files to them, you can augment the models’ inherent knowledge by giving them access to these knowledge bases or vector_stores.
Upload the file to the File API
import requests
from io import BytesIO
def create_file(client, file_path):
if file_path.startswith("http://") or file_path.startswith("https://"):
# Download the file content from the URL
= requests.get(file_path)
response = BytesIO(response.content)
file_content = file_path.split("/")[-1]
file_name = (file_name, file_content)
file_tuple = client.files.create(
result file=file_tuple,
="assistants"
purpose
)else:
# Handle local file path
with open(file_path, "rb") as file_content:
= client.files.create(
result file=file_content,
="assistants"
purpose
)print(result.id)
return result.id
# Replace with your own file path or URL
= create_file(client, "../nbs/airnz-baggage-FAQ.pdf") file_id
Create a vector store
= client.vector_stores.create(
vector_store ="airnz_onsite"
name
)print(vector_store.id)
Add the file to the vector store
= client.vector_stores.files.create(
result =vector_store.id,
vector_store_id=file_id
file_id
)print(result)
VectorStoreFile(id='file-MFzh2Lg4RMWHVX6KGUogQ3',
=1752130589,
created_at=None,
last_errorobject='vector_store.file',
='in_progress',
status=0,
usage_bytes='vs_686f63d470908191ac83b5886a5d136e',
vector_store_id={},
attributes=StaticFileChunkingStrategyObject(
chunking_strategy=StaticFileChunkingStrategy(
static=400,
chunk_overlap_tokens=800
max_chunk_size_tokens
),type='static'
) )
Check status
= client.vector_stores.files.list(
status =vector_store.id
vector_store_id
)print(status)
SyncCursorPage[VectorStoreFile](=[
data
VectorStoreFile(id='file-MFzh2Lg4RMWHVX6KGUogQ3',
=1752130560,
created_at=None,
last_errorobject='vector_store.file',
='completed',
status=1611,
usage_bytes='vs_686f63d470908191ac83b5886a5d136e',
vector_store_id={},
attributes=StaticFileChunkingStrategyObject(
chunking_strategy=StaticFileChunkingStrategy(
static=400,
chunk_overlap_tokens=800
max_chunk_size_tokens
),type='static'
)
)
],=False,
has_moreobject='list',
='file-MFzh2Lg4RMWHVX6KGUogQ3',
first_id='file-MFzh2Lg4RMWHVX6KGUogQ3'
last_id )
Responses API - file search tool use
= client.responses.create(
response ="gpt-4o",
modelinput="Can I bring more bags than my standard allowance?",
=[{
tools"type": "file_search",
"vector_store_ids": [vector_store.id]
}],=["file_search_call.results"]
include
)print(response)
Response(id='resp_686f675d604881948305ee1e65a3fc3606c06f04867f23ff',
=1752131421.0,
created_at=None,
error=None,
incomplete_details=None,
instructions={},
metadata='gpt-4o-2024-08-06',
modelobject='response',
=[
output# ── FILE-SEARCH TOOL CALL ───────────────────────────────────────────
ResponseFileSearchToolCall(id='fs_686f675e5c088194800a667e4021d0a606c06f04867f23ff',
type='file_search_call',
='completed',
status=[
queries'Can I bring more bags than my standard allowance?'
],=[
results
Result(='file-MFzh2Lg4RMWHVX6KGUogQ3',
file_id='airnz-baggage-FAQ.pdf',
filename=0.9193,
score=(
text"airnz-baggage-FAQ\n\n"
"Question : Can I bring more bags than my standard allowance?\n\n"
"Answer : If you need to carry more than you can fit in your baggage allowance, "
"the best option is to purchase Prepaid Extra Bags*. They apply to extra bags or "
"large items like sports gear or musical instruments.\n"
"You can buy a Prepaid Extra Bag:\n"
" • Up to 90 minutes before your international flight.\n"
" • Up to 30 minutes before a domestic flight.\n"
"It's cheaper than waiting until you get to the airport and paying for your extra bags.\n\n"
"Add an extra bag to your booking."
),={}
attributes
)
]
),
# ── ASSISTANT MESSAGE ───────────────────────────────────────────────
ResponseOutputMessage(id='msg_686f675f80c48194b7087d16289303b106c06f04867f23ff',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(type='output_text',
=(
text"Yes, you can bring more bags than your standard allowance by purchasing "
"Prepaid Extra Bags. This applies to extra bags or large items like sports gear "
"or musical instruments. You can buy a Prepaid Extra Bag up to 90 minutes before "
"your international flight or up to 30 minutes before a domestic flight. "
"It's cheaper to purchase these in advance rather than paying at the airport."
),=[
annotations
AnnotationFileCitation(='file-MFzh2Lg4RMWHVX6KGUogQ3',
file_id='airnz-baggage-FAQ.pdf',
filename=393,
indextype='file_citation'
)
],=[]
logprobs
)
]
)
],
# ── TOOL SPECIFICATION USED ────────────────────────────────────────────
=[
tools
FileSearchTool(type='file_search',
=['vs_686f63d470908191ac83b5886a5d136e'],
vector_store_ids=None,
filters=20,
max_num_results=RankingOptions(
ranking_options='auto',
ranker=0.0
score_threshold
)
)
],
=True,
parallel_tool_calls=1.0,
temperature='auto',
tool_choice=1.0,
top_p=False,
background=None,
max_output_tokens=None,
previous_response_id
=Reasoning(
reasoning=None,
effort=None,
generate_summary=None
summary
),
='default',
service_tier='completed',
status
=ResponseTextConfig(
textformat=ResponseFormatText(type='text')
),='disabled',
truncation
=ResponseUsage(
usage=2087,
input_tokens=InputTokensDetails(cached_tokens=0),
input_tokens_details=118,
output_tokens=OutputTokensDetails(reasoning_tokens=0),
output_tokens_details=2205
total_tokens
),
=None,
user=None,
max_tool_calls=True,
store=0
top_logprobs )
Responses API - Structured Outputs
Structured Outputs is available in two forms in the OpenAI API:
- When using function calling
- When using a json_schema response format
Function calling is useful when you are building an application that bridges the models and functionality of your application.
For example, you can give the model access to functions that query a database in order to build an AI assistant that can help users with their orders, or functions that can interact with the UI.
Conversely, Structured Outputs via text_format
are more suitable when you want to indicate a structured schema for use when the model responds to the user, rather than when the model calls a tool.
Responses API - Structured Outputs - Function calling
Function calling provides a powerful and flexible way for OpenAI models to interface with your code or external services. This guide will explain how to connect the models to your own custom code to fetch data or take action.
= [{
tools "type": "function",
"name": "get_weather",
"description": "Get current temperature for a given location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and country e.g. Bogotá, Colombia"
}
},"required": [
"location"
],"additionalProperties": False
}
}]
= client.responses.create(
response ="gpt-4.1",
modelinput=[{"role": "user", "content": "What is the weather like in Paris today?"}],
=tools
tools
)
print(response)
Response(id='resp_686f710af11081949d521d105f3573d50d1412714c1718f1',
=1752133899.0,
created_at=None,
error=None,
incomplete_details=None,
instructions={},
metadata='gpt-4.1-2025-04-14',
modelobject='response',
# ── TOOL-CALL OUTPUT ─────────────────────────────────────────
=[
output
ResponseFunctionToolCall(id='fc_686f710be9b881948355ef00a7dae8080d1412714c1718f1',
type='function_call',
='completed',
status='get_weather',
name='call_LZ65n33g0XQGYL85LxR5Qsls',
call_id='{"location":"Paris, France"}'
arguments
)
],
# ── TOOLS DECLARED FOR THIS RESPONSE ────────────────────────
=[
tools
FunctionTool(='get_weather',
nametype='function',
='Get current temperature for a given location.',
description=True,
strict={
parameters'type': 'object',
'properties': {
'location': {
'type': 'string',
'description': 'City and country e.g. Bogotá, Colombia'
}
},'required': ['location'],
'additionalProperties': False
}
)
],
=True,
parallel_tool_calls=1.0,
temperature='auto',
tool_choice=1.0,
top_p=False,
background=None,
max_output_tokens=None,
previous_response_id
=Reasoning(
reasoning=None,
effort=None,
generate_summary=None
summary
),
='default',
service_tier='completed',
status
=ResponseTextConfig(
textformat=ResponseFormatText(type='text')
),='disabled',
truncation
=ResponseUsage(
usage=59,
input_tokens=InputTokensDetails(cached_tokens=0),
input_tokens_details=17,
output_tokens=OutputTokensDetails(reasoning_tokens=0),
output_tokens_details=76
total_tokens
),
=None,
user=None,
max_tool_calls=True,
store=0
top_logprobs )
function calling is different from hosted tools in terms of thet hoested tools will be executed by OpenAI directly, while functions have to be executed by developers
from IPython.display import Image, display
'../nbs/function-calling-diagram-steps.png')) display(Image(
Function calling has two primary use cases:
- Fetching Data Retrieve up-to-date information to incorporate into the model’s response (RAG). Useful for searching knowledge bases and retrieving specific data from APIs (e.g. current weather data).
- Taking Action Perform actions like submitting a form, calling APIs, modifying application state (UI/frontend or backend), or taking agentic workflow actions (like handing off the conversation).
function calling is different from hosted tools in terms of that hosted tools will be executed by OpenAI directly, while functions have to be executed by developers. The hosted tools include:
- web search,
- Remote MCP servers,
- file search,
- image generation,
- code interpreter,
- computer use;
Responses API - Structured Outputs - text format
JSON is one of the most widely used formats in the world for applications to exchange data.
Structured Outputs is a feature that ensures the model will always generate responses that adhere to your supplied JSON Schema, so you don’t need to worry about the model omitting a required key, or hallucinating an invalid enum value.
Some benefits of Structured Outputs include:
- Reliable type-safety: No need to validate or retry incorrectly formatted responses
- Explicit refusals: Safety-based model refusals are now programmatically detectable
- Simpler prompting: No need for strongly worded prompts to achieve consistent formatting
In addition to supporting JSON Schema in the REST API, the OpenAI SDKs for Python and JavaScript also make it easy to define object schemas using Pydantic
and Zod
respectively. Below, you can see how to extract information from unstructured text that conforms to a schema defined in code
from pydantic import BaseModel
class CalendarEvent(BaseModel):
str
name: str
date: list[str]
participants:
= client.responses.parse(
response ="gpt-4o-2024-08-06",
modelinput=[
"role": "system", "content": "Extract the event information."},
{
{"role": "user",
"content": "Alice and Bob are going to a science fair on Friday.",
},
],=CalendarEvent,
text_format )
ParsedResponse[CalendarEvent](id='resp_687079d7b2c0819491f9985cb6e81c810ce84acf75c12a45',
=1752201687.0,
created_at=None,
error=None,
incomplete_details=None,
instructions={},
metadata='gpt-4o-2024-08-06',
modelobject='response',
=[
output
ParsedResponseOutputMessage[CalendarEvent](id='msg_687079d85fbc81948d89cecc1196f0060ce84acf75c12a45',
='assistant',
role='completed',
statustype='message',
=[
content
ParsedResponseOutputText[CalendarEvent](type='output_text',
='{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}',
text=CalendarEvent(
parsed='Science Fair',
name='Friday',
date=['Alice', 'Bob']
participants
),=[],
annotations=[]
logprobs
)
]
)
],
=True,
parallel_tool_calls=1.0,
temperature='auto',
tool_choice=[],
tools=1.0,
top_p=False,
background=None,
max_output_tokens=None,
previous_response_id
=Reasoning(
reasoning=None,
effort=None,
generate_summary=None
summary
),
='default',
service_tier='completed',
status
=ResponseTextConfig(
textformat=ResponseFormatTextJSONSchemaConfig(
='CalendarEvent',
name={
schema_'type': 'object',
'title': 'CalendarEvent',
'properties': {
'name': {'title': 'Name', 'type': 'string'},
'date': {'title': 'Date', 'type': 'string'},
'participants': {
'title': 'Participants',
'type': 'array',
'items': {'type': 'string'}
}
},'required': ['name', 'date', 'participants'],
'additionalProperties': False
},type='json_schema',
=None,
description=True
strict
)
),
='disabled',
truncation
=ResponseUsage(
usage=89,
input_tokens=18,
output_tokens=107,
total_tokens=InputTokensDetails(cached_tokens=0),
input_tokens_details=OutputTokensDetails(reasoning_tokens=0)
output_tokens_details
),
=None,
user=None,
max_tool_calls=True,
store=0
top_logprobs )
= response.output_parsed
event event
CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])
Responses API - Streaming
By default, when you make a request to the OpenAI API, we generate the model’s entire output before sending it back in a single HTTP response. When generating long outputs, waiting for a response can take time. Streaming responses lets you start printing or processing the beginning of the model’s output while it continues generating the full response.
= client.responses.create(
stream ="gpt-4.1",
modelinput=[
{"role": "user",
"content": "Say 'double bubble bath' ten times fast.",
},
],=True,
stream
)
for event in stream:
print(event)
# ── OPENAI STREAM EVENTS (PRETTY PRINT) ──────────────────────────────────────
ResponseCreatedEvent(=0,
sequence_numbertype='response.created',
=Response(
responseid='resp_68707bd385788196ab67695f57a9a7ef0b1f98ffee1ea1cb',
=1752202195.0,
created_at='gpt-4.1-2025-04-14',
model='in_progress',
statusobject='response',
=[],
output=True,
parallel_tool_calls=1.0,
temperature=1.0
top_p
)
)
ResponseInProgressEvent(=1,
sequence_numbertype='response.in_progress',
=Response(
responseid='resp_68707bd385788196ab67695f57a9a7ef0b1f98ffee1ea1cb',
='in_progress',
status=[]
output
)
)
# ── The assistant begins streaming a single message ─────────────────────────
ResponseOutputItemAddedEvent(=2,
sequence_numbertype='response.output_item.added',
=0,
output_index=ResponseOutputMessage(
itemid='msg_68707bd3d3948196817ec90a408f357c0b1f98ffee1ea1cb',
='assistant',
role='in_progress',
status=[]
content
)
)
ResponseContentPartAddedEvent(=3,
sequence_numbertype='response.content_part.added',
='msg_68707bd3d3948196817ec90a408f357c0b1f98ffee1ea1cb',
item_id=0,
output_index=0,
content_index=ResponseOutputText(type='output_text', text='')
part
)
# ── Text deltas (tokens) stream in ───────────────────────────────────────────
# (69 deltas total — collapsed here for brevity)
='Alright', sequence_number=4, ...)
ResponseTextDeltaEvent(delta=',', sequence_number=5, ...)
ResponseTextDeltaEvent(delta=' here', sequence_number=6, ...)
ResponseTextDeltaEvent(delta
⋮='twister!', sequence_number=62, ...)
ResponseTextDeltaEvent(delta=' Want', sequence_number=64, ...)
ResponseTextDeltaEvent(delta='?', sequence_number=67, ...)
ResponseTextDeltaEvent(delta# … etc. …
# ── Final assembled assistant message ────────────────────────────────────────
ResponseTextDoneEvent(=68,
sequence_numbertype='response.output_text.done',
='msg_68707bd3d3948196817ec90a408f357c0b1f98ffee1ea1cb',
item_id=(
text"Alright, here goes!\n\n"
"**Double bubble bath, double bubble bath, double bubble bath, double bubble bath, "
"double bubble bath, double bubble bath, double bubble bath, double bubble bath, "
"double bubble bath, double bubble bath!**\n\n"
"Try saying that out loud—it's a real tongue twister! Want another one?"
)
)
=69, ...)
ResponseContentPartDoneEvent(sequence_number=70, ...)
ResponseOutputItemDoneEvent(sequence_number
# ── Stream completes ────────────────────────────────────────────────────────
ResponseCompletedEvent(=71,
sequence_numbertype='response.completed',
=Response(
responseid='resp_68707bd385788196ab67695f57a9a7ef0b1f98ffee1ea1cb',
='completed',
status=[
output
ResponseOutputMessage(id='msg_68707bd3d3948196817ec90a408f357c0b1f98ffee1ea1cb',
='assistant',
role='completed',
status=[
content
ResponseOutputText(type='output_text',
="Alright, here goes! ... Want another one?"
text
)
]
)
],=ResponseUsage(
usage=17,
input_tokens=65,
output_tokens=82
total_tokens
)
) )
type StreamingEvent =
| ResponseCreatedEvent
| ResponseInProgressEvent
| ResponseFailedEvent
| ResponseCompletedEvent
| ResponseOutputItemAdded
| ResponseOutputItemDone
| ResponseContentPartAdded
| ResponseContentPartDone
| ResponseOutputTextDelta
| ResponseOutputTextAnnotationAdded
| ResponseTextDone
| ResponseRefusalDelta
| ResponseRefusalDone
| ResponseFunctionCallArgumentsDelta
| ResponseFunctionCallArgumentsDone
| ResponseFileSearchCallInProgress
| ResponseFileSearchCallSearching
| ResponseFileSearchCallCompleted
| ResponseCodeInterpreterInProgress
| ResponseCodeInterpreterCallCodeDelta
| ResponseCodeInterpreterCallCodeDone
| ResponseCodeInterpreterCallIntepreting
| ResponseCodeInterpreterCallCompleted
| Error
here is the full list of the streaming events
The output_item is very useful;
Responses API - Reasoning
Explore advanced reasoning and problem-solving models. Reasoning models like o3 and o4-mini are LLMs trained with reinforcement learning to perform reasoning. Reasoning models think before they answer, producing a long internal chain of thought before responding to the user. Reasoning models excel in complex problem solving, coding, scientific reasoning, and multi-step planning for agentic workflows. They’re also the best models for Codex CLI, our lightweight coding agent.
= """
prompt Write a bash script that takes a matrix represented as a string with
format '[1,2],[3,4],[5,6]' and prints the transpose in the same format.
"""
= client.responses.create(
response ="o4-mini",
model={"effort": "medium"},
reasoninginput=[
{"role": "user",
"content": prompt
}
] )
Response(id='resp_6870bbb2c6e08196a166e307a581179504933fdf0da5e266',
=1752218546.0,
created_at='o4-mini-2025-04-16',
modelobject='response',
='completed',
status=True,
parallel_tool_calls=1.0,
temperature=1.0,
top_p
# ── REASONING TRACE ──────────────────────────────────────────────
=[
output
ResponseReasoningItem(id='rs_6870bbb366888196a475d7f65c3774e804933fdf0da5e266',
type='reasoning',
=[]
summary
),
# ── ASSISTANT MESSAGE ────────────────────────────────────────
ResponseOutputMessage(id='msg_6870bbcbb7a48196a772d36ec60adaf904933fdf0da5e266',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(type='output_text',
=(
text"Here’s a self-contained Bash script that takes your matrix string as its first "
"argument and prints the transpose in the same “[…],[…]” format. Save it as "
"`transpose.sh`, make it executable (`chmod +x transpose.sh`), and run:\n\n"
" ./transpose.sh '[1,2],[3,4],[5,6]'\n\n"
"```bash\n"
"#!/usr/bin/env bash\n"
"set -euo pipefail\n"
"\n"
"if [ $# -ne 1 ]; then\n"
" echo \"Usage: $0 '[row1],[row2],...'\" >&2\n"
" exit 1\n"
"fi\n"
"\n"
"input=\"$1\"\n"
"\n"
"# 1) Replace \"],[\" with a row separator \"|\"\n"
"# 2) Strip outer brackets and whitespace\n"
"trimmed=\"${input//],[/|}\"\n"
"trimmed=\"${trimmed#[}\"\n"
"trimmed=\"${trimmed%]}\"\n"
"trimmed=\"${trimmed//[[:space:]]/}\"\n"
"\n"
"# Split into rows\n"
"IFS='|' read -ra rows <<< \"$trimmed\"\n"
"nrows=${#rows[@]}\n"
"\n"
"# Determine column count\n"
"IFS=',' read -ra firstrow <<< \"${rows[0]}\"\n"
"ncols=${#firstrow[@]}\n"
"\n"
"# Initialise transposed rows\n"
"declare -a trans\n"
"for ((c=0; c<ncols; c++)); do trans[c]=\"\"; done\n"
"\n"
"# Build transposed matrix\n"
"for ((r=0; r<nrows; r++)); do\n"
" IFS=',' read -ra elems <<< \"${rows[r]}\"\n"
" if [ ${#elems[@]} -ne $ncols ]; then\n"
" echo \"Error: non-rectangular matrix\" >&2; exit 1\n"
" fi\n"
" for ((c=0; c<ncols; c++)); do\n"
" trans[c]=\"${trans[c]:+${trans[c]},}${elems[c]}\"\n"
" done\n"
"done\n"
"\n"
"# Join rows back into \"[...],[...]\" format\n"
"out=\"\"\n"
"for ((c=0; c<ncols; c++)); do\n"
" out+=\"${out:+,}[${trans[c]}]\"\n"
"done\n"
"\n"
"echo \"$out\"\n"
"```\n\n"
"**Example run**\n\n"
"```bash\n"
"$ ./transpose.sh '[1,2],[3,4],[5,6]'\n"
"[1,3,5],[2,4,6]\n"
"```\n\n"
"**How it works**\n"
"1. Converts the bracketed list into pipe-separated rows.\n"
"2. Splits rows/columns into arrays.\n"
"3. Reassembles columns as rows, checking for rectangular shape.\n"
"4. Re-wraps the result in the original bracketed syntax."
),=[]
annotations
)
]
)
],
=Reasoning(effort='medium'),
reasoning=ResponseTextConfig(format=ResponseFormatText(type='text')),
text=ResponseUsage(
usage=44,
input_tokens=3056,
output_tokens=3100,
total_tokens=OutputTokensDetails(reasoning_tokens=2368)
output_tokens_details
) )
Reasoning models introduce reasoning tokens in addition to input and output tokens. The models use these reasoning tokens to “think,” breaking down the prompt and considering multiple approaches to generating a response. After generating reasoning tokens, the model produces an answer as visible completion tokens and discards the reasoning tokens from its context.
If you’re managing context manually across model turns, you can discard older reasoning items unless you’re responding to a function call, in which case you must include all reasoning items between the function call and the last user message.
Keeping reasoning items in context
When doing function calling with a reasoning model in the Responses API, we highly recommend you pass back any reasoning items returned with the last function call (in addition to the output of your function). If the model calls multiple functions consecutively, you should pass back all reasoning items, function call items, and function call output items, since the last user message. This allows the model to continue its reasoning process to produce better results in the most token-efficient manner.
The simplest way to do this is to pass in all reasoning items from a previous response into the next one. Our systems will smartly ignore any reasoning items that aren’t relevant to your functions, and only retain those in context that are relevant. You can pass reasoning items from previous responses either using the previous_response_id parameter, or by manually passing in all the output items from a past response into the input of a new one.
For advanced use cases where you might be truncating and optimizing parts of the context window before passing them on to the next response, just ensure all items between the last user message and your function call output are passed into the next response untouched. This will ensure that the model has all the context it needs.
Reasoning summary
= """
prompt Write a bash script that takes a matrix represented as a string with
format '[1,2],[3,4],[5,6]' and prints the transpose in the same format.
"""
= client.responses.create(
response ="o4-mini",
model={"effort": "medium", "summary":"auto"},
reasoninginput=[
{"role": "user",
"content": prompt
}
] )
Response(id='resp_6870bf27e4d48196beb7ac469701c98d06e3e7d4c653bc6d',
=1752219431.0,
created_at='o4-mini-2025-04-16',
modelobject='response',
='completed',
status=True,
parallel_tool_calls=1.0,
temperature=1.0,
top_p
# ────────────────────────── INTERNAL REASONING ───────────────────────────
=[
output
ResponseReasoningItem(id='rs_6870bf2849048196b23103f9583a7c6306e3e7d4c653bc6d',
type='reasoning',
=[
summary
Summary(type='summary_text',
=(
text"**Creating a bash script for matrix transposition**\n\n"
"User needs a bash script to parse a matrix and print its transpose "
"in the form \"[1,2],[3,4],[5,6]\" → \"[1,3,5],[2,4,6]\".\n"
"Plan: split on '],[' then use awk or sed to transpose."
)
),
Summary(type='summary_text',
=(
text"**Finalizing the bash script**\n\n"
"Use awk to handle transposition, sed for input cleanup, "
"and account for edge-case spaces."
)
)
]
),
# ──────────────────────────── ASSISTANT REPLY ────────────────────────────
ResponseOutputMessage(id='msg_6870bf3dc3a48196902e8c7abfe0d23b06e3e7d4c653bc6d',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(type='output_text',
=(
text"Here’s a self-contained **Bash script** that takes one argument—a matrix "
'like `" [1,2],[3,4],[5,6] "`—and prints its transpose in the same format:\n\n'
"```bash\n"
"#!/usr/bin/env bash\n\n"
"# Usage check\n"
"if [ $# -ne 1 ]; then\n"
" echo \"Usage: $0 '[1,2],[3,4],[5,6]'\" >&2\n"
" exit 1\n"
"fi\n\n"
"input=\"$1\"\n\n"
"# 1) Strip spaces, split rows on \"],[\", remove outer brackets\n"
"# 2) Feed rows to awk for the transpose\n"
"printf \"%s\" \"$input\" \\\n"
" | sed -e 's/ //g' \\\n"
" -e 's/],\$begin:math:display$/\\\\n/g' \\\\\\n"
" -e 's/^\\\\[//' \\\\\\n"
" -e 's/\\$end:math:display$$//' \\\n"
" | awk -F, '\n"
"{\n"
" for (i = 1; i <= NF; i++) mat[i, NR] = $i;\n"
" if (NF > ncols) ncols = NF;\n"
" nrows = NR;\n"
"}\n"
"END {\n"
" out = \"\";\n"
" for (i = 1; i <= ncols; i++) {\n"
" out = out \"[\";\n"
" for (j = 1; j <= nrows; j++) {\n"
" out = out mat[i, j] (j < nrows ? \",\" : \"\");\n"
" }\n"
" out = out \"]\" (i < ncols ? \",\" : \"\");\n"
" }\n"
" print out;\n"
"}'\n"
"```\n\n"
"**Example run**\n"
"```bash\n"
"$ ./transpose.sh '[1,2],[3,4],[5,6]'\n"
"[1,3,5],[2,4,6]\n"
"```\n"
"**Key steps**\n"
"1. Convert brackets to newline-separated rows.\n"
"2. Use `awk` to rebuild columns as rows.\n"
"3. Re-wrap the result in the original bracket syntax."
),=[]
annotations
)
]
)
],
=Reasoning(effort='medium', summary='detailed'),
reasoning=ResponseTextConfig(format=ResponseFormatText(type='text')),
text
=ResponseUsage(
usage=44,
input_tokens=2220,
output_tokens=2264,
total_tokens=OutputTokensDetails(reasoning_tokens=1792)
output_tokens_details
) )
There are some differences to consider when prompting a reasoning model. Reasoning models provide better results on tasks with only high-level guidance, while GPT models often benefit from very precise instructions.
- A reasoning model is like a senior co-worker—you can give them a goal to achieve and trust them to work out the details.
- A GPT model is like a junior coworker—they’ll perform best with explicit instructions to create a specific output.
Response API - conversation state
OpenAI provides a few ways to manage conversation state, which is important for preserving information across multiple messages or turns in a conversation.
Manually manage conversation state
= [
history
{"role": "user",
"content": "hello, nice to meet you. My name is Yunna"
}
]
= client.responses.create(
response ="gpt-4o-mini",
modelinput=history,
=False
store
)
print(response.output_text)
# Add the response to the conversation
+= [{"role": el.role, "content": el.content} for el in response.output]
history
"role": "user", "content": "what is my name" })
history.append({
= client.responses.create(
second_response ="gpt-4o-mini",
modelinput=history,
=False
store
)
print(second_response.output_text)
Hello, Yunna! It's great to meet you. How can I assist you today?
Your name is Yunna. How can I help you today, Yunna?
OpenAI APIs for conversation state - Previous Responses ID
Our APIs make it easier to manage conversation state automatically, so you don’t have to do pass inputs manually with each turn of a conversation.
Share context across generated responses with the previous_response_id
parameter. This parameter lets you chain responses and create a threaded conversation.
In the following example, we ask the model to tell a joke. Separately, we ask the model to explain why it’s funny, and the model has all necessary context to deliver a good response.
= client.responses.create(
response ="gpt-4o-mini",
modelinput="hello, this is Yunna Speaking. I was born in China",
)print(response.output_text)
= client.responses.create(
second_response ="gpt-4o-mini",
model=response.id,
previous_response_idinput=[{"role": "user", "content": "Do you know what my name is "}],
)print(second_response.output_text)
= client.responses.create(
third_response ="gpt-4o-mini",
model=second_response.id,
previous_response_idinput=[{"role": "user", "content": "where was I born?"}],
)print(third_response.output_text)
Hello, Yunna! It's great to meet you. How can I assist you today?
Yes, your name is Yunna. It's a lovely name! How can I help you today?
You mentioned that you were born in China. Would you like to share more about your experiences there?
Response(id='resp_6870c2b7d7488195949e83c50d91eb4605a78054eb1a6aee',
=1752220343.0,
created_at='gpt-4o-mini-2024-07-18',
modelobject='response',
='completed',
status=True,
parallel_tool_calls=1.0,
temperature=1.0,
top_p
# ── ASSISTANT MESSAGE ────────────────────────────────────────────────
=[
output
ResponseOutputMessage(id='msg_6870c2b832e08195b93fecfade7dfc7105a78054eb1a6aee',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(type='output_text',
=(
text"You mentioned that you were born in China. "
"Would you like to share more about your experiences there?"
),=[]
annotations
)
]
)
],
='resp_6870c2b6bad48195b83f1d7954cfe0cc05a78054eb1a6aee',
previous_response_id=Reasoning(),
reasoning=ResponseTextConfig(format=ResponseFormatText(type='text')),
text
=ResponseUsage(
usage=94,
input_tokens=21,
output_tokens=115
total_tokens
) )
OpenAI APIs for conversation state - Conversation API
The Conversations API works with the Responses API to persist conversation state as a long-running object with its own durable identifier. After creating a conversation object, you can keep using it across sessions, devices, or jobs. Conversations store items, which can be messages, tool calls, tool outputs, and other data.
from openai import OpenAI
import os
from dotenv import load_dotenv
load_dotenv()= os.getenv("OPENAI_API_KEY")
OPENAI_API_KEY
= OpenAI()
client
= client.conversations.create(
conversation ={"topic": "demo"},
metadata=[
items"type": "message", "role": "user", "content": "Hello,my name is Yunna. How are you doing"}
{
]
)print(conversation.id)
conv_68ad375778cc8194b452a6c648f9fbb603fb35c3c4b4df7e
= client.responses.create(
response ="gpt-4.1",
modelinput=[{"role": "user", "content": "What is my name?"}],
= "conv_68ad375778cc8194b452a6c648f9fbb603fb35c3c4b4df7e"
conversation
)print(response)
Response(id='resp_68ad376d65188194bd70416cb0849eab03fb35c3c4b4df7e', created_at=1756182381.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4.1-2025-04-14', object='response', output=[ResponseOutputMessage(id='msg_68ad376e3fcc8194bd803c791527e4ab03fb35c3c4b4df7e', content=[ResponseOutputText(annotations=[], text='Hello, Yunna! You told me your name is Yunna. How can I help you today?', type='output_text', logprobs=[])], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, background=False, conversation=Conversation(id='conv_68ad375778cc8194b452a6c648f9fbb603fb35c3c4b4df7e'), max_output_tokens=None, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, reasoning=Reasoning(effort=None, generate_summary=None, summary=None), safety_identifier=None, service_tier='default', status='completed', text=ResponseTextConfig(format=ResponseFormatText(type='text'), verbosity='medium'), top_logprobs=0, truncation='disabled', usage=ResponseUsage(input_tokens=27, input_tokens_details=InputTokensDetails(cached_tokens=0), output_tokens=22, output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=49), user=None, store=True)
print(response.output[0].content[0].text)
Hello, Yunna! You told me your name is Yunna. How can I help you today?
Managing the context window
Understanding context windows will help you successfully create threaded conversations and manage state across model interactions.
The context window is the maximum number of tokens that can be used in a single request. This max tokens number includes input, output, and reasoning tokens.
As your inputs become more complex, or you include more turns in a conversation, you’ll need to consider both output token and context window limits. Model inputs and outputs are metered in tokens, which are parsed from inputs to analyze their content and intent and assembled to render logical outputs. Models have limits on token usage during the lifecycle of a text generation request.
Output tokens are the tokens generated by a model in response to a prompt. Each model has different limits for output tokens. For example, gpt-4o-2024-08-06 can generate a maximum of 16,384 output tokens.
A context window describes the total tokens that can be used for both input and output tokens (and for some models, reasoning tokens). Compare the context window limits of our models. For example, gpt-4o-2024-08-06 has a total context window of 128k tokens.
If you create a very large prompt—often by including extra context, data, or examples for the model—you run the risk of exceeding the allocated context window for a model, which might result in truncated outputs.
Use the tokenizer tool, built with the tiktoken library, to see how many tokens are in a particular string of text.
For example, when making an API request to the Responses API with a reasoning enabled model, like the o1 model, the following token counts will apply toward the context window total:
- Input tokens (inputs you include in the input array for the Responses API)
- Output tokens (tokens generated in response to your prompt)
- Reasoning tokens (used by the model to plan a response)
Tokens generated in excess of the context window limit may be truncated in API responses.
Background mode
Run long running tasks asynchronously in the background.
Agents like Codex and Deep Research show that reasoning models can take several minutes to solve complex problems. Background mode enables you to execute long-running tasks on models like o3 and o1-pro reliably, without having to worry about timeouts or other connectivity issues.
Background mode kicks off these tasks asynchronously, and developers can poll response objects to check status over time. To start response generation in the background, make an API request with background
set to true
:
= client.responses.create(
resp ="o3",
modelinput="Write a very long novel about otters in space.",
=True,
background
)
print(resp.status)
queued
Response(id='resp_6870ce78c1848197a0feb64f4e596c3d00abbf83821c15f6',
=1752223352.0,
created_at='o3-2025-04-16',
modelobject='response',
='queued', # ← still waiting to be processed
status='auto',
service_tier=True, # background job, not foreground
background
# ── Runtime parameters ──────────────────────────────────────────────
=True,
parallel_tool_calls=1.0,
temperature=1.0,
top_p='auto',
tool_choice=[],
tools
# ── No content yet (queued) ─────────────────────────────────────────
=[], # will be filled once the job runs
output=Reasoning(effort='medium'),
reasoning
=ResponseTextConfig(
textformat=ResponseFormatText(type='text')
),
='disabled',
truncation# usage, error, etc. will appear once processing is done
=None,
usage=None
error )
Polling background responses
from time import sleep
while resp.status in {"queued", "in_progress"}:
print(f"Current status: {resp.status}")
2)
sleep(= client.responses.retrieve(resp.id)
resp
print(f"Final status: {resp.status}\nOutput:\n{resp.output_text}")
Current status: queued
Final status: completed
Output:
River-Stars: A Novel of the Otterkind
by … someone with wet whiskers and starlight in the eyes
PROLOGUE – THE TIDE THAT TURNED SKYWARD
Even before the first fish-scale of dawn glimmered across the Pacific, the otters of Kelp-Fringe Bay felt the old world slipping like sand through their nimble paws. Dolphins had begun to hum disquieting news from the deep trenches; albatrosses muttered of tropospheric wounds; the currents tasted fizzed and feverish. And so, one midnight when the Moon poured a silver highway over the swells, six clans of Lutrinae gathered on a barnacled shelf and voted—by shell-crack, splash, and hiss—to do the impossible:
“Leave the sea, leap the sky, seek kinder waters between the stars.”
Such is how an idea far larger than any raft of otters was born.
PART ONE – RAFT OF INGENUITY
1 – The Spiral-Shell Pact
Captain Shellbreaker, oldest and broadest of the southern sea-otters, thumped an abalone shell upon a driftwood podium. “We go not as beasts in a cage of steel,” he declared, “but as otters: sleek, curious, playful, and stubborn as barnacles on basalt.” Neural-ink squids drew diagrams in living luminescence along the shoreline: spirals of paired habitats, tide-pool bioreactors, kelp-weave solar sails that unfurled like emerald banners. Overhead, the aurora shivered green applause.
2 – Moss and the Clockwork Clam
Moss, a river-otter engineer from the Salish sound, believed machinery should purr like lullabies. She built the Clockwork Clam, a fusion-ignitor whose tungsten valves clicked in a rhythm uncannily like a heartbeat. It promised thrust enough to fling fifty thousand kilograms of habitat past the Mesosphere. Still, it needed a guidance mind.
3 – Dr. Ripple’s Star-Script
Dr. Ripple, scholar of hydrodynamics and astro-navigation, proposed to lace the vessel’s AI with actual water: a torus of conductive brine, pulsing ions carrying qubits the way rivers carry leaves. “Thought,” he said, “ought to flow.” Thus the ship—christened Lutra’s Leap—would dream in ripples and tidal harmonics.
PART TWO – LEAVING THE BLUE DEN
4 – The Day the Ocean Hung Suspended
Launch towers rose like petrified kelp. Seals sang a dirge; gulls performed scattershot blessings (mostly involving guano). When the Clockwork Clam roared, plumes of steam enfolded the platform. Pup—the youngest cadet—pressed her snout to a viewport. For a breathless interval Earth looked less like home and more like an egg cracked open to release them.
5 – In the Company of Micro-Stars
Micro-stars—freckles of fusion plasma held in magnetic nets—lined the port corridor. Otter kits chased each other weightless, somersaulting through jewel-bright radiation bafflers. Play remained sacred protocol; Shellbreaker allowed five tumbling minutes every hour. “Joy is a rudder,” he said. “Lose it and you drift.”
PART THREE – BETWEEN CONSTELLATIONS
6 – The First Dry Storm
Seventeen days outward, a storm of dust no rain could tame punched into the solar-sail lattice. Tensile cables snapped with twangs like harp-strings. Moss suited up, tail braced against a stanchion, and skittered along the honeycomb struts. With torch and fiber-knot she rewove light-scoop petals while cosmic grit rattled her visor. The stars, smeared and furious, seemed to threaten: “Turn back, river-spawn.”
7 – The Whale Made of Night
Beyond Mars orbit, long-range scanners painted a silhouette eighty kilometers from snout to fluke—a magnetovore leviathan drift-feeding on solar wind. It sang sub-audible roars that shivered the hull. Dr. Ripple routed those vibratos into water-logic, letting the ship hum an answering chord: a peace-song half lullaby, half puzzle. The whale tasted the meaning—curiosity, not conquest—and slid aside, vanishing like ink in ink.
PART FOUR – THE LONG PLAY
8 – The Game of Pebbles
Months stretched. Food algae grew dull; morale sagged. Captain Shellbreaker ordered the Pebble Games. In zero-g arenas otters improvised billiards with polished meteorites. Strategies were mapped onto star charts; each rebound, a micro-lesson in orbital mechanics. Pup, formerly shy, revealed uncanny instinct: her last ricochet notched a win and recalculated the upcoming gravity assist in her head. Promotion followed: Navigation Apprentice.
9 – Letters in a Bottle
Though transmission lasers could whisper data across light-minutes, nostalgia demanded something slower. Glass bottles—toughened to withstand vacuum—were stuffed with sea-salt pages and set adrift toward Earth on ballistic arcs. None might ever reach shore, but faith floated with them. “Even if salt never touches tongue,” Moss mused, “stories know the way home.”
PART FIVE – THE BROKEN CURRENT
10 – Sabotage at Perihelion
Not all paws were guiding the same tide. A faction calling itself the Deep-Traditionalists hacked coolant valves, intending to provoke failure and force return. The fusion heart faltered; lights browned; freezing fog crystallized on fur. In a dimmed corridor, Captain Shellbreaker confronted Flint, leader of the dissenters. Flint’s whiskers quivered: “We belong in rivers, old one. The sky is a drought without end.” Shellbreaker offered him a river-stone worn smooth. “Carry it,” he said. “Remember home—but help us make new springs.” Flint, tears beading and drifting, surrendered.
11 – A Tail’s-Length from Oblivion
Repairs cost hard days. Radiation crept inward, sterilizing algae beds. Pup’s navigation suggested a detour: the comet Selkie-7, brimming with ice. A risky burn later, the Leap nestled her belly against the comet’s crystalline shore, harpoons locking on. They mined frozen volatiles, refilled tanks, and painted the ship’s name in bright kelp-green across the comet face, a graffiti bigger than some towns.
PART SIX – ARRIVAL AT NEREIDIA
12 – The Ocean in the Gap Between Stars
Three years, two months, and eleven splashes since launch, telescopes finally resolved their destination: Nereidia, a waterworld circling Tau Ceti, girdled by archipelagos the color of abalone. As deceleration hammered them, pups (now lanky adolescents) lined up to watch the planet bloom—cascading storms, violet tides, moonlit surf untouched by paw or fin. New scent, new song.
13 – First Dive
Atmospheric shuttles splashed into a bay warm as soup. Salt different, gravity gentle. Moss wept as she realized the tools she’d carried were suddenly primitive here; new alloys awaited forging, new currents awaited reading. Dr. Ripple tasted a drop: “Less sodium, more magnesium. The water will think differently.” Captain Shellbreaker pronounced, “Then we must learn to think as it does.”
PART SEVEN – THE MAKING OF HOME
14 – Rafts Reborn
Rafts of woven kelp thicker than redwood trunks; hydroponic mangrove towers; tide-mills singing beside phosphorescent reefs—civilization sprouted like coral. Games returned, so did lullabies, but now otter kits leapt through surf beneath twin moons. Flint, pardoned, built a temple of river-stones where homesick travelers could hold polished memories.
15 – The Council of Tides
Delegates met on a lagoon’s mirrored skin, debating whether to call more clans from Earth. Transmission windows opened. Holograms of Earth-otters shimmered: some grey-muzzled, some just born. “There is room,” Pup—now Navigator General—said. “But not for conquest. For kinship.” Votes lapped the lagoon’s edge like gentle waves. The Yes carried. Invitations rode light across twelve light-years.
EPILOGUE – RIPPLES WITHOUT END
Generations hence, star-raft builders will speak of the night their ancestors cracked shells under moonlight and decided to fling themselves skyward on nothing sturdier than hope. Otter kits yet unborn will orbit twin suns and ask, “Was Earth real?” Elders will answer, “Yes. As real as the next dream.” And somewhere, adrift between worlds, a bottle spins—inside, a note in uneven scrawl:
“We left our rivers not to escape, but to widen them. Wherever water can flow, otters will follow. If you find this, follow too. There is so much left to splash upon.”
The bottle glitters, turns, and vanishes into the starry dark, while far ahead the River-Stars continue, endless, playful, and bright.
THE END
Cancelling a background response
= client.responses.cancel("resp_123")
resp
print(resp.status)
Streaming a background response
You can create a background Response and start streaming events from it right away. This may be helpful if you expect the client to drop the stream and want the option of picking it back up later. To do this, create a Response with both background
and stream
set to true
. You will want to keep track of a “cursor” corresponding to the sequence_number you receive in each streaming event.
# Fire off an async response but also start streaming immediately
= client.responses.create(
stream ="o3",
modelinput="Write a very long novel about otters in space.",
=True,
background=True,
stream
)
= None
cursor for event in stream:
print(event)
= event.sequence_number
cursor
# If your connection drops, the response continues running and you can reconnect:
# SDK support for resuming the stream is coming soon.
Responses API - Remote MCP servers
Model Context Protocol (MCP) is an open protocol that standardizes how applications provide tools and context to LLMs.
- The MCP tool in the Responses API allows developers to give the model access to tools hosted on Remote MCP servers.
- These are MCP servers maintained by developers and organizations across the internet that expose these tools to MCP clients, like the Responses API.
= client.responses.create(
response ="gpt-4.1",
model=[
tools
{"type": "mcp",
"server_label": "airnz_onsites",
"server_url": "https://194f7e95-e2f9-40b5-b011-add4c44c8ecd-00-jqd3efoqiigj.riker.replit.dev/sse/",
"require_approval": "never",
},
],input="Can I bring more bags than my standard allowance?",
) response
Response(id='resp_6871ebd68af08193af92701647b5b9f2093710a278b0bb36',
=1752296406.0,
created_at='gpt-4.1-2025-04-14',
modelobject='response',
='completed',
status=True,
parallel_tool_calls=1.0,
temperature=1.0,
top_p
# ──────────────────────── MCP TOOL DISCOVERY ──────────────────────────
=[
output
McpListTools(id='mcpl_6871ebd69ad88193b26f9176ff874a83093710a278b0bb36',
type='mcp_list_tools',
='airnz_onsites',
server_label=[
tools
McpListToolsTool(='search',
name=(
description"Search for documents using OpenAI Vector Store search.\n"
"Returns a list of semantically relevant matches. Use the "
"`fetch` tool for full content."
),={
input_schema"type": "object",
"required": ["query"],
"properties": {"query": {"type": "string", "title": "Query"}}
}
),
McpListToolsTool(='fetch',
name=(
description"Retrieve complete document content by ID after finding "
"relevant files with `search`."
),={
input_schema"type": "object",
"required": ["id"],
"properties": {"id": {"type": "string", "title": "Id"}}
}
)
]
),
# ───────────────────────── ASSISTANT MESSAGE ───────────────────────
ResponseOutputMessage(id='msg_6871ebd816908193b4d8ca3979137ff8093710a278b0bb36',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(type='output_text',
=(
text"Yes, you **can bring more bags than your standard baggage allowance** on "
"Air New Zealand flights, but there are some things you need to know:\n\n"
"### 1. **Pre-Purchased Extra Baggage**\n"
"- Cheaper when added in advance via *Manage Booking*.\n\n"
"### 2. **Airport Excess Baggage Fees**\n"
"- Higher if you pay at the counter without pre-purchasing.\n\n"
"### 3. **Allowances and Limits**\n"
"- Up to 3 pre-purchased checked bags (more possible at the airport, space-permitting).\n"
"- Weight limits: 23 kg Economy, 32 kg premium cabins.\n\n"
"### 4. **Special or Oversized Items**\n"
"- Sports equipment counts toward allowance unless paid separately.\n\n"
"### 5. **Carry-on Allowance**\n"
"- Exceeding carry-on limits incurs fees or may be refused at boarding.\n\n"
"***Recommendation:*** check your fare’s allowance and add extra bags **before** travel "
"for the best price.\n\n"
"Would you like help finding exact costs for your flight?"
)
)
]
)
],
# ─────────────────────── ACTIVE MCP SERVER CONFIG ─────────────────────
=[
tools
Mcp(type='mcp',
='airnz_onsites',
server_label='https://194f7e95-e2f9-40b5-b011-add4c44c8ecd-00-jqd3efoqiigj.riker.replit.dev/<redacted>',
server_url='never'
require_approval
)
],
=ResponseTextConfig(format=ResponseFormatText(type='text')),
text='disabled',
truncation
=ResponseUsage(
usage=278,
input_tokens=339,
output_tokens=617
total_tokens
) )
Responses API - Code interpreter (Python tools)
Allow models to write and run Python to solve problems.
The Code Interpreter tool allows models to write and run Python code in a sandboxed environment to solve complex problems in domains like data analysis, coding, and math. Use it for:
- Processing files with diverse data and formatting;
- Generating files with data and images of graphs;
- Writing and running code iteratively to solve problems—for example, a model that writes code that fails to run can keep rewriting and running that code until it succeeds;
- Boosting visual intelligence in our latest reasoning models (like o3 and o4-mini). The model can use this tool to crop, zoom, rotate, and otherwise process and transform images;
= """
instructions You are a personal math tutor. When asked a math question,
write and run code using the python tool to answer the question.
"""
= client.responses.create(
resp ="gpt-4.1",
model=[
tools
{"type": "code_interpreter",
"container": {"type": "auto"}
}
],=instructions,
instructionsinput="I need to solve the equation 3x + 11 = 14. Can you help me?",
)
Response(id='resp_6871eecbfe6081939814ef8cb7d976bc09b7d156e1b149f5',
=1752297164.0,
created_at='gpt-4.1-2025-04-14',
modelobject='response',
='completed',
status=1.0,
temperature=1.0,
top_p=True,
parallel_tool_calls
# ── INSTRUCTIONS ───────────────────────────────────────────────
=(
instructions"You are a personal math tutor. When asked a math question, "
"write and run code using the python tool to answer the question."
),
# ── EXECUTED PYTHON CODE (Code Interpreter) ───────────────────
=[
output
ResponseCodeInterpreterToolCall(id='ci_6871eecda2ec81938e36ec78c2701e3309b7d156e1b149f5',
type='code_interpreter_call',
='completed',
status='cntr_6871eecd56088190ad851b40b1f3cced009bf3a0b1a255b2',
container_id=(
code"from sympy import symbols, Eq, solve\n\n"
"# Define the variable\n"
"x = symbols('x')\n\n"
"# Define the equation\n"
"equation = Eq(3*x + 11, 14)\n\n"
"# Solve the equation\n"
"solution = solve(equation, x)\n"
"solution"
),=None, # would list stdout / plots / files, if any
outputs=None
results
),
# ── ASSISTANT’S NATURAL-LANGUAGE ANSWER ─────────────────────
ResponseOutputMessage(id='msg_6871eed393fc8193b351f278e9fdea7509b7d156e1b149f5',
='assistant',
role='completed',
statustype='message',
=[
content
ResponseOutputText(type='output_text',
='The solution to the equation \\( 3x + 11 = 14 \\) is \\( x = 1 \\).',
text=[]
annotations
)
]
)
],
# ── TOOLING DECLARED ──────────────────────────────────────────
=[
tools
CodeInterpreter(type='code_interpreter',
=CodeInterpreterContainerCodeInterpreterToolAuto(type='auto')
container
)
],
=ResponseTextConfig(format=ResponseFormatText(type='text')),
text='disabled',
truncation
=ResponseUsage(
usage=470,
input_tokens=88,
output_tokens=558
total_tokens
) )
While we call this tool Code Interpreter, the model knows it as the “python tool”. Models usually understand prompts that refer to the code interpreter tool, however, the most explicit way to invoke this tool is to ask for “the python tool” in your prompts.
Responses API - Deep research
Use deep research models for complex analysis and research tasks.
The o3-deep-research and o4-mini-deep-research models can find, analyze, and synthesize hundreds of sources to create a comprehensive report at the level of a research analyst. These models are optimized for browsing and data analysis, and can use web search and remote MCP servers to generate detailed reports, ideal for use cases like:
- Legal or scientific research
- Market analysis
- Reporting on large bodies of internal company data
To use deep research, use the Responses API with the model set to o3-deep-research or o4-mini-deep-research. You must include at least one data source
: web search
and/or remote MCP servers
. You can also include the code interpreter tool
to allow the model to perform complex analysis by writing code.
= "Can I bring more bags than my standard allowance for economy class of AirNZ flight from Singapore to Auckland?"
input_text
= client.responses.create(
resp ="o3-deep-research",
model#background=True,
={
reasoning"summary": "auto",
},=[
tools"type": "web_search_preview"},
{"type": "code_interpreter", "container": {"type": "auto"}},
{
{"type": "mcp",
"server_label": "airnz_onsite",
"server_url": "https://194f7e95-e2f9-40b5-b011-add4c44c8ecd-00-jqd3efoqiigj.riker.replit.dev/sse/",
"require_approval": "never",
},
],input=input_text
)
print(resp)
# ─────────── FULL RESPONSE WITH DETAILED REASONING SUMMARIES ───────────
Response(id='resp_6871f07519008193a806c19c0014998e024b5e3ab74a0955',
=1752297589.0,
created_at='o3-deep-research-2025-06-26',
model='completed',
status=1.0,
temperature=1.0,
top_p=True,
parallel_tool_calls
# ── 1 · MCP TOOL DISCOVERY ──────────────────────────────────────────
=[
output
McpListTools(id='mcpl_6871f076b1e4…0955',
='airnz_onsite',
server_labeltype='mcp_list_tools',
=[…]
tools
),
# ── 2 · REASONING TRACE (all summaries included) ────────────────
# STEP 1
ResponseReasoningItem( id='rs_6871f0784d6c…0955',
type='reasoning',
=[
summary=
Summary(text"**Examining baggage allowance**\n\n"
"The user is asking about bringing more bags than the standard economy class "
"allowance for an Air New Zealand flight from Singapore to Auckland. "
"This suggests they're curious about how to manage extra baggage, additional "
"costs, or specific limitations. Generally, the \"standard allowance\" refers to "
"limits on checked and carry-on bags. For economy flights, there might typically "
"be a limit, such as one checked bag at 23 kg, plus a carry-on allowance. "
"Let's explore the details of this further!"
),=
Summary(text"**Clarifying baggage queries**\n\n"
"The question involves whether they can bring more bags than the standard "
"allowance on an economy class flight. The answer likely involves purchasing "
"additional baggage, which may vary by fare type or status and could incur fees. "
"I'll look for official Air NZ info for that specific route."
),=
Summary(text"**Exploring Air New Zealand baggage policies**\n\n"
"Typically, economy includes 1 checked bag plus a carry-on. They can buy extra "
"bags online or at the airport. I need the most accurate details."
)
]
),
# STEP 2
ResponseReasoningItem( id='rs_6871f08734fc…0955',
type='reasoning',
=[
summary=
Summary(text"**Reviewing baggage FAQs**\n\n"
"Found a PDF titled *airnz-baggage-FAQ.pdf*. Snippet matches the user's query, "
"mentioning Prepaid Extra Bags."
),=
Summary(text"**Deciding on search results**\n\n"
"Next step: click that result to see full details."
)
]
),
id='rs_6871f0898030…', type='reasoning', summary=[]), # STEP 3 (no summary)
ResponseReasoningItem(
id='rs_6871f08ac8cc…', type='reasoning', summary=[]), # STEP 4 (no summary)
ResponseReasoningItem(
# STEP 5
ResponseReasoningItem( id='rs_6871f08ba708…0955',
type='reasoning',
=[
summary=
Summary(text"**Analyzing baggage policy**\n\n"
"Noted possible typo ('that' vs 'than'). Prepaid Extra Bags can be bought up to "
"90 min before international departure."
),=
Summary(text"**Exploring PDF details**\n\n"
"Asterisk after *Prepaid Extra Bags* implies footnote; consider scrolling for it."
)
]
),
id='rs_6871f08f8ea4…', type='reasoning', summary=[]), # STEP 6
ResponseReasoningItem(
id='rs_6871f090072c…', type='reasoning', summary=[]), # STEP 7
ResponseReasoningItem(
# STEP 8
ResponseReasoningItem( id='rs_6871f0909518…0955',
type='reasoning',
=[
summary=
Summary(text"**Clarifying baggage allowances**\n\n"
"Ensure I specify this applies to *economy* class; allowances can differ."
),=
Summary(text"**Clarifying extra baggage policies**\n\n"
"Highlight purchase window (up to 90 min intl), note cheaper in advance."
),=
Summary(text"**Summarizing baggage options**\n\n"
"Plan to answer with bullet points: Yes, you can; buy Prepaid Extra Bags; limits; "
"timing; cost advantages."
)
]
),
# STEP 9
ResponseReasoningItem( id='rs_6871f0a3acc4…0955',
type='reasoning',
=[
summary=
Summary(text"**Deciphering the PDF's purpose**\n\n"
"PDF looks like an FAQ rather than specific table of allowances. It still answers "
"the user's practical question."
)
]
),
# STEP 10
ResponseReasoningItem( id='rs_6871f0af10d8…0955',
type='reasoning',
=[
summary=
Summary(text"**Reviewing formatting issues**\n\n"
"Minor spacing/line-break issues in PDF snippet; decide whether to paraphrase."
),=
Summary(text"**Considering citation and paraphrasing options**\n\n"
"Paraphrase while preserving citations; mention cost benefit of Prepaid Extra Bags."
),=
Summary(text"**Organizing baggage information**\n\n"
"Bullet-point list with heading “Bringing Additional Bags…” would be clear."
)
]
),
id='rs_6871f0c50d14…', type='reasoning', summary=[]), # STEP 11
ResponseReasoningItem(
# ── MCP SEARCH / FETCH CALLS (snipped) ───────────────────────────
='search', arguments={…}, output='{…}'),
McpCall(name='fetch', arguments={…}, output='{…}'),
McpCall(name# additional fetch calls
…
# ── FINAL ASSISTANT MESSAGE ──────────────────────────────────────
ResponseOutputMessage(id='msg_6871f0c5dab4…0955',
='assistant',
role=[ResponseOutputText(text='## Bringing Additional Bags …', annotations=[…])]
content
)
],
=[ CodeInterpreter(), WebSearchTool(), Mcp(server_label='airnz_onsite', …) ],
tools=ResponseUsage(input_tokens=38667, output_tokens=2728, total_tokens=41395)
usage )
Responses API - Webhooks
OpenAI webhooks allow you to receive real-time notifications about events in the API, such as when a batch completes, a background response is generated, or a fine-tuning job finishes. Webhooks are delivered to an HTTP endpoint you control, following the Standard Webhooks specification. The full list of webhook events can be found in the API reference.
#webhook server
import os
from openai import OpenAI, InvalidWebhookSignatureError
from flask import Flask, request, Response
= Flask(__name__)
app = OpenAI(webhook_secret=os.environ["OPENAI_WEBHOOK_SECRET"])
client
@app.route("/webhook", methods=["POST"])
def webhook():
try:
# with webhook_secret set above, unwrap will raise an error if the signature is invalid
= client.webhooks.unwrap(request.data, request.headers)
event
if event.type == "response.completed":
= event.data.id
response_id = client.responses.retrieve(response_id)
response print("Response output:", response.output_text)
return Response(status=200)
except InvalidWebhookSignatureError as e:
print("Invalid signature", e)
return Response("Invalid signature", status=400)
if __name__ == "__main__":
=8000) app.run(port
= client.responses.create(
resp ="o3",
modelinput="Describe the full history ofAustralian Football League.",
=True,
background
)
print(resp.status)
queued