Agents
Vision Agent
from phi.agent import Agent
from phi.llm.openai import OpenAIChat
agent = Agent(
model=OpenAIChat(id="gpt-4o"),
markdown=True,
)
# Single Image
agent.print_response(
[
{"type": "text", "text": "What's in this image, describe in 1 sentence"},
{
"type": "image_url",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
]
)
# Multiple Images
agent.print_response(
[
{
"type": "text",
"text": "Is there any difference between these. Describe them in 1 sentence.",
},
{
"type": "image_url",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
{
"type": "image_url",
"image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
},
],
markdown=True,
)
Was this page helpful?