import typer
from rich.prompt import Prompt
from typing import Optional

from phi.agent import Agent
from phi.knowledge.pdf import PDFUrlKnowledgeBase
from phi.vectordb.lancedb import LanceDb

# type: ignore

db_url = "/tmp/lancedb"

knowledge_base = PDFUrlKnowledgeBase(
urls=["https://phi-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"],
vector_db=LanceDb(table_name="recipes", uri=db_url),
)

# Comment out after first run

knowledge_base.load(recreate=False)
def pdf_agent(user: str = "user"):
run_id: Optional[str] = None

    agent = Agent(
        run_id=run_id,
        user_id=user,
        knowledge_base=knowledge_base,
        use_tools=True,
        show_tool_calls=True,
        # Uncomment the following line to use traditional RAG
        # add_references_to_prompt=True,
    )
    if run_id is None:
        run_id = agent.run_id
        print(f"Started Run: {run_id}\n")
    else:
        print(f"Continuing Run: {run_id}\n")

    while True:
        message = Prompt.ask(f"[bold] :sunglasses: {user} [/bold]")
        if message in ("exit", "bye"):
            break
        agent.print_response(message)

if __name__ == "__main__":
    typer.run(pdf_agent)

LanceDb Params

ParameterTypeDefaultDescription
uristr-The URI to connect to.
tableLanceTable-The Lance table to use.
table_namestr-The name of the table to use.
connectionDBConnection-The database connection to use.
api_keystr-The API key to use.
embedderEmbedder-The embedder to use.
search_typeSearchTypevectorThe search type to use.
distanceDistancecosineThe distance to use.
nprobesint-The number of probes to use. More Info
rerankerReranker-The reranker to use. More Info
use_tantivybool-Whether to use tantivy.