Use live web search as your RAG retrieval layer

Superhighway guides

Most RAG tutorials retrieve from a static vector database built from your own documents. But when your agent needs current information — today's news, recent research, live product data — a vector store is the wrong tool. Superhighway gives you live web retrieval over HTTP, so you can feed fresh results straight into your LLM context without any embedding pipeline.

The pattern

Traditional RAG:

query → embed → vector DB → top-k chunks → LLM

Web-retrieval RAG with Superhighway:

query → Superhighway /search or /research → structured results → LLM

The retrieval step is a live HTTP call instead of a vector lookup. No ingestion, no embedding model, no index to maintain. Results are always fresh.

Standalone Python (no framework)

import os, requests

API_KEY = os.environ["SUPERHIGHWAY_API_KEY"]
BASE = "https://superhighway.walls.sh"

def retrieve(query: str, n: int = 5) -> list[dict]:
    r = requests.get(f"{BASE}/search", params={"q": query, "count": n},
                     headers={"Authorization": f"Bearer {API_KEY}"}, timeout=15)
    r.raise_for_status()
    return r.json()["results"]  # [{title, url, description}, ...]

def rag_answer(question: str, client) -> str:
    results = retrieve(question)
    context = "\n\n".join(
        f"[{i+1}] {r['title']}\n{r['url']}\n{r['description']}"
        for i, r in enumerate(results)
    )
    messages = [
        {"role": "system", "content": "Answer the question using only the search results below. Cite sources by number."},
        {"role": "user", "content": f"Search results:\n{context}\n\nQuestion: {question}"},
    ]
    return client.chat.completions.create(
        model="claude-opus-4-8",
        max_tokens=1024,
        messages=messages,
    ).content[0].text

# from anthropic import Anthropic
# print(rag_answer("What happened in AI this week?", Anthropic()))

One-call deep retrieval: /research

For questions that benefit from reading multiple pages, use /research. It searches, fetches the top result pages, and synthesises them — all in one call. Use this as your retrieval step when you need paragraph-level content, not just snippets:

def retrieve_deep(query: str) -> str:
    r = requests.get(f"{BASE}/research", params={"q": query},
                     headers={"Authorization": f"Bearer {API_KEY}"}, timeout=30)
    r.raise_for_status()
    return r.json()["answer"]  # already synthesised — pass directly to your LLM as context

# Now your RAG pipeline is one extra HTTP call, not a whole embedding stack
context = retrieve_deep("latest advances in protein folding 2025")

Expect 5–15 s for /research — it's reading pages in real time. Set your HTTP client timeout to 30 s.

LangChain LCEL retriever

from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import requests, os

class SuperhighwayRetriever(BaseRetriever):
    api_key: str = os.environ.get("SUPERHIGHWAY_API_KEY", "")
    base_url: str = "https://superhighway.walls.sh"
    k: int = 5

    def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> list[Document]:
        r = requests.get(f"{self.base_url}/search",
                         params={"q": query, "count": self.k},
                         headers={"Authorization": f"Bearer {self.api_key}"}, timeout=15)
        r.raise_for_status()
        return [
            Document(page_content=res["description"], metadata={"title": res["title"], "url": res["url"]})
            for res in r.json()["results"]
        ]

retriever = SuperhighwayRetriever()

prompt = ChatPromptTemplate.from_template(
    "Answer the question using these web search results:\n{context}\n\nQuestion: {question}"
)

def format_docs(docs):
    return "\n\n".join(f"[{d.metadata['title']}]({d.metadata['url']})\n{d.page_content}" for d in docs)

chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | ChatOpenAI(model="gpt-4o")
    | StrOutputParser()
)

answer = chain.invoke("What are the top AI tools released this month?")
print(answer)

LlamaIndex query engine

from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore, TextNode
from llama_index.core.query_engine import RetrieverQueryEngine
import requests, os

class SuperhighwayRetriever(BaseRetriever):
    def __init__(self, api_key: str, base_url: str = "https://superhighway.walls.sh", k: int = 5):
        self._api_key = api_key
        self._base_url = base_url
        self._k = k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
        r = requests.get(f"{self._base_url}/search",
                         params={"q": query_bundle.query_str, "count": self._k},
                         headers={"Authorization": f"Bearer {self._api_key}"}, timeout=15)
        r.raise_for_status()
        return [
            NodeWithScore(node=TextNode(
                text=res["description"],
                metadata={"title": res["title"], "url": res["url"]}
            ), score=1.0)
            for res in r.json()["results"]
        ]

retriever = SuperhighwayRetriever(api_key=os.environ["SUPERHIGHWAY_API_KEY"])
query_engine = RetrieverQueryEngine.from_args(retriever=retriever)

response = query_engine.query("What are the latest developments in quantum computing?")
print(response)

When to use web retrieval vs. a vector store

Use caseRetrieval approach
Current events, news, live dataSuperhighway /search or /research
Your own docs, PDFs, code basesVector store (Chroma, Pinecone, Weaviate)
Mix of fresh web + your docsHybrid: Superhighway + vector store in parallel, then merge
Deep synthesis from multiple pagesSuperhighway /research (reads pages for you)

Install

pip install requests langchain-core langchain-openai  # LangChain path
pip install requests llama-index                       # LlamaIndex path

Get your API key at /pricing (free tier: 1,000 calls/month). Set it as SUPERHIGHWAY_API_KEY. For more integration options see the full guide list.