Use live web search as your RAG retrieval layer

Superhighway guides

Most RAG tutorials retrieve from a static vector database built from your own documents. But when your agent needs current information — today's news, recent research, live product data — a vector store is the wrong tool. Superhighway gives you live web retrieval over HTTP, so you can feed fresh results straight into your LLM context without any embedding pipeline.

The pattern

Traditional RAG:

query → embed → vector DB → top-k chunks → LLM

Web-retrieval RAG with Superhighway:

query → Superhighway /search or /research → structured results → LLM

The retrieval step is a live HTTP call instead of a vector lookup. No ingestion, no embedding model, no index to maintain. Results are always fresh.

Standalone Python (no framework)

import os, requests

API_KEY = os.environ["SUPERHIGHWAY_API_KEY"]
BASE = "https://superhighway.walls.sh"

def retrieve(query: str, n: int = 5) -> list[dict]:
    r = requests.get(f"{BASE}/search", params={"q": query, "count": n},
                     headers={"Authorization": f"Bearer {API_KEY}"}, timeout=15)
    r.raise_for_status()
    return r.json()["results"]  # [{title, url, description}, ...]

def rag_answer(question: str, client) -> str:
    results = retrieve(question)
    context = "\n\n".join(
        f"[{i+1}] {r['title']}\n{r['url']}\n{r['description']}"
        for i, r in enumerate(results)
    )
    messages = [
        {"role": "system", "content": "Answer the question using only the search results below. Cite sources by number."},
        {"role": "user", "content": f"Search results:\n{context}\n\nQuestion: {question}"},
    ]
    return client.chat.completions.create(
        model="claude-opus-4-8",
        max_tokens=1024,
        messages=messages,
    ).content[0].text

# from anthropic import Anthropic
# print(rag_answer("What happened in AI this week?", Anthropic()))

One-call deep retrieval: /research

For questions that benefit from reading multiple pages, use /research. It searches, fetches the top result pages, and synthesises them — all in one call. Use this as your retrieval step when you need paragraph-level content, not just snippets:

def retrieve_deep(query: str) -> str:
    r = requests.get(f"{BASE}/research", params={"q": query},
                     headers={"Authorization": f"Bearer {API_KEY}"}, timeout=30)
    r.raise_for_status()
    return r.json()["answer"]  # already synthesised — pass directly to your LLM as context

# Now your RAG pipeline is one extra HTTP call, not a whole embedding stack
context = retrieve_deep("latest advances in protein folding 2025")

Expect 5–15 s for /research — it's reading pages in real time. Set your HTTP client timeout to 30 s.

LangChain LCEL retriever

from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import requests, os

class SuperhighwayRetriever(BaseRetriever):
    api_key: str = os.environ.get("SUPERHIGHWAY_API_KEY", "")
    base_url: str = "https://superhighway.walls.sh"
    k: int = 5

    def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> list[Document]:
        r = requests.get(f"{self.base_url}/search",
                         params={"q": query, "count": self.k},
                         headers={"Authorization": f"Bearer {self.api_key}"}, timeout=15)
        r.raise_for_status()
        return [
            Document(page_content=res["description"], metadata={"title": res["title"], "url": res["url"]})
            for res in r.json()["results"]
        ]

retriever = SuperhighwayRetriever()

prompt = ChatPromptTemplate.from_template(
    "Answer the question using these web search results:\n{context}\n\nQuestion: {question}"
)

def format_docs(docs):
    return "\n\n".join(f"[{d.metadata['title']}]({d.metadata['url']})\n{d.page_content}" for d in docs)

chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | ChatOpenAI(model="gpt-4o")
    | StrOutputParser()
)

answer = chain.invoke("What are the top AI tools released this month?")
print(answer)

LlamaIndex query engine

from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore, TextNode
from llama_index.core.query_engine import RetrieverQueryEngine
import requests, os

class SuperhighwayRetriever(BaseRetriever):
    def __init__(self, api_key: str, base_url: str = "https://superhighway.walls.sh", k: int = 5):
        self._api_key = api_key
        self._base_url = base_url
        self._k = k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
        r = requests.get(f"{self._base_url}/search",
                         params={"q": query_bundle.query_str, "count": self._k},
                         headers={"Authorization": f"Bearer {self._api_key}"}, timeout=15)
        r.raise_for_status()
        return [
            NodeWithScore(node=TextNode(
                text=res["description"],
                metadata={"title": res["title"], "url": res["url"]}
            ), score=1.0)
            for res in r.json()["results"]
        ]

retriever = SuperhighwayRetriever(api_key=os.environ["SUPERHIGHWAY_API_KEY"])
query_engine = RetrieverQueryEngine.from_args(retriever=retriever)

response = query_engine.query("What are the latest developments in quantum computing?")
print(response)

When to use web retrieval vs. a vector store

Use case	Retrieval approach
Current events, news, live data	Superhighway `/search` or `/research`
Your own docs, PDFs, code bases	Vector store (Chroma, Pinecone, Weaviate)
Mix of fresh web + your docs	Hybrid: Superhighway + vector store in parallel, then merge
Deep synthesis from multiple pages	Superhighway `/research` (reads pages for you)

Install

pip install requests langchain-core langchain-openai  # LangChain path
pip install requests llama-index                       # LlamaIndex path

Get your API key at /pricing (free tier: 1,000 calls/month). Set it as SUPERHIGHWAY_API_KEY. For more integration options see the full guide list.