Build a Real-Time News Retriever with Haystack + wpnews (Python, 2025)
2026-05-19 ยท 8 min read
Tutorial: implement a custom Haystack component that fetches live AI news from wpnews and plugs it into any RAG pipeline โ morning briefing, entity search, or topic digest as a Retriever.
Why Haystack + wpnews?
Haystack by deepset is the leading open-source framework for building production RAG pipelines. Unlike agent frameworks, Haystack uses a Pipeline + Component model: you compose data flows by connecting typed input/output pins between components. Any component that produces List[Document] can plug into a Haystack Generator, Ranker, or Reranker.
wpnews provides live AI news that your pipeline can retrieve, rank, and inject into prompts โ solving the "LLM doesn't know what happened this week" problem for knowledge-intensive RAG applications.
What you'll build
- A WPNewsRetriever custom component โ implements Haystack's
Retrieverinterface, returnsList[Document] - A WPNewsMorningBriefing component โ injects velocity + breaking stories + entities into the pipeline context
- A complete RAG pipeline:
WPNewsRetriever โ PromptBuilder โ OpenAIGenerator
Step 1 โ Install
pip install haystack-ai wpnews
Get a free wpnews API key at wpnews.pro/api#get-key.
Step 2 โ Build the WPNewsRetriever component
Haystack components are decorated with @component. The @component.output_types annotation declares what the component emits โ Haystack uses this to validate pipeline connections at definition time.
import os
from haystack import component, Document
from haystack.core.component import Component
from wpnews import WPNews
_client = WPNews(api_key=os.environ.get("WPNEWS_API_KEY", ""))
@component
class WPNewsRetriever:
"""Fetches recent AI news from wpnews and returns Haystack Documents.
Outputs a list of Documents where each document's content is the article
summary and metadata contains title, topics, entities, and URL.
"""
@component.output_types(documents=list[Document])
def run(
self,
query: str = "",
topic: str = "",
limit: int = 10,
lang: str = "en",
) -> dict:
if query:
articles = _client.search(q=query, lang=lang, limit=limit)
elif topic:
articles = _client.get_news(topic=topic, lang=lang, limit=limit)
else:
articles = _client.get_news(lang=lang, limit=limit)
docs = []
for a in articles:
content = f"{a.get('title', '')}\n\n{a.get('summary', '')}"
docs.append(Document(
content=content,
meta={
"title": a.get("title", ""),
"slug": a.get("slug", ""),
"published_at": a.get("published_at", ""),
"topics": a.get("topics", []),
"entities": a.get("entities", []),
"url": f"https://wpnews.pro/news/{a.get('slug', '')}",
},
))
return {"documents": docs}
Step 3 โ Build the WPNewsMorningBriefing component
This component fetches situational awareness data and emits a compact string suitable for injection into a system prompt:
@component
class WPNewsMorningBriefing:
"""Fetches today's AI news briefing and returns a context string."""
@component.output_types(context=str, velocity_status=str)
def run(self, hours: int = 6, lang: str = "en") -> dict:
data = _client.get_morning_briefing(lang=lang, hours=hours)
vel = data.get("velocity", {})
articles = data.get("hot_articles", [])
entities = data.get("trending_entities", [])
lines = [
f"AI News Status: {vel.get('status', 'normal').upper()} "
f"({vel.get('ratio', 1.0):.1f}x baseline)",
]
if articles:
lines.append("Breaking stories:")
for a in articles[:3]:
lines.append(f" โข {a['title']} [{a.get('hours_ago', 0):.1f}h ago]")
if entities:
names = ", ".join(e["name"] for e in entities[:5])
lines.append(f"Trending: {names}")
return {
"context": "\n".join(lines),
"velocity_status": vel.get("status", "normal"),
}
Step 4 โ Assemble the RAG pipeline
from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
PROMPT_TEMPLATE = """
You are an AI news analyst. Use the news context and retrieved articles to answer the question.
## Current AI News Situation
## Retrieved Articles
## Question
Answer:
"""
pipeline = Pipeline()
pipeline.add_component("briefing", WPNewsMorningBriefing())
pipeline.add_component("retriever", WPNewsRetriever())
pipeline.add_component("prompt_builder", PromptBuilder(template=PROMPT_TEMPLATE))
pipeline.add_component("llm", OpenAIGenerator(model="gpt-4o"))
# Connect components
pipeline.connect("briefing.context", "prompt_builder.briefing_context")
pipeline.connect("retriever.documents", "prompt_builder.documents")
pipeline.connect("prompt_builder.prompt", "llm.prompt")
# Run the pipeline
result = pipeline.run({
"briefing": {"hours": 6},
"retriever": {"query": "OpenAI GPT-5", "limit": 5},
"prompt_builder": {"question": "What's the latest news about GPT-5?"},
})
print(result["llm"]["replies"][0])
Step 5 โ Conditional pipeline with burst routing
Use Haystack's ConditionalRouter to take different paths based on velocity status โ comprehensive search on burst days, compact briefing otherwise:
from haystack.components.routers import ConditionalRouter
routes = [
{
"condition": "False",
"output": "",
"output_name": "burst_query",
"output_type": str,
},
{
"condition": "True",
"output": "",
"output_name": "normal_query",
"output_type": str,
},
]
pipeline.add_component("router", ConditionalRouter(routes))
pipeline.connect("briefing.velocity_status", "router.velocity_status")
# burst_query โ high-limit retriever
# normal_query โ low-limit retriever
Full working pipeline (under 100 lines)
"""
Haystack + wpnews AI news RAG pipeline
Run: OPENAI_API_KEY=... WPNEWS_API_KEY=... python haystack_news.py
"""
import os
from haystack import component, Document, Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from wpnews import WPNews
_news = WPNews(api_key=os.environ.get("WPNEWS_API_KEY", ""))
@component
class WPNewsRetriever:
@component.output_types(documents=list[Document])
def run(self, query: str = "", limit: int = 8, lang: str = "en") -> dict:
articles = _news.search(q=query, lang=lang, limit=limit) if query \
else _news.get_news(lang=lang, limit=limit)
return {"documents": [
Document(
content=f"{a.get('title', '')}\n{a.get('summary', '')}",
meta={"title": a.get("title", ""), "url": f"https://wpnews.pro/news/{a.get('slug', '')}"},
) for a in articles
]}
@component
class WPNewsMorningBriefing:
@component.output_types(context=str)
def run(self, lang: str = "en") -> dict:
data = _news.get_morning_briefing(lang=lang)
return {"context": data.get("context_text", "")}
TEMPLATE = """
Current AI news:
Articles:
Question:
Answer:
"""
pipe = Pipeline()
pipe.add_component("briefing", WPNewsMorningBriefing())
pipe.add_component("retriever", WPNewsRetriever())
pipe.add_component("prompt", PromptBuilder(template=TEMPLATE))
pipe.add_component("llm", OpenAIGenerator(model="gpt-4o"))
pipe.connect("briefing.context", "prompt.briefing_context")
pipe.connect("retriever.documents", "prompt.documents")
pipe.connect("prompt.prompt", "llm.prompt")
question = "What's the most important AI news today?"
result = pipe.run({
"briefing": {},
"retriever": {"query": question, "limit": 8},
"prompt": {"question": question},
})
print(result["llm"]["replies"][0])
Get your free wpnews API key
1,000 calls/day free. No credit card. Haystack RAG pipeline up in 10 minutes.
Get Free API Key โOr try keyless first: curl https://api.wpnews.pro/api/v1/morning-briefing