haystack python rag retrieval news-api

Build a Real-Time News Retriever with Haystack + wpnews (Python, 2025)

2026-05-19 ยท 8 min read

Tutorial: implement a custom Haystack component that fetches live AI news from wpnews and plugs it into any RAG pipeline โ€” morning briefing, entity search, or topic digest as a Retriever.

Why Haystack + wpnews?

Haystack by deepset is the leading open-source framework for building production RAG pipelines. Unlike agent frameworks, Haystack uses a Pipeline + Component model: you compose data flows by connecting typed input/output pins between components. Any component that produces List[Document] can plug into a Haystack Generator, Ranker, or Reranker.

wpnews provides live AI news that your pipeline can retrieve, rank, and inject into prompts โ€” solving the "LLM doesn't know what happened this week" problem for knowledge-intensive RAG applications.

What you'll build

  • A WPNewsRetriever custom component โ€” implements Haystack's Retriever interface, returns List[Document]
  • A WPNewsMorningBriefing component โ€” injects velocity + breaking stories + entities into the pipeline context
  • A complete RAG pipeline: WPNewsRetriever โ†’ PromptBuilder โ†’ OpenAIGenerator

Step 1 โ€” Install

pip install haystack-ai wpnews

Get a free wpnews API key at wpnews.pro/api#get-key.

Step 2 โ€” Build the WPNewsRetriever component

Haystack components are decorated with @component. The @component.output_types annotation declares what the component emits โ€” Haystack uses this to validate pipeline connections at definition time.

import os
from haystack import component, Document
from haystack.core.component import Component
from wpnews import WPNews

_client = WPNews(api_key=os.environ.get("WPNEWS_API_KEY", ""))


@component
class WPNewsRetriever:
    """Fetches recent AI news from wpnews and returns Haystack Documents.

    Outputs a list of Documents where each document's content is the article
    summary and metadata contains title, topics, entities, and URL.
    """

    @component.output_types(documents=list[Document])
    def run(
        self,
        query: str = "",
        topic: str = "",
        limit: int = 10,
        lang: str = "en",
    ) -> dict:
        if query:
            articles = _client.search(q=query, lang=lang, limit=limit)
        elif topic:
            articles = _client.get_news(topic=topic, lang=lang, limit=limit)
        else:
            articles = _client.get_news(lang=lang, limit=limit)

        docs = []
        for a in articles:
            content = f"{a.get('title', '')}\n\n{a.get('summary', '')}"
            docs.append(Document(
                content=content,
                meta={
                    "title": a.get("title", ""),
                    "slug": a.get("slug", ""),
                    "published_at": a.get("published_at", ""),
                    "topics": a.get("topics", []),
                    "entities": a.get("entities", []),
                    "url": f"https://wpnews.pro/news/{a.get('slug', '')}",
                },
            ))
        return {"documents": docs}

Step 3 โ€” Build the WPNewsMorningBriefing component

This component fetches situational awareness data and emits a compact string suitable for injection into a system prompt:

@component
class WPNewsMorningBriefing:
    """Fetches today's AI news briefing and returns a context string."""

    @component.output_types(context=str, velocity_status=str)
    def run(self, hours: int = 6, lang: str = "en") -> dict:
        data = _client.get_morning_briefing(lang=lang, hours=hours)
        vel = data.get("velocity", {})
        articles = data.get("hot_articles", [])
        entities = data.get("trending_entities", [])

        lines = [
            f"AI News Status: {vel.get('status', 'normal').upper()} "
            f"({vel.get('ratio', 1.0):.1f}x baseline)",
        ]
        if articles:
            lines.append("Breaking stories:")
            for a in articles[:3]:
                lines.append(f"  โ€ข {a['title']} [{a.get('hours_ago', 0):.1f}h ago]")
        if entities:
            names = ", ".join(e["name"] for e in entities[:5])
            lines.append(f"Trending: {names}")

        return {
            "context": "\n".join(lines),
            "velocity_status": vel.get("status", "normal"),
        }

Step 4 โ€” Assemble the RAG pipeline

from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator

PROMPT_TEMPLATE = """
You are an AI news analyst. Use the news context and retrieved articles to answer the question.

## Current AI News Situation


## Retrieved Articles


## Question


Answer:
"""

pipeline = Pipeline()
pipeline.add_component("briefing", WPNewsMorningBriefing())
pipeline.add_component("retriever", WPNewsRetriever())
pipeline.add_component("prompt_builder", PromptBuilder(template=PROMPT_TEMPLATE))
pipeline.add_component("llm", OpenAIGenerator(model="gpt-4o"))

# Connect components
pipeline.connect("briefing.context", "prompt_builder.briefing_context")
pipeline.connect("retriever.documents", "prompt_builder.documents")
pipeline.connect("prompt_builder.prompt", "llm.prompt")

# Run the pipeline
result = pipeline.run({
    "briefing": {"hours": 6},
    "retriever": {"query": "OpenAI GPT-5", "limit": 5},
    "prompt_builder": {"question": "What's the latest news about GPT-5?"},
})
print(result["llm"]["replies"][0])

Step 5 โ€” Conditional pipeline with burst routing

Use Haystack's ConditionalRouter to take different paths based on velocity status โ€” comprehensive search on burst days, compact briefing otherwise:

from haystack.components.routers import ConditionalRouter

routes = [
    {
        "condition": "False",
        "output": "",
        "output_name": "burst_query",
        "output_type": str,
    },
    {
        "condition": "True",
        "output": "",
        "output_name": "normal_query",
        "output_type": str,
    },
]

pipeline.add_component("router", ConditionalRouter(routes))
pipeline.connect("briefing.velocity_status", "router.velocity_status")
# burst_query โ†’ high-limit retriever
# normal_query โ†’ low-limit retriever

Full working pipeline (under 100 lines)

"""
Haystack + wpnews AI news RAG pipeline
Run: OPENAI_API_KEY=... WPNEWS_API_KEY=... python haystack_news.py
"""
import os
from haystack import component, Document, Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from wpnews import WPNews

_news = WPNews(api_key=os.environ.get("WPNEWS_API_KEY", ""))


@component
class WPNewsRetriever:
    @component.output_types(documents=list[Document])
    def run(self, query: str = "", limit: int = 8, lang: str = "en") -> dict:
        articles = _news.search(q=query, lang=lang, limit=limit) if query \
                   else _news.get_news(lang=lang, limit=limit)
        return {"documents": [
            Document(
                content=f"{a.get('title', '')}\n{a.get('summary', '')}",
                meta={"title": a.get("title", ""), "url": f"https://wpnews.pro/news/{a.get('slug', '')}"},
            ) for a in articles
        ]}


@component
class WPNewsMorningBriefing:
    @component.output_types(context=str)
    def run(self, lang: str = "en") -> dict:
        data = _news.get_morning_briefing(lang=lang)
        return {"context": data.get("context_text", "")}


TEMPLATE = """
Current AI news: 

Articles:


Question: 
Answer:
"""

pipe = Pipeline()
pipe.add_component("briefing", WPNewsMorningBriefing())
pipe.add_component("retriever", WPNewsRetriever())
pipe.add_component("prompt", PromptBuilder(template=TEMPLATE))
pipe.add_component("llm", OpenAIGenerator(model="gpt-4o"))
pipe.connect("briefing.context", "prompt.briefing_context")
pipe.connect("retriever.documents", "prompt.documents")
pipe.connect("prompt.prompt", "llm.prompt")

question = "What's the most important AI news today?"
result = pipe.run({
    "briefing": {},
    "retriever": {"query": question, "limit": 8},
    "prompt": {"question": question},
})
print(result["llm"]["replies"][0])

Get your free wpnews API key

1,000 calls/day free. No credit card. Haystack RAG pipeline up in 10 minutes.

Get Free API Key โ†’

Or try keyless first: curl https://api.wpnews.pro/api/v1/morning-briefing