Building a Text-to-Image Pipeline with Next.js — Architecture Decisions

wpnews.pro

When I built the backend for free AI image generator no credit card, the core challenge was connecting a Next.js frontend to a GPU inference endpoint efficiently. Here's the architecture thinking — not the specific implementation, but the decisions that shaped it.

Text-to-image generation has an unusual request profile:

The solution: async job queue pattern.

User submits prompt → 
Job created in queue → 
Job ID returned immediately → 
Frontend polls for status → 
Result delivered when ready
// app/api/generate/route.js
export async function POST(request) {
  const { prompt } = await request.json();

  // BAD: holds connection open for 15+ seconds
  const image = await runInference(prompt);
  return NextResponse.json({ image });
}

Problem: HTTP connections time out. Vercel serverless functions have execution limits. One slow generation blocks the connection.

// app/api/generate/route.js — Submit job
export async function POST(request) {
  const { prompt } = await request.json();

  // Submit to inference queue, get job ID immediately
  const jobId = await submitToQueue({ prompt });

  // Return immediately — don't wait for completion
  return NextResponse.json({ jobId, status: 'processing' });
}

// app/api/status/[jobId]/route.js — Check status
export async function GET(request, { params }) {
  const { jobId } = params;
  const result = await checkJobStatus(jobId);

  return NextResponse.json({
    status: result.status, // 'processing' | 'completed' | 'failed'
    image: result.image ?? null,
  });
}

Frontend polls /api/status/[jobId] every 2 seconds until complete.

// Real-time updates via WebSocket
// Backend notifies frontend when job completes
// No polling overhead
// More infrastructure to maintain

For an early-stage product, Option B (polling) is the right tradeoff — simpler to implement and debug, good enough UX.

'use client';
import { useState, useCallback } from 'react';
import Image from 'next/image';

export default function ImageGenerator() {
  const [prompt, setPrompt] = useState('');
  const [status, setStatus] = useState('idle');
  // idle | submitting | processing | complete | error
  const [imageUrl, setImageUrl] = useState(null);

  const generate = useCallback(async () => {
    if (!prompt.trim() || status === 'processing') return;

    setStatus('submitting');

    try {
      // Submit job
      const submitRes = await fetch('/api/generate', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ prompt }),
      });

      const { jobId } = await submitRes.json();
      setStatus('processing');

      // Poll for result
      await pollForResult(jobId);

    } catch {
      setStatus('error');
    }
  }, [prompt, status]);

  const pollForResult = async (jobId) => {
    const maxAttempts = 60; // 2 min timeout

    for (let i = 0; i < maxAttempts; i++) {
      await new Promise(r => setTimeout(r, 2000));

      const statusRes = await fetch(`/api/status/${jobId}`);
      const data = await statusRes.json();

      if (data.status === 'completed') {
        setImageUrl(data.image);
        setStatus('complete');
        return;
      }

      if (data.status === 'failed') {
        setStatus('error');
        return;
      }
    }

    setStatus('error'); // Timeout
  };

  return (
    <div className="flex flex-col gap-4 max-w-2xl mx-auto p-6">
      <textarea
        value={prompt}
        onChange={(e) => setPrompt(e.target.value)}
        placeholder="Describe the image you want..."
        className="w-full p-3 border border-border rounded-xl 
          resize-none h-24 bg-background text-foreground"
        disabled={status === 'processing'}
      />

      <button
        onClick={generate}
        disabled={status === 'processing' || !prompt.trim()}
        className="bg-orange-500 text-white px-6 py-3 
          rounded-full font-semibold transition-colors
          hover:bg-orange-600 disabled:opacity-50"
      >
        {status === 'processing' ? 'Generating...' : 'Generate'}
      </button>

      {/*  skeleton — matches output dimensions */}
      {status === 'processing' && (
        <div className="w-full aspect-square rounded-2xl 
          bg-neutral-100 dark:bg-neutral-800 animate-pulse" />
      )}

      {/* Generated result */}
      {status === 'complete' && imageUrl && (
        <div className="relative w-full aspect-square 
          rounded-2xl overflow-hidden">
          <Image
            src={imageUrl}
            alt={prompt}
            fill
            className="object-cover"
            priority
          />
          <a
            href={imageUrl}
            download="generated.png"
            className="absolute bottom-4 right-4 
              bg-white/90 backdrop-blur text-black 
              px-4 py-2 rounded-full text-sm font-semibold
              hover:bg-white transition-colors"
          >
            Download
          </a>
        </div>
      )}

      {status === 'error' && (
        <p className="text-red-500 text-sm text-center">
          Something went wrong. Try again.
        </p>
      )}
    </div>
  );
}

The skeleton placeholder has the exact dimensions of the output image. When the result arrives, no layout shift — the image drops into the same space.

{/* Dimensions match expected output */}
{status === 'processing' && (
  <div className="w-full aspect-square rounded-2xl 
    bg-neutral-100 animate-pulse" />
)}

Users shouldn't be able to submit a second job while one is processing. Disable both the input and the button.

The button should reflect 'submitting'

state before the API call returns — not after. Users notice the lag.

Without user accounts, rate limiting falls on IP address:

// middleware.js
const requestCounts = new Map();

export function middleware(request) {
  const ip = request.headers.get('x-forwarded-for') 
    ?? 'unknown';
  const now = Date.now();
  const window = 60_000; // 1 minute
  const limit = 8; // requests per window

  const history = (requestCounts.get(ip) ?? [])
    .filter(t => now - t < window);

  if (history.length >= limit) {
    return new Response('Rate limit exceeded', { 
      status: 429 
    });
  }

  requestCounts.set(ip, [...history, now]);
  return NextResponse.next();
}

export const config = {
  matcher: '/api/generate',
};

Important: In-memory rate limiting resets on server restart and doesn't work across multiple instances. For production, use a distributed cache for rate limit state.

Problem	Solution
Cold starts on inference	Keep minimum workers warm
Large image payloads	Use CDN URL instead of base64
Layout shift on load	Pre-sized skeleton placeholder
Polling overhead	Stop polling after completion
Timeout handling	Max attempts with user feedback

Webhooks over polling. The inference backend notifying the frontend when complete is cleaner than the frontend repeatedly asking. Polling works, but it's chatty.

Better error categorization. A single 'error'

state isn't enough. Timeout is different from inference failure is different from invalid prompt. Each deserves different user messaging.

For a full breakdown of how the no-account architecture affects product decisions, I wrote a detailed post here.

Questions about the architecture? Comments open.

source & further reading

dev.to — original article HUQAN: The Deterministic Trust Layer That Tells AI Agents "Wait, I Decide First" The TomeVault Instruction Corpus (2026-07) I Implemented the Algorithm Behind ChatGPT From Scratch - Day 8 (PPO).

Building a Text-to-Image Pipeline with Next.js — Architecture Decisions

Run your AI side-project on zahid.host