{"type": "article", "title": "Scaling LLM Inference: Multi-Node KV Cache Offloading with GKE & Managed Lustre", "publisher": "Web Pulse", "url": "https://wpnews.pro/news/scaling-llm-inference-multi-node-kv-cache-offloading-with-gke-managed-lustre", "original_source": "https://cloud.google.com/blog/topics/developers-practitioners/scaling-llm-inference-multi-node-kv-cache-offloading-with-gke-managed-lustre/", "published": "2026-07-01T07:00:00+00:00", "accessed": "2026-07-01", "id": "scaling-llm-inference-multi-node-kv-cache-offloading-with-gke-managed-lustre"}