{"entity": "Kog AI", "url": "https://wpnews.pro/entity/Kog AI", "count": 2, "articles": [{"slug": "real-time-llm-inference-on-standard-gpus-3k-tokens-s-per-request", "title": "Real-time LLM Inference on Standard GPUs: 3k tokens/s per request", "url": "https://wpnews.pro/news/real-time-llm-inference-on-standard-gpus-3k-tokens-s-per-request", "published_at": "2026-05-29 09:47:23+00:00"}, {"slug": "building-a-single-kernel-latency-optimized-llm-inference-engine-on-amd-mi300x", "title": "Building a single-kernel, latency-optimized LLM inference engine on AMD MI300X GPUs", "url": "https://wpnews.pro/news/building-a-single-kernel-latency-optimized-llm-inference-engine-on-amd-mi300x", "published_at": "2026-05-28 16:18:00+00:00"}]}