{"entity": "TurboQuant", "url": "https://wpnews.pro/entity/TurboQuant", "count": 2, "articles": [{"slug": "accelerate-llm-model-loading-and-increase-context-windows-with-gpudirect-on-fsx", "title": "Accelerate LLM model loading and increase context windows with GPUDirect on Amazon FSx for Lustre and TurboQuant", "url": "https://wpnews.pro/news/accelerate-llm-model-loading-and-increase-context-windows-with-gpudirect-on-fsx", "published_at": "2026-06-01 16:07:19+00:00"}, {"slug": "speculative-kv-coding-losslessly-compressing-kv-cache-by-up-to-4x-using-a-model", "title": "Speculative KV coding: losslessly compressing KV cache by up to ~4× using a predictor model", "url": "https://wpnews.pro/news/speculative-kv-coding-losslessly-compressing-kv-cache-by-up-to-4x-using-a-model", "published_at": "2026-05-08 00:00:00+00:00"}]}