{"slug": "microsoft-taught-a-reasoning-model-to-compress-its-own-thoughts-mid-generation", "title": "Microsoft Taught a Reasoning Model to Compress Its Own Thoughts Mid-Generation.", "summary": "Microsoft researchers developed Memento, a method that trains large language models to compress their own reasoning steps mid-generation by evicting blocks from the KV cache and replacing them with compact summaries, reducing computational costs.", "body_md": "Memento (Microsoft, April 2026) trains LLMs to evict reasoning blocks from the KV cache and replace them with compact summaries, cutting…\nContinue reading on Towards AI »", "url": "https://wpnews.pro/news/microsoft-taught-a-reasoning-model-to-compress-its-own-thoughts-mid-generation", "canonical_source": "https://pub.towardsai.net/microsoft-taught-a-reasoning-model-to-compress-its-own-thoughts-mid-generation-f49802312dee?source=rss----98111c9905da---4", "published_at": "2026-06-16 07:38:17+00:00", "updated_at": "2026-06-16 07:53:29.979532+00:00", "lang": "en", "topics": ["large-language-models", "ai-research", "ai-infrastructure"], "entities": ["Microsoft", "Memento"], "alternates": {"html": "https://wpnews.pro/news/microsoft-taught-a-reasoning-model-to-compress-its-own-thoughts-mid-generation", "markdown": "https://wpnews.pro/news/microsoft-taught-a-reasoning-model-to-compress-its-own-thoughts-mid-generation.md", "text": "https://wpnews.pro/news/microsoft-taught-a-reasoning-model-to-compress-its-own-thoughts-mid-generation.txt", "jsonld": "https://wpnews.pro/news/microsoft-taught-a-reasoning-model-to-compress-its-own-thoughts-mid-generation.jsonld"}}