{"slug": "basert-a-fast-inference-runtime-for-local-ai-on-apple-silicon", "title": "BaseRT, A fast inference runtime for local AI on Apple Silicon", "summary": "BaseCompute released BaseRT, a fast inference runtime for local AI on Apple Silicon, claiming up to 35% faster decode and 78% faster prefill on an Apple M4 Pro with 4-bit quantization. The runtime allows users to serve models locally without API keys or data leaving their device.", "body_md": "`$ curl -LsSf https://basecompute.co/install.sh | sh`\n\nUp to 35% on Decode, up to 78% on Prefill.\n\nTokens / sec · Apple M4 Pro · 4-bit\n\nServe a model with BaseRT, point your agent at it, and keep everything on your machine. No API keys, no data leaving your device.", "url": "https://wpnews.pro/news/basert-a-fast-inference-runtime-for-local-ai-on-apple-silicon", "canonical_source": "https://www.basecompute.co/getbasert", "published_at": "2026-07-01 12:30:44+00:00", "updated_at": "2026-07-01 12:51:01.519219+00:00", "lang": "en", "topics": ["ai-infrastructure", "ai-tools", "machine-learning"], "entities": ["BaseCompute", "BaseRT", "Apple Silicon", "Apple M4 Pro"], "alternates": {"html": "https://wpnews.pro/news/basert-a-fast-inference-runtime-for-local-ai-on-apple-silicon", "markdown": "https://wpnews.pro/news/basert-a-fast-inference-runtime-for-local-ai-on-apple-silicon.md", "text": "https://wpnews.pro/news/basert-a-fast-inference-runtime-for-local-ai-on-apple-silicon.txt", "jsonld": "https://wpnews.pro/news/basert-a-fast-inference-runtime-for-local-ai-on-apple-silicon.jsonld"}}