{"slug": "gemma-4-e2b-running-in-browser-at-255-tok-s", "title": "Gemma 4 E2B running in-browser at 255 tok/s", "summary": "A new Hugging Face Space demonstrates Gemma 4 E2B running in-browser via WebGPU at 255 tokens per second, showcasing efficient on-device AI inference.", "body_md": "Article URL: \nhttps://huggingface.co/spaces/webml-community/gemma-4-webgpu-kernels\n\nComments URL: \nhttps://news.ycombinator.com/item?id=48577195\n\nPoints: 3\n\n# Comments: 0", "url": "https://wpnews.pro/news/gemma-4-e2b-running-in-browser-at-255-tok-s", "canonical_source": "https://huggingface.co/spaces/webml-community/gemma-4-webgpu-kernels", "published_at": "2026-06-17 21:30:25+00:00", "updated_at": "2026-06-17 21:52:55.950092+00:00", "lang": "en", "topics": ["large-language-models", "ai-tools", "ai-infrastructure"], "entities": ["Gemma 4", "Hugging Face", "WebGPU"], "alternates": {"html": "https://wpnews.pro/news/gemma-4-e2b-running-in-browser-at-255-tok-s", "markdown": "https://wpnews.pro/news/gemma-4-e2b-running-in-browser-at-255-tok-s.md", "text": "https://wpnews.pro/news/gemma-4-e2b-running-in-browser-at-255-tok-s.txt", "jsonld": "https://wpnews.pro/news/gemma-4-e2b-running-in-browser-at-255-tok-s.jsonld"}}