{"data":[{"id":"xiaomi/mimo-v2.5","canonical_slug":"xiaomi/mimo-v2.5-20260422","hugging_face_id":"XiaomiMiMo/MiMo-V2.5","name":"Xiaomi: MiMo-V2.5","created":1776874269,"description":"MiMo-V2.5 is a native omnimodal model by Xiaomi. It delivers Pro-level agentic performance at roughly half the inference cost, while surpassing MiMo-V2-Omni in multimodal perception across image and video understanding...","context_length":1048576,"architecture":{"modality":"text+image+audio+video->text","input_modalities":["text","audio","image","video"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.00000014","completion":"0.00000028","input_cache_read":"0.0000000028"},"top_provider":{"context_length":1048576,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","response_format","stop","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/xiaomi/mimo-v2.5-20260422/endpoints"}},{"id":"anthropic/claude-opus-4.7","canonical_slug":"anthropic/claude-4.7-opus-20260416","hugging_face_id":null,"name":"Anthropic: Claude Opus 4.7","created":1776351100,"description":"Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","web_search":"0.01","input_cache_read":"0.0000005","input_cache_write":"0.00000625"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.7-opus-20260416/endpoints"}},{"id":"tencent/hy3-preview","canonical_slug":"tencent/hy3-preview-20260421","hugging_face_id":"tencent/Hy3-preview","name":"Tencent: Hy3 preview","created":1776878150,"description":"Hy3 preview is a high-efficiency Mixture-of-Experts model from Tencent designed for agentic workflows and production use. It supports configurable reasoning levels across disabled, low, and high modes, allowing it to...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.000000066","completion":"0.00000026","input_cache_read":"0.000000029"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","max_tokens","presence_penalty","reasoning","seed","stop","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":0.9,"top_p":1,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/tencent/hy3-preview-20260421/endpoints"}},{"id":"xiaomi/mimo-v2.5-pro","canonical_slug":"xiaomi/mimo-v2.5-pro-20260422","hugging_face_id":"XiaomiMiMo/MiMo-V2.5-Pro","name":"Xiaomi: MiMo-V2.5-Pro","created":1776874273,"description":"MiMo-V2.5-Pro is Xiaomi’s flagship model, delivering strong performance in general agentic capabilities, complex software engineering, and long-horizon tasks, with top rankings on benchmarks such as ClawEval, GDPVal, and SWE-bench Pro....","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.000000435","completion":"0.00000087","input_cache_read":"0.0000000036"},"top_provider":{"context_length":1048576,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/xiaomi/mimo-v2.5-pro-20260422/endpoints"}},{"id":"deepseek/deepseek-v4-flash","canonical_slug":"deepseek/deepseek-v4-flash-20260423","hugging_face_id":"deepseek-ai/DeepSeek-V4-Flash","name":"DeepSeek: DeepSeek V4 Flash","created":1777000666,"description":"DeepSeek V4 Flash is an efficiency-optimized Mixture-of-Experts model from DeepSeek with 284B total parameters and 13B activated parameters, supporting a 1M-token context window. It is designed for fast inference and...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":"0.0000000983","completion":"0.0000001966","input_cache_read":"0.0000000197"},"top_provider":{"context_length":1048576,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-v4-flash-20260423/endpoints"}},{"id":"anthropic/claude-sonnet-4.6","canonical_slug":"anthropic/claude-4.6-sonnet-20260217","hugging_face_id":"","name":"Anthropic: Claude Sonnet 4.6","created":1771342990,"description":"Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.000003","completion":"0.000015","web_search":"0.01","input_cache_read":"0.0000003","input_cache_write":"0.00000375"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.6-sonnet-20260217/endpoints"}},{"id":"nvidia/nemotron-3-super-120b-a12b:free","canonical_slug":"nvidia/nemotron-3-super-120b-a12b-20230311","hugging_face_id":"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8","name":"NVIDIA: Nemotron 3 Super (free)","created":1773245239,"description":"NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer...","context_length":1000000,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0","completion":"0"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/nvidia/nemotron-3-super-120b-a12b-20230311/endpoints"}},{"id":"deepseek/deepseek-v4-pro","canonical_slug":"deepseek/deepseek-v4-pro-20260423","hugging_face_id":"deepseek-ai/DeepSeek-V4-Pro","name":"DeepSeek: DeepSeek V4 Pro","created":1777000679,"description":"DeepSeek V4 Pro is a large-scale Mixture-of-Experts model from DeepSeek with 1.6T total parameters and 49B activated parameters, supporting a 1M-token context window. It is designed for advanced reasoning, coding,...","context_length":1048576,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":null},"pricing":{"prompt":"0.000000435","completion":"0.00000087","input_cache_read":"0.000000003625"},"top_provider":{"context_length":1048576,"max_completion_tokens":384000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":1,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/deepseek/deepseek-v4-pro-20260423/endpoints"}},{"id":"moonshotai/kimi-k2.6","canonical_slug":"moonshotai/kimi-k2.6-20260420","hugging_face_id":"moonshotai/Kimi-K2.6","name":"MoonshotAI: Kimi K2.6","created":1776699402,"description":"Kimi K2.6 is Moonshot AI's next-generation multimodal model, designed for long-horizon coding, coding-driven UI/UX generation, and multi-agent orchestration. It handles complex end-to-end coding tasks across Python, Rust, and Go, and...","context_length":262144,"architecture":{"modality":"text+image->text","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.000000684","completion":"0.00000342","input_cache_read":"0.000000144"},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/moonshotai/kimi-k2.6-20260420/endpoints"}},{"id":"poolside/laguna-m.1:free","canonical_slug":"poolside/laguna-m.1-20260312","hugging_face_id":null,"name":"Poolside: Laguna M.1 (free)","created":1777388504,"description":"Laguna M.1 is the flagship coding agent model from [Poolside](https://poolside.ai), optimized for complex software engineering tasks. Designed for agentic coding workflows, it supports tool calling and reasoning, with a 128K...","context_length":262144,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0","completion":"0"},"top_provider":{"context_length":262144,"max_completion_tokens":32768,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","temperature","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/poolside/laguna-m.1-20260312/endpoints"}},{"id":"openrouter/owl-alpha","canonical_slug":"openrouter/owl-alpha","hugging_face_id":null,"name":"Owl Alpha","created":1777398589,"description":"Owl Alpha is a high-performance foundation model designed for agentic workloads. Natively supports tool use, and long-context tasks, with strong performance in code generation, automated workflows, and complex instruction execution....","context_length":1048756,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0","completion":"0"},"top_provider":{"context_length":1048756,"max_completion_tokens":262144,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","logit_bias","max_tokens","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tools","top_k","top_p"],"default_parameters":null,"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openrouter/owl-alpha/endpoints"}},{"id":"minimax/minimax-m2.7","canonical_slug":"minimax/minimax-m2.7-20260318","hugging_face_id":"MiniMaxAI/MiniMax-M2.7","name":"MiniMax: MiniMax M2.7","created":1773836697,"description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent...","context_length":204800,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.00000026","completion":"0.0000012"},"top_provider":{"context_length":196608,"max_completion_tokens":null,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/minimax/minimax-m2.7-20260318/endpoints"}},{"id":"openai/gpt-5.5","canonical_slug":"openai/gpt-5.5-20260423","hugging_face_id":"","name":"OpenAI: GPT-5.5","created":1777051893,"description":"GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.00003","web_search":"0.01","input_cache_read":"0.0000005"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-12-01","expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.5-20260423/endpoints"}},{"id":"anthropic/claude-opus-4.6","canonical_slug":"anthropic/claude-4.6-opus-20260205","hugging_face_id":"","name":"Anthropic: Claude Opus 4.6","created":1770219050,"description":"Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","web_search":"0.01","input_cache_read":"0.0000005","input_cache_write":"0.00000625"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.6-opus-20260205/endpoints"}},{"id":"google/gemini-3.5-flash","canonical_slug":"google/gemini-3.5-flash-20260519","hugging_face_id":null,"name":"Google: Gemini 3.5 Flash","created":1779193800,"description":"Gemini 3.5 Flash is Google's high-efficiency multimodal model, bringing near-Pro level coding and reasoning at Flash-tier cost and speed. It is highly optimized for coding proficiency and parallel agentic execution...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","video","file","audio"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000015","completion":"0.000009","image":"0.0000015","audio":"0.000003","web_search":"0.014","internal_reasoning":"0.000009","input_cache_read":"0.00000015","input_cache_write":"0.00000008333333333333334"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":"2025-01-01","expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3.5-flash-20260519/endpoints"}},{"id":"google/gemini-3-flash-preview","canonical_slug":"google/gemini-3-flash-preview-20251217","hugging_face_id":"","name":"Google: Gemini 3 Flash Preview","created":1765987078,"description":"Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool...","context_length":1048576,"architecture":{"modality":"text+image+file+audio+video->text","input_modalities":["text","image","file","audio","video"],"output_modalities":["text"],"tokenizer":"Gemini","instruct_type":null},"pricing":{"prompt":"0.0000005","completion":"0.000003","image":"0.0000005","audio":"0.000001","web_search":"0.014","internal_reasoning":"0.000003","input_cache_read":"0.00000005","input_cache_write":"0.00000008333333333333334"},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/google/gemini-3-flash-preview-20251217/endpoints"}},{"id":"z-ai/glm-5.1","canonical_slug":"z-ai/glm-5.1-20260406","hugging_face_id":"zai-org/GLM-5.1","name":"Z.ai: GLM 5.1","created":1775578025,"description":"GLM-5.1 delivers a major leap in coding capability, with particularly significant gains in handling long-horizon tasks. Unlike previous models built around minute-level interactions, GLM-5.1 can work independently and continuously on...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.00000098","completion":"0.00000308","input_cache_read":"0.000000182"},"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-5.1-20260406/endpoints"}},{"id":"openai/gpt-5.4","canonical_slug":"openai/gpt-5.4-20260305","hugging_face_id":"","name":"OpenAI: GPT-5.4","created":1772734352,"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for...","context_length":1050000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT","instruct_type":null},"pricing":{"prompt":"0.0000025","completion":"0.000015","web_search":"0.01","input_cache_read":"0.00000025"},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"per_request_limits":null,"supported_parameters":["include_reasoning","max_completion_tokens","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/openai/gpt-5.4-20260305/endpoints"}},{"id":"anthropic/claude-opus-4.8","canonical_slug":"anthropic/claude-4.8-opus-20260528","hugging_face_id":null,"name":"Anthropic: Claude Opus 4.8","created":1779905091,"description":"Claude Opus 4.8 is Anthropic's most capable generally available model in the Opus family. It supports text, image, and file inputs with text output, with reasoning support and a 1M-token...","context_length":1000000,"architecture":{"modality":"text+image+file->text","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude","instruct_type":null},"pricing":{"prompt":"0.000005","completion":"0.000025","web_search":"0.01","input_cache_read":"0.0000005","input_cache_write":"0.00000625"},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":false},"per_request_limits":null,"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"default_parameters":{"temperature":null,"top_p":null,"top_k":null,"frequency_penalty":null,"presence_penalty":null,"repetition_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/anthropic/claude-4.8-opus-20260528/endpoints"}},{"id":"z-ai/glm-4.7","canonical_slug":"z-ai/glm-4.7-20251222","hugging_face_id":"zai-org/GLM-4.7","name":"Z.ai: GLM 4.7","created":1766378014,"description":"GLM-4.7 is Z.ai’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while...","context_length":202752,"architecture":{"modality":"text->text","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":null},"pricing":{"prompt":"0.0000004","completion":"0.00000175","input_cache_read":"0.00000008"},"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"per_request_limits":null,"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"default_parameters":{"temperature":1,"top_p":0.95,"frequency_penalty":null},"supported_voices":null,"knowledge_cutoff":null,"expiration_date":null,"links":{"details":"/api/v1/models/z-ai/glm-4.7-20251222/endpoints"}}]}