{
  "generated_at": "2026-07-04T10:48:54Z",
  "count": 135,
  "version": "0.5",
  "models": [
    {
      "provider": "openai",
      "model_id": "gpt-5.5",
      "display_name": "GPT-5.5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 5.0,
        "output_per_mtok": 30.0,
        "cached_input_per_mtok": 0.5,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "Dec 01, 2025",
      "sources": [
        {
          "url": "https://developers.openai.com/api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "gpt-5.5$5.00$0.50$30.00$10.00$1.00$45.00"
        },
        {
          "url": "https://developers.openai.com/api/docs/models/compare",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff",
            "modalities"
          ],
          "quote": "GPT-5.5 ... Input $5.00 Cached Input $0.50 Output $30.00 Context Window 1,050,000 Max Output Tokens 128,000 Knowledge Cutoff Dec 01, 2025 Supported Features Streaming Function calling Structured outputs Image input"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "OpenAI pricing records the standard short-context tier; long-context pricing, where present, is noted in the cited quote.",
      "permalink": "/models/openai/gpt-5_5.html"
    },
    {
      "provider": "openai",
      "model_id": "gpt-5.5-pro",
      "display_name": "GPT-5.5 Pro",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 30.0,
        "output_per_mtok": 180.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "Dec 01, 2025",
      "sources": [
        {
          "url": "https://developers.openai.com/api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "gpt-5.5-pro$30.00-$180.00$60.00-$270.00"
        },
        {
          "url": "https://developers.openai.com/api/docs/models/compare",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff",
            "modalities"
          ],
          "quote": "GPT-5.5 Pro ... Input $30.00 Cached Input - Output $180.00 Context Window 1,050,000 Max Output Tokens 128,000 Knowledge Cutoff Dec 01, 2025 Supported Features Streaming Function calling Structured outputs Image input"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "OpenAI pricing records the standard short-context tier; long-context pricing, where present, is noted in the cited quote.",
      "permalink": "/models/openai/gpt-5_5-pro.html"
    },
    {
      "provider": "openai",
      "model_id": "gpt-5.4",
      "display_name": "GPT-5.4",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.5,
        "output_per_mtok": 15.0,
        "cached_input_per_mtok": 0.25,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1050000,
      "max_output_tokens": 128000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "Aug 31, 2025",
      "sources": [
        {
          "url": "https://developers.openai.com/api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "gpt-5.4$2.50$0.25$15.00$5.00$0.50$22.50"
        },
        {
          "url": "https://developers.openai.com/api/docs/models/compare",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff",
            "modalities"
          ],
          "quote": "GPT-5.4 ... Input $2.50 Cached Input $0.25 Output $15.00 Context Window 1,050,000 Max Output Tokens 128,000 Knowledge Cutoff Aug 31, 2025 Supported Features Streaming Function calling Structured outputs Image input"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "OpenAI pricing records the standard short-context tier; long-context pricing, where present, is noted in the cited quote.",
      "permalink": "/models/openai/gpt-5_4.html"
    },
    {
      "provider": "anthropic",
      "model_id": "claude-fable-5",
      "display_name": "Claude Fable 5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 10,
        "output_per_mtok": 50,
        "cached_input_per_mtok": 1,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 128000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "Jan 2026",
      "sources": [
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Claude Fable 5$10 / MTok$12.50 / MTok$20 / MTok$1 / MTok$50 / MTok Claude Opus 4.8$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.7$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.6$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Sonnet 5 through August 31, 2026 $2 / MTok$2.50 / MTok$4 / MTok$0.20 / MTok$10 / MTok Claude Haiku 4.5$1 / MTok$1.25 / MTok$2 / MTok$0.10 / MTok$5 / MTok"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "All current Claude models support text and image input, text output, multilingual capabilities, and vision."
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff"
          ],
          "quote": "Claude API ID claude-fable-5 claude-opus-4-8 claude-sonnet-5 claude-haiku-4-5-20251001 ... Context window 1M tokens 1M tokens 1M tokens 200k tokens Max output 128k tokens 128k tokens 128k tokens 64k tokens Reliable knowledge cutoff Jan 2026 Jan 2026 Jan 2026 Feb 2025"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/anthropic/claude-fable-5.html"
    },
    {
      "provider": "anthropic",
      "model_id": "claude-opus-4-8",
      "display_name": "Claude Opus 4.8",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 5,
        "output_per_mtok": 25,
        "cached_input_per_mtok": 0.5,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 128000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "Jan 2026",
      "sources": [
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Claude Fable 5$10 / MTok$12.50 / MTok$20 / MTok$1 / MTok$50 / MTok Claude Opus 4.8$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.7$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.6$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Sonnet 5 through August 31, 2026 $2 / MTok$2.50 / MTok$4 / MTok$0.20 / MTok$10 / MTok Claude Haiku 4.5$1 / MTok$1.25 / MTok$2 / MTok$0.10 / MTok$5 / MTok"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "All current Claude models support text and image input, text output, multilingual capabilities, and vision."
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff"
          ],
          "quote": "Claude API ID claude-fable-5 claude-opus-4-8 claude-sonnet-5 claude-haiku-4-5-20251001 ... Context window 1M tokens 1M tokens 1M tokens 200k tokens Max output 128k tokens 128k tokens 128k tokens 64k tokens Reliable knowledge cutoff Jan 2026 Jan 2026 Jan 2026 Feb 2025"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/anthropic/claude-opus-4-8.html"
    },
    {
      "provider": "anthropic",
      "model_id": "claude-opus-4-7",
      "display_name": "Claude Opus 4.7",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 5,
        "output_per_mtok": 25,
        "cached_input_per_mtok": 0.5,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Claude Fable 5$10 / MTok$12.50 / MTok$20 / MTok$1 / MTok$50 / MTok Claude Opus 4.8$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.7$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.6$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Sonnet 5 through August 31, 2026 $2 / MTok$2.50 / MTok$4 / MTok$0.20 / MTok$10 / MTok Claude Haiku 4.5$1 / MTok$1.25 / MTok$2 / MTok$0.10 / MTok$5 / MTok"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "All current Claude models support text and image input, text output, multilingual capabilities, and vision."
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/model-ids-and-versions",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id"
          ],
          "quote": "For example: `claude-sonnet-4-6`, `claude-sonnet-5`, `claude-opus-4-6`, `claude-opus-4-7`, and `claude-opus-4-8`"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "context_window_tokens"
          ],
          "quote": "Claude Fable 5, Claude Mythos 5, Claude Mythos Preview, Claude Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 5, and Sonnet 4.6 include the full 1M token context window at standard pricing."
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/anthropic/claude-opus-4-7.html"
    },
    {
      "provider": "anthropic",
      "model_id": "claude-opus-4-6",
      "display_name": "Claude Opus 4.6",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 5,
        "output_per_mtok": 25,
        "cached_input_per_mtok": 0.5,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Claude Fable 5$10 / MTok$12.50 / MTok$20 / MTok$1 / MTok$50 / MTok Claude Opus 4.8$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.7$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.6$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Sonnet 5 through August 31, 2026 $2 / MTok$2.50 / MTok$4 / MTok$0.20 / MTok$10 / MTok Claude Haiku 4.5$1 / MTok$1.25 / MTok$2 / MTok$0.10 / MTok$5 / MTok"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "All current Claude models support text and image input, text output, multilingual capabilities, and vision."
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/model-ids-and-versions",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id"
          ],
          "quote": "For example: `claude-sonnet-4-6`, `claude-sonnet-5`, `claude-opus-4-6`, `claude-opus-4-7`, and `claude-opus-4-8`"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "context_window_tokens"
          ],
          "quote": "Claude Fable 5, Claude Mythos 5, Claude Mythos Preview, Claude Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 5, and Sonnet 4.6 include the full 1M token context window at standard pricing."
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/anthropic/claude-opus-4-6.html"
    },
    {
      "provider": "anthropic",
      "model_id": "claude-sonnet-5",
      "display_name": "Claude Sonnet 5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2,
        "output_per_mtok": 10,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 128000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "Jan 2026",
      "sources": [
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Claude Fable 5$10 / MTok$12.50 / MTok$20 / MTok$1 / MTok$50 / MTok Claude Opus 4.8$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.7$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.6$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Sonnet 5 through August 31, 2026 $2 / MTok$2.50 / MTok$4 / MTok$0.20 / MTok$10 / MTok Claude Haiku 4.5$1 / MTok$1.25 / MTok$2 / MTok$0.10 / MTok$5 / MTok"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "All current Claude models support text and image input, text output, multilingual capabilities, and vision."
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff"
          ],
          "quote": "Claude API ID claude-fable-5 claude-opus-4-8 claude-sonnet-5 claude-haiku-4-5-20251001 ... Context window 1M tokens 1M tokens 1M tokens 200k tokens Max output 128k tokens 128k tokens 128k tokens 64k tokens Reliable knowledge cutoff Jan 2026 Jan 2026 Jan 2026 Feb 2025"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/anthropic/claude-sonnet-5.html"
    },
    {
      "provider": "anthropic",
      "model_id": "claude-haiku-4-5-20251001",
      "display_name": "Claude Haiku 4.5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1,
        "output_per_mtok": 5,
        "cached_input_per_mtok": 0.1,
        "batch_discount_pct": null
      },
      "context_window_tokens": 200000,
      "context_window_notation": "200k",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "Feb 2025",
      "sources": [
        {
          "url": "https://platform.claude.com/docs/en/about-claude/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Claude Fable 5$10 / MTok$12.50 / MTok$20 / MTok$1 / MTok$50 / MTok Claude Opus 4.8$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.7$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Opus 4.6$5 / MTok$6.25 / MTok$10 / MTok$0.50 / MTok$25 / MTok Claude Sonnet 5 through August 31, 2026 $2 / MTok$2.50 / MTok$4 / MTok$0.20 / MTok$10 / MTok Claude Haiku 4.5$1 / MTok$1.25 / MTok$2 / MTok$0.10 / MTok$5 / MTok"
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "All current Claude models support text and image input, text output, multilingual capabilities, and vision."
        },
        {
          "url": "https://platform.claude.com/docs/en/about-claude/models/overview",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff"
          ],
          "quote": "Claude API ID claude-fable-5 claude-opus-4-8 claude-sonnet-5 claude-haiku-4-5-20251001 ... Context window 1M tokens 1M tokens 1M tokens 200k tokens Max output 128k tokens 128k tokens 128k tokens 64k tokens Reliable knowledge cutoff Jan 2026 Jan 2026 Jan 2026 Feb 2025"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/anthropic/claude-haiku-4-5-20251001.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-3.1-flash-lite",
      "display_name": "Gemini 3.1 Flash-Lite",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.25,
        "output_per_mtok": 1.5,
        "cached_input_per_mtok": 0.025,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 3.1 Flash-Lite `gemini-3.1-flash-lite` ... Input price Free of charge $0.25 (text / image / video) $0.50 (audio) Output price (including thinking tokens) Free of charge $1.50 Context caching price Not available $0.025 (text / image / video)"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-3_1-flash-lite.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-3.1-pro-preview",
      "display_name": "Gemini 3.1 Pro Preview",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.0,
        "output_per_mtok": 12.0,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video",
          "audio"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 3.1 Pro Preview `gemini-3.1-pro-preview` ... Input price Not available $2.00, prompts <= 200k tokens $4.00, prompts > 200k tokens Output price (including thinking tokens) Not available $12.00, prompts <= 200k tokens $18.00, prompts > 200k Context caching price Not available $0.20, prompts <= 200k tokens"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-3_1-pro-preview.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-3.1-flash-live-preview",
      "display_name": "Gemini 3.1 Flash Live Preview",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.75,
        "output_per_mtok": 4.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 3.1 Flash Live Preview `gemini-3.1-flash-live-preview` ... Input price Free of charge $0.75 (text) $3.00 or $0.005/min (audio) $1.00 or $0.002/min (image/video) Output price (including thinking tokens) Free of charge $4.50 (text) $12.00 or $0.018/min (audio)"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-3_1-flash-live-preview.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-3-flash-preview",
      "display_name": "Gemini 3 Flash Preview",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 3.0,
        "cached_input_per_mtok": 0.05,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 3 Flash Preview `gemini-3-flash-preview` ... Input price Free of charge $0.50 (text / image / video) $1.00 (audio) Output price (including thinking tokens) Free of charge $3.00 Context caching price Free of charge $0.05 (text / image / video)"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-3-flash-preview.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-2.5-pro",
      "display_name": "Gemini 2.5 Pro",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.25,
        "output_per_mtok": 10.0,
        "cached_input_per_mtok": 0.125,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 2.5 Pro `gemini-2.5-pro` ... Input price Free of charge $1.25, prompts <= 200k tokens $2.50, prompts > 200k tokens Output price (including thinking tokens) Free of charge $10.00, prompts <= 200k tokens $15.00, prompts > 200k Context caching price Not available $0.125, prompts <= 200k tokens"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-2_5-pro.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-2.5-flash",
      "display_name": "Gemini 2.5 Flash",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 2.5,
        "cached_input_per_mtok": 0.03,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1048576,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 2.5 Flash `gemini-2.5-flash` Our first hybrid reasoning model which supports a 1M token context window ... Input price Free of charge $0.30 (text / image / video) $1.00 (audio) Output price (including thinking tokens) Free of charge $2.50 Context caching price Not available $0.03 (text / image / video)"
        },
        {
          "url": "https://ai.google.dev/gemini-api/docs/models/gemini-2.5-flash",
          "accessed_at": "2026-07-04T07:37:12Z",
          "fields": [
            "context_window_tokens"
          ],
          "quote": "Gemini 2.5 Flash ... Model code gemini-2.5-flash ... Token limits ... Input token limit 1,048,576 Output token limit 65,536"
        }
      ],
      "verified_at": "2026-07-04T07:37:12Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-2_5-flash.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-2.5-flash-lite",
      "display_name": "Gemini 2.5 Flash-Lite",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.1,
        "output_per_mtok": 0.4,
        "cached_input_per_mtok": 0.01,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 2.5 Flash-Lite `gemini-2.5-flash-lite` ... Input price (text, image, video) Free of charge $0.10 (text / image / video) $0.30 (audio) Output price (including thinking tokens) Free of charge $0.40 Context caching price Not available $0.01 (text / image / video)"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-2_5-flash-lite.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-2.5-flash-lite-preview-09-2025",
      "display_name": "Gemini 2.5 Flash-Lite Preview",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.1,
        "output_per_mtok": 0.4,
        "cached_input_per_mtok": 0.01,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 2.5 Flash-Lite Preview `gemini-2.5-flash-lite-preview-09-2025` ... Input price (text, image, video) Free of charge $0.10 (text / image / video) $0.30 (audio) Output price (including thinking tokens) Free of charge $0.40 Context caching price Not available $0.01 (text / image / video)"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-2_5-flash-lite-preview-09-2025.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-2.5-flash-native-audio-preview-12-2025",
      "display_name": "Gemini 2.5 Flash Native Audio",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 2.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "audio",
          "video"
        ],
        "output": [
          "text",
          "audio"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 2.5 Flash Native Audio (Live API) `gemini-2.5-flash-native-audio-preview-12-2025` ... Input price Free of charge $0.50 (text) $3.00 (audio / video) Output price (including thinking tokens) Free of charge $2.00 (text) $12.00 (audio)"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-2_5-flash-native-audio-preview-12-2025.html"
    },
    {
      "provider": "google",
      "model_id": "gemini-2.5-flash-preview-tts",
      "display_name": "Gemini 2.5 Flash Preview TTS",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 10.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "audio"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://ai.google.dev/gemini-api/docs/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Gemini 2.5 Flash Preview TTS `gemini-2.5-flash-preview-tts` ... Input price Free of charge $0.50 (text) Output price Free of charge $10.00 (audio)"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": "Gemini pricing may vary by modality and prompt length; this entry records the cited standard text or text/image/video tier.",
      "permalink": "/models/google/gemini-2_5-flash-preview-tts.html"
    },
    {
      "provider": "mistral",
      "model_id": "mistral-medium-latest",
      "display_name": "Mistral Medium 3.5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.5,
        "output_per_mtok": 7.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Mistral Medium 3.5 ... Text-to-text Reasoning Coding Agentic Multimodal Input (/M tokens) $1.5 Output (/M tokens) $7.5 mistral-medium-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/mistral-medium-latest.html"
    },
    {
      "provider": "mistral",
      "model_id": "mistral-small-latest",
      "display_name": "Mistral Small 4",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.15,
        "output_per_mtok": 0.6,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Mistral Small 4 ... Text-to-text Agentic Multimodal Lightweight Input (/M tokens) $0.15 Output (/M tokens) $0.6 mistral-small-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/mistral-small-latest.html"
    },
    {
      "provider": "mistral",
      "model_id": "mistral-large-latest",
      "display_name": "Mistral Large 3",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 1.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Mistral Large 3 ... Text-to-text Multimodal Input (/M tokens) $0.5 Output (/M tokens) $1.5 mistral-large-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/mistral-large-latest.html"
    },
    {
      "provider": "mistral",
      "model_id": "voxtral-small-latest",
      "display_name": "Voxtral Small",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.1,
        "output_per_mtok": 0.4,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "audio"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Voxtral Small ... Audio Input (per min / per M tok) $0.004 Text Input (per min / per M tok) $0.1 Output (/M tokens) $0.4 Available on `/v1/chat/completions` voxtral-small-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/voxtral-small-latest.html"
    },
    {
      "provider": "mistral",
      "model_id": "devstral-medium-latest",
      "display_name": "Devstral 2",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.4,
        "output_per_mtok": 2.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Devstral 2 ... Coding Agentic Text-to-text Input (/M tokens) $0.4 Output (/M tokens) $2 devstral-medium-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/devstral-medium-latest.html"
    },
    {
      "provider": "mistral",
      "model_id": "devstral-small-latest",
      "display_name": "Devstral Small 2",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.1,
        "output_per_mtok": 0.3,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Devstral Small 2 ... Coding Agentic Text-to-text Lightweight Multimodal Input (/M tokens) $0.1 Output (/M tokens) $0.3 devstral-small-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/devstral-small-latest.html"
    },
    {
      "provider": "mistral",
      "model_id": "codestral-latest",
      "display_name": "Codestral",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 0.9,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Codestral ... Coding Text-to-text Input (/M tokens) $0.3 Output (/M tokens) $0.9 codestral-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/codestral-latest.html"
    },
    {
      "provider": "mistral",
      "model_id": "magistral-medium-latest",
      "display_name": "Magistral Medium",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.0,
        "output_per_mtok": 5.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://mistral.ai/pricing/api/",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "modalities"
          ],
          "quote": "Magistral Medium ... Text-to-text Reasoning Multimodal Input (/M tokens) $2 Output (/M tokens) $5 magistral-medium-latest"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/mistral/magistral-medium-latest.html"
    },
    {
      "provider": "xai",
      "model_id": "grok-build-0.1",
      "display_name": "Grok Build 0.1",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.0,
        "output_per_mtok": 2.0,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256k",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.x.ai/developers/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Chat API Prices per 1M tokens Model Context Input Cached input Output grok-build-0.1 256k$1.00$0.20$2.00 grok-4.3 1M$1.25$0.20$2.50 grok-4.20-multi-agent-0309 1M$1.25$0.20$2.50 grok-4.20-0309-reasoning 1M$1.25$0.20$2.50 grok-4.20-0309-non-reasoning 1M$1.25$0.20$2.50"
        },
        {
          "url": "https://docs.x.ai/developers/models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "We have dedicated models and APIs for audio, image, and video capabilities. For everything else, use Grok 4.3."
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/xai/grok-build-0_1.html"
    },
    {
      "provider": "xai",
      "model_id": "grok-4.3",
      "display_name": "Grok 4.3",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.25,
        "output_per_mtok": 2.5,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.x.ai/developers/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Chat API Prices per 1M tokens Model Context Input Cached input Output grok-build-0.1 256k$1.00$0.20$2.00 grok-4.3 1M$1.25$0.20$2.50 grok-4.20-multi-agent-0309 1M$1.25$0.20$2.50 grok-4.20-0309-reasoning 1M$1.25$0.20$2.50 grok-4.20-0309-non-reasoning 1M$1.25$0.20$2.50"
        },
        {
          "url": "https://docs.x.ai/developers/models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "We have dedicated models and APIs for audio, image, and video capabilities. For everything else, use Grok 4.3."
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/xai/grok-4_3.html"
    },
    {
      "provider": "xai",
      "model_id": "grok-4.20-multi-agent-0309",
      "display_name": "Grok 4.20 Multi-Agent 0309",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.25,
        "output_per_mtok": 2.5,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.x.ai/developers/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Chat API Prices per 1M tokens Model Context Input Cached input Output grok-build-0.1 256k$1.00$0.20$2.00 grok-4.3 1M$1.25$0.20$2.50 grok-4.20-multi-agent-0309 1M$1.25$0.20$2.50 grok-4.20-0309-reasoning 1M$1.25$0.20$2.50 grok-4.20-0309-non-reasoning 1M$1.25$0.20$2.50"
        },
        {
          "url": "https://docs.x.ai/developers/models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "We have dedicated models and APIs for audio, image, and video capabilities. For everything else, use Grok 4.3."
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/xai/grok-4_20-multi-agent-0309.html"
    },
    {
      "provider": "xai",
      "model_id": "grok-4.20-0309-reasoning",
      "display_name": "Grok 4.20 0309 Reasoning",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.25,
        "output_per_mtok": 2.5,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.x.ai/developers/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Chat API Prices per 1M tokens Model Context Input Cached input Output grok-build-0.1 256k$1.00$0.20$2.00 grok-4.3 1M$1.25$0.20$2.50 grok-4.20-multi-agent-0309 1M$1.25$0.20$2.50 grok-4.20-0309-reasoning 1M$1.25$0.20$2.50 grok-4.20-0309-non-reasoning 1M$1.25$0.20$2.50"
        },
        {
          "url": "https://docs.x.ai/developers/models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "We have dedicated models and APIs for audio, image, and video capabilities. For everything else, use Grok 4.3."
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/xai/grok-4_20-0309-reasoning.html"
    },
    {
      "provider": "xai",
      "model_id": "grok-4.20-0309-non-reasoning",
      "display_name": "Grok 4.20 0309 Non-Reasoning",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.25,
        "output_per_mtok": 2.5,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.x.ai/developers/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Chat API Prices per 1M tokens Model Context Input Cached input Output grok-build-0.1 256k$1.00$0.20$2.00 grok-4.3 1M$1.25$0.20$2.50 grok-4.20-multi-agent-0309 1M$1.25$0.20$2.50 grok-4.20-0309-reasoning 1M$1.25$0.20$2.50 grok-4.20-0309-non-reasoning 1M$1.25$0.20$2.50"
        },
        {
          "url": "https://docs.x.ai/developers/models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "modalities"
          ],
          "quote": "We have dedicated models and APIs for audio, image, and video capabilities. For everything else, use Grok 4.3."
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/xai/grok-4_20-0309-non-reasoning.html"
    },
    {
      "provider": "deepseek",
      "model_id": "deepseek-v4-flash",
      "display_name": "DeepSeek V4 Flash",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.14,
        "output_per_mtok": 0.28,
        "cached_input_per_mtok": 0.0028,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 384000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://api-docs.deepseek.com/quick_start/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "context_window_tokens",
            "max_output_tokens",
            "modalities"
          ],
          "quote": "MODEL deepseek-v4-flash(1)deepseek-v4-pro ... CONTEXT LENGTH 1M MAX OUTPUT MAXIMUM: 384K ... 1M INPUT TOKENS (CACHE HIT)$0.0028$0.003625 1M INPUT TOKENS (CACHE MISS)$0.14$0.435 1M OUTPUT TOKENS$0.28$0.87"
        },
        {
          "url": "https://api-docs.deepseek.com/api/list-models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id"
          ],
          "quote": "\"id\": \"deepseek-v4-flash\" ... \"id\": \"deepseek-v4-pro\""
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/deepseek/deepseek-v4-flash.html"
    },
    {
      "provider": "deepseek",
      "model_id": "deepseek-v4-pro",
      "display_name": "DeepSeek V4 Pro",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.435,
        "output_per_mtok": 0.87,
        "cached_input_per_mtok": 0.003625,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 384000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://api-docs.deepseek.com/quick_start/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok",
            "context_window_tokens",
            "max_output_tokens",
            "modalities"
          ],
          "quote": "MODEL deepseek-v4-flash(1)deepseek-v4-pro ... CONTEXT LENGTH 1M MAX OUTPUT MAXIMUM: 384K ... 1M INPUT TOKENS (CACHE HIT)$0.0028$0.003625 1M INPUT TOKENS (CACHE MISS)$0.14$0.435 1M OUTPUT TOKENS$0.28$0.87"
        },
        {
          "url": "https://api-docs.deepseek.com/api/list-models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id"
          ],
          "quote": "\"id\": \"deepseek-v4-flash\" ... \"id\": \"deepseek-v4-pro\""
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/deepseek/deepseek-v4-pro.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-r-plus-08-2024",
      "display_name": "Command R+ 08-2024",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.5,
        "output_per_mtok": 10.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://cohere.com/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Command R+ 08-2024 pricing is $2.50/1M tokens for input and $10.00/1M tokens for output"
        },
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "`command-r-plus-08-2024 `Live `command-r-plus-08-2024` is an update of the Command R+ model, delivered in August 2024. Find more information here Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": "Listed as legacy on Cohere pricing page",
      "permalink": "/models/cohere/command-r-plus-08-2024.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-r-03-2024",
      "display_name": "Command R 03-2024",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": "2025-09-15",
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 1.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://cohere.com/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Command R 03-2024 pricing is $0.50/1M tokens for input and $1.50/1M tokens for output"
        },
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens",
            "deprecation_date"
          ],
          "quote": "`command-r-03-2024 `Deprecated Sept 15, 2025 Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/cohere/command-r-03-2024.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-r-plus-04-2024",
      "display_name": "Command R+ 04-2024",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": "2025-09-15",
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 3.0,
        "output_per_mtok": 15.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://cohere.com/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Command R+ 04-2024 pricing is $3.00/1M tokens for input and $15.00/1M tokens for output"
        },
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens",
            "deprecation_date"
          ],
          "quote": "`command-r-plus-04-2024 `Deprecated Sept 15, 2025 Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use.Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/cohere/command-r-plus-04-2024.html"
    },
    {
      "provider": "cohere",
      "model_id": "command",
      "display_name": "Command",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": "2025-09-15",
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.0,
        "output_per_mtok": 2.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 4000,
      "context_window_notation": "4k",
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://cohere.com/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Command pricing is $1.00/1M tokens for input and $2.00/1M tokens for output"
        },
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens",
            "deprecation_date"
          ],
          "quote": "`command `Deprecated Sept 15, 2025 An instruction-following conversational model that performs language tasks with high quality, more reliably and with a longer context than our base generative models.Text 4k 4k"
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/cohere/command.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-light",
      "display_name": "Command Light",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": "2025-09-15",
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 0.6,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 4000,
      "context_window_notation": "4k",
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://cohere.com/pricing",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Command-light pricing is $0.30/1M tokens for input and $0.60/1M tokens for output"
        },
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T06:08:18Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens",
            "deprecation_date"
          ],
          "quote": "`command-light `Deprecated Sept 15, 2025 A smaller, faster version of `command`. Almost as capable, but a lot faster.Text 4k 4k"
        }
      ],
      "verified_at": "2026-07-04T06:08:18Z",
      "notes": null,
      "permalink": "/models/cohere/command-light.html"
    },
    {
      "provider": "cohere",
      "model_id": "c4ai-aya-expanse-32b",
      "display_name": "Aya Expanse 32B",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 1.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://cohere.com/pricing",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Aya Expanse models (8B and 32B) on the API are charged at $0.50/1M tokens for input and $1.50/1M tokens for output."
        },
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T05:55:32Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "`c4ai-aya-expanse-32b `Live Aya Expanse is a highly performant 32B multilingual model ... Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T05:55:32Z",
      "notes": null,
      "permalink": "/models/cohere/c4ai-aya-expanse-32b.html"
    },
    {
      "provider": "amazon",
      "model_id": "amazon.nova-premier-v1:0",
      "display_name": "Amazon Nova Premier",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.5,
        "output_per_mtok": 12.5,
        "cached_input_per_mtok": 0.625,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 10000,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://aws.amazon.com/bedrock/pricing/",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Amazon Nova Premier $2.50 per 1M input tokens $12.50 per 1M output tokens; prompt cache read $0.625 per 1M tokens in us-east-1 on-demand."
        },
        {
          "url": "https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Model ID amazon.nova-premier-v1:0 amazon.nova-pro-v1:0 amazon.nova-lite-v1:0 amazon.nova-micro-v1:0 ... Input modalities Text, Image, Video Text, Image, Video Text, Image, Video Text ... Output Modalities Text Text Text Text ... Context Window 1M 300k 300k 128k ... Max Output Tokens 10K 10k 10k 10k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "AWS Bedrock us-east-1 on-demand Standard tier; price table exposed on AWS Bedrock pricing.",
      "permalink": "/models/amazon/amazon_nova-premier-v1_0.html"
    },
    {
      "provider": "amazon",
      "model_id": "amazon.nova-pro-v1:0",
      "display_name": "Amazon Nova Pro",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.8,
        "output_per_mtok": 3.2,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": 300000,
      "context_window_notation": "300k",
      "max_output_tokens": 10000,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://aws.amazon.com/bedrock/pricing/",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Amazon Nova Pro $0.80 per 1M input tokens $3.20 per 1M output tokens; prompt cache read $0.20 per 1M tokens in us-east-1 on-demand."
        },
        {
          "url": "https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Model ID amazon.nova-premier-v1:0 amazon.nova-pro-v1:0 amazon.nova-lite-v1:0 amazon.nova-micro-v1:0 ... Input modalities Text, Image, Video Text, Image, Video Text, Image, Video Text ... Output Modalities Text Text Text Text ... Context Window 1M 300k 300k 128k ... Max Output Tokens 10K 10k 10k 10k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "AWS Bedrock us-east-1 on-demand Standard tier; price table exposed on AWS Bedrock pricing.",
      "permalink": "/models/amazon/amazon_nova-pro-v1_0.html"
    },
    {
      "provider": "amazon",
      "model_id": "amazon.nova-lite-v1:0",
      "display_name": "Amazon Nova Lite",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.06,
        "output_per_mtok": 0.24,
        "cached_input_per_mtok": 0.015,
        "batch_discount_pct": null
      },
      "context_window_tokens": 300000,
      "context_window_notation": "300k",
      "max_output_tokens": 10000,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://aws.amazon.com/bedrock/pricing/",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Amazon Nova Lite $0.06 per 1M input tokens $0.24 per 1M output tokens; prompt cache read $0.015 per 1M tokens in us-east-1 on-demand."
        },
        {
          "url": "https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Model ID amazon.nova-premier-v1:0 amazon.nova-pro-v1:0 amazon.nova-lite-v1:0 amazon.nova-micro-v1:0 ... Input modalities Text, Image, Video Text, Image, Video Text, Image, Video Text ... Output Modalities Text Text Text Text ... Context Window 1M 300k 300k 128k ... Max Output Tokens 10K 10k 10k 10k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "AWS Bedrock us-east-1 on-demand Standard tier; price table exposed on AWS Bedrock pricing.",
      "permalink": "/models/amazon/amazon_nova-lite-v1_0.html"
    },
    {
      "provider": "amazon",
      "model_id": "amazon.nova-micro-v1:0",
      "display_name": "Amazon Nova Micro",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.035,
        "output_per_mtok": 0.14,
        "cached_input_per_mtok": 0.00875,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 10000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://aws.amazon.com/bedrock/pricing/",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Amazon Nova Micro $0.035 per 1M input tokens $0.14 per 1M output tokens; prompt cache read $0.00875 per 1M tokens in us-east-1 on-demand."
        },
        {
          "url": "https://docs.aws.amazon.com/nova/latest/userguide/what-is-nova.html",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Model ID amazon.nova-premier-v1:0 amazon.nova-pro-v1:0 amazon.nova-lite-v1:0 amazon.nova-micro-v1:0 ... Input modalities Text, Image, Video Text, Image, Video Text, Image, Video Text ... Output Modalities Text Text Text Text ... Context Window 1M 300k 300k 128k ... Max Output Tokens 10K 10k 10k 10k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "AWS Bedrock us-east-1 on-demand Standard tier; price table exposed on AWS Bedrock pricing.",
      "permalink": "/models/amazon/amazon_nova-micro-v1_0.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.7-max",
      "display_name": "Qwen3.7 Max",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.5,
        "output_per_mtok": 7.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Model ID ... qwen3.7-max International Non-Thinking and Thinking modes 0<Token<=1M $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-20 ... $2.5 $7.5 1 million tokens qwen3.7-max-preview ... $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-17 ... $2.5 $7.5 1 million tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "International Model Studio price; base 0<Token<=1M tier recorded.",
      "permalink": "/models/alibaba/qwen3_7-max.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.7-max-2026-05-20",
      "display_name": "Qwen3.7 Max 2026-05-20",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.5,
        "output_per_mtok": 7.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Model ID ... qwen3.7-max International Non-Thinking and Thinking modes 0<Token<=1M $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-20 ... $2.5 $7.5 1 million tokens qwen3.7-max-preview ... $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-17 ... $2.5 $7.5 1 million tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "International Model Studio price; base 0<Token<=1M tier recorded.",
      "permalink": "/models/alibaba/qwen3_7-max-2026-05-20.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.7-max-preview",
      "display_name": "Qwen3.7 Max Preview",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.5,
        "output_per_mtok": 7.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Model ID ... qwen3.7-max International Non-Thinking and Thinking modes 0<Token<=1M $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-20 ... $2.5 $7.5 1 million tokens qwen3.7-max-preview ... $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-17 ... $2.5 $7.5 1 million tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "International Model Studio price; base 0<Token<=1M tier recorded.",
      "permalink": "/models/alibaba/qwen3_7-max-preview.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.7-max-2026-05-17",
      "display_name": "Qwen3.7 Max 2026-05-17",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.5,
        "output_per_mtok": 7.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens"
          ],
          "quote": "Model ID ... qwen3.7-max International Non-Thinking and Thinking modes 0<Token<=1M $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-20 ... $2.5 $7.5 1 million tokens qwen3.7-max-preview ... $2.5 $7.5 1 million tokens qwen3.7-max-2026-05-17 ... $2.5 $7.5 1 million tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "International Model Studio price; base 0<Token<=1M tier recorded.",
      "permalink": "/models/alibaba/qwen3_7-max-2026-05-17.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.6-max-preview",
      "display_name": "Qwen3.6 Max Preview",
      "status": "preview",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.3,
        "output_per_mtok": 7.8,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256K",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens"
          ],
          "quote": "qwen3.6-max-preview International Non-Thinking and Thinking modes 0<Token<=128K $1.3 $7.8 1 million tokens 128K<Token<=256K $2 $12"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "International Model Studio price; base 0<Token<=128K tier recorded; higher tiers noted in quote.",
      "permalink": "/models/alibaba/qwen3_6-max-preview.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3-max",
      "display_name": "Qwen3 Max",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.2,
        "output_per_mtok": 6.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256K",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens"
          ],
          "quote": "qwen3-max International Non-Thinking and Thinking modes 0<Token<=32K $1.2 $6 1 million tokens 32K<Token<=128K $2.4 $12 128K<Token<=256K $3 $15 qwen3-max-2026-01-23 ... $1.2 $6"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "International Model Studio price; base 0<Token<=32K tier recorded; higher tiers noted in quote.",
      "permalink": "/models/alibaba/qwen3-max.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3-max-2026-01-23",
      "display_name": "Qwen3 Max 2026-01-23",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.2,
        "output_per_mtok": 6.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256K",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens"
          ],
          "quote": "qwen3-max International Non-Thinking and Thinking modes 0<Token<=32K $1.2 $6 1 million tokens 32K<Token<=128K $2.4 $12 128K<Token<=256K $3 $15 qwen3-max-2026-01-23 ... $1.2 $6"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "International Model Studio price; base 0<Token<=32K tier recorded; higher tiers noted in quote.",
      "permalink": "/models/alibaba/qwen3-max-2026-01-23.html"
    },
    {
      "provider": "moonshot",
      "model_id": "kimi-k2.7-code",
      "display_name": "Kimi K2.7 Code",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.95,
        "output_per_mtok": 4.0,
        "cached_input_per_mtok": 0.19,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256k",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.kimi.ai/",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "K2.7 Code Kimi K2.7 Code ... Cache Hit $0.19 / MTok Input $0.95 / MTok Output $4.00 / MTok"
        },
        {
          "url": "https://platform.kimi.ai/docs/pricing/chat-k27-code",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "modalities",
            "context_window_tokens"
          ],
          "quote": "Kimi K2.7 Code ... supports text, image, video input ... Context length 256k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": null,
      "permalink": "/models/moonshot/kimi-k2_7-code.html"
    },
    {
      "provider": "moonshot",
      "model_id": "kimi-k2.6",
      "display_name": "Kimi K2.6",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.95,
        "output_per_mtok": 4.0,
        "cached_input_per_mtok": 0.16,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256k",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.kimi.ai/",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "K2.6 kimi-k2.6 ... Cache Hit $0.16 / MTok Input $0.95 / MTok Output $4.00 / MTok"
        },
        {
          "url": "https://platform.kimi.ai/docs/pricing/chat-k26",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "modalities",
            "context_window_tokens"
          ],
          "quote": "Kimi K2.6 ... supports text, image, and video input ... Context length 256k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": null,
      "permalink": "/models/moonshot/kimi-k2_6.html"
    },
    {
      "provider": "moonshot",
      "model_id": "kimi-k2.5",
      "display_name": "Kimi K2.5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.6,
        "output_per_mtok": 3.0,
        "cached_input_per_mtok": 0.1,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256k",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image",
          "video"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.kimi.ai/",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "K2.5 kimi-k2.5 ... Cache Hit $0.10 / MTok Input $0.60 / MTok Output $3.00 / MTok"
        },
        {
          "url": "https://platform.kimi.ai/docs/pricing/chat-k25",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "modalities",
            "context_window_tokens"
          ],
          "quote": "Kimi K2.5 supports text, image, and video input ... Context length 256k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": null,
      "permalink": "/models/moonshot/kimi-k2_5.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-5.2",
      "display_name": "GLM-5.2",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.4,
        "output_per_mtok": 4.4,
        "cached_input_per_mtok": 0.26,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-5_2.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-5.1",
      "display_name": "GLM-5.1",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.4,
        "output_per_mtok": 4.4,
        "cached_input_per_mtok": 0.26,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-5_1.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-5",
      "display_name": "GLM-5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.0,
        "output_per_mtok": 3.2,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-5.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-5-turbo",
      "display_name": "GLM-5 Turbo",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.2,
        "output_per_mtok": 4.0,
        "cached_input_per_mtok": 0.24,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-5-turbo.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4.7",
      "display_name": "GLM-4.7",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.6,
        "output_per_mtok": 2.2,
        "cached_input_per_mtok": 0.11,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4_7.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4.7-flashx",
      "display_name": "GLM-4.7 FlashX",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.07,
        "output_per_mtok": 0.4,
        "cached_input_per_mtok": 0.01,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4_7-flashx.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4.6",
      "display_name": "GLM-4.6",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.6,
        "output_per_mtok": 2.2,
        "cached_input_per_mtok": 0.11,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4_6.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4.5",
      "display_name": "GLM-4.5",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.6,
        "output_per_mtok": 2.2,
        "cached_input_per_mtok": 0.11,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4_5.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4.5-x",
      "display_name": "GLM-4.5-X",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.2,
        "output_per_mtok": 8.9,
        "cached_input_per_mtok": 0.45,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4_5-x.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4.5-air",
      "display_name": "GLM-4.5 Air",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.2,
        "output_per_mtok": 1.1,
        "cached_input_per_mtok": 0.03,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4_5-air.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4.5-airx",
      "display_name": "GLM-4.5 AirX",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.1,
        "output_per_mtok": 4.5,
        "cached_input_per_mtok": 0.22,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4_5-airx.html"
    },
    {
      "provider": "zhipu",
      "model_id": "glm-4-32b-0414-128k",
      "display_name": "GLM-4-32B-0414-128K",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.1,
        "output_per_mtok": 0.1,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.z.ai/guides/overview/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Prices per 1M tokens. Model Input Cached Input Cached Input Storage Output GLM-5.2$1.4$0.26 Limited-time Free$4.4 GLM-5.1$1.4$0.26 Limited-time Free$4.4 GLM-5$1$0.2 Limited-time Free$3.2 GLM-5-Turbo$1.2$0.24 Limited-time Free$4.0 GLM-4.7$0.6$0.11 Limited-time Free$2.2 GLM-4.7-FlashX$0.07$0.01 Limited-time Free$0.4 GLM-4.6$0.6$0.11 Limited-time Free$2.2 GLM-4.5$0.6$0.11 Limited-time Free$2.2 GLM-4.5-X$2.2$0.45 Limited-time Free$8.9 GLM-4.5-Air$0.2$0.03 Limited-time Free$1.1 GLM-4.5-AirX$1.1$0.22 Limited-time Free$4.5 GLM-4-32B-0414-128K$0.1--$0.1"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Z.AI official USD pricing; provider key follows EXPANSION_ORDER zhipu naming. Cached input storage is listed as limited-time free and not modeled.",
      "permalink": "/models/zhipu/glm-4-32b-0414-128k.html"
    },
    {
      "provider": "minimax",
      "model_id": "minimax-m3",
      "display_name": "MiniMax M3",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.06,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.minimax.io/docs/guides/pricing-paygo",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Model Input Output Prompt caching Read MiniMax-M3 <= 512k input tokens Permanent 50% off $0.30 / M tokens $1.20 / M tokens $0.06 / M tokens ... MiniMax-M2.7$0.3 / M tokens$1.2 / M tokens$0.06 / M tokens$0.375 / M tokens Legacy Models MiniMax-M2.5$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2.1$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Standard Pay-as-you-go tier; M3 base <=512k input-token tier recorded, long-context and Priority tiers noted in source.",
      "permalink": "/models/minimax/minimax-m3.html"
    },
    {
      "provider": "minimax",
      "model_id": "minimax-m2.7",
      "display_name": "MiniMax M2.7",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.06,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.minimax.io/docs/guides/pricing-paygo",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Model Input Output Prompt caching Read MiniMax-M3 <= 512k input tokens Permanent 50% off $0.30 / M tokens $1.20 / M tokens $0.06 / M tokens ... MiniMax-M2.7$0.3 / M tokens$1.2 / M tokens$0.06 / M tokens$0.375 / M tokens Legacy Models MiniMax-M2.5$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2.1$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Standard Pay-as-you-go tier; M3 base <=512k input-token tier recorded, long-context and Priority tiers noted in source.",
      "permalink": "/models/minimax/minimax-m2_7.html"
    },
    {
      "provider": "minimax",
      "model_id": "minimax-m2.5",
      "display_name": "MiniMax M2.5",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.03,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.minimax.io/docs/guides/pricing-paygo",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Model Input Output Prompt caching Read MiniMax-M3 <= 512k input tokens Permanent 50% off $0.30 / M tokens $1.20 / M tokens $0.06 / M tokens ... MiniMax-M2.7$0.3 / M tokens$1.2 / M tokens$0.06 / M tokens$0.375 / M tokens Legacy Models MiniMax-M2.5$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2.1$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Standard Pay-as-you-go tier; M3 base <=512k input-token tier recorded, long-context and Priority tiers noted in source.",
      "permalink": "/models/minimax/minimax-m2_5.html"
    },
    {
      "provider": "minimax",
      "model_id": "minimax-m2.1",
      "display_name": "MiniMax M2.1",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.03,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.minimax.io/docs/guides/pricing-paygo",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Model Input Output Prompt caching Read MiniMax-M3 <= 512k input tokens Permanent 50% off $0.30 / M tokens $1.20 / M tokens $0.06 / M tokens ... MiniMax-M2.7$0.3 / M tokens$1.2 / M tokens$0.06 / M tokens$0.375 / M tokens Legacy Models MiniMax-M2.5$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2.1$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Standard Pay-as-you-go tier; M3 base <=512k input-token tier recorded, long-context and Priority tiers noted in source.",
      "permalink": "/models/minimax/minimax-m2_1.html"
    },
    {
      "provider": "minimax",
      "model_id": "minimax-m2",
      "display_name": "MiniMax M2",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.03,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://platform.minimax.io/docs/guides/pricing-paygo",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Model Input Output Prompt caching Read MiniMax-M3 <= 512k input tokens Permanent 50% off $0.30 / M tokens $1.20 / M tokens $0.06 / M tokens ... MiniMax-M2.7$0.3 / M tokens$1.2 / M tokens$0.06 / M tokens$0.375 / M tokens Legacy Models MiniMax-M2.5$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2.1$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens MiniMax-M2$0.3 / M tokens$1.2 / M tokens$0.03 / M tokens$0.375 / M tokens"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Standard Pay-as-you-go tier; M3 base <=512k input-token tier recorded, long-context and Priority tiers noted in source.",
      "permalink": "/models/minimax/minimax-m2.html"
    },
    {
      "provider": "groq",
      "model_id": "llama-3.1-8b-instant",
      "display_name": "Llama 3.1 8B Instant on Groq",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.05,
        "output_per_mtok": 0.08,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 131072,
      "max_output_tokens": 131072,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://console.groq.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Production Models MODEL ID ... llama-3.1-8b-instant $0.05 input$0.08 output ... 131,072 131,072 ... llama-3.3-70b-versatile $0.59 input$0.79 output ... 131,072 32,768 ... openai/gpt-oss-120b $0.15 input$0.60 output ... 131,072 65,536 ... openai/gpt-oss-20b $0.075 input$0.30 output ... 131,072 65,536"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Upstream model family: Meta Llama; provider is Groq serving platform.",
      "permalink": "/models/groq/llama-3_1-8b-instant.html"
    },
    {
      "provider": "groq",
      "model_id": "llama-3.3-70b-versatile",
      "display_name": "Llama 3.3 70B Versatile on Groq",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.59,
        "output_per_mtok": 0.79,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 131072,
      "max_output_tokens": 32768,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://console.groq.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Production Models MODEL ID ... llama-3.1-8b-instant $0.05 input$0.08 output ... 131,072 131,072 ... llama-3.3-70b-versatile $0.59 input$0.79 output ... 131,072 32,768 ... openai/gpt-oss-120b $0.15 input$0.60 output ... 131,072 65,536 ... openai/gpt-oss-20b $0.075 input$0.30 output ... 131,072 65,536"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Upstream model family: Meta Llama; provider is Groq serving platform.",
      "permalink": "/models/groq/llama-3_3-70b-versatile.html"
    },
    {
      "provider": "groq",
      "model_id": "openai/gpt-oss-120b",
      "display_name": "GPT OSS 120B on Groq",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.15,
        "output_per_mtok": 0.6,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 131072,
      "max_output_tokens": 65536,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://console.groq.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Production Models MODEL ID ... llama-3.1-8b-instant $0.05 input$0.08 output ... 131,072 131,072 ... llama-3.3-70b-versatile $0.59 input$0.79 output ... 131,072 32,768 ... openai/gpt-oss-120b $0.15 input$0.60 output ... 131,072 65,536 ... openai/gpt-oss-20b $0.075 input$0.30 output ... 131,072 65,536"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Upstream model family: OpenAI GPT OSS; provider is Groq serving platform.",
      "permalink": "/models/groq/openai_gpt-oss-120b.html"
    },
    {
      "provider": "groq",
      "model_id": "openai/gpt-oss-20b",
      "display_name": "GPT OSS 20B on Groq",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.075,
        "output_per_mtok": 0.3,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 131072,
      "max_output_tokens": 65536,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://console.groq.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Production Models MODEL ID ... llama-3.1-8b-instant $0.05 input$0.08 output ... 131,072 131,072 ... llama-3.3-70b-versatile $0.59 input$0.79 output ... 131,072 32,768 ... openai/gpt-oss-120b $0.15 input$0.60 output ... 131,072 65,536 ... openai/gpt-oss-20b $0.075 input$0.30 output ... 131,072 65,536"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Upstream model family: OpenAI GPT OSS; provider is Groq serving platform.",
      "permalink": "/models/groq/openai_gpt-oss-20b.html"
    },
    {
      "provider": "fireworks",
      "model_id": "kimi-k2.7-code",
      "display_name": "Kimi K2.7 Code on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.95,
        "output_per_mtok": 4.0,
        "cached_input_per_mtok": 0.19,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/kimi-k2_7-code.html"
    },
    {
      "provider": "fireworks",
      "model_id": "kimi-k2.7-code-fast",
      "display_name": "Kimi K2.7 Code Fast on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.9,
        "output_per_mtok": 8.0,
        "cached_input_per_mtok": 0.38,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code Fast $1.90 / $0.38 / $8.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/kimi-k2_7-code-fast.html"
    },
    {
      "provider": "fireworks",
      "model_id": "kimi-k2.6",
      "display_name": "Kimi K2.6 on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.95,
        "output_per_mtok": 4.0,
        "cached_input_per_mtok": 0.16,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/kimi-k2_6.html"
    },
    {
      "provider": "fireworks",
      "model_id": "kimi-k2.6-fast",
      "display_name": "Kimi K2.6 Fast on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.0,
        "output_per_mtok": 8.0,
        "cached_input_per_mtok": 0.3,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.6 Fast $2.00 / $0.30 / $8.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/kimi-k2_6-fast.html"
    },
    {
      "provider": "fireworks",
      "model_id": "deepseek-v4-pro",
      "display_name": "DeepSeek V4 Pro on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.74,
        "output_per_mtok": 3.48,
        "cached_input_per_mtok": 0.145,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/deepseek-v4-pro.html"
    },
    {
      "provider": "fireworks",
      "model_id": "deepseek-v4-flash",
      "display_name": "DeepSeek V4 Flash on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.14,
        "output_per_mtok": 0.28,
        "cached_input_per_mtok": 0.028,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/deepseek-v4-flash.html"
    },
    {
      "provider": "fireworks",
      "model_id": "glm-5.2",
      "display_name": "GLM 5.2 on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.4,
        "output_per_mtok": 4.4,
        "cached_input_per_mtok": 0.14,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/glm-5_2.html"
    },
    {
      "provider": "fireworks",
      "model_id": "glm-5.2-fast",
      "display_name": "GLM 5.2 Fast on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.1,
        "output_per_mtok": 6.6,
        "cached_input_per_mtok": 0.21,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... GLM 5.2 Fast $2.10 / $0.21 / $6.60"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/glm-5_2-fast.html"
    },
    {
      "provider": "fireworks",
      "model_id": "glm-5.1",
      "display_name": "GLM 5.1 on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.4,
        "output_per_mtok": 4.4,
        "cached_input_per_mtok": 0.26,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/glm-5_1.html"
    },
    {
      "provider": "fireworks",
      "model_id": "glm-5.1-fast",
      "display_name": "GLM 5.1 Fast on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.8,
        "output_per_mtok": 8.8,
        "cached_input_per_mtok": 0.52,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... GLM 5.1 Fast $2.80 / $0.52 / $8.80"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/glm-5_1-fast.html"
    },
    {
      "provider": "fireworks",
      "model_id": "qwen-3.7-plus",
      "display_name": "Qwen 3.7 Plus on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.4,
        "output_per_mtok": 1.6,
        "cached_input_per_mtok": 0.08,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/qwen-3_7-plus.html"
    },
    {
      "provider": "fireworks",
      "model_id": "minimax-m3",
      "display_name": "MiniMax M3 on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.06,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/minimax-m3.html"
    },
    {
      "provider": "fireworks",
      "model_id": "minimax-m2.7",
      "display_name": "MiniMax M2.7 on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.06,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/minimax-m2_7.html"
    },
    {
      "provider": "fireworks",
      "model_id": "openai-gpt-oss-120b",
      "display_name": "OpenAI GPT OSS 120B on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.15,
        "output_per_mtok": 0.6,
        "cached_input_per_mtok": 0.015,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/openai-gpt-oss-120b.html"
    },
    {
      "provider": "fireworks",
      "model_id": "openai-gpt-oss-20b",
      "display_name": "OpenAI GPT OSS 20B on Fireworks",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.07,
        "output_per_mtok": 0.3,
        "cached_input_per_mtok": 0.035,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Text and vision models ... prices are input / cached input / output (USD per 1M tokens) ... Kimi K2.7 Code $0.95 / $0.19 / $4.00 ... Kimi K2.6 $0.95 / $0.16 / $4.00 ... DeepSeek V4 Pro $1.74 / $0.145 / $3.48 ... DeepSeek V4 Flash $0.14 / $0.028 / $0.28 ... GLM 5.2 $1.40 / $0.14 / $4.40 ... GLM 5.1 $1.40 / $0.26 / $4.40 ... Qwen 3.7 Plus $0.40 / $0.08 / $1.60 ... MiniMax M3 $0.30 / $0.06 / $1.20 ... MiniMax M2.7 $0.30 / $0.06 / $1.20 ... OpenAI GPT OSS 120B $0.15 / $0.015 / $0.60 ... OpenAI GPT OSS 20B $0.07 / $0.035 / $0.30 ... NVIDIA Nemotron 3 Ultra (Preview) $0.60 / $0.12 / $2.40"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Fireworks serving platform; upstream model family named in display_name. Context window not captured from Fireworks model page.",
      "permalink": "/models/fireworks/openai-gpt-oss-20b.html"
    },
    {
      "provider": "together",
      "model_id": "deepseek-v4-pro",
      "display_name": "DeepSeek V4 Pro on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.74,
        "output_per_mtok": 3.48,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/deepseek-v4-pro.html"
    },
    {
      "provider": "together",
      "model_id": "minimax-m3",
      "display_name": "MiniMax M3 on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.06,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/minimax-m3.html"
    },
    {
      "provider": "together",
      "model_id": "kimi-k2.7-code",
      "display_name": "Kimi K2.7 Code on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.95,
        "output_per_mtok": 4.0,
        "cached_input_per_mtok": 0.19,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/kimi-k2_7-code.html"
    },
    {
      "provider": "together",
      "model_id": "glm-5.2",
      "display_name": "GLM-5.2 on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.4,
        "output_per_mtok": 4.4,
        "cached_input_per_mtok": 0.26,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/glm-5_2.html"
    },
    {
      "provider": "together",
      "model_id": "lfm2-24b-a2b",
      "display_name": "LFM2 24B A2B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.03,
        "output_per_mtok": 0.12,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/lfm2-24b-a2b.html"
    },
    {
      "provider": "together",
      "model_id": "gemma-4-31b",
      "display_name": "Gemma 4 31B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.39,
        "output_per_mtok": 0.97,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/gemma-4-31b.html"
    },
    {
      "provider": "together",
      "model_id": "nvidia-nemotron-3-ultra",
      "display_name": "NVIDIA Nemotron 3 Ultra on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.6,
        "output_per_mtok": 3.6,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/nvidia-nemotron-3-ultra.html"
    },
    {
      "provider": "together",
      "model_id": "qwen3.7-plus",
      "display_name": "Qwen3.7 Plus on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.32,
        "output_per_mtok": 1.28,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/qwen3_7-plus.html"
    },
    {
      "provider": "together",
      "model_id": "kimi-k2.6",
      "display_name": "Kimi K2.6 on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.2,
        "output_per_mtok": 4.5,
        "cached_input_per_mtok": 0.2,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/kimi-k2_6.html"
    },
    {
      "provider": "together",
      "model_id": "qwen3.7-max",
      "display_name": "Qwen3.7 Max on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.25,
        "output_per_mtok": 3.75,
        "cached_input_per_mtok": 0.13,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/qwen3_7-max.html"
    },
    {
      "provider": "together",
      "model_id": "gpt-oss-120b",
      "display_name": "GPT OSS 120B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.15,
        "output_per_mtok": 0.6,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/gpt-oss-120b.html"
    },
    {
      "provider": "together",
      "model_id": "qwen3.5-397b-a17b",
      "display_name": "Qwen3.5 397B A17B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.6,
        "output_per_mtok": 3.6,
        "cached_input_per_mtok": 0.35,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/qwen3_5-397b-a17b.html"
    },
    {
      "provider": "together",
      "model_id": "qwen3.5-9b",
      "display_name": "Qwen3.5 9B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.17,
        "output_per_mtok": 0.25,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/qwen3_5-9b.html"
    },
    {
      "provider": "together",
      "model_id": "gemma-4-31b-it-pearl",
      "display_name": "Gemma 4 31B IT Pearl on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.28,
        "output_per_mtok": 0.86,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/gemma-4-31b-it-pearl.html"
    },
    {
      "provider": "together",
      "model_id": "cogito-v2.1-671b",
      "display_name": "Cogito v2.1 671B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.25,
        "output_per_mtok": 1.25,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/cogito-v2_1-671b.html"
    },
    {
      "provider": "together",
      "model_id": "rnj-1-instruct",
      "display_name": "Rnj-1 Instruct on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.15,
        "output_per_mtok": 0.15,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/rnj-1-instruct.html"
    },
    {
      "provider": "together",
      "model_id": "llama-3.3-70b",
      "display_name": "Llama 3.3 70B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.04,
        "output_per_mtok": 1.04,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/llama-3_3-70b.html"
    },
    {
      "provider": "together",
      "model_id": "gemma-3n-e4b-instruct",
      "display_name": "Gemma 3n E4B Instruct on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.06,
        "output_per_mtok": 0.12,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/gemma-3n-e4b-instruct.html"
    },
    {
      "provider": "together",
      "model_id": "gpt-oss-20b",
      "display_name": "GPT OSS 20B on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.05,
        "output_per_mtok": 0.2,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/gpt-oss-20b.html"
    },
    {
      "provider": "together",
      "model_id": "qwen3-235b-a22b-fp8-throughput",
      "display_name": "Qwen3 235B A22B FP8 Throughput on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.2,
        "output_per_mtok": 0.6,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/qwen3-235b-a22b-fp8-throughput.html"
    },
    {
      "provider": "together",
      "model_id": "minimax-m2.5",
      "display_name": "MiniMax M2.5 on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.06,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/minimax-m2_5.html"
    },
    {
      "provider": "together",
      "model_id": "glm-5.1",
      "display_name": "GLM-5.1 on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 1.4,
        "output_per_mtok": 4.4,
        "cached_input_per_mtok": 0.26,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/glm-5_1.html"
    },
    {
      "provider": "together",
      "model_id": "minimax-m2.7",
      "display_name": "MiniMax M2.7 on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 1.2,
        "cached_input_per_mtok": 0.06,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/minimax-m2_7.html"
    },
    {
      "provider": "together",
      "model_id": "qwen3.6-plus",
      "display_name": "Qwen3.6 Plus on Together",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 3.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Serverless Inference Price per 1M tokens Model Input output DeepSeek V4 Pro $1.74 $0.20 (cached) $3.48 MiniMax M3 $0.30 $0.06 (cached) $1.20 Kimi K2.7 Code $0.95 $0.19 (cached) $4.00 GLM-5.2 $1.40 $0.26 (cached) $4.40 LFM2 24B A2B $0.03 $0.12 Gemma 4 31B $0.39 $0.97 NVIDIA Nemotron 3 Ultra $0.60 $0.20 (cached) $3.60 Qwen3.7-Plus $0.32 $1.28 Kimi K2.6 $1.20 $0.20 (cached) $4.50 Qwen3.7-Max $1.25 $0.13 (cached) $3.75 gpt-oss-120B $0.15 $0.60 Qwen3.5-397B-A17B $0.60 $0.35 (cached) $3.60 Qwen3.5 9B $0.17 $0.25 Gemma-4-31B-it-Pearl $0.28 $0.86 Cogito v2.1 671B $1.25 $1.25 Rnj-1 Instruct $0.15 $0.15 Llama 3.3 70B $1.04 $1.04 Gemma 3n E4B Instruct $0.06 $0.12 gpt-oss-20B $0.05 $0.20 Qwen3 235B A22B FP8 Throughput $0.20 $0.60 MiniMax M2.5 $0.30 $0.06 (cached) $1.20 GLM-5.1 $1.40 $0.26 (cached) $4.40 MiniMax M2.7 $0.30 $0.06 (cached) $1.20 Qwen3.6-Plus $0.50 $3.00"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Provider is Together AI serving platform; upstream model family named in display_name. Context window not captured from Together model page.",
      "permalink": "/models/together/qwen3_6-plus.html"
    },
    {
      "provider": "perplexity",
      "model_id": "perplexity/sonar",
      "display_name": "Sonar",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.25,
        "output_per_mtok": 2.5,
        "cached_input_per_mtok": 0.0625,
        "batch_discount_pct": null
      },
      "context_window_tokens": null,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.perplexity.ai/docs/agent-api/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Perplexity Sonar — Perplexity’s grounded search model. Model Input ($/1M)Output ($/1M)Cache ($/1M) `perplexity/sonar` 0.25 2.50 0.0625"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Perplexity Agent API model table; context window not stated on collected page.",
      "permalink": "/models/perplexity/perplexity_sonar.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-a-plus-05-2026",
      "display_name": "Command A+ 05-2026",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": null,
        "output_per_mtok": null,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "The Command family of models includes Command A+, Command A, Command R7B, Command A Translate, Command A Reasoning, Command A Vision, Command R+, Command R, and Command ... Model Name Status Description Modality Context Length Maximum Output Tokens Endpoints `command-a-plus-05-2026` Live ... Text, Images 128k 64k ... `command-a-03-2025` Live ... Text 256k 8k ... `command-r7b-12-2024` Live ... Text 128k 4k ... `command-a-translate-08-2025` Live ... Text 8K 8k ... `command-a-reasoning-08-2025` Live ... Text 256k 32k ... `command-a-vision-07-2025` Live ... Text, Images 128K 8K ... `command-r-08-2024` Live ... Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Cohere model page confirms live model and limits; public token price was not found in collected primary pricing page.",
      "permalink": "/models/cohere/command-a-plus-05-2026.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-a-03-2025",
      "display_name": "Command A 03-2025",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": null,
        "output_per_mtok": null,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256k",
      "max_output_tokens": 8000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "The Command family of models includes Command A+, Command A, Command R7B, Command A Translate, Command A Reasoning, Command A Vision, Command R+, Command R, and Command ... Model Name Status Description Modality Context Length Maximum Output Tokens Endpoints `command-a-plus-05-2026` Live ... Text, Images 128k 64k ... `command-a-03-2025` Live ... Text 256k 8k ... `command-r7b-12-2024` Live ... Text 128k 4k ... `command-a-translate-08-2025` Live ... Text 8K 8k ... `command-a-reasoning-08-2025` Live ... Text 256k 32k ... `command-a-vision-07-2025` Live ... Text, Images 128K 8K ... `command-r-08-2024` Live ... Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Cohere model page confirms live model and limits; public token price was not found in collected primary pricing page.",
      "permalink": "/models/cohere/command-a-03-2025.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-a-translate-08-2025",
      "display_name": "Command A Translate 08-2025",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": null,
        "output_per_mtok": null,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 8000,
      "context_window_notation": "8k",
      "max_output_tokens": 8000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "The Command family of models includes Command A+, Command A, Command R7B, Command A Translate, Command A Reasoning, Command A Vision, Command R+, Command R, and Command ... Model Name Status Description Modality Context Length Maximum Output Tokens Endpoints `command-a-plus-05-2026` Live ... Text, Images 128k 64k ... `command-a-03-2025` Live ... Text 256k 8k ... `command-r7b-12-2024` Live ... Text 128k 4k ... `command-a-translate-08-2025` Live ... Text 8K 8k ... `command-a-reasoning-08-2025` Live ... Text 256k 32k ... `command-a-vision-07-2025` Live ... Text, Images 128K 8K ... `command-r-08-2024` Live ... Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Cohere model page confirms live model and limits; public token price was not found in collected primary pricing page.",
      "permalink": "/models/cohere/command-a-translate-08-2025.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-a-reasoning-08-2025",
      "display_name": "Command A Reasoning 08-2025",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": null,
        "output_per_mtok": null,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 256000,
      "context_window_notation": "256k",
      "max_output_tokens": 32000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "The Command family of models includes Command A+, Command A, Command R7B, Command A Translate, Command A Reasoning, Command A Vision, Command R+, Command R, and Command ... Model Name Status Description Modality Context Length Maximum Output Tokens Endpoints `command-a-plus-05-2026` Live ... Text, Images 128k 64k ... `command-a-03-2025` Live ... Text 256k 8k ... `command-r7b-12-2024` Live ... Text 128k 4k ... `command-a-translate-08-2025` Live ... Text 8K 8k ... `command-a-reasoning-08-2025` Live ... Text 256k 32k ... `command-a-vision-07-2025` Live ... Text, Images 128K 8K ... `command-r-08-2024` Live ... Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Cohere model page confirms live model and limits; public token price was not found in collected primary pricing page.",
      "permalink": "/models/cohere/command-a-reasoning-08-2025.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-a-vision-07-2025",
      "display_name": "Command A Vision 07-2025",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": null,
        "output_per_mtok": null,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 8000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "The Command family of models includes Command A+, Command A, Command R7B, Command A Translate, Command A Reasoning, Command A Vision, Command R+, Command R, and Command ... Model Name Status Description Modality Context Length Maximum Output Tokens Endpoints `command-a-plus-05-2026` Live ... Text, Images 128k 64k ... `command-a-03-2025` Live ... Text 256k 8k ... `command-r7b-12-2024` Live ... Text 128k 4k ... `command-a-translate-08-2025` Live ... Text 8K 8k ... `command-a-reasoning-08-2025` Live ... Text 256k 32k ... `command-a-vision-07-2025` Live ... Text, Images 128K 8K ... `command-r-08-2024` Live ... Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Cohere model page confirms live model and limits; public token price was not found in collected primary pricing page.",
      "permalink": "/models/cohere/command-a-vision-07-2025.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-r-08-2024",
      "display_name": "Command R 08-2024",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": null,
        "output_per_mtok": null,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128k",
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "The Command family of models includes Command A+, Command A, Command R7B, Command A Translate, Command A Reasoning, Command A Vision, Command R+, Command R, and Command ... Model Name Status Description Modality Context Length Maximum Output Tokens Endpoints `command-a-plus-05-2026` Live ... Text, Images 128k 64k ... `command-a-03-2025` Live ... Text 256k 8k ... `command-r7b-12-2024` Live ... Text 128k 4k ... `command-a-translate-08-2025` Live ... Text 8K 8k ... `command-a-reasoning-08-2025` Live ... Text 256k 32k ... `command-a-vision-07-2025` Live ... Text, Images 128K 8K ... `command-r-08-2024` Live ... Text 128k 4k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Cohere model page confirms live model and limits; public token price was not found in collected primary pricing page.",
      "permalink": "/models/cohere/command-r-08-2024.html"
    },
    {
      "provider": "cohere",
      "model_id": "command-r7b-12-2024",
      "display_name": "Command R7B 12-2024",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.0375,
        "output_per_mtok": 0.15,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "max_output_tokens": 4000,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": "June 1, 2024",
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/command-r7b",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "context_window_tokens",
            "max_output_tokens",
            "knowledge_cutoff",
            "model_id",
            "modalities"
          ],
          "quote": "Command R7B model details and specifications Capabilities Multilingual Safety Modes Citations Tool Use Structured Outputs Reasoning Image Inputs Pricing Input $0.0375 / 1M tokens Output $0.15 / 1M tokens Specifications Context Window: 128,000 tokens Max Output Tokens: 4,000 tokens Knowledge Cutoff: June 1, 2024 Model ID command-r7b-12-2024"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": null,
      "permalink": "/models/cohere/command-r7b-12-2024.html"
    },
    {
      "provider": "cohere",
      "model_id": "tiny-aya-global",
      "display_name": "Tiny Aya Global",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": null,
        "output_per_mtok": null,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 8000,
      "context_window_notation": "8k",
      "max_output_tokens": 8000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.cohere.com/docs/models",
          "accessed_at": "2026-07-04T08:00:00Z",
          "fields": [
            "model_id",
            "status",
            "modalities",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "Model Name Status Description Modality Context Length Maximum Output Tokens Endpoints `tiny-aya-global` Live Tiny Aya Global is a 3.35B instruction-tuned multilingual model with the best balance across languages and regions. Supports 70 languages.Text 8k 8k"
        }
      ],
      "verified_at": "2026-07-04T08:00:00Z",
      "notes": "Cohere model page confirms live model and limits; public token price was not found in collected primary pricing page.",
      "permalink": "/models/cohere/tiny-aya-global.html"
    },
    {
      "provider": "perplexity",
      "model_id": "sonar-pro",
      "display_name": "Sonar Pro",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 3.0,
        "output_per_mtok": 15.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 200000,
      "context_window_notation": "200K",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.perplexity.ai/docs/getting-started/pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Token Pricing ... Sonar Pro$3$15--- Sonar Reasoning Pro$2$8--- Request Pricing by Search Context Size ... Sonar Pro$6$10$14 Sonar Reasoning Pro$6$10$14"
        },
        {
          "url": "https://docs.perplexity.ai/docs/sonar/models/sonar-pro",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens"
          ],
          "quote": "Sonar Pro ... Input Tokens $3 Per 1M Tokens ... Output Tokens $15 Per 1M Tokens ... model: sonar-pro ... 200K context length"
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "Perplexity Sonar API model; request fees vary by search context size and are noted in the cited pricing quote.",
      "permalink": "/models/perplexity/sonar-pro.html"
    },
    {
      "provider": "perplexity",
      "model_id": "sonar-reasoning-pro",
      "display_name": "Sonar Reasoning Pro",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 2.0,
        "output_per_mtok": 8.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "context_window_notation": "128K",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.perplexity.ai/docs/getting-started/pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Token Pricing ... Sonar Pro$3$15--- Sonar Reasoning Pro$2$8--- Request Pricing by Search Context Size ... Sonar Pro$6$10$14 Sonar Reasoning Pro$6$10$14"
        },
        {
          "url": "https://docs.perplexity.ai/docs/sonar/models/sonar-reasoning-pro",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens"
          ],
          "quote": "Sonar Reasoning Pro ... sonar-reasoning was deprecated on December 15, 2025 ... model: sonar-reasoning-pro ... 128K context length"
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "Perplexity Sonar API reasoning model; request fees vary by search context size and are noted in the cited pricing quote.",
      "permalink": "/models/perplexity/sonar-reasoning-pro.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.7-plus",
      "display_name": "Qwen3.7 Plus",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.4,
        "output_per_mtok": 1.6,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "qwen3.7-plus Currently equivalent to qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.6-plus Currently equivalent to qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6 qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6"
        },
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/models",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "All models > Qwen3.7: Model ID qwen3.7-plus; Input Text, images, video; Output Text; Context 1M; Max output 64k."
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "International/Global Model Studio price; base 0<Token<=256K tier recorded; higher tier to 1M tokens is noted in the cited quote.",
      "permalink": "/models/alibaba/qwen3_7-plus.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.7-plus-2026-05-26",
      "display_name": "Qwen3.7 Plus 2026-05-26",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.4,
        "output_per_mtok": 1.6,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "qwen3.7-plus Currently equivalent to qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.6-plus Currently equivalent to qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6 qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6"
        },
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/models",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "All models > Qwen3.7: Model ID qwen3.7-plus-2026-05-26; Input Text, images, video; Output Text; Context 1M; Max output 64k."
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "International/Global Model Studio price; base 0<Token<=256K tier recorded; higher tier to 1M tokens is noted in the cited quote.",
      "permalink": "/models/alibaba/qwen3_7-plus-2026-05-26.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.6-plus",
      "display_name": "Qwen3.6 Plus",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 3.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "qwen3.7-plus Currently equivalent to qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.6-plus Currently equivalent to qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6 qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6"
        },
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/models",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "All models > Qwen3.6: Model ID qwen3.6-plus; Input Text, images, video; Output Text; Context 1M; Max output 64k."
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "International/Global Model Studio price; base 0<Token<=256K tier recorded; higher tier to 1M tokens is noted in the cited quote.",
      "permalink": "/models/alibaba/qwen3_6-plus.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.6-plus-2026-04-02",
      "display_name": "Qwen3.6 Plus 2026-04-02",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.5,
        "output_per_mtok": 3.0,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "qwen3.7-plus Currently equivalent to qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.7-plus-2026-05-26 International 0<Token?256K $0.4 $1.6 $1.6 1 million tokens 256K<Token?1M $1.2 $4.8 $4.8 qwen3.6-plus Currently equivalent to qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6 qwen3.6-plus-2026-04-02 International 0<Token?256K $0.5 $3 $3 1 million tokens 256K<Token?1M $2 $6 $6"
        },
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/models",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "All models > Qwen3.6: Model ID qwen3.6-plus-2026-04-02; Input Text, images, video; Output Text; Context 1M; Max output 64k."
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "International/Global Model Studio price; base 0<Token<=256K tier recorded; higher tier to 1M tokens is noted in the cited quote.",
      "permalink": "/models/alibaba/qwen3_6-plus-2026-04-02.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.6-flash",
      "display_name": "Qwen3.6 Flash",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.25,
        "output_per_mtok": 1.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "qwen3.6-flash Currently equivalent to qwen3.6-flash-2026-04-16 50% batch inference discount context caching discount International 0<Token?256K $0.25 $1.5 1 million tokens qwen3.6-flash-2026-04-16 International 0<Token?256K $0.25 $1.5 1 million tokens"
        },
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/models",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "All models > Qwen3.6: Model ID qwen3.6-flash; Input Text, images, video; Output Text; Context 1M; Max output 64k."
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "International Model Studio price; 0<Token<=256K tier recorded.",
      "permalink": "/models/alibaba/qwen3_6-flash.html"
    },
    {
      "provider": "alibaba",
      "model_id": "qwen3.6-flash-2026-04-16",
      "display_name": "Qwen3.6 Flash 2026-04-16",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.25,
        "output_per_mtok": 1.5,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 1000000,
      "context_window_notation": "1M",
      "max_output_tokens": 64000,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/model-pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "qwen3.6-flash Currently equivalent to qwen3.6-flash-2026-04-16 50% batch inference discount context caching discount International 0<Token?256K $0.25 $1.5 1 million tokens qwen3.6-flash-2026-04-16 International 0<Token?256K $0.25 $1.5 1 million tokens"
        },
        {
          "url": "https://www.alibabacloud.com/help/en/model-studio/models",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "max_output_tokens"
          ],
          "quote": "All models > Qwen3.6: Model ID qwen3.6-flash-2026-04-16; Input Text, images, video; Output Text; Context 1M; Max output 64k."
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "International Model Studio price; 0<Token<=256K tier recorded.",
      "permalink": "/models/alibaba/qwen3_6-flash-2026-04-16.html"
    },
    {
      "provider": "fireworks",
      "model_id": "llama-v3p3-70b-instruct",
      "display_name": "Llama 3.3 70B Instruct",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.9,
        "output_per_mtok": 0.9,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 131072,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Prices below are per 1 million tokens in US dollars ... Other base models -- by size and architecture ... More than 16B parameters $0.90"
        },
        {
          "url": "https://fireworks.ai/models/fireworks/llama-v3p3-70b-instruct",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "modalities"
          ],
          "quote": "model path:accounts/fireworks/models/llama-v3p3-70b-instruct ... Fireworks supports a context length of 131,072 tokens ... The model is available via serverless at $0.90 per million tokens ... Support image input Not supported"
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "Provider is Fireworks serving platform; upstream model family is Meta Llama. Generic >16B serverless token tier recorded.",
      "permalink": "/models/fireworks/llama-v3p3-70b-instruct.html"
    },
    {
      "provider": "fireworks",
      "model_id": "qwen2p5-vl-32b-instruct",
      "display_name": "Qwen2.5-VL 32B Instruct",
      "status": "ga",
      "release_date": null,
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.9,
        "output_per_mtok": 0.9,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 128000,
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://docs.fireworks.ai/serverless/pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Prices below are per 1 million tokens in US dollars ... Other base models -- by size and architecture ... More than 16B parameters $0.90"
        },
        {
          "url": "https://fireworks.ai/models/fireworks/qwen2p5-vl-32b-instruct",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "modalities"
          ],
          "quote": "model path:accounts/fireworks/models/qwen2p5-vl-32b-instruct ... Fireworks supports up to 128,000 tokens ... The model is supported on serverless deployment at $0.90 per million tokens ... Support image input Supported"
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "Provider is Fireworks serving platform; upstream model family is Qwen. Generic >16B serverless token tier recorded.",
      "permalink": "/models/fireworks/qwen2p5-vl-32b-instruct.html"
    },
    {
      "provider": "together",
      "model_id": "Qwen/Qwen2.5-7B-Instruct-Turbo",
      "display_name": "Qwen2.5 7B Instruct Turbo",
      "status": "ga",
      "release_date": "2024-10-11",
      "deprecation_date": null,
      "retirement_date": null,
      "pricing": {
        "input_per_mtok": 0.3,
        "output_per_mtok": 0.3,
        "cached_input_per_mtok": null,
        "batch_discount_pct": null
      },
      "context_window_tokens": 32768,
      "context_window_notation": "32K",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://www.together.ai/pricing",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok"
          ],
          "quote": "Qwen2.5 7B Instruct Turbo $0.30 $0.30 ... Displayed prices refer to the lowest resolution/duration settings. Price per 1M tokens"
        },
        {
          "url": "https://www.together.ai/models/qwen2-5-7b-instruct-turbo",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "context_window_tokens",
            "modalities",
            "release_date"
          ],
          "quote": "Endpoint: Qwen/Qwen2.5-7B-Instruct-Turbo ... Context length 32K ... Input modalities Text Output modalities Text Released October 11, 2024"
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "Provider is Together AI serving platform; exact endpoint ID recorded from Together model page.",
      "permalink": "/models/together/Qwen_Qwen2_5-7B-Instruct-Turbo.html"
    },
    {
      "provider": "amazon",
      "model_id": "anthropic.claude-3-5-sonnet-20241022-v2:0",
      "display_name": "Anthropic Claude 3.5 Sonnet v2 on Bedrock",
      "status": "deprecated",
      "release_date": null,
      "deprecation_date": "2026-01-30",
      "retirement_date": "2026-07-30",
      "pricing": {
        "input_per_mtok": 6.0,
        "output_per_mtok": 30.0,
        "cached_input_per_mtok": 0.6,
        "batch_discount_pct": null
      },
      "context_window_tokens": 200000,
      "context_window_notation": "200K",
      "max_output_tokens": null,
      "modalities": {
        "input": [
          "text",
          "image"
        ],
        "output": [
          "text"
        ]
      },
      "knowledge_cutoff": null,
      "sources": [
        {
          "url": "https://aws.amazon.com/bedrock/pricing/",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "pricing.input_per_mtok",
            "pricing.output_per_mtok",
            "pricing.cached_input_per_mtok"
          ],
          "quote": "Anthropic Claude 3.5 Sonnet v2 (Public Extended Access, Effective 1 Dec 2025) ... $6.00 $30.00 $3.00 $15.00 $7.50 $0.60"
        },
        {
          "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/model-lifecycle.html",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "model_id",
            "status",
            "deprecation_date",
            "retirement_date"
          ],
          "quote": "Claude 3.5 Sonnet v2 anthropic.claude-3-5-sonnet-20241022-v2:0 ... Legacy date January 30, 2026 EOL date July 30, 2026 Public extended access start date April 30, 2026"
        },
        {
          "url": "https://www.anthropic.com/news/claude-3-5-sonnet",
          "accessed_at": "2026-07-04T08:44:47Z",
          "fields": [
            "context_window_tokens",
            "modalities"
          ],
          "quote": "Claude 3.5 Sonnet is now available ... via Anthropic API, Amazon Bedrock, and Google Cloud Vertex AI ... 200K token context window ... vision capabilities"
        }
      ],
      "verified_at": "2026-07-04T08:44:47Z",
      "notes": "hosted third-party model (Anthropic) on Bedrock; Public Extended Access price recorded from AWS Bedrock pricing.",
      "permalink": "/models/amazon/anthropic_claude-3-5-sonnet-20241022-v2_0.html"
    }
  ]
}