# Rate cards for perf.rate_cards — USD per 1M tokens.
#
# Override via OGMEM_PERF_RATE_CARD=/path/to/your/cards.yaml
#
# Unknown models fall through to the "default" entry; absence of both
# the entry and the "default" key raises (no silent fallback per
# CLAUDE.md §工程原则).
llm:
  gpt-4o-mini:
    input: 0.15
    output: 0.60
    cache_read: 0.075
  gpt-4o:
    input: 2.50
    output: 10.00
    cache_read: 1.25
  gpt-4.1:
    input: 2.00
    output: 8.00
    cache_read: 0.50
  gpt-4.1-mini:
    input: 0.40
    output: 1.60
    cache_read: 0.10
  o4-mini:
    input: 1.10
    output: 4.40
    cache_read: 0.275
  default:
    input: 1.00
    output: 3.00
    cache_read: 0.50

embedding:
  text-embedding-3-small:
    input: 0.02
  text-embedding-3-large:
    input: 0.13
  text-embedding-ada-002:
    input: 0.10
  default:
    input: 0.05