from serving_cast.request import Request
from tests.helpers.assert_utils import assert_latency_within


def test_tpot_metric_only(cast_model):
    assert cast_model["model_id"]
    assert cast_model["op_meta"]
    req = Request(num_input_tokens=16, num_output_tokens=5)
    req.prefill_done_time = 2.0
    req.decode_done_time = 3.2
    assert_latency_within(req.time_per_output_token(), 0.3)