genui-sdk/packages/benchmarks/src/framework/reporter.ts-代码预览-genui-sdk:基于 Generative UI 的全栈 AI 应用开发工具包 - AtomGit

Yyyfeat: add handling for empty benchmark results in printBenchmarkSummary function
import type { LlmBenchmarkResultItem } from './types';
import { comparisonScenarioLabel, formatNumber } from '../utils';

function toDisplayNumber(value: number | undefined) {
  return typeof value === 'number' ? formatNumber(value, 2) : '';
}

function averageDefined(values: Array<number | undefined>) {
  const defined = values.filter((value): value is number => typeof value === 'number' && Number.isFinite(value));
  if (defined.length === 0) return null;
  return defined.reduce((sum, value) => sum + value, 0) / defined.length;
}

/**
 * 输出每个场景的详细指标表格。
 * @param results 单次运行结果明细
 */
export function printBenchmarkTable(results: LlmBenchmarkResultItem[]) {
  console.table(
    results.map((item) => ({
      scenario: item.scenario,
      promptVariant: item.promptVariant ?? 'full',
      model: item.model ?? '',
      runIndex: item.runIndex ?? 1,
      ttftMs: toDisplayNumber(item.ttftMs),
      tinyCardMs: toDisplayNumber(item.firstObservableComponentMs),
      totalMs: formatNumber(item.totalMs, 2),
      tpotMsPerTok: item.tpotMs == null ? '' : formatNumber(item.tpotMs, 2),
      validSchema: item.isSchemaJsonValidAgainstProtocol,
      schemaError: item.schemaValidationError ?? '',
      promptTokens: item.promptTokens,
      completionTokens: item.completionTokens,
      totalTokens: item.totalTokens,
      outputChars: item.rawOutputChars,
      judgeScore: item.llmJudgeScore == null ? '' : formatNumber(item.llmJudgeScore, 2),
      judgeReason: item.llmJudgeReason ?? '',
      judgeError: item.llmJudgeError ?? '',
      error: item.errorMessage || '',
    })),
  );
}

/**
 * 输出聚合汇总指标（成功率、平均延迟、总 token）。
 * @param results 单次运行结果明细
 */
export function printBenchmarkSummary(results: LlmBenchmarkResultItem[]) {
  if (results.length === 0) {
    console.log('\nBenchmark Summary');
    console.log('No result rows to summarize (empty input, all runs failed, or filters excluded every row).');
    console.table([
      {
        scenarios: 0,
        models: 0,
        runs: 0,
        validSchema: 'N/A',
        avgJudgeScore: 'N/A',
        avgTtftMs: 'N/A',
        avgTinyCardMs: 'N/A',
        avgTotalMs: 'N/A',
        avgTpotMsPerTok: 'N/A',
        totalTokens: 'N/A',
      },
    ]);
    return;
  }

  const successCount = results.filter((item) => item.isSchemaJsonValidAgainstProtocol).length;
  const avgTtft = averageDefined(results.map((item) => item.ttftMs));
  const avgFirstObs = averageDefined(results.map((item) => item.firstObservableComponentMs));
  const avgTotal = results.reduce((sum, item) => sum + item.totalMs, 0) / results.length;
  const tpotDefined = results.filter((item) => typeof item.tpotMs === 'number');
  const avgTpot =
    tpotDefined.length > 0 ? tpotDefined.reduce((sum, item) => sum + (item.tpotMs as number), 0) / tpotDefined.length : null;
  const totalTokens = results.reduce((sum, item) => sum + item.totalTokens, 0);
  const uniqueScenarioCount = new Set(results.map((item) => comparisonScenarioLabel(item))).size;
  const uniqueModelCount = new Set(results.map((item) => item.model).filter(Boolean)).size;
  const judgeScores = results.map((item) => item.llmJudgeScore).filter((score): score is number => typeof score === 'number');
  const avgJudgeScore = judgeScores.length > 0 ? judgeScores.reduce((sum, score) => sum + score, 0) / judgeScores.length : null;
  const summary = [
    {
      scenarios: uniqueScenarioCount,
      models: uniqueModelCount,
      runs: results.length,
      validSchema: `${successCount}/${results.length}`,
      avgJudgeScore: avgJudgeScore == null ? 'N/A' : formatNumber(avgJudgeScore, 2),
      avgTtftMs: avgTtft == null ? 'N/A' : formatNumber(avgTtft, 2),
      avgTinyCardMs: avgFirstObs == null ? 'N/A' : formatNumber(avgFirstObs, 2),
      avgTotalMs: formatNumber(avgTotal, 2),
      avgTpotMsPerTok: avgTpot == null ? 'N/A' : formatNumber(avgTpot, 2),
      totalTokens,
    },
  ];

  console.log('\nBenchmark Summary');
  console.table(summary);
}

/**
 * 以 JSON 结构输出全部结果，便于后续自动化处理。
 * @param results 单次运行结果明细
 */
export function printBenchmarkJson(results: LlmBenchmarkResultItem[]) {
  console.log(JSON.stringify(results, null, 2));
}