import subprocess
import os
import logging
import pytest
import pandas as pd
COMMAND_SUCCESS = 0
def execute_cmd(cmd):
logging.info('Execute command:%s' % " ".join(cmd))
completed_process = subprocess.run(cmd, shell=False, stderr=subprocess.PIPE)
if completed_process.returncode != COMMAND_SUCCESS:
logging.error(completed_process.stderr.decode())
return completed_process.returncode
def check_column_actual(actual_columns, expected_columns, context):
"""检查实际列名是否与预期列名一致"""
for col in expected_columns:
if col not in actual_columns:
logging.error(f"在 {context} 中未找到预期列名: {col}")
return False
return True
def check_row(df, expected_columns, numeric_columns):
"""检查数据框中Metric列数据类型和指定列数据是否为数字"""
for row_index in df.index:
try:
value = df.at[row_index, 'Metric']
if not isinstance(value, str):
logging.error(f"在Metric列的第{row_index}行,值 '{value}' 不是字符串类型")
return False
except KeyError:
logging.error(f"数据框中不存在 'Metric' 列")
return False
for column in numeric_columns:
if column not in df.columns:
logging.error(f"数据框中不存在 {column} 列")
continue
for row_index in df.index:
try:
cell_value = df.at[row_index, column]
float(cell_value)
except (ValueError, KeyError):
logging.error(
f"在 {column} 列的第 {row_index} 行,值 {cell_value} 不是有效的数字")
return False
return True
def check_no_empty_lines_before_first_line(dataframe, context=""):
empty_line = 0
for _, row in dataframe.iterrows():
if row.isnull().all():
empty_line += 1
else:
break
pytest.assume(empty_line == 0, f"{context} table has {empty_line} empty lines before first line.")
def check_no_empty_lines_between_first_last_line(dataframe, context=""):
empty_rows = dataframe.eq('').all(axis=1)
num_empty_rows = empty_rows.sum()
pytest.assume(num_empty_rows == 0, f"{context} table has empty lines.")
def check_during_time(dataframe, context=""):
required_columns = ['end_time(ms)', 'start_time(ms)', 'during_time(ms)']
for col in required_columns:
if col not in dataframe.columns:
logging.error(f"The column {col} not found in {context}.")
return False
for index, row in dataframe.iloc[:-1].iterrows():
end_time = row['end_time(ms)']
start_time = row['start_time(ms)']
during_time = row['during_time(ms)']
diff = abs((end_time - start_time) - during_time)
if diff > 1:
logging.error(f"In row {index} of {context}, the during_time is not correct.")
return False
return True
def check_split_csv_content(output_path, csv_file_name):
csv_file = os.path.join(output_path, csv_file_name)
pytest.assume(os.path.exists(csv_file), f"CSV file not found: {csv_file}")
task_name = os.path.splitext(csv_file_name)[0]
expected_header = ['name', 'during_time(ms)', 'max', 'min', 'mean', 'std', \
'pid', 'tid', 'start_time(ms)', 'end_time(ms)']
if task_name == 'prefill':
expected_header.append('rid')
df = pd.read_csv(csv_file)
result = check_column_actual(df.columns.tolist(), expected_header, context=csv_file_name)
pytest.assume(result, f"{csv_file_name} column check failed")
check_no_empty_lines_before_first_line(df, context=csv_file_name)
check_no_empty_lines_between_first_last_line(df, context=csv_file_name)
result = check_during_time(df, context=csv_file_name)
pytest.assume(result, f"{csv_file_name} execution time validation failed")