import os
from pathlib import Path
from pytest_check import check
def check_df_expected_column(df, expected_columns):
missing_columns = set(expected_columns) - set(df.columns.tolist())
assert len(missing_columns) == 0, f"missing columns: {missing_columns}"
def check_df_has_no_empty_line(df):
empty_rows = df.eq("").all(axis=1)
num_empty_rows = empty_rows.sum()
assert num_empty_rows == 0, f"has empty lines."
def check_df_col_has_no_nan_value(df, col_name):
check.is_false(df[col_name].isna().any(), f"column {col_name} has nan value.")
def check_df_col_has_value(df, col_name, value, times=None, empty_enable=False):
check.is_true(col_name in df, f"check {col_name} in dataframe failed.")
value_count_series = df[col_name].value_counts()
if empty_enable and value not in value_count_series:
return
check.is_true(value in value_count_series, f"check {col_name} has {value} failed(not in).")
count = value_count_series[value] if value in value_count_series else 0
check.is_true(count > 0 if times is None else count == times, f"check {col_name} has {value} failed({times}).")
def check_df_col_type(df, col_name, checker):
return df[col_name].apply(checker).all()
def check_df_col_unique_value_nums(df, col_name, number):
return df[col_name].nunique() == number
def has_prof_folder(root_folder):
root_path = Path(root_folder)
for p in root_path.rglob("*"):
if p.is_dir() and p.name.startswith("PROF_"):
return True
return False
def check_files_in_folder(folder_path, target_extension):
for file in os.listdir(folder_path):
if not file.endswith(target_extension):
return False
return True