# -------------------------------------------------------------------------
# This file is part of the MindStudio project.
# Copyright (c) 2025 Huawei Technologies Co.,Ltd.
#
# MindStudio is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
#          http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# -------------------------------------------------------------------------
import os
from pathlib import Path
from pytest_check import check


def check_df_expected_column(df, expected_columns):
    # 检查是否有缺失的列
    missing_columns = set(expected_columns) - set(df.columns.tolist())
    assert len(missing_columns) == 0, f"missing columns: {missing_columns}"


def check_df_has_no_empty_line(df):
    # 没有空行
    empty_rows = df.eq("").all(axis=1)
    num_empty_rows = empty_rows.sum()
    assert num_empty_rows == 0, f"has empty lines."


def check_df_col_has_no_nan_value(df, col_name):
    # 检查某列没有Nan值
    check.is_false(df[col_name].isna().any(), f"column {col_name} has nan value.")


def check_df_col_has_value(df, col_name, value, times=None, empty_enable=False):
    # 检查某列某个值出现的次数
    check.is_true(col_name in df, f"check {col_name} in dataframe failed.")
    value_count_series = df[col_name].value_counts()
    if empty_enable and value not in value_count_series: 
        return
    check.is_true(value in value_count_series, f"check {col_name} has {value} failed(not in).")
    count = value_count_series[value] if value in value_count_series else 0
    check.is_true(count > 0 if times is None else count == times, f"check {col_name} has {value} failed({times}).")


def check_df_col_type(df, col_name, checker):
    # 检查某列的类型
    return df[col_name].apply(checker).all()


def check_df_col_unique_value_nums(df, col_name, number):
    # 检查某列 unique 值是否是 number 个
    return df[col_name].nunique() == number


def has_prof_folder(root_folder):
    # 检查文件夹下是否存在PROF_开头的子文件夹
    root_path = Path(root_folder)
    for p in root_path.rglob("*"):
        if p.is_dir() and p.name.startswith("PROF_"):
            return True
    return False


def check_files_in_folder(folder_path, target_extension):
    for file in os.listdir(folder_path):
        if not file.endswith(target_extension):
            return False

    return True