import os
import stat
import tensorflow as tf
MIN_SIZE = 1
MAX_FILE_SIZE = 500 * 1024 * 1024 * 1024
HDFS_FILE_PREFIX = ["viewfs://", "hdfs://"]
UNSUPPORTED_FILE_MODE_MASK = 0o022
def check_file_system_is_hdfs(file_path):
return any(file_path.startswith(prefix) for prefix in HDFS_FILE_PREFIX)
def validate_read_file(read_file_path):
"""
Validate file before reading,including validating soft link, file size
:param read_file_path: the file path to be validated
"""
if not isinstance(read_file_path, str):
raise ValueError("parameter value's type is not str")
file_stat = tf.io.gfile.stat(read_file_path)
if not (MIN_SIZE < file_stat.length <= MAX_FILE_SIZE):
raise ValueError(f"file size: {file_stat.length} is invalid, not in ({MIN_SIZE}, {MAX_FILE_SIZE}]")
if check_file_system_is_hdfs(read_file_path):
return
if (os.path.abspath(read_file_path) != os.path.realpath(read_file_path)):
raise ValueError(f"soft link or relative path: {read_file_path} should not be in the path parameter")
stat_info = os.stat(read_file_path)
process_uid = os.geteuid()
process_gid = os.getegid()
if not ((process_uid == stat_info.st_uid) or (process_gid == stat_info.st_gid)):
raise ValueError(f"Invalid log file user or group, path: {read_file_path}.")
mode = stat.S_IMODE(stat_info.st_mode)
if ((mode & UNSUPPORTED_FILE_MODE_MASK) != 0):
raise ValueError(f"Current file:{read_file_path}, mode {oct(mode)} is unsupported")
def validate_save_path(save_path):
if check_file_system_is_hdfs(save_path):
return
if not isinstance(save_path, str):
raise ValueError("parameter value's type is not str")
if (os.path.abspath(save_path) != os.path.realpath(save_path)):
raise ValueError(f"soft link or relative path: {save_path} should not be in the path parameter")