* Copyright (c) 2026 Huawei Technologies Co., Ltd.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include "zip_archive_reader.h"
#include <algorithm>
#include <fstream>
#include <limits>
#include <vector>
#include "common/checker.h"
#include "common/debug/log.h"
#include "common/scope_guard.h"
#include "graph_metadef/graph/utils/math_util.h"
#include "mmpa/mmpa_api.h"
#include "om2_file_utils.h"
namespace ge {
namespace {
constexpr int kMemZipOk = 0;
constexpr int kMemZipError = -1;
constexpr uint32_t kMaxFileNameLength = 4096U;
constexpr int64_t kBufSize = 16384UL;
voidpf ZCALLBACK MemOpenFileFuncReadonly(voidpf opaque, const void *filename, int mode) {
(void)filename;
(void)mode;
const auto mem_file_readonly = static_cast<MemoryFileReadonly *>(opaque);
if (mem_file_readonly == nullptr) {
GELOGE(FAILED, "[MEMZIP] Opaque pointer is null. Cannot initialize memory file.");
return nullptr;
}
mem_file_readonly->position = 0;
return mem_file_readonly;
}
uLong ZCALLBACK MemReadFileFuncReadonly(voidpf opaque, voidpf stream, void *buf, uLong size) {
const auto mem_file = static_cast<MemoryFileReadonly *>(stream);
(void)opaque;
if (mem_file == nullptr) {
return 0;
}
uLong bytes_to_read = size;
if (mem_file->position + bytes_to_read > mem_file->length) {
bytes_to_read = mem_file->length - mem_file->position;
}
if (bytes_to_read > 0) {
const auto ret = GeMemcpy(static_cast<uint8_t *>(buf), size, mem_file->buffer + mem_file->position, bytes_to_read);
if (ret != SUCCESS) {
GELOGE(FAILED,
"[MEMZIP] Failed to copy, ret=%d: dest_ptr[%p], dest_max[%zu], src_base_ptr[%p], src_position[%zu], "
"src_size[%zu]",
ret, buf, size, mem_file->buffer, mem_file->position, bytes_to_read);
return 0;
}
mem_file->position += bytes_to_read;
}
return bytes_to_read;
}
uLong ZCALLBACK MemWriteFileFuncReadonly(voidpf opaque, voidpf stream, const void *buf, uLong size) {
(void)opaque;
(void)stream;
(void)buf;
(void)size;
return 0;
}
ZPOS64_T ZCALLBACK MemTell64FileFuncReadonly(voidpf opaque, voidpf stream) {
const auto mem_file = static_cast<MemoryFileReadonly *>(stream);
(void)opaque;
if (mem_file == nullptr) {
return static_cast<ZPOS64_T>(-1);
}
return mem_file->position;
}
long ZCALLBACK MemSeek64FileFuncReadonly(voidpf opaque, voidpf stream, ZPOS64_T offset, int origin) {
const auto mem_file = static_cast<MemoryFileReadonly *>(stream);
uint64_t new_position;
(void)opaque;
if (mem_file == nullptr) {
return kMemZipError;
}
switch (origin) {
case ZLIB_FILEFUNC_SEEK_CUR:
new_position = mem_file->position + offset;
break;
case ZLIB_FILEFUNC_SEEK_END:
new_position = mem_file->length + offset;
break;
case ZLIB_FILEFUNC_SEEK_SET:
new_position = offset;
break;
default:
return kMemZipError;
}
if (new_position > mem_file->length) {
return kMemZipError;
}
mem_file->position = new_position;
return kMemZipOk;
}
int ZCALLBACK MemCloseFileFuncReadonly(voidpf opaque, voidpf stream) {
(void)opaque;
(void)stream;
return kMemZipOk;
}
int ZCALLBACK MemErrorFileFuncReadonly(voidpf opaque, voidpf stream) {
const auto mem_file = static_cast<MemoryFileReadonly *>(stream);
(void)opaque;
if (mem_file == nullptr) {
return kMemZipError;
}
return kMemZipOk;
}
void FillMemFileFuncReadonly(zlib_filefunc64_def *file_func_def, MemoryFileReadonly *mem_file) {
file_func_def->zopen64_file = MemOpenFileFuncReadonly;
file_func_def->zread_file = MemReadFileFuncReadonly;
file_func_def->zwrite_file = MemWriteFileFuncReadonly;
file_func_def->ztell64_file = MemTell64FileFuncReadonly;
file_func_def->zseek64_file = MemSeek64FileFuncReadonly;
file_func_def->zclose_file = MemCloseFileFuncReadonly;
file_func_def->zerror_file = MemErrorFileFuncReadonly;
file_func_def->opaque = mem_file;
}
std::string ParentDirectory(const std::string &filepath) {
size_t last_slash = filepath.find_last_of('/');
if (last_slash != std::string::npos) {
return filepath.substr(0, last_slash);
}
return "";
}
bool MakeSureDirExists(const std::string &file_path) {
const auto parent_dir = ParentDirectory(file_path);
GE_ASSERT(!parent_dir.empty());
if (mmAccess2(parent_dir.c_str(), M_F_OK) != EN_OK) {
GE_ASSERT_TRUE(om2::CreateDir(parent_dir) == 0);
}
return true;
}
constexpr uint32_t kCdHeaderFixedSize = 46U;
constexpr uint32_t kCdCompressedSizeOffset = 20U;
constexpr uint32_t kCdUncompressedSizeOffset = 24U;
constexpr uint32_t kCdFileNameLenOffset = 28U;
constexpr uint32_t kCdExtraLenOffset = 30U;
constexpr uint32_t kCdLocalFileHeaderOffset = 42U;
constexpr uint32_t kCdHeaderMagicNum = 0x02014b50;
constexpr uint16_t kCdExtraMagicNum = 0x0001;
constexpr uint16_t kLfHeaderFixedSize = 30U;
constexpr uint32_t kLfNameLenOffset = 26U;
constexpr uint32_t kLfExtraLenOffset = 28U;
constexpr uint32_t kLfHeaderMagicNum = 0x04034b50;
constexpr size_t kBitsPerByte = 8;
constexpr size_t kBytesPerUint16 = sizeof(uint16_t);
constexpr size_t kBytesPerUint32 = sizeof(uint32_t);
constexpr size_t kBytesPerUint64 = sizeof(uint64_t);
constexpr size_t kBitsPerUint16 = kBitsPerByte * kBytesPerUint16;
constexpr size_t kBitsPerUint32 = kBitsPerByte * kBytesPerUint32;
struct ZipEntryInfo {
uint64_t compressed_size;
uint64_t uncompressed_size;
uint64_t local_file_header_offset;
};
uint16_t ReadLE16(const uint8_t *p) {
return static_cast<uint16_t>(p[0]) | (static_cast<uint16_t>(p[1]) << kBitsPerByte);
}
uint32_t ReadLE32(const uint8_t *p) {
return static_cast<uint32_t>(ReadLE16(p)) | (static_cast<uint32_t>(ReadLE16(p + kBytesPerUint16)) << kBitsPerUint16);
}
uint64_t ReadLE64(const uint8_t *p) {
return static_cast<uint64_t>(ReadLE32(p)) | (static_cast<uint64_t>(ReadLE32(p + kBytesPerUint32)) << kBitsPerUint32);
}
bool ParseCentralDirEntry(const MemoryFileReadonly &buffer, const uint64_t pos_in_central_dir, ZipEntryInfo &entry_info) {
GE_ASSERT_TRUE(pos_in_central_dir + kCdHeaderFixedSize <= buffer.length, "Invalid central directory position");
const uint8_t *entry_buff = buffer.buffer + pos_in_central_dir;
const uint32_t magic_num = ReadLE32(entry_buff);
GE_ASSERT_TRUE(magic_num == kCdHeaderMagicNum, "Invalid central directory file header, magic = %u", magic_num);
entry_info.compressed_size = ReadLE32(entry_buff + kCdCompressedSizeOffset);
entry_info.uncompressed_size = ReadLE32(entry_buff + kCdUncompressedSizeOffset);
const uint16_t name_len = ReadLE16(entry_buff + kCdFileNameLenOffset);
const uint16_t extra_len = ReadLE16(entry_buff + kCdExtraLenOffset);
entry_info.local_file_header_offset = ReadLE32(entry_buff + kCdLocalFileHeaderOffset);
if (entry_info.compressed_size == MAXU32 || entry_info.uncompressed_size == MAXU32 ||
entry_info.local_file_header_offset == MAXU32) {
GE_ASSERT_TRUE(pos_in_central_dir + kCdHeaderFixedSize + name_len + extra_len <= buffer.length,
"Invalid extra field position");
const uint8_t *extra_buf = entry_buff + kCdHeaderFixedSize + name_len;
size_t offset = 0;
while (offset + kBytesPerUint32 <= extra_len) {
const uint16_t header_id = ReadLE16(extra_buf + offset);
const uint16_t data_size = ReadLE16(extra_buf + offset + kBytesPerUint16);
offset += kBytesPerUint32;
if (header_id == kCdExtraMagicNum) {
if ((entry_info.uncompressed_size == MAXU32) && (offset + kBytesPerUint64 <= extra_len)) {
entry_info.uncompressed_size = ReadLE64(extra_buf + offset);
offset += kBytesPerUint64;
}
if ((entry_info.compressed_size == MAXU32) && (offset + kBytesPerUint64 <= extra_len)) {
entry_info.compressed_size = ReadLE64(extra_buf + offset);
offset += kBytesPerUint64;
}
if ((entry_info.local_file_header_offset == MAXU32) && (offset + kBytesPerUint64 <= extra_len)) {
entry_info.local_file_header_offset = ReadLE64(extra_buf + offset);
offset += kBytesPerUint64;
}
break;
} else {
offset += data_size;
}
}
}
return true;
}
bool LocateFileDataOffset(const MemoryFileReadonly &buffer, const uint64_t local_file_header_offset,
uint64_t &raw_data_offset) {
GE_ASSERT_TRUE(local_file_header_offset + kLfHeaderFixedSize <= buffer.length, "Invalid local file header position");
const uint8_t *header_buff = buffer.buffer + local_file_header_offset;
const uint32_t header_magic_num = ReadLE32(header_buff);
GE_ASSERT_TRUE(header_magic_num == kLfHeaderMagicNum, "Invalid local file header, magic = %u", header_magic_num);
const int64_t name_len = ReadLE16(header_buff + kLfNameLenOffset);
const int64_t extra_len = ReadLE16(header_buff + kLfExtraLenOffset);
raw_data_offset = local_file_header_offset + kLfHeaderFixedSize + name_len + extra_len;
GE_ASSERT_TRUE(raw_data_offset <= buffer.length, "Invalid file data offset");
return true;
}
}
RAIIZipArchive::RAIIZipArchive(const uint8_t *data, const size_t length) : mem_file_{data, length, 0} {
if (mem_file_.buffer == nullptr || mem_file_.length == 0) {
GELOGE(FAILED, "Invalid zip archive data, data is [%p] and size is [%zu]", mem_file_.buffer, mem_file_.length);
return;
}
zlib_filefunc64_def file_funcs;
FillMemFileFuncReadonly(&file_funcs, &mem_file_);
zip_handle_ = unzOpen2_64(nullptr, &file_funcs);
if (zip_handle_ == nullptr) {
GELOGE(FAILED, "Failed to open ZIP file from memory");
return;
}
if (!BuildEntryCache()) {
entry_cache_.clear();
entry_names_.clear();
entry_cache_ready_ = false;
}
}
RAIIZipArchive::~RAIIZipArchive() {
if (zip_handle_ != nullptr) {
unzClose(zip_handle_);
}
}
std::vector<std::string> RAIIZipArchive::ListFiles() const {
GE_ASSERT_TRUE(IsGood(), "Invalid status of archive");
return entry_names_;
}
bool RAIIZipArchive::BuildEntryCache() {
GE_ASSERT_NOTNULL(zip_handle_, "Invalid status of archive");
entry_cache_.clear();
entry_names_.clear();
entry_cache_ready_ = false;
auto uz_ret = unzGoToFirstFile(zip_handle_);
GE_ASSERT_TRUE(uz_ret == UNZ_OK, "Failed to go to the first file in the archive, ret = %d", uz_ret);
do {
std::vector<char_t> name_buff(kMaxFileNameLength, '\0');
unz_file_info64 file_info{};
uz_ret = unzGetCurrentFileInfo64(zip_handle_, &file_info, name_buff.data(), name_buff.size(), nullptr, 0, nullptr,
0);
GE_ASSERT_TRUE(uz_ret == UNZ_OK, "Failed to get the current file information, ret = %d", uz_ret);
const std::string file_name(name_buff.data());
if (!file_name.empty() && file_name.back() != '/') {
GE_ASSERT_TRUE(CacheCurrentEntry(file_name, file_info));
entry_names_.emplace_back(file_name);
}
uz_ret = unzGoToNextFile(zip_handle_);
} while (uz_ret == UNZ_OK);
GE_ASSERT_TRUE(uz_ret == UNZ_END_OF_LIST_OF_FILE, "unzGoToNextFile failed, ret=%d", uz_ret);
entry_cache_ready_ = true;
return true;
}
bool RAIIZipArchive::CacheCurrentEntry(const std::string &entry_name, const unz_file_info64 &file_info) {
unz64_file_pos file_pos{};
auto uz_ret = unzGetFilePos64(zip_handle_, &file_pos);
GE_ASSERT_TRUE(uz_ret == UNZ_OK, "Failed to get file position for [%s], ret = %d", entry_name.c_str(), uz_ret);
CachedZipEntry cached_entry;
cached_entry.file_pos = file_pos;
cached_entry.uncompressed_size = file_info.uncompressed_size;
if (file_info.compression_method == Z_NO_COMPRESSION) {
GE_ASSERT_TRUE(GetRawDataOffset(file_pos.pos_in_zip_directory, file_info.uncompressed_size,
cached_entry.raw_data_offset));
cached_entry.raw_data_ready = true;
}
const auto emplace_ret = entry_cache_.emplace(entry_name, cached_entry);
GE_ASSERT_TRUE(emplace_ret.second, "Duplicate zip entry [%s]", entry_name.c_str());
return true;
}
bool RAIIZipArchive::GoToEntry(const std::string &entry_name) const {
GE_ASSERT_TRUE(IsGood(), "Invalid status of archive");
const auto iter = entry_cache_.find(entry_name);
if (iter != entry_cache_.end()) {
auto file_pos = iter->second.file_pos;
const auto uz_ret = unzGoToFilePos64(zip_handle_, &file_pos);
GE_ASSERT_TRUE(uz_ret == UNZ_OK, "Failed to go to file [%s] by cached position, ret = %d", entry_name.c_str(),
uz_ret);
return true;
}
GELOGE(FAILED, "Failed to locate file [%s] in zip entry cache", entry_name.c_str());
return false;
}
bool RAIIZipArchive::GetCachedRawData(const std::string &entry_name, size_t &buff_size,
ReadonlyByteBuffer &raw_data) const {
raw_data = ReadonlyByteBuffer(nullptr, ConditionalDeleter{false});
GE_ASSERT_TRUE(IsGood(), "Invalid status of archive");
const auto iter = entry_cache_.find(entry_name);
if (iter == entry_cache_.end()) {
GELOGE(FAILED, "Failed to locate file [%s] in zip entry cache", entry_name.c_str());
return false;
}
if (!iter->second.raw_data_ready) {
return true;
}
const auto &cached_entry = iter->second;
GE_ASSERT_TRUE(cached_entry.raw_data_offset + cached_entry.uncompressed_size <= mem_file_.length);
buff_size = cached_entry.uncompressed_size;
raw_data = ReadonlyByteBuffer(mem_file_.buffer + cached_entry.raw_data_offset, ConditionalDeleter{false});
return true;
}
bool RAIIZipArchive::GetRawDataOffset(const size_t pos_in_central_dir, const size_t buff_size,
uint64_t &raw_data_offset) const {
ZipEntryInfo entry_info{};
GE_ASSERT_TRUE(ParseCentralDirEntry(mem_file_, pos_in_central_dir, entry_info));
GE_ASSERT_TRUE(entry_info.compressed_size == entry_info.uncompressed_size,
"uncompressed_size and compressed_size must be equal when loading raw data");
GE_ASSERT_TRUE(buff_size == entry_info.uncompressed_size, "buff_size is %zu, but uncompressed_size is %zu", buff_size,
entry_info.uncompressed_size);
GE_ASSERT_TRUE(LocateFileDataOffset(mem_file_, entry_info.local_file_header_offset, raw_data_offset));
GE_ASSERT_TRUE(raw_data_offset + entry_info.uncompressed_size <= mem_file_.length);
return true;
}
bool RAIIZipArchive::ExtractToFile(const std::string &entry_name, const std::string &output_dir) const {
GE_ASSERT_TRUE(IsGood(), "Invalid status of archive");
GE_ASSERT_TRUE(!output_dir.empty(), "The name of output directory is empty");
GE_ASSERT_TRUE(GoToEntry(entry_name), "Failed to locate file [%s]", entry_name.c_str());
auto uz_ret = unzOpenCurrentFile(zip_handle_);
GE_ASSERT_TRUE(uz_ret == UNZ_OK, "Failed to open file [%s], ret = %d", entry_name.c_str(), uz_ret);
GE_MAKE_GUARD(zipfile_guard, [this]() { (void)unzCloseCurrentFile(zip_handle_); });
auto output_path = output_dir + "/" + entry_name;
GE_ASSERT_TRUE(MakeSureDirExists(output_path));
std::ofstream ofs(output_path, std::ios::binary);
GE_ASSERT_TRUE(ofs.is_open(), "Failed to open file [%s]", output_path.c_str());
std::vector<char> buffer(kBufSize);
size_t total_read = 0;
int32_t bytes_read = 0;
while ((bytes_read = unzReadCurrentFile(zip_handle_, buffer.data(), buffer.size())) > 0) {
ofs.write(buffer.data(), bytes_read);
total_read += bytes_read;
}
GE_ASSERT_TRUE(bytes_read == 0, "Failed to read file [%s], ret = %d", entry_name.c_str(), bytes_read);
GELOGI("Successfully extract file [%s], total_read = %d bytes", entry_name.c_str(), total_read);
return true;
}
ReadonlyByteBuffer RAIIZipArchive::ExtractToMem(const std::string &entry_name, size_t &buff_size) const {
GE_ASSERT_TRUE(IsGood(), "Invalid status of archive");
ReadonlyByteBuffer raw_data(nullptr, ConditionalDeleter{false});
GE_ASSERT_TRUE(GetCachedRawData(entry_name, buff_size, raw_data));
if (raw_data != nullptr) {
return raw_data;
}
GE_ASSERT_TRUE(GoToEntry(entry_name), "Failed to locate file [%s]", entry_name.c_str());
auto uz_ret = unzOpenCurrentFile(zip_handle_);
GE_ASSERT_TRUE(uz_ret == UNZ_OK, "Failed to open file [%s], ret = %d", entry_name.c_str(), uz_ret);
GE_MAKE_GUARD(zipfile_guard, [this]() { (void)unzCloseCurrentFile(zip_handle_); });
unz_file_info64 file_info{};
uz_ret = unzGetCurrentFileInfo64(zip_handle_, &file_info, nullptr, 0, nullptr, 0, nullptr, 0);
GE_ASSERT_TRUE(uz_ret == UNZ_OK, "Failed to get the current file information, ret = %d", uz_ret);
buff_size = file_info.uncompressed_size;
auto mutable_buffer = std::make_unique<uint8_t[]>(buff_size);
GE_ASSERT_NOTNULL(mutable_buffer, "Failed to allocate buffer, size = %zu", buff_size);
size_t total_read = 0;
int32_t bytes_read = 0;
do {
const uint32_t remaining = static_cast<uint32_t>(
std::min<size_t>(buff_size - total_read, static_cast<size_t>(std::numeric_limits<int32_t>::max())));
bytes_read = unzReadCurrentFile(zip_handle_, mutable_buffer.get() + total_read, remaining);
GE_ASSERT_TRUE(bytes_read >= 0, "Failed to read file [%s], ret = %d", entry_name.c_str(), bytes_read);
total_read += static_cast<size_t>(bytes_read);
} while (bytes_read > 0);
GE_ASSERT_TRUE(total_read == buff_size, "Failed to extract file [%s], expected = %zu bytes, actual = %zu bytes",
entry_name.c_str(), buff_size, total_read);
GELOGI("Successfully extract file [%s], total_read = %d bytes", entry_name.c_str(), total_read);
return ReadonlyByteBuffer(mutable_buffer.release(), ConditionalDeleter{true});
}
}