import os
import re
import sys
import shutil
import numpy as np
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, 'PaddleOCR')))
import tools.program as program
from tqdm import tqdm
from ppocr.data import build_dataloader
def main(config, device, logger, vdl_writer, data_path):
valid_dataloader = build_dataloader(config, 'Eval', device, logger)
label_file_list = config["Eval"]["dataset"]["label_file_list"][0]
with open(label_file_list, 'r') as f:
label_files = f.readlines()
pbar = tqdm(
total=len(valid_dataloader),
desc='Preprocessing',
position=0,
leave=True)
for idx, batch in enumerate(valid_dataloader):
img_name = "{}.bin".format(re.search(r"img_\d+", label_files[idx]).group())
batch[0].numpy().tofile(os.path.join(data_path, img_name))
pbar.update(1)
pbar.close()
if __name__ == "__main__":
config, device, logger, vdl_writer = program.preprocess()
data_path = os.path.join(config['bin_data'])
if os.path.exists(data_path):
shutil.rmtree(data_path)
os.makedirs(data_path)
main(config, device, logger, vdl_writer, data_path)