import argparse
from paddleocr import PaddleOCRVL
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--layout_detection_model_dir",
type=str,
default="PP-DocLayoutV2",
help="Path to PP-DocLayoutV2 model directory"
)
parser.add_argument(
"--vllm_ip",
type=str,
default="http://127.0.0.1:8000/v1",
help="vLLM server URL"
)
parser.add_argument(
"--input_path",
type=str,
required=True,
help="Path to input image/pdf or image fold"
)
parser.add_argument(
"--output_dir",
type=str,
default="output",
help="Output directory for results"
)
return parser.parse_args()
def main():
args = parse_args()
pipeline = PaddleOCRVL(
layout_detection_model_dir=args.layout_detection_model_dir,
vl_rec_backend="vllm-server",
vl_rec_server_url=args.vllm_ip,
use_doc_unwarping=False,
use_doc_orientation_classify=False,
device="npu"
)
print("warm up...")
output = pipeline.predict(args.input_path)
print("warm up end")
output = pipeline.predict(args.input_path)
for res in output:
res.print()
res.save_to_json(save_path=args.output_dir)
res.save_to_markdown(save_path=args.output_dir)
if __name__ == "__main__":
main()