import json
from argparse import ArgumentParser
parser = ArgumentParser(description="parse coco annotations file and compress out data we don't need for bbox training")
parser.add_argument('input_file', type=str, help='input instances_*.json file')
parser.add_argument('output_file', type=str, help='"compressed" output instances_*.json file')
parser.add_argument('--keep-keys', '-k', action='store_true',
help='pycocotools depends on all the useless keys. This flag lets you keep the useless keys (but deletes the data they point to)')
parser.add_argument('--pretty-print', '-p', action='store_true',
help='pretty print the output .json file (for debugging) rather than minimizing size')
args = parser.parse_args()
with open(args.input_file) as json_file:
data = json.load(json_file)
images = data['images']
annots = data['annotations']
print("images: ", len(images))
print("bboxes: ", len(annots))
useful_keys = ['id', 'file_name', 'height', 'width']
for img in images:
useless_keys = [k for k in img.keys() if k not in useful_keys]
for uk in useless_keys:
del img[uk]
useful_keys = ['image_id', 'bbox', 'category_id', 'id']
for a in annots:
useless_keys = [k for k in a.keys() if k not in useful_keys]
if args.keep_keys:
a['segmentation'] = []
a['area'] = 1.0
else:
for uk in useless_keys:
del a[uk]
newresult = dict()
newresult['images'] = images
newresult['annotations'] = annots
newresult['categories'] = data['categories']
with open(args.output_file, "w+") as ofile:
my_separators = None if args.pretty_print else (',',':')
my_indent = 4 if args.pretty_print else None
json.dump(newresult, ofile, separators=my_separators, indent=my_indent)