import os
import json
import random
from PIL import Image
from tqdm import tqdm
import torch
import torchvision.transforms as T
from torchvision.transforms.functional import InterpolationMode
from transformers import AutoModel, AutoTokenizer, AutoConfig
from example.InternVL2.internvl2_utils import load_image
from msmodelslim.pytorch.llm_ptq.anti_outlier import AntiOutlierConfig, AntiOutlier
from msmodelslim.pytorch.llm_ptq.llm_ptq_tools import Calibrator, QuantConfig
def get_textvqa_calibration(textvqa_path, calib_num=30, get_all_calib=False):
val_json = 'textvqa_val.jsonl'
calibration_dataset = []
with open(os.path.join(textvqa_path, val_json), "r") as file:
for line in file:
line_dict = json.loads(line.strip())
line_dict['text'] = line_dict['question']
line_dict['image_url'] = line_dict['image']
calibration_dataset.append(line_dict)
if not get_all_calib:
calibration_dataset = random.sample(calibration_dataset, calib_num)
return calibration_dataset
def get_tokenized_data(tokenizer, inputs, dtype=torch.float16):
tokenization_data = []
for _, input_item in tqdm(enumerate(inputs), total=len(inputs), desc="Tokenizing data"):
question = input_item.get('text')
query = '<|im_start|>system\n你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。<|im_end|>' + \
'<|im_start|>user\n<image>\n' + question + '<|im_end|><|im_start|>assistant\n'
image_url = input_item['image_url']
pixel_values = load_image(image_url, max_num=12).to('npu').to(dtype)
generation_config = dict(max_new_tokens=1024, do_sample=False)
tokenization_data.append([tokenizer, pixel_values, query, generation_config])
return tokenization_data
CPU = 'cpu'
NPU = 'npu'
model_path = f"{os.environ['PROJECT_PATH']}/resource/mllm/InternVL2-40B"
calib_images = f"{os.environ['PROJECT_PATH']}/resource/mllm/textvqa_val"
calib_num = 1
save_directory = f"{os.environ['PROJECT_PATH']}/output/mllm_ptq_internvl2_40b"
part_file_size = None
w_bit = 8
a_bit = 8
device_type = NPU
device_map = CPU if device_type == CPU else "auto"
config = AutoConfig.from_pretrained(model_path,
local_files_only=True,
trust_remote_code=True)
dtype = config.torch_dtype
model = AutoModel.from_pretrained(
model_path,
torch_dtype=dtype,
local_files_only=True,
low_cpu_mem_usage=True,
device_map=device_map,
use_safetensors=True,
trust_remote_code=True
).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path,
local_files_only=True,
trust_remote_code=True,
use_fast=False)
model.forward = model.chat
disable_names = []
vision_name = []
llm_name = [
"language_model.lm_head",
"mlp1.1",
"mlp1.3"
]
for i in range(config.vision_config.num_hidden_layers):
vision_name.extend([
f"vision_model.encoder.layers.{i}.mlp.fc1",
f"vision_model.encoder.layers.{i}.mlp.fc2",
f"vision_model.encoder.layers.{i}.attn.proj",
f"vision_model.encoder.layers.{i}.attn.qkv",
])
for i in range(config.llm_config.num_hidden_layers):
llm_name.extend([
f"language_model.model.layers.{i}.mlp.down_proj"
])
disable_names.extend(vision_name)
disable_names.extend(llm_name)
calibration_dataset = get_textvqa_calibration(calib_images, calib_num)
calib_data = get_tokenized_data(tokenizer, calibration_dataset, dtype=dtype)
anti_config = AntiOutlierConfig(
w_bit=w_bit,
a_bit=a_bit,
anti_method="m2",
dev_type=device_type,
dev_id=model.device.index
)
anti_outlier = AntiOutlier(model, calib_data=calib_data[:1], cfg=anti_config)
anti_outlier.process()
quant_config = QuantConfig(
w_bit=w_bit,
a_bit=a_bit,
disable_names=disable_names,
dev_type=device_type,
dev_id=model.device.index,
act_method=1,
mm_tensor=False
)
calibrator = Calibrator(model, quant_config, calib_data=calib_data[:1], disable_level='L0')
calibrator.run()
calibrator.save(save_directory, save_type=["safe_tensor"], part_file_size=part_file_size)