# coding=utf-8
# Copyright 2024 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
with open("./alpaca_data.json", "r") as f:
data = json.load(f)
output_path = "./alpaca_converter.json"
new_lines = []
for i, item in enumerate(data):
instruction = item["instruction"]
input_ = item["input"]
output = item["output"]
line = {
"type": "chatml",
"messages": [
{
"role": "user",
"content": instruction + input_
},
{
"role": "assistant",
"content": output
}
],
"source": "alpaca"
}
new_lines.append(line)
with open(output_path, "w") as f:
for i in new_lines:
f.write(json.dumps(i) + '\n')