#该版仅能实现zh-cn,zh-tw的文件写入,同时print不一定会生效,报错为正常,只要data文件中有就是成功了。
import datetime
import json
import ssl
import urllib.request
from rdflib import Graph

# 初始化 SSLContext
ctx = ssl.create_default_context()

# 初始化 RDF 图对象
graph = Graph()

# 获取当前日期
date = datetime.datetime.now()

# 预处理函数:替换无效日期字符串
def preprocess_rdf_data(raw_data):
    invalid_date_patterns = [
        "3190000-01-01T00:00:00Z",
        "-3190000-01-01T00:00:00Z"
    ]
    for pattern in invalid_date_patterns:
        raw_data = raw_data.replace(pattern, "0001-01-01T00:00:00Z")
    return raw_data

def process_data(graph, lang, specials):
    prefixes = []
    suffixes = []
    apprentices = 0
    kits = 0

    query = graph.query(f'''SELECT ?translation ?suffix WHERE {{
        ?cat wdt:P3 wd:Q622 ;
             p:P84 ?name .
        ?name pq:P85 ?translation ;
              pq:P111 ?suffix .
        FILTER (lang(?translation) = "{lang}")
    }}''')

    for row in query:
        if row.translation.value in specials:
            prefix, suffix = specials[row.translation.value]
            prefixes.append(prefix)
            suffixes.append(suffix)
        else:
            if row.suffix.value == 'paw':
                prefixes.append(row.translation.value[:-1])
                apprentices += 1
            elif row.suffix.value == 'kit':
                if row.translation.value[-1] == '崽':
                    prefixes.append(row.translation.value[:-1])
                else:
                    prefixes.append(row.translation.value[1:])
                kits += 1
            else:
                prefixes.append(row.translation.value[:-1])
                suffixes.append(row.translation.value[-1])

    return {
        'language': lang,
        'date': {
            'year': date.year,
            'month': date.month,
            'day': date.day
        },
        'prefixes': prefixes,
        'suffixes': suffixes,
        'apprentices': apprentices,
        'kits': kits
    }

# 加载 RDF 数据
url = 'https://raw.gitcode.com/ryj/dummp/raw/master/wbdump.ttl'
with urllib.request.urlopen(url, context=ctx) as dump:
    raw_data = dump.read().decode('utf-8')
    # 预处理 RDF 数据
    preprocessed_data = preprocess_rdf_data(raw_data)
    # 加载预处理后的数据
    graph.parse(data=preprocessed_data, format='turtle')

# 特殊情况处理
specials_CN = {'桦树皮': ('桦', '树皮')}
specials_TW = {'樺樹皮': ('樺', '樹皮')}

# 处理每种语言的数据
data_en = process_data(graph, 'en', {})
data_CN = process_data(graph, 'zh-cn', specials_CN)
data_TW = process_data(graph, 'zh-tw', specials_TW)

# 写入文件
output_dir = 'data/'
import os
os.makedirs(output_dir, exist_ok=True)

with open(f'{output_dir}en.js', 'w', encoding='utf-8') as fw_en:
    fw_en.write('const data = ' + json.dumps(data_en, ensure_ascii=False))

with open(f'{output_dir}zh-cn.js', 'w', encoding='utf-8') as fw_CN:
    fw_CN.write('const data = ' + json.dumps(data_CN, ensure_ascii=False))

with open(f'{output_dir}zh-tw.js', 'w', encoding='utf-8') as fw_TW:
    fw_TW.write('const data = ' + json.dumps(data_TW, ensure_ascii=False))

print("数据已成功处理并保存!")