"""Normalize PTO ISA instruction pages and generate Chinese counterparts."""
from __future__ import annotations
import csv
import json
import re
from pathlib import Path
from typing import Dict, List
REPO_ROOT = Path(__file__).resolve().parents[2]
MANIFEST = REPO_ROOT / "docs" / "isa" / "manifest.yaml"
ISA_DIR = REPO_ROOT / "docs" / "isa"
PTO_ISA_LEVEL_TABLE = Path.home() / "pto-isa.txt"
_LANG_LINK_LINE_RE = re.compile(r"^>\s*(?:Chinese|English)\s+version:\s*.*$", re.IGNORECASE)
_INSTR_NAME_RE = re.compile(r"^[A-Z_][A-Z0-9_]*$")
_CODE_BLOCK_RE = re.compile(r"```(?:[A-Za-z0-9_+-]+)?\n(.*?)\n```", re.DOTALL)
_LEVEL_ALIAS = {
"TGEMV_ACC": "TGEMV",
"TGEMV_BIAS": "TGEMV",
}
_EXPLICIT_FALLBACK_FORMS: Dict[str, Dict[str, str]] = {
"TEXTRACT_FP": {
"level1": "%dst = pto.textract_fp %src, %idxrow, %idxcol : (!pto.tile<...>, dtype, dtype) -> !pto.tile<...>",
"level2": "pto.textract_fp ins(%src, %idxrow, %idxcol : !pto.tile_buf<...>, dtype, dtype) outs(%dst : !pto.tile_buf<...>)",
},
"TFILLPAD_EXPAND": {
"level1": "%dst = pto.tfillpad_expand %src : !pto.tile<...> -> !pto.tile<...>",
"level2": "pto.tfillpad_expand ins(%src : !pto.tile_buf<...>) outs(%dst : !pto.tile_buf<...>)",
},
"TFILLPAD_INPLACE": {
"level1": "%dst = pto.tfillpad_inplace %src : !pto.tile<...> -> !pto.tile<...>",
"level2": "pto.tfillpad_inplace ins(%src : !pto.tile_buf<...>) outs(%dst : !pto.tile_buf<...>)",
},
"TIMG2COL": {
"level1": "%dst = pto.timg2col %src : !pto.tile<...> -> !pto.tile<...>",
"level2": "pto.timg2col ins(%src : !pto.tile_buf<...>) outs(%dst : !pto.tile_buf<...>)",
},
"TINSERT_FP": {
"level1": "%dst = pto.tinsert_fp %src, %fp, %idxrow, %idxcol : (!pto.tile<...>, !pto.tile<...>, dtype, dtype) -> !pto.tile<...>",
"level2": "pto.tinsert_fp ins(%src, %fp, %idxrow, %idxcol : !pto.tile_buf<...>, !pto.tile_buf<...>, dtype, dtype) outs(%dst : !pto.tile_buf<...>)",
},
"TQUANT": {
"level1": "%dst = pto.tquant %src, %qp : (!pto.tile<...>, !pto.tile<...>) -> !pto.tile<...>",
"level2": "pto.tquant ins(%src, %qp : !pto.tile_buf<...>, !pto.tile_buf<...>) outs(%dst : !pto.tile_buf<...>)",
},
"SETFMATRIX": {
"level1": "pto.SETFMATRIX %cfg : !pto.fmatrix_config -> ()",
"level2": "pto.SETFMATRIX ins(%cfg : !pto.fmatrix_config) outs()",
},
"TTRI": {
"level1": "%dst = pto.ttri %src0, %src1 : (!pto.tile<...>, !pto.tile<...>) -> !pto.tile<...>",
"level2": "pto.ttri ins(%src0, %src1 : !pto.tile_buf<...>, !pto.tile_buf<...>) outs(%dst : !pto.tile_buf<...>)",
},
}
def load_manifest() -> List[Dict[str, object]]:
data = json.loads(MANIFEST.read_text(encoding="utf-8"))
return data["instructions"]
def _normalize_cell(text: str) -> str:
lines = text.replace("\r\n", "\n").replace("\r", "\n").split("\n")
while lines and not lines[0].strip():
lines.pop(0)
while lines and not lines[-1].strip():
lines.pop()
return "\n".join(line.rstrip() for line in lines)
def load_level_formats(path: Path) -> Dict[str, Dict[str, str]]:
if not path.exists():
return {}
formats: Dict[str, Dict[str, str]] = {}
with path.open("r", encoding="utf-8", newline="") as handle:
reader = csv.reader(handle, delimiter="\t", quotechar='"')
for row in reader:
if len(row) < 4:
continue
raw_name = row[1].strip()
if not raw_name or raw_name == "PTO":
continue
name = re.sub(r"\(.*\)$", "", raw_name).strip().upper()
if not _INSTR_NAME_RE.fullmatch(name):
continue
level1 = _normalize_cell(row[2])
level2 = _normalize_cell(row[3])
notes = _normalize_cell(row[4]) if len(row) > 4 else ""
if not (level1 or level2 or notes):
continue
formats[name] = {"level1": level1, "level2": level2, "notes": notes}
for dst, src in _LEVEL_ALIAS.items():
if dst not in formats and src in formats:
formats[dst] = dict(formats[src])
return formats
def _split_sections(text: str) -> Dict[str, str]:
matches = list(re.finditer(r"^##\s+(.+?)\s*$", text, re.MULTILINE))
out: Dict[str, str] = {}
for index, match in enumerate(matches):
name = match.group(1).strip()
start = match.end() + 1
end = matches[index + 1].start() if index + 1 < len(matches) else len(text)
out[name] = text[start:end]
return out
def _fallback_level1(instr: str, assembly_body: str) -> str:
explicit = _EXPLICIT_FALLBACK_FORMS.get(instr, {}).get("level1")
if explicit:
return explicit
for block in _CODE_BLOCK_RE.findall(assembly_body):
candidate = _normalize_cell(block)
if candidate:
candidate = re.sub(r"(^\s*[%@][^=\n]*=\s*)([a-z][a-z0-9_.]+)\b", r"\1pto.\2", candidate, flags=re.MULTILINE)
candidate = re.sub(r"(^\s*)([a-z][a-z0-9_.]+)\b", r"\1pto.\2", candidate, flags=re.MULTILINE)
return candidate
return f"%dst = pto.{instr.lower()} ..."
def _to_tile_buf_types(type_expr: str) -> str:
return re.sub(r"!pto\.tile<", "!pto.tile_buf<", type_expr)
def _render_dps_from_level1(level1: str) -> str:
lines: List[str] = []
for raw_line in level1.splitlines():
line = raw_line.strip()
if not line:
continue
if line.startswith("//"):
continue
typed = re.match(
r"(?:(?P<res>[%@][^=\n]*?)\s*=\s*)?pto\.(?P<op>[a-z][a-z0-9_.]*)\s*(?P<args>.*?)\s*:\s*(?P<ins>.+?)\s*->\s*(?P<outs>.+)$",
line,
)
if typed:
res = (typed.group("res") or "").strip()
op = typed.group("op").strip()
args = typed.group("args").strip()
in_types = _to_tile_buf_types(typed.group("ins").strip())
out_types_raw = typed.group("outs").strip()
out_types = _to_tile_buf_types(out_types_raw)
if args:
ins_clause = f"{args} : {in_types}"
else:
ins_clause = f": {in_types}"
if out_types_raw == "()":
lines.append(f"pto.{op} ins({ins_clause}) outs()")
else:
out_name = res if res else "%dst"
lines.append(f"pto.{op} ins({ins_clause}) outs({out_name} : {out_types})")
continue
semi_typed = re.match(
r"(?:(?P<res>[%@][^=\n]*?)\s*=\s*)?pto\.(?P<op>[a-z][a-z0-9_.]*)\s*(?P<args>.*?)\s*:\s*(?P<ins>.+)$",
line,
)
if semi_typed:
res = (semi_typed.group("res") or "").strip()
op = semi_typed.group("op").strip()
args = semi_typed.group("args").strip()
in_types = _to_tile_buf_types(semi_typed.group("ins").strip())
if args:
ins_clause = f"{args} : {in_types}"
else:
ins_clause = f": {in_types}"
if res:
lines.append(f"pto.{op} ins({ins_clause}) outs({res} : !pto.tile_buf<...>)")
else:
lines.append(f"pto.{op} ins({ins_clause}) outs()")
continue
untyped = re.match(r"(?:(?P<res>[%@][^=\n]*?)\s*=\s*)?pto\.(?P<op>[a-z][a-z0-9_.]*)\s*(?P<args>.*)$", line)
if untyped:
op = untyped.group("op").strip()
args = untyped.group("args").strip()
res = (untyped.group("res") or "").strip()
if args and res:
lines.append(f"pto.{op} ins({args}) outs({res} : !pto.tile_buf<...>)")
elif args:
lines.append(f"pto.{op} ins({args}) outs()")
elif res:
lines.append(f"pto.{op} ins() outs({res} : !pto.tile_buf<...>)")
else:
lines.append(f"pto.{op} ins() outs()")
return "\n".join(lines).strip()
def _fallback_level2(instr: str, level1: str) -> str:
explicit = _EXPLICIT_FALLBACK_FORMS.get(instr, {}).get("level2")
if explicit:
return explicit
synthesized = _render_dps_from_level1(level1)
if synthesized:
return synthesized
return f"pto.{instr.lower()} ins(%src : !pto.tile_buf<...>) outs(%dst : !pto.tile_buf<...>)"
def _sync_level2_from_table(level_formats: Dict[str, Dict[str, str]]) -> str:
segments: List[str] = []
for name in ("RECORD_EVENT", "WAIT_EVENT", "BARRIER"):
item = level_formats.get(name)
if not item:
continue
body = item.get("level2", "").strip()
note = item.get("notes", "").strip()
if body:
segments.append(body)
if note:
segments.append(f"// {note}")
return "\n".join(segments).strip()
def _resolve_level_formats(instr: str, assembly_body: str, level_formats: Dict[str, Dict[str, str]]) -> Dict[str, str]:
if instr == "TSYNC":
level1 = "// Level 1 (SSA) does not support explicit synchronization primitives."
level2 = _sync_level2_from_table(level_formats)
if not level2:
level2 = "pto.record_event[src_op, dst_op, eventID]\npto.wait_event[src_op, dst_op, eventID]\npto.barrier(op)"
return {"level1": level1, "level2": level2}
item = level_formats.get(instr, {})
level1 = item.get("level1", "").strip()
level2 = item.get("level2", "").strip()
if not level1:
level1 = _fallback_level1(instr, assembly_body)
if not level2:
level2 = _fallback_level2(instr, level1)
return {"level1": level1, "level2": level2}
def ensure_ir_level_syntax(instr: str, text: str, level_formats: Dict[str, Dict[str, str]]) -> str:
section_map = _split_sections(text)
body = section_map.get("Assembly Syntax")
if body is None:
return text
base = re.split(r"^###\s+IR Level 1 \(SSA\)\s*$", body, maxsplit=1, flags=re.MULTILINE)[0].rstrip()
formats = _resolve_level_formats(instr, base, level_formats)
level_block = (
"\n\n### IR Level 1 (SSA)\n\n"
"```text\n"
f"{formats['level1']}\n"
"```\n\n"
"### IR Level 2 (DPS)\n\n"
"```text\n"
f"{formats['level2']}\n"
"```\n"
)
new_body = (base + level_block).rstrip() + "\n"
start_marker = re.compile(r"^##\s+Assembly Syntax\s*$", re.MULTILINE)
start_match = start_marker.search(text)
if not start_match:
return text
start = start_match.end() + 1
next_heading = re.search(r"^##\s+.+$", text[start:], re.MULTILINE)
end = start + next_heading.start() if next_heading else len(text)
return text[:start] + new_body + text[end:]
def template_new_page(instr: str, summary: str) -> str:
if instr == "TGEMV_MX":
return """# TGEMV_MX
## Introduction
GEMV with scaling tiles for mixed-precision / quantized matrix-vector compute on supported targets.
This instruction family extends `TGEMV` with additional scale operands (mx path). Accumulator and scale handling are target-dependent.
## Math Interpretation
Conceptually (base GEMV path):
$$
\\mathrm{C}_{0,j} = \\sum_{k=0}^{K-1} \\mathrm{A}_{0,k} \\cdot \\mathrm{B}_{k,j}
$$
For `TGEMV_MX`, scale tiles participate in implementation-defined mixed-precision reconstruction / scaling. The architectural contract is that output corresponds to the target-defined mx GEMV semantics.
## Assembly Syntax
Schematic form:
```text
%acc = tgemv.mx %a, %a_scale, %b, %b_scale : (!pto.tile<...>, !pto.tile<...>, !pto.tile<...>, !pto.tile<...>) -> !pto.tile<...>
```
## C++ Intrinsic
Declared in `include/pto/common/pto_instr.hpp`:
```cpp
template <typename TileRes, typename TileLeft, typename TileLeftScale, typename TileRight, typename TileRightScale,
typename... WaitEvents>
PTO_INST RecordEvent TGEMV_MX(TileRes &cMatrix, TileLeft &aMatrix, TileLeftScale &aScaleMatrix,
TileRight &bMatrix, TileRightScale &bScaleMatrix, WaitEvents &... events);
```
Additional overloads support accumulation/bias variants and `AccPhase` selection.
## Constraints
- Uses backend-specific mx legality checks for data types, tile locations, fractal/layout combinations, and scaling formats.
- Scale tile compatibility and accumulator promotion are implementation-defined by target backend.
- For portability, validate the exact `(A, B, scaleA, scaleB, C)` type tuple and tile layout against target implementation constraints.
## Examples
For practical usage patterns, see:
- `docs/isa/TMATMUL_MX.md`
- `docs/isa/TGEMV.md`
"""
if instr == "TPARTMUL":
return """# TPARTMUL
## Introduction
Partial elementwise multiply with implementation-defined handling of mismatched valid regions.
## Math Interpretation
For each element `(i, j)` in the destination valid region:
$$
\\mathrm{dst}_{i,j} =
\\begin{cases}
\\mathrm{src0}_{i,j} \\cdot \\mathrm{src1}_{i,j} & \\text{if both inputs are defined at } (i,j) \\\\\n+\\mathrm{src0}_{i,j} & \\text{if only src0 is defined at } (i,j) \\\\\n+\\mathrm{src1}_{i,j} & \\text{if only src1 is defined at } (i,j)
\\end{cases}
$$
## Assembly Syntax
Synchronous form:
```text
%dst = tpartmul %src0, %src1 : !pto.tile<...> -> !pto.tile<...>
```
## C++ Intrinsic
Declared in `include/pto/common/pto_instr.hpp`:
```cpp
template <typename TileDataDst, typename TileDataSrc0, typename TileDataSrc1, typename... WaitEvents>
PTO_INST RecordEvent TPARTMUL(TileDataDst &dst, TileDataSrc0 &src0, TileDataSrc1 &src1, WaitEvents &... events);
```
## Constraints
- Element type/layout legality follows backend checks and is analogous to `TPARTADD` / `TPARTMAX` / `TPARTMIN`.
- Destination valid region defines the result domain.
- Partial-validity handling is implementation-defined for unsupported shape combinations.
## Examples
### Auto
```cpp
#include <pto/pto-inst.hpp>
using namespace pto;
void example_auto() {
using TileT = Tile<TileType::Vec, float, 16, 16>;
TileT src0, src1, dst;
TPARTMUL(dst, src0, src1);
}
```
### Manual
```cpp
#include <pto/pto-inst.hpp>
using namespace pto;
void example_manual() {
using TileT = Tile<TileType::Vec, float, 16, 16>;
TileT src0, src1, dst;
TASSIGN(src0, 0x1000);
TASSIGN(src1, 0x2000);
TASSIGN(dst, 0x3000);
TPARTMUL(dst, src0, src1);
}
```
"""
if instr == "TSETHF32MODE":
return """# TSETHF32MODE
## Introduction
Configure HF32 transform mode (implementation-defined).
This instruction controls backend-specific HF32 transformation behavior used by supported compute paths.
## Math Interpretation
No direct tensor arithmetic is produced by this instruction. It updates target mode state used by subsequent instructions.
## Assembly Syntax
Schematic form:
```text
tsethf32mode {enable = true, mode = ...}
```
## C++ Intrinsic
Declared in `include/pto/common/pto_instr.hpp`:
```cpp
template <bool isEnable, RoundMode hf32TransMode = RoundMode::CAST_ROUND, typename... WaitEvents>
PTO_INST RecordEvent TSETHF32MODE(WaitEvents &... events);
```
## Constraints
- Available only when the corresponding backend capability macro is enabled.
- Exact mode values and hardware behavior are target-defined.
- This instruction has control-state side effects and should be ordered appropriately relative to dependent compute instructions.
## Examples
```cpp
#include <pto/pto-inst.hpp>
using namespace pto;
void example_enable_hf32() {
TSETHF32MODE<true, RoundMode::CAST_ROUND>();
}
```
"""
if instr == "TSETTF32MODE":
return """# TSETTF32MODE
## Introduction
Configure TF32 transform mode (implementation-defined).
This instruction controls backend-specific TF32 transformation behavior used by supported compute paths.
## Math Interpretation
No direct tensor arithmetic is produced by this instruction. It updates target mode state used by subsequent instructions.
## Assembly Syntax
Schematic form:
```text
tsettf32mode {enable = true, mode = ...}
```
## C++ Intrinsic
Declared in `include/pto/common/pto_instr.hpp`:
```cpp
template <bool isEnable, RoundMode tf32TransMode = RoundMode::CAST_ROUND, typename... WaitEvents>
PTO_INST RecordEvent TSETTF32MODE(WaitEvents &... events);
```
## Constraints
- Available only when the corresponding backend capability macro is enabled.
- Exact mode values and hardware behavior are target-defined.
- This instruction has control-state side effects and should be ordered appropriately relative to dependent compute instructions.
## Examples
```cpp
#include <pto/pto-inst.hpp>
using namespace pto;
void example_enable_tf32() {
TSETTF32MODE<true, RoundMode::CAST_ROUND>();
}
```
"""
return f"""# {instr}
## Introduction
{summary}
## Math Interpretation
Semantics are instruction-specific. Unless stated otherwise, behavior is defined over the destination valid region.
## Assembly Syntax
## C++ Intrinsic
Declared in `include/pto/common/pto_instr.hpp`.
## Constraints
Refer to backend-specific legality checks for data type/layout/location/shape constraints.
## Examples
See related instruction pages in `docs/isa/` for concrete Auto/Manual usage patterns.
"""
def ensure_top_block(instr: str, text: str) -> str:
svg_token = f"../figures/isa/{instr}.svg"
if svg_token in text:
return text
lines = text.splitlines()
if not lines:
return text
insert_at = 1
if len(lines) > 1 and lines[1].strip() == "":
insert_at = 2
block = [
"## Tile Operation Diagram",
"",
f"",
"",
]
out = lines[:insert_at] + block + lines[insert_at:]
return "\n".join(out).rstrip() + "\n"
def ensure_required_sections(instr: str, text: str) -> str:
required = [
("Introduction", "## Introduction\n\nRefer to the authoritative summary in `docs/isa/manifest.yaml`.\n"),
(
"Math Interpretation",
"## Math Interpretation\n\nUnless otherwise specified, semantics are defined over the valid region and target-dependent behavior is marked as implementation-defined.\n",
),
(
"Assembly Syntax",
"## Assembly Syntax\n\nProvide the instruction textual form when one is defined.\n",
),
(
"C++ Intrinsic",
"## C++ Intrinsic\n\nDeclared in `include/pto/common/pto_instr.hpp`.\n",
),
(
"Constraints",
"## Constraints\n\nType/layout/location/shape legality is backend-dependent; treat implementation-specific notes as normative for that backend.\n",
),
(
"Examples",
"## Examples\n\nSee related examples in `docs/isa/` and `docs/coding/tutorials/`.\n",
),
]
out = text.rstrip() + "\n"
for sec, fallback in required:
pattern = re.compile(rf"^##\s+{re.escape(sec)}\s*$", re.MULTILINE)
if not pattern.search(out):
out += "\n" + fallback
return out
def _strip_language_links(text: str) -> str:
lines = []
for line in text.splitlines():
if _LANG_LINK_LINE_RE.match(line.strip()):
continue
if "_zh.md" in line:
continue
lines.append(line)
return "\n".join(lines).rstrip() + "\n"
def _extract_sections(md: str) -> Dict[str, str]:
matches = list(re.finditer(r"^##\s+(.+?)\s*$", md, re.MULTILINE))
out: Dict[str, str] = {}
for i, m in enumerate(matches):
title = m.group(1).strip()
start = m.end() + 1
end = matches[i + 1].start() if i + 1 < len(matches) else len(md)
body = md[start:end].strip("\n")
out[title] = body
return out
def _translate_zh_line_segment(seg: str) -> str:
table = [
("### IR Level 1 (SSA)", "### AS Level 1(SSA)"),
("### IR Level 2 (DPS)", "### AS Level 2(DPS)"),
("PTO-AS form: see ", "PTO-AS 形式:参见 "),
("Declared in ", "声明于 "),
("Level 1 (SSA) does not support explicit synchronization primitives.", "Level 1(SSA)不支持显式同步原语。"),
(" in the valid region:", " 在有效区域内:"),
(" in the valid region.", " 在有效区域内。"),
("Synchronous form:", "同步形式:"),
("### Auto", "### 自动(Auto)"),
("### Manual", "### 手动(Manual)"),
("Index-based gather (conceptual):", "基于索引的 gather(概念性定义):"),
("Mask-pattern gather:", "基于掩码模式的 gather:"),
("Mask-pattern gather is", "掩码模式 gather 属于"),
("Exact index interpretation and bounds behavior are implementation-defined.", "索引解释方式与越界行为为实现定义。"),
("Implementation checks", "实现检查"),
("Valid region", "有效区域"),
("Runtime valid checks", "运行期有效区域检查"),
("Bounds / validity", "边界 / 有效性"),
("Data types", "数据类型"),
("Tile layout", "Tile 布局"),
("Tile shape/layout constraint", "Tile 形状/布局约束"),
("DType consistency", "数据类型一致性"),
("Recommended", "推荐"),
("To be removed", "将移除"),
("See related examples in `docs/isa/` and `docs/coding/tutorials/`.", "更多用法示例参见 `docs/isa/` 与 `docs/coding/tutorials/`。"),
("For each element", "对每个元素"),
("For each source element", "对每个源元素"),
("For each", "对每个"),
("Unless otherwise specified", "除非另有说明"),
("Semantics are instruction-specific.", "语义随指令而变化。"),
]
for k, v in table:
seg = seg.replace(k, v)
return seg
def _translate_md_to_zh(md: str) -> str:
out: List[str] = []
in_code = False
in_math = False
for line in md.splitlines():
stripped = line.strip()
if stripped.startswith("```"):
in_code = not in_code
out.append(line)
continue
if stripped.startswith("$$"):
in_math = not in_math
out.append(line)
continue
if in_code or in_math:
out.append(line)
continue
parts = re.split(r"(`[^`]*`)", line)
for i, p in enumerate(parts):
if p.startswith("`") and p.endswith("`"):
continue
parts[i] = _translate_zh_line_segment(p)
out.append("".join(parts))
return "\n".join(out).rstrip()
def build_zh_page(instr: str, summary_zh: str, en_text: str) -> str:
sections = _extract_sections(_strip_language_links(en_text))
intro_en = sections.get("Introduction", "").strip()
math_en = sections.get("Math Interpretation", "").strip()
asm_en = sections.get("Assembly Syntax", "").strip()
cpp_en = sections.get("C++ Intrinsic", "").strip()
cons_en = sections.get("Constraints", "").strip()
ex_en = sections.get("Examples", "").strip()
lines: List[str] = []
lines.append(f"# {instr}")
lines.append("")
lines.append("## 指令示意图")
lines.append("")
lines.append(f"")
lines.append("")
lines.append("## 简介")
lines.append("")
lines.append(summary_zh.strip() or f"{instr} 指令。")
if intro_en:
intro_lines = [ln for ln in intro_en.splitlines() if ln.strip()]
is_substantive = len(intro_lines) > 2 or any(ln.lstrip().startswith(("-", "*")) for ln in intro_lines) or "```" in intro_en
if is_substantive:
lines.append("")
lines.append(_translate_md_to_zh(intro_en))
lines.append("")
lines.append("## 数学语义")
lines.append("")
if math_en:
lines.append(_translate_md_to_zh(math_en))
else:
lines.append("该指令的数学语义为指令相关定义。除非另有说明,语义仅在有效区域内定义。")
lines.append("")
lines.append("## 汇编语法")
lines.append("")
if asm_en:
lines.append(_translate_md_to_zh(asm_en))
else:
lines.append("按需给出该指令的文本形式。")
lines.append("")
lines.append("## C++ 内建接口")
lines.append("")
if cpp_en:
lines.append(_translate_md_to_zh(cpp_en))
else:
lines.append("接口声明位于 `include/pto/common/pto_instr.hpp`。")
lines.append("")
lines.append("## 约束")
lines.append("")
if cons_en:
lines.append(_translate_md_to_zh(cons_en))
else:
lines.append("类型/布局/位置/形状等合法性通常依赖后端实现。若行为依赖具体后端,文档会标注为“实现定义”。")
lines.append("")
lines.append("## 示例")
lines.append("")
if ex_en:
lines.append(_translate_md_to_zh(ex_en))
else:
lines.append("参见 `docs/isa/` 与 `docs/coding/tutorials/`。")
lines.append("")
return "\n".join(lines)
def main() -> int:
entries = load_manifest()
level_formats = load_level_formats(PTO_ISA_LEVEL_TABLE)
for e in entries:
instr = str(e["instruction"])
summary_en = str(e.get("summary_en", f"{instr} instruction."))
summary_zh = str(e.get("summary_zh", f"{instr} 指令。"))
en_path = ISA_DIR / f"{instr}.md"
if not en_path.exists():
en_path.write_text(template_new_page(instr, summary_en), encoding="utf-8")
text = _strip_language_links(en_path.read_text(encoding="utf-8", errors="ignore"))
text = ensure_top_block(instr, text)
text = ensure_required_sections(instr, text)
text = ensure_ir_level_syntax(instr, text, level_formats)
en_path.write_text(text, encoding="utf-8")
zh_path = ISA_DIR / f"{instr}_zh.md"
zh_path.write_text(build_zh_page(instr, summary_zh, text), encoding="utf-8")
print(
f"Normalized English ISA pages and generated Chinese counterparts for {len(entries)} instructions. "
f"Loaded PTO-AS level table entries: {len(level_formats)}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())