<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1200 520" width="1200" height="520">
  <defs>
    <filter id="cardShadow" x="-4%" y="-4%" width="108%" height="112%">
      <feDropShadow dx="0" dy="1" stdDeviation="2" flood-color="#202124" flood-opacity="0.06"/>
    </filter>
    <clipPath id="headerClip"><rect x="10" y="10" width="1180" height="48" rx="12" ry="12"/></clipPath>
    <marker id="arrowSolid" viewBox="0 0 10 10" markerWidth="8" markerHeight="8" refX="9" refY="5" orient="auto" markerUnits="userSpaceOnUse">
      <path d="M0,1 L9,5 L0,9 Z" fill="#1A73E8" stroke="none"/>
    </marker>
    <marker id="arrowDash" viewBox="0 0 10 10" markerWidth="8" markerHeight="8" refX="9" refY="5" orient="auto" markerUnits="userSpaceOnUse">
      <path d="M0,1 L9,5 L0,9 Z" fill="#5F6368" stroke="none"/>
    </marker>
  </defs>

  <rect x="10" y="10" width="1180" height="500" rx="12" fill="#F8F9FA" stroke="#DADCE0" stroke-width="1"/>
  <g clip-path="url(#headerClip)"><rect x="10" y="10" width="1180" height="48" fill="#1A73E8"/></g>
  <text x="600" y="42" text-anchor="middle" font-family="Roboto, PingFang SC, Microsoft YaHei, sans-serif" font-size="20" font-weight="700" fill="#FFFFFF">Multimodal SDK &#33021;&#21147;&#20840;&#26223;</text>

  <rect x="26" y="68" width="1148" height="148" rx="10" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
  <text x="44" y="88" font-family="Roboto, PingFang SC, sans-serif" font-size="11" font-weight="700" fill="#5F6368">&#27169;&#22359;&#35843;&#29992;&#20851;&#31995;</text>

  <g filter="url(#cardShadow)">
    <rect x="1038" y="134" width="120" height="40" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
    <text x="1098" y="159" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="700" fill="#202124">vLLM</text>
  </g>
  <g filter="url(#cardShadow)">
    <rect x="680" y="118" width="340" height="72" rx="8" fill="#FFFFFF" stroke="#1A73E8" stroke-width="1.2"/>
    <text x="850" y="144" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="700" fill="#1A73E8">mm.patcher.vllm</text>
    <text x="850" y="164" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10" fill="#5F6368">video_patcher / image_patcher</text>
    <text x="850" y="178" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10" fill="#5F6368">qwen2_vl / internvl2 image_processor_patcher</text>
  </g>
  <g filter="url(#cardShadow)">
    <rect x="390" y="118" width="260" height="72" rx="8" fill="#FFFFFF" stroke="#1A73E8" stroke-width="1.2"/>
    <text x="520" y="144" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="700" fill="#1A73E8">mm.adapter</text>
    <text x="520" y="164" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10" fill="#5F6368">MultimodalQwen2VLImageProcessor</text>
    <text x="520" y="178" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10" fill="#5F6368">InternVL2PreProcessor</text>
  </g>
  <g filter="url(#cardShadow)">
    <rect x="44" y="118" width="300" height="72" rx="8" fill="#E8F0FE" stroke="#1A73E8" stroke-width="1.5"/>
    <text x="194" y="144" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="700" fill="#1A73E8">mm.acc.wrapper</text>
    <text x="194" y="164" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10" fill="#5F6368">&#38754;&#21521;&#40114;&#40527; CPU</text>
    <text x="194" y="178" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10" fill="#5F6368">Image · Video · Tensor · Audio</text>
  </g>
  <path d="M 390 154 L 344 154" fill="none" stroke="#1A73E8" stroke-width="2" marker-end="url(#arrowSolid)"/>
  <text x="367" y="146" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="9" font-weight="600" fill="#1A73E8">&#35843;&#29992;</text>
  <path d="M 680 154 L 650 154" fill="none" stroke="#1A73E8" stroke-width="2" marker-end="url(#arrowSolid)"/>
  <text x="665" y="146" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="9" font-weight="600" fill="#1A73E8">patch</text>
  <path d="M 850 118 L 850 104 L 194 104 L 194 118" fill="none" stroke="#1A73E8" stroke-width="1.8" marker-end="url(#arrowSolid)"/>
  <text x="522" y="98" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="9" font-weight="600" fill="#1A73E8">&#35843;&#29992;</text>
  <path d="M 1020 154 L 1038 154" fill="none" stroke="#5F6368" stroke-width="1.5" stroke-dasharray="5 3" marker-end="url(#arrowDash)"/>
  <text x="1029" y="146" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="9" font-weight="600" fill="#5F6368">patch</text>

  <text x="44" y="238" font-family="Roboto, PingFang SC, sans-serif" font-size="11" font-weight="700" fill="#5F6368">&#20844;&#24320;&#33021;&#21147;</text>

  <g id="col-accel">
    <rect x="26" y="248" width="276" height="28" rx="6" fill="#1A73E8"/>
    <text x="164" y="267" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="600" fill="#FFFFFF">mm.acc.wrapper</text>
    <g filter="url(#cardShadow)"><rect x="26" y="284" width="86" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="69" y="302" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">Image.open</text>
      <text x="69" y="318" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#22270;&#20687;&#21152;&#36733;&#19982;&#35299;&#30721;</text></g>
    <g filter="url(#cardShadow)"><rect x="118" y="284" width="86" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="161" y="302" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">video_decode</text>
      <text x="161" y="318" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#35270;&#39057;&#24103;&#35299;&#30721;</text></g>
    <g filter="url(#cardShadow)"><rect x="210" y="284" width="86" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="253" y="302" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">Tensor</text>
      <text x="253" y="318" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">numpy / torch &#20114;&#36716;</text></g>
    <g filter="url(#cardShadow)"><rect x="26" y="342" width="86" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="69" y="360" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">resize</text>
      <text x="69" y="376" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#22270;&#20687;&#32553;&#25918;</text></g>
    <g filter="url(#cardShadow)"><rect x="118" y="342" width="86" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="161" y="360" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">crop</text>
      <text x="161" y="376" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#22270;&#20687;&#35009;&#21098;</text></g>
    <g filter="url(#cardShadow)"><rect x="210" y="342" width="86" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="253" y="360" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">to_tensor</text>
      <text x="253" y="376" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#36716;&#20026; NCHW &#24352;&#37327;</text></g>
    <g filter="url(#cardShadow)"><rect x="26" y="400" width="133" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="92.5" y="418" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">normalize</text>
      <text x="92.5" y="434" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#22343;&#20540;&#26041;&#24046;&#24402;&#19968;&#21270;</text></g>
    <g filter="url(#cardShadow)"><rect x="169" y="400" width="133" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="235.5" y="418" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">load_audio</text>
      <text x="235.5" y="434" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#38899;&#39057;&#21152;&#36733;&#19982;&#35299;&#30721;</text></g>
  </g>

  <g id="col-adapter">
    <rect x="316" y="248" width="276" height="28" rx="6" fill="#1A73E8"/>
    <text x="454" y="267" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="600" fill="#FFFFFF">mm.adapter</text>
    <g filter="url(#cardShadow)"><rect x="316" y="284" width="276" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="454" y="302" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="9.5" font-weight="700" fill="#1A73E8">MultimodalQwen2VLImageProcessor</text>
      <text x="454" y="318" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">Qwen2-VL &#22810;&#27169;&#24577;&#39044;&#22788;&#29702;</text></g>
    <g filter="url(#cardShadow)"><rect x="316" y="342" width="276" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="454" y="360" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">InternVL2PreProcessor</text>
      <text x="454" y="376" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">InternVL2 &#21160;&#24577;&#20998;&#22359;&#39044;&#22788;&#29702;</text></g>
  </g>

  <g id="col-patcher">
    <rect x="606" y="248" width="276" height="28" rx="6" fill="#1A73E8"/>
    <text x="744" y="267" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="600" fill="#FFFFFF">mm.patcher.vllm</text>
    <g filter="url(#cardShadow)"><rect x="606" y="284" width="131" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="671.5" y="302" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">video_patcher</text>
      <text x="671.5" y="318" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">patch vLLM &#35270;&#39057; IO</text></g>
    <g filter="url(#cardShadow)"><rect x="751" y="284" width="131" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="816.5" y="302" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">image_patcher</text>
      <text x="816.5" y="318" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">patch vLLM &#22270;&#20687; IO</text></g>
    <g filter="url(#cardShadow)"><rect x="606" y="342" width="131" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="671.5" y="358" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="7.5" font-weight="700" fill="#1A73E8">qwen2_vl_image_processor_patcher</text>
      <text x="671.5" y="376" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">patch Qwen2VL &#36866;&#37197;&#22120;</text></g>
    <g filter="url(#cardShadow)"><rect x="751" y="342" width="131" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="816.5" y="358" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="7.5" font-weight="700" fill="#1A73E8">internvl2_image_processor_patcher</text>
      <text x="816.5" y="376" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">patch InternVL2 &#36866;&#37197;&#22120;</text></g>
  </g>

  <g id="col-frame-selector">
    <rect x="896" y="248" width="276" height="28" rx="6" fill="#1A73E8"/>
    <text x="1034" y="267" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="12" font-weight="600" fill="#FFFFFF">mm.core.frame_selector</text>
    <g filter="url(#cardShadow)"><rect x="896" y="284" width="276" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="1034" y="302" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">KFrameSelector</text>
      <text x="1034" y="318" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#20851;&#38190;&#24103;&#36873;&#21462;</text></g>
    <g filter="url(#cardShadow)"><rect x="896" y="342" width="276" height="50" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
      <text x="1034" y="360" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="10.5" font-weight="700" fill="#1A73E8">KRangFrameSelector</text>
      <text x="1034" y="376" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="8.5" fill="#5F6368">&#33539;&#22260;&#24103;&#36873;&#21462;</text></g>
  </g>

  <rect x="26" y="468" width="1148" height="28" rx="8" fill="#FFFFFF" stroke="#DADCE0" stroke-width="1"/>
  <text x="600" y="487" text-anchor="middle" font-family="Roboto, PingFang SC, sans-serif" font-size="11" fill="#1A73E8">&#38754;&#21521;&#26119;&#33150; NPU &#8226; &#21152;&#36895;&#22810;&#27169;&#24577;&#22823;&#27169;&#22411;&#25512;&#29702;&#39044;&#22788;&#29702; &#8226; &#35206;&#30422;&#35299;&#30721; / &#21464;&#25442; / &#27169;&#22411;&#36866;&#37197;&#20840;&#38142;&#36335;</text>
</svg>