#!/bin/bash
set -e
BUILD_DIR=$(pwd)/install_comm
ROOT_DIR=$(pwd)/../..
OUTPUT_DIR=${ROOT_DIR}/output/kernel/dependency/text-splitter
FFI_DIR=$(pwd)/text_splitter_ffi_build
rm -rf install_* ${FFI_DIR}
echo "[text-splitter] Creating FFI project..."
mkdir -p ${FFI_DIR}/src
cat > ${FFI_DIR}/Cargo.toml << 'EOF'
[package]
name = "text_splitter_ffi"
version = "0.28.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
text-splitter = "0.28.0"
libc = "0.2"
EOF
cat > ${FFI_DIR}/src/lib.rs << 'EOF'
use std::ffi::{CStr, CString};
use std::os::raw::{c_char, c_int};
use text_splitter::{TextSplitter, ChunkConfig, Characters};
pub struct TextSplitterHandle {
splitter: TextSplitter<Characters>,
}
pub extern "C" fn CreateTextSplitter(max_chunk_size: c_int, max_chunk_overlap: c_int) -> *mut TextSplitterHandle {
if max_chunk_size <= 0 {
return std::ptr::null_mut();
}
let cfg = match ChunkConfig::new(max_chunk_size as usize)
.with_overlap(max_chunk_overlap as usize) {
Ok(cfg) => cfg,
Err(_) => return std::ptr::null_mut(),
};
let splitter = TextSplitter::new(cfg);
let handle = Box::new(TextSplitterHandle { splitter });
Box::into_raw(handle)
}
pub extern "C" fn FreeTextSplitter(handle: *mut TextSplitterHandle) {
if !handle.is_null() {
let _ = unsafe { Box::from_raw(handle) };
}
}
pub extern "C" fn SplitText(
handle: *mut TextSplitterHandle,
document: *const c_char,
chunk_num: *mut c_int
) -> *mut *mut c_char {
if handle.is_null() || document.is_null() || chunk_num.is_null() {
return std::ptr::null_mut();
}
unsafe {
let c_str = CStr::from_ptr(document);
let text_str = match c_str.to_str() {
Ok(s) => s,
Err(_) => return std::ptr::null_mut(),
};
let chunks = (*handle).splitter.chunks(text_str).collect::<Vec<_>>();
*chunk_num = chunks.len() as c_int;
let mut result: Vec<*mut c_char> = Vec::with_capacity(chunks.len() + 1);
for chunk in chunks {
match CString::new(chunk) {
Ok(c_str) => result.push(c_str.into_raw()),
Err(_) => {
for ptr in result {
let _ = CString::from_raw(ptr);
}
return std::ptr::null_mut();
}
}
}
result.push(std::ptr::null_mut());
let result_ptr = result.as_mut_ptr();
std::mem::forget(result);
result_ptr
}
}
pub extern "C" fn FreeSplitResult(chunks: *mut *mut c_char) {
if chunks.is_null() {
return;
}
unsafe {
let mut count = 0;
let mut current = chunks;
while !(*current).is_null() {
count += 1;
current = current.add(1);
}
for i in 0..count {
let ptr = *chunks.add(i);
let _ = CString::from_raw(ptr);
}
let _ = Vec::from_raw_parts(chunks, count + 1, count + 1);
}
}
EOF
cat > ${FFI_DIR}/text_splitter_wrapper.h << 'EOF'
extern "C" {
typedef void* TextSplitterHandle;
TextSplitterHandle CreateTextSplitter(int max_chunk_size, int max_chunk_overlap);
void FreeTextSplitter(TextSplitterHandle handle);
char** SplitText(TextSplitterHandle handle, const char* document, int* chunk_num);
void FreeSplitResult(char** chunks);
}
EOF
if ! command -v cargo &> /dev/null; then
echo "Error: Rust/Cargo not found. Please install Rust first."
exit 1
fi
echo "[text-splitter] Building Rust FFI library..."
cd ${FFI_DIR}
export RUSTFLAGS="-C relocation-model=pic -C link-arg=-Wl,-z,relro,-z,now,-z,noexecstack"
cargo build --release
echo "[text-splitter] Installing..."
mkdir -p ${BUILD_DIR}/lib
mkdir -p ${BUILD_DIR}/include
cp target/release/libtext_splitter_ffi.so ${BUILD_DIR}/lib/
cp text_splitter_wrapper.h ${BUILD_DIR}/include/
strip ${BUILD_DIR}/lib/libtext_splitter_ffi.so
cd $(dirname ${FFI_DIR})
cp -r install_comm install_llt
mkdir -p ${OUTPUT_DIR}/comm ${OUTPUT_DIR}/llt
cp -r install_comm/* ${OUTPUT_DIR}/comm/
cp -r install_llt/* ${OUTPUT_DIR}/llt/
echo "[text-splitter] Build completed!"