package scientific.matplot
import std.fs.*
import std.io.*
import std.collection.*
import std.math.*
import std.unittest.*
import std.unittest.testmacro.*
import scientific.numbers.*
import scientific.linear.*
import scientific.stats.random.*
import scientific.stats.normal.*
foreign func c_wordcloud(text: CString, black_list: CPointer<CString>, len: Int64): Unit
foreign func c_tokenize(text: CString): Tokens
@C
struct Tokens{
var tokens: CPointer<CString>
var size: Int64
init(tokens_: CPointer<CString>, size_: Int64) {
tokens = tokens_
size = size_
}
}
public func wordcloud(text: String, black_list: Array<String>) {
let size = black_list.size
var a = unsafe { malloc(UIntNative(8 * size)) }
var b_list_ptr = CPointer<CString>(a)
for (i in 0..size) {
var cstr_black = unsafe { LibC.mallocCString(black_list[i]) }
unsafe { b_list_ptr.write(i, cstr_black) }
unsafe { LibC.free(cstr_black) }
}
var cstr_text = unsafe { LibC.mallocCString(text) }
unsafe{ c_wordcloud(cstr_text, b_list_ptr, size) }
unsafe { LibC.free(cstr_text) }
}
public func tokenize(text: String): Array<String> {
var cstr_text = unsafe { LibC.mallocCString(text) }
let tokens: Tokens = unsafe { c_tokenize(cstr_text) }
unsafe { LibC.free(cstr_text) }
let res = ArrayList<String>()
for (i in 0..tokens.size) {
res.add(unsafe{(tokens.tokens).read(i)}.toString())
}
return res.toArray()
}
// public func wordcount(text: String, black_list: Array<String>) {
// let size = black_list.size()
// var a = unsafe { malloc(UIntNative(8 * size)) }
// var b_list_ptr = CPointer<CString>(a)
// for (i in 0..size) {
// var cstr_black = unsafe { LibC.mallocCString(black_list[i]) }
// unsafe { b_list_ptr.write(i, cstr_black) }
// unsafe { LibC.free(cstr_black) }
// }
// var cstr_text = unsafe { LibC.mallocCString(text) }
// let tokens = unsafe{ c_wordcount_tokens(cstr_text, b_list_ptr, size) }
// let tokens_arrlst = ArrayList<String>()
// let nums = unsafe{ c_wordcount_sz(cstr_text, b_list_ptr, size) }
// for (i in 0..nums) {
// tokens_arrlst.append(unsafe{tokens.read(i)}.toString())
// }
// let tokens_arr = tokens_arrlst.toArray()
// let count = vector<Int64>(unsafe{ c_wordcount_count(cstr_text, b_list_ptr, size) })
// unsafe { LibC.free(cstr_text) }
// return (tokens_arr, count)
// }
public func testWordCloud1() {
let path = "./tests/imgs/wordcloud/shakespeare_sonnets.txt"
var fs: File = File(path, OpenMode.Read)
var reader: StringReader<File> = StringReader(fs)
var text = reader.readToEnd()
fs.close()
var low_text = text.toAsciiLower()
let black_path = "./tests/imgs/wordcloud/en_blacklist.txt"
var bfs: File = File(black_path, OpenMode.Read)
var bReader: StringReader<File> = StringReader(bfs)
var bl_text = bReader.readToEnd()
bfs.close()
let bl_tokens = tokenize(bl_text)
wordcloud(low_text, bl_tokens)
save("./tests/imgs/wordcloud/wordcloud_1.svg", "svg")
clear()
}
public func testWordCloud() {
testWordCloud1()
}