package scientific.stats.correlation
/* Pearson correlation coefficient and p-value */
import std.math.*
import std.unittest.*
import std.unittest.testmacro.*
import scientific.linear.*
import scientific.numbers.*
import scientific.stats.random.Random
import scientific.stats.normal.randn
import scientific.stats.continuous.betaCDF
import scientific.stats.summary.*
public func pearsonr<T>(x: Vector<T>, y: Vector<T>): (Float64, Float64) where T <: Float<T> {
let n = x.size()
if (n != y.size()) {
throw IllegalArgumentException("pearsonr: size do not match")
}
let xm = x - mean(x)
let ym = y - mean(y)
let normxm = xm.norm()
let normym = ym.norm()
var r: Float64 = (xm / normxm).dot(ym / normym).toFloat64()
// abs(r) > 1 can only be due to floating-point error
r = max(min(r, 1.0), -1.0)
let ab = Float64(n) / 2.0 - 1.0
let prob = 2.0 * betaCDF(0.5 * (1.0 - abs(r)), ab, ab)
return (r, prob)
}
// Point biserial correlation coefficient, equivalent to pearsonr,
// except the first argument is always a binary variable.
public func pointbiserialr<T>(x: Vector<Int64>, y: Vector<T>): (Float64, Float64) where T <: Float<T> {
let rx = empty<T>(x.size())
for (i in 0..x.size()) {
rx[i] = T.fromInt(x[i])
}
return pearsonr(rx, y)
}
@Test
public class TestPearsonR {
@TestCase
func testPearsonR1(): Unit {
// Test with hand-picked input
let (r, prob) = pearsonr(
vector<Float64>([1.0, 2.0, 3.0, 4.0, 5.0]),
vector<Float64>([10.0, 9.0, 2.5, 6.0, 4.0])
)
@Assert(approxEqual(r, -0.7426106572325057, atol:1e-10))
@Assert(approxEqual(prob, 0.1505558088534455, atol:1e-10))
}
@TestCase
func testPearsonR2(): Unit {
// Test generated data following y = x + e
let r = Random(2)
let s: Float64 = 0.5
let x = randn(r, 500, 0.0, 1.0)
let e = randn(r, 500, 0.0, 0.5)
let y = x + e
let (pr, prob) = pearsonr(x, y)
@Assert(approxEqual(pr, 0.894537, atol:1e-5)) // depend on random number generator
@Assert(approxEqual(prob, 0.0, atol:1e-5)) // nearly certain
}
@TestCase
func testPointBiserialR(): Unit {
let a = vector([0, 0, 0, 1, 1, 1, 1])
let b = vector([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
let (r, prob) = pointbiserialr(a, b)
@Assert(approxEqual(r, 0.866025, atol:1e-6))
@Assert(approxEqual(prob, 0.011725, atol:1e-6))
}
}