eaebec39创建于 2024年10月24日历史提交
package scientific.stats.correlation

/* Pearson correlation coefficient and p-value */

import std.math.*
import std.unittest.*
import std.unittest.testmacro.*

import scientific.linear.*
import scientific.numbers.*
import scientific.stats.random.Random
import scientific.stats.normal.randn
import scientific.stats.continuous.betaCDF
import scientific.stats.summary.*

public func pearsonr<T>(x: Vector<T>, y: Vector<T>): (Float64, Float64) where T <: Float<T> {
    let n = x.size()
    if (n != y.size()) {
        throw IllegalArgumentException("pearsonr: size do not match")
    }

    let xm = x - mean(x)
    let ym = y - mean(y)
    let normxm = xm.norm()
    let normym = ym.norm()
    var r: Float64 = (xm / normxm).dot(ym / normym).toFloat64()

    // abs(r) > 1 can only be due to floating-point error
    r = max(min(r, 1.0), -1.0)

    let ab = Float64(n) / 2.0 - 1.0
    let prob = 2.0 * betaCDF(0.5 * (1.0 - abs(r)), ab, ab)
    return (r, prob)
}

// Point biserial correlation coefficient, equivalent to pearsonr,
// except the first argument is always a binary variable.
public func pointbiserialr<T>(x: Vector<Int64>, y: Vector<T>): (Float64, Float64) where T <: Float<T> {
    let rx = empty<T>(x.size())
    for (i in 0..x.size()) {
        rx[i] = T.fromInt(x[i])
    }
    return pearsonr(rx, y)
}


@Test
public class TestPearsonR {
    @TestCase
    func testPearsonR1(): Unit {
        // Test with hand-picked input
        let (r, prob) = pearsonr(
            vector<Float64>([1.0, 2.0, 3.0, 4.0, 5.0]),
            vector<Float64>([10.0, 9.0, 2.5, 6.0, 4.0])
        )
        @Assert(approxEqual(r, -0.7426106572325057, atol:1e-10))
        @Assert(approxEqual(prob, 0.1505558088534455, atol:1e-10))
    }

    @TestCase
    func testPearsonR2(): Unit {
        // Test generated data following y = x + e
        let r = Random(2)
        let s: Float64 = 0.5
        let x = randn(r, 500, 0.0, 1.0)
        let e = randn(r, 500, 0.0, 0.5)
        let y = x + e
        let (pr, prob) = pearsonr(x, y)
        @Assert(approxEqual(pr, 0.894537, atol:1e-5))  // depend on random number generator
        @Assert(approxEqual(prob, 0.0, atol:1e-5))     // nearly certain
    }

    @TestCase
    func testPointBiserialR(): Unit {
        let a = vector([0, 0, 0, 1, 1, 1, 1])
        let b = vector([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
        let (r, prob) = pointbiserialr(a, b)
        @Assert(approxEqual(r, 0.866025, atol:1e-6))
        @Assert(approxEqual(prob, 0.011725, atol:1e-6))
    }
}