Statistics
Mean, standard deviation
# Throw a laplace dice 10000 times. Calculate mean
# and corrected standard deviation of this sample.
use math: sqrt
function mean(a)
return a.sum()/len(a)
end
function sigma(a,m=null)
if m is null
m = mean(a)
end
return sqrt(a.sum(|x| (x-m)^2)/(len(a)-1))
end
Stat = table{
function string
return """\
mean = {:f4},\n\
sigma = {:f4}\
""" % [self.mean, self.sigma]
end
}
function stat(a)
m = mean(a)
return table Stat{mean = m, sigma = sigma(a,m)}
end
s = stat(rand(1..6).list(10000))
print(s)
Simple linear regression
LinearRegression = table{
function string
return """\
center = [mx,my]
mx = {mx:f4}
my = {my:f4}
rxy = {rxy:f4}
y(x) = ax*x+bx
ax = {ax:f4}
bx = {bx:f4}
x(y) = ay*y+by
ay = {ay:f4}
by = {by:f4}
""" % record(self)
end
}
function linear_regression(a)
vx,vy = list(zip(*a))
mx = mean(vx)
my = mean(vy)
sx = vx.sum(|x| (x-mx)^2)
sy = vy.sum(|y| (y-my)^2)
sxy = a.sum(|[x,y]| (x-mx)*(y-my))
ax = sxy/sx; bx = my-ax*mx
ay = sxy/sy; by = mx-ay*my
return table LinearRegression{
rxy = sxy/sqrt(sx*sy),
center = [mx,my],
mx = mx, my = my,
ax = ax, bx = bx,
ay = ay, by = by,
fx = |x| ax*x+bx,
fy = |y| ay*y+by,
gx = |y| (y-bx)/ax,
gy = |x| (x-by)/ay
}
end
rng = rand()
a = list(-2..2: 0.1).map(|x| [x,x+2*rng()])
r = linear_regression(a)
# print(r)
# Let us plot this sample
use svg.plotlib: system
s = system()
s.scatter(a)
s.plot([r.fx,r.gy])
s.scatter([r.center],color="800080")
print(s.flush())
# moss lr > plot.svg
use math: sqrt,erf
use svg.plotlib: system
# Numerical analysis:
# inversion by bisection method
use math.na: inv
# Inverse transform sampling
function rand_cdf(F)
rng = rand()
return || inv(F,rng(),-100,100)
end
# Take a list of random numbers and return the
# cumulative distribution function of this sample.
function cdf(a)
return |x| a.count(|X| X<=x)/len(a)
end
# CDF: normal distribution
function norm({mu,sigma})
return |x| 0.5+0.5*erf((x-mu)/sqrt(2*sigma^2))
end
# CDF: standard normal distribution
Phi = norm(mu=0,sigma=1)
X = rand_cdf(Phi)
F1 = cdf(X.list(10))
F2 = cdf(X.list(100))
s = system(wy=1.25,n=100)
s.plot([Phi,F1,F2])
print(s.flush())
Obtaining a CDF from a PMF
use math: pi, exp, sqrt
use math.na: pli, integral
# Optionally, cache the values by
# piecewise linear interpolation.
function cache(f,a,b,d)
return pli(a,d,list(a..b: d).map(f))
end
erf = |x| 2/sqrt(pi)*integral(0,x,|t| exp(-t^2))
erf = cache(erf,-100,100,0.01)