FIZ353 - Numerical Analysis | 23/10/2020
Emre S. Tasci emre.tasci@hacettepe.edu.tr
The free-fall data we will be using is taken from: D. Horvat & R. Jecmenica, "The Free Fall Experiment" Resonance 21 259-275 (2016) [https://doi.org/10.1007/s12045-016-0321-9\].
Here's the contents of our data file:
import pandas as pd
data1 = pd.read_csv("data/03_FreeFallData.csv")
data1.columns
data1
We don't need the first row and first column, so let's remove them:
data1.drop(0, inplace=True)
data1.drop(11, inplace=True)
data1.drop(['Vert.dist. y/m'],axis=1, inplace=True)
data1
data1.loc[2,"0.7"]
type(data1.loc[2,"0.7"])
data1.dtypes
data1 = data1.astype('float')
data1.dtypes
data1.reset_index(inplace=True)
data1
#data1.reset_index(inplace=True, drop=True)
data1.drop('index',axis=1, inplace=True)
data1
import seaborn as sns
sns.set_theme() # To make things appear "more beautiful" 8)
# %matplotlib notebook
data2 = data1.copy()
data2
plt1 = sns.relplot(data=data2,kind="line",marker="o")
k =plt1.set(xticks=data2.index)
data2.mean()
data_stats = pd.DataFrame(data2.mean())
data_stats.rename(columns={0:'dmean'}, inplace=True )
data_stats['dvar'] = data2.var()
data_stats['dstd'] = data2.std()
data_stats
data_stats.dstd # Unbiased
or $$\sigma = \sqrt{\frac{\sum_{i}{\left(x_i - \bar{x}\right)^2}}{N-1}}\;\text{(Sample)}$$
import numpy as np
N = data2.shape[0]
for coll in list(data2.columns):
s_dev = 0
s_mean = data2.loc[:,coll].mean()
#print(s_mean)
for x_i in data2.loc[:,coll]:
# print (x_i)
s_dev += (x_i - s_mean)**2
s_dev = np.sqrt(s_dev/(N))
print("{:s}: {:.6f}".format(coll,s_dev))
data2.std(ddof=0) # Biased
Average over all the sample deviations should be equal to the deviation of the population!
For more information on this Bessel's Correction, check: http://mathcenter.oxford.emory.edu/site/math117/besselCorrection/
But what if we don't know the true value?..
Computations are repeated until $\left|\varepsilon_a\right| < \left|\varepsilon_s\right|$ ($\varepsilon_s$ is the satisfactory precision criterion).
The result is correct to at least $n$ significant figures given that: $$\varepsilon_s=\left(0.5\times10^{2-n}\right)\,\%$$
To estimate $e^{0.5}$ so that the absolute value of the approximate error estimate falls below an error criterion conforming to 3 significant figures, how many terms do you need to include?
Solution:
$\varepsilon_s=(0.5\times 10^{2-3})\,\%$
eps_s = 0.5*10**(2-3)
print("{:.2f}%".format(eps_s))
import numpy as np
eps_s = 0.5*10**(2-3)
x = 0.5
eps_a = 1000
e_prev = 0
no_of_terms = 0
print("{:>3} {:^10}\t{:^7}".format("#","e","E_a"))
while(eps_a > eps_s):
e_calculated = e_prev + x**no_of_terms/np.math.factorial(no_of_terms)
eps_a = np.abs(((e_calculated - e_prev)/e_calculated))
print("{:3d}:{:10.6f}\t{:7.4f}".format(no_of_terms+1,e_calculated,eps_a))
e_prev = e_calculated
no_of_terms += 1
import numpy as np
eps_s = 0.5*10**(2-3)
e_sqrt_real = np.sqrt(np.e)
x = 0.5
eps_a = 1000
e_sqrt_prev = 0
no_of_terms = 0
print("{:>3} {:^13}{:^20}{:^12}".format("#","e_calc","E_a","E_t$"))
while(eps_a > eps_s):
e_sqrt_calculated = e_sqrt_prev + x**no_of_terms/np.math.factorial(no_of_terms)
eps_a = np.abs(((e_sqrt_calculated - e_sqrt_prev)/e_sqrt_calculated))*100
eps_t = np.abs((e_sqrt_real - e_sqrt_calculated)/e_sqrt_real)*100
print("{:3d}:{:10.6f}{:16.5f}{:16.5f}".format(no_of_terms+1,e_sqrt_calculated,eps_a,eps_t))
e_sqrt_prev = e_sqrt_calculated
no_of_terms += 1
Once again, consider our free fall data:
data2
data_stats
from scipy.optimize import minimize
def fun_err(m,x):
err = 0
for x_i in x:
err += (x_i - m)**2
err = np.sqrt(err/np.prod(x.shape))
return err
fun_err(data2['0.2'].mean(),data2['0.2'])
fun_err(data2['0.2'].mean()+1,data2['0.2'])
minimize(fun_err,data2['0.2'].mean(),args=(data2['0.2']))
data2['0.2'].mean()
list(data2.columns)
data_stats.loc['0.2','dmean']
print("{:^5}: {:^8} ({:^8})".format("col","min","mean"))
for col in list(data2.columns):
res_min = minimize(fun_err,1,args=(data2[col]))
print("{:^5}: {:8.6f} ({:8.6f})".format(col,float(res_min.x),data_stats.loc[col,'dmean']))
print("{:.6f}".format(data_stats.loc[col,'dmean']))
def fun_err2(m,x):
err = 0
for x_i in x:
err += (x_i - m)**2
#err = np.sqrt(err/np.prod(x.shape))
return err
fun_err2(data2['0.2'].mean(),data2['0.2'])
minimize(fun_err2,data2['0.2'].mean(),args=(data2['0.2']))
print("{:^5}: {:^8} ({:^8})".format("col","min","mean"))
for col in list(data2.columns):
res_min = minimize(fun_err2,1,args=(data2[col]))
print("{:^5}: {:8.6f} ({:8.6f})".format(col,float(res_min.x),data_stats.loc[col,'dmean']))
def fun_err3(m,x):
err = 0
for x_i in x:
err += np.abs(x_i - m)
#err = np.sqrt(err/np.prod(x.shape))
return err
fun_err3(data2['0.2'].mean(),data2['0.2'])
minimize(fun_err3,data2['0.2'].mean(),args=(data2['0.2']))
data_exp = pd.DataFrame(data_stats.dmean)
data_exp
def freefall_err(g,y_exp,t):
err = 0
y_theo = 0.5*g*t**2
err = (y_theo - y_exp)**2
return np.sum(err)
y_exp = np.array(list(data_exp.index))
print(y_exp)
y_exp.dtype
y_exp = np.array(list(data_exp.index),dtype=float)
y_exp.dtype
print(y_exp)
t = np.array(list(data_exp.dmean[:]))
print(t)
freefall_err(9,y_exp,t)
freefall_err(9.1,y_exp,t)
for g in np.arange(9,10,0.1):
print("{:5.3f}:{:10.6f}".format(g,freefall_err(g,y_exp,t)))
for g in np.arange(9.7,9.9,0.01):
print("{:5.3f}:{:10.6f}".format(g,freefall_err(g,y_exp,t)))
for g in np.arange(9.79,9.81,0.001):
print("{:5.3f}:{:10.8f}".format(g,freefall_err(g,y_exp,t)))
res_min = minimize(freefall_err,x0=1,args=(y_exp,t))
print(res_min)
import matplotlib.pyplot as plt
plt.plot(t,y_exp,"or")
tt = np.linspace(0,0.7,100)
plt.plot(tt,0.5*res_min.x*tt**2,"-b")
plt.show()
from scipy.optimize import least_squares
g0 = 1
def fun_yt(g,t):
return 0.5*g*t**2
t = np.array(list(data_exp.dmean[:]))
y_exp = np.array(list(data_exp.index),dtype=float)
def fun(g):
return fun_yt(g,t) - y_exp
res_ls = least_squares(fun,g0)
print(res_ls)
($\leftrightarrow$ Standard deviation $\sigma=\sqrt{\frac{S_t}{n-1}}$, variance $sigma^2=\frac{\sum_{i}{\left(y_i-\bar{y}\right)^2}}{n-1}=\frac{\sum_{i}{y_i^2}-\left(\sum_{i}{y_i}\right)^2/n}{n-1}$)
(a) $S_t$, (b) $S_r$
(Source: Chapra)
It doesn't much matter even if the model we're trying to fit is non-linear. We can simply apply a transformation to form it into a linear one. Here are a couple example for handling non-linear functions:
| Model | Nonlinear | Linearized |
|---|---|---|
| exponential | $y=\alpha_1 e^{\beta_1 x}$ | $\ln{y}=\ln{\alpha_1}+\beta_1 x$ |
| power | $y = \alpha_2 x^{\beta_2}$ | $\log{y}=\log{\alpha_2}+\beta_2\log{x}$ |
| saturation-growth-rate | $y=\alpha_3\frac{x}{\beta_3+x}$ | $\frac{1}{y}=\frac{1}{\alpha_3}+\frac{\beta_3}{\alpha_3}\frac{1}{x}$ |
Source: Chapra
Example: Fit the data to the power model (Chapra, 14.6)
Data:
| $i$ | $x_i$ | $y_i$ |
|---|---|---|
| 1 | 10 | 25 |
| 2 | 20 | 70 |
| 3 | 30 | 380 |
| 4 | 40 | 550 |
| 5 | 50 | 610 |
| 6 | 60 | 1220 |
| 7 | 70 | 830 |
| 8 | 80 | 1450 |
Find the optimum $\alpha$ and $\beta$ for the best fit of $y=\alpha x^\beta$ for the given data.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data1 = pd.DataFrame({'i':np.arange(1,9),'x':np.arange(10,90,10),
'y':[25,70,380,550,610,1220,830,1450]})
data1.set_index('i', inplace=True)
data1
plt.plot(data1.x,data1.y,"o")
plt.show()
We can convert it such that $$\log{y}=\log{\alpha} + \beta\log{x}$$
and as the least square fit for a linear model given as $y' = a_0 + a_1 x'$ is:
$$a_1 = \frac{n\sum{x_i' y_i'} - \sum{x_i'}\sum{y_i'}}{n\sum{x_i'^2}-\left(\sum{x_i'}\right)^2}$$$$a_0 = \bar{y}' - a_1\bar{x}'$$(For derivations, refer to FIZ219 Lecture Notes #5)
and since $x_i' = \log{x_i},\;y_i' = \log{y_i}$:
n = data1.shape[0]
xp = np.log(data1.x)
yp = np.log(data1.y)
a1 = (n*np.sum(xp*yp)-np.sum(xp)*np.sum(yp)) / (n*np.sum(xp**2) - np.sum(xp)**2)
a0 = np.mean(yp) - a1*np.mean(xp)
print("a0: {:7.4f}\na1: {:7.4f}".format(a0,a1))
as $a_0 = \log{\alpha}\rightarrow \alpha = e^{a_0}$ and $a_1 x' = \beta\log{x}\rightarrow \beta = a_1$
alpha = np.exp(a0)
beta = a1
print("alpha: {:7.4f}\nbeta: {:7.4f}".format(alpha,beta))
def fun(alpha, beta, x):
return alpha*x**beta
xx = np.linspace(0,80,100);
yy = fun(alpha,beta,xx)
plt.plot(data1.x,data1.y,"or",xx,yy,"-b")
plt.show()
def fun(alpha, beta, x):
return alpha*x**beta
x = data1.x
y = data1.y
def err(params):
e = y - fun(params[0],params[1],x)
return np.sum(e**2)
from scipy.optimize import minimize
res = minimize(err,[0.274,1.98])
print(res)
alpha2,beta2 = res.x
xx = np.linspace(0,80,100);
yy2 = fun(alpha2,beta2,xx)
plt.plot(data1.x,data1.y,"or",xx,yy2,"-k")
plt.show()
err_ls = err([alpha,beta])
err_min = err([alpha2,beta2])
print("Least-square sum of squares error: {:10.2f}".format(err_ls))
print(" Minimizer sum of squares error: {:10.2f}".format(err_min))
Let's plot the two side by side:
xx = np.linspace(0,80,100);
yy_ls = fun(alpha,beta,xx)
yy_min = fun(alpha2,beta2,xx)
# Blue for least-squares, Black for minimizer
plt.plot(data1.x,data1.y,"or",xx,yy_ls,"-b",xx,yy_min,"-k")
plt.legend(["data","least-squares","minimizer"])
plt.show()
It is always safe to stay on the linear side of the things, whever possible. So, consider the following data:
data_l = np.array(([[ 1. , 4.9142],
[ 2. , 7.1201],
[ 3. , 8.8456],
[ 4. , 10.8113],
[ 5. , 13.2231]]))
data_l
Let's fit it to a $f(x) = a + bx$ model:
x = data_l[:,0]
y = data_l[:,1]
def fun(x,a,b):
return a+b*x
import scipy.optimize as opt
# Curve fitting
res_cf = opt.curve_fit(fun,x,y)
print(res_cf)
def err(ab):
return np.sum((y - (ab[0]+ab[1]*x))**2)
# Minimizer
res_min = opt.minimize(err,[0.1,0.1])
print(res_min)
# Least-squares
res_ls = opt.least_squares(err,[0.1,0.1],max_nfev=500)
print(res_ls)
xx = np.linspace(0,6,100)
plt.plot(x,y,"or",xx,res_ls.x[0]+res_ls.x[1] * xx,"-b")
plt.show()
x y
1.0000 4.9142
2.0000 7.1201
3.0000 8.8456
4.0000 10.8113
5.0000 13.2231