import numpy as np np.set_printoptions(precision=3) N = 20 xs = -3 + 15 * np.array(sorted(np.random.rand(N))) xs
array([-2.918, -0.36 , -0.343, -0.236, 1.362, 1.81 , 2.952, 3.587, 4.134, 4.609, 4.905, 5.319, 5.706, 7.547, 7.691, 7.975, 8.127, 8.221, 9.267, 10.004])
sigma0 = 0.8 noise = sigma0 * np.random.randn(N) a = -0.7 b = 4.2 ys = a * xs + b + noise ys
array([ 5.455, 2.908, 3.089, 4.039, 3.59 , 3.779, 2.367, 2.381, 2.506, 2.037, -0.058, 0.745, -0.852, -0.831, -0.397, -0.539, -2.101, -2.099, -3.948, -1.512])
import matplotlib.pyplot as plt plt.rc('font', size=16) fig, ax = plt.subplots(1,1) ax.grid() ax.plot(xs,ys,'.',markersize=15,markeredgecolor='k',color='c')
from scipy.optimize import fmin f = lambda x, a, b: a * x + b q_opt = fmin(lambda q: np.sum((f(xs,q[0],q[1]) - ys)**2),[0,0]) q_opt
lambda
-function inside fmin
is introduced
since fmin
expects a function of one variable. We
need to have two variables determined, and , but luckily enough,
fmin
accepts a vector as its argument and hence
we provide the vector q
whose first element is
interpreted as and its second element is interpreted as .
We get the following output:
Optimization terminated successfully. Current function value: 16.144634 Iterations: 67 Function evaluations: 132 array([-0.636, 3.871])
[0,0]
, given to fmin
are the initial guesses
for and .
a_opt = q_opt[0] b_opt = q_opt[1] ax.plot(xs,f(xs,a_opt,b_opt),'k--',linewidth=3) fig
import numpy as np import matplotlib.pyplot as plt plt.rc('font', size=16) N = 1000 bins = np.linspace(-5,10,31) # bin edges bins_c = (bins[1:]+bins[:-1])/2 # center values of the bins x0 = 3.2 sigma0 = 1.2 data = sigma0 * np.random.randn(N) + x0 obs, _ = np.histogram(data,bins) bins_c,obs
(array([-4.75, -4.25, -3.75, -3.25, -2.75, -2.25, -1.75, -1.25, -0.75, -0.25, 0.25, 0.75, 1.25, 1.75, 2.25, 2.75, 3.25, 3.75, 4.25, 4.75, 5.25, 5.75, 6.25, 6.75, 7.25, 7.75, 8.25, 8.75, 9.25, 9.75]), array([ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 8, 27, 37, 86, 121, 162, 154, 159, 115, 62, 29, 26, 8, 3, 0, 0, 0, 0, 0, 0]))
np.histogram
function just distributes the data in bins
and counts how many data fall in each bin. We can confirm that we have a total
of N=1000
pieces of data:
np.sum(obs)
1000
fig, ax = plt.subplots(1,1) ax.grid() ax.bar(bins_c,obs,width=0.4,edgecolor='k',color='c')
q
with , , and as its three components
and having fmin
find the optimum q
as the one that minimizes the
sum of square deviations between the -values of the data points and the function values at
the corresponding -values:
from scipy.optimize import fmin f = lambda x, A, x0, sigma: \ A/sigma/np.sqrt(2*np.pi)*np.exp(-1/2*((x-x0)/sigma)**2) q_opt = fmin(lambda q: np.sum((f(bins_c,q[0],q[1],q[2]) - obs)**2), [100,2.5,1]) q_opt
Optimization terminated successfully. Current function value: 659.506315 Iterations: 116 Function evaluations: 206 array([497.03785365, 3.17629749, 1.18678323])
A_opt = q_opt[0] x0_opt = q_opt[1] sigma_opt = q_opt[2] xs = np.linspace(-5,10,100) ax.plot(xs,f(xs,A_opt,x0_opt,sigma_opt),'k--',linewidth=3) fig
Choose which booklet to go to: