Optimization, Minimization & Fitting Notes & Recipes#

import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt
np.random.seed(371)
def g(x,mu,sigma):
    return -np.exp(-(x-mu)**2/(2*sigma**2))
x = np.linspace(0,15,100)
mu = 5
sigma = 1
plt.plot(x,g(x,mu,sigma))
plt.show()
_images/a1760a2745e81524ee4ba80a03f8841893c5e232f129a330c2f0a7c755ee9cf5.png
optimize.minimize(g,x0=2,args=(mu,sigma))
      fun: -1.0
 hess_inv: array([[1.00023666]])
      jac: array([0.])
  message: 'Optimization terminated successfully.'
     nfev: 24
      nit: 3
     njev: 12
   status: 0
  success: True
        x: array([5.])

Gradient Descent Algorithm#

Parabol#

def f(x,abc):
    return abc[0]*x**2+abc[1]*x+abc[2]
def g(x,abc):
    # Derivative
    return 2*abc[0]*x+abc[1]
xx = np.linspace(-5,10,100)
abc = np.array([2,3,-4])
plt.plot(xx,f(xx,abc))
plt.show()
_images/b91cca97507d666bd9edd2232e31cb2eae88702fcfd9e849dd666fd2f1cec2db.png
x = 5
N = 50
eta = .4
tolerance = 1E-4
xs_so_far = [x]
fs_so_far = [f(x,abc)]
for i in range(N):
    gg = g(x,abc)
    print("Step #{:d}".format(i+1))
    print("The derivative (gradient) at x = {:7.5f} is {:5.3f}"\
          .format(x,gg))
    if(np.abs(gg)<tolerance):
        print("\tAs it is sufficiently close to zero, we have found the minima!")
        break
    elif(gg>0):
        print("\tAs it is positive, go left by: "+
              "(this amount)*eta(={:.2f}).".format(eta))
    else:
        print("\tAs it is negative, go right by: "+
              "|this amount|*eta(={:.2f}).".format(eta))

    delta = -gg*eta
    x0 = x
    x = x + delta
    xs_so_far.append(x)
    fs_so_far.append(f(x,abc))
    print("\t==> The new x is {:7.5f}{:+7.5f}={:7.5f}".format(x0,delta,x))
    plt.plot(xx,f(xx,abc),color="orange")
    plt.plot(xs_so_far,fs_so_far,"*-")
    plt.show()

    print("-"*45)
Step #1
The derivative (gradient) at x = 5.00000 is 23.000
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00000-9.20000=-4.20000
_images/214042c14acb0f1c5298e0105213fce954fa69cdcc0b3c57a35f26d129fa9880.png
---------------------------------------------
Step #2
The derivative (gradient) at x = -4.20000 is -13.800
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -4.20000+5.52000=1.32000
_images/7ed722d9327b7b8ef000acec300020bb07652d6e3b4873a21d62de93a3d17ba7.png
---------------------------------------------
Step #3
The derivative (gradient) at x = 1.32000 is 8.280
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 1.32000-3.31200=-1.99200
_images/6c3304d22e99cd301ab76a84a4b6273b14c83fa9b7c7d17a2a3048bcc5525ece.png
---------------------------------------------
Step #4
The derivative (gradient) at x = -1.99200 is -4.968
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -1.99200+1.98720=-0.00480
_images/de00290c9074a7054790165530e8644fc7db4bc31af2ada6e0007e8b4ce35925.png
---------------------------------------------
Step #5
The derivative (gradient) at x = -0.00480 is 2.981
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.00480-1.19232=-1.19712
_images/c4ac4b7270c491e5460c1ef9ac606287b37b0db4973aec8f2de6d8882e52660b.png
---------------------------------------------
Step #6
The derivative (gradient) at x = -1.19712 is -1.788
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -1.19712+0.71539=-0.48173
_images/2c6e96adab9c4cbf9ee15b5f409d914d3656632ab2790a9995d0f443a2b9f9c6.png
---------------------------------------------
Step #7
The derivative (gradient) at x = -0.48173 is 1.073
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.48173-0.42924=-0.91096
_images/e1d2685741e36922cd00d0175d4803fefb21309da96ca739b2f73341a336565b.png
---------------------------------------------
Step #8
The derivative (gradient) at x = -0.91096 is -0.644
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.91096+0.25754=-0.65342
_images/33ff63fd3ad75c0c96f664f1af34f98589a522e34500eefe9d572efc4f051bcf.png
---------------------------------------------
Step #9
The derivative (gradient) at x = -0.65342 is 0.386
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.65342-0.15452=-0.80795
_images/76ca9c37954e68d4826844703297c236d48ef15a104c074b17cdd5d61b41c289.png
---------------------------------------------
Step #10
The derivative (gradient) at x = -0.80795 is -0.232
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.80795+0.09271=-0.71523
_images/11da8d23bed550662a3490073d5a7bcdcfdb9a14290dbf5e984ddf7ad34f4ec8.png
---------------------------------------------
Step #11
The derivative (gradient) at x = -0.71523 is 0.139
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.71523-0.05563=-0.77086
_images/8efd0838890c0755cac3a8bd9e9432e918e83a95ce1e56f50d940f0f6dc2bd44.png
---------------------------------------------
Step #12
The derivative (gradient) at x = -0.77086 is -0.083
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.77086+0.03338=-0.73748
_images/327056065cb613e5fb9e563162e0f3e36c9c6061e35d56df80fa33e4a90ecde3.png
---------------------------------------------
Step #13
The derivative (gradient) at x = -0.73748 is 0.050
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.73748-0.02003=-0.75751
_images/7bb5bb27281091a63a006081c1340c7ff8b2ae66c0931361fc1f742b83d76315.png
---------------------------------------------
Step #14
The derivative (gradient) at x = -0.75751 is -0.030
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.75751+0.01202=-0.74549
_images/85f3c77b90a2260e2d86f0ab809e470d9498da9142741a4f07bb6a5354293d76.png
---------------------------------------------
Step #15
The derivative (gradient) at x = -0.74549 is 0.018
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.74549-0.00721=-0.75270
_images/69b3a72fe2917c50c9358493f641c9a24e3a671c0f25d32a88f26ad01550aab4.png
---------------------------------------------
Step #16
The derivative (gradient) at x = -0.75270 is -0.011
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.75270+0.00433=-0.74838
_images/0ed7d72e50051c48ba23d52957b2d006e03e34e0214cbe8a64950aae0c57eca2.png
---------------------------------------------
Step #17
The derivative (gradient) at x = -0.74838 is 0.006
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.74838-0.00260=-0.75097
_images/e480df3a4e8a9f493d4a799ea6e60ab72d5c272e82e08b87b11e7d65c8a52e5f.png
---------------------------------------------
Step #18
The derivative (gradient) at x = -0.75097 is -0.004
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.75097+0.00156=-0.74942
_images/494d6400cf894f8b0740cdc4ee93a746f4ae207b3ddcf791f7c122734a8f9d1a.png
---------------------------------------------
Step #19
The derivative (gradient) at x = -0.74942 is 0.002
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.74942-0.00093=-0.75035
_images/d9e73cf437a2bfff01e4ffe155efb70c16e0117b9da6f03246def2a21b3431b2.png
---------------------------------------------
Step #20
The derivative (gradient) at x = -0.75035 is -0.001
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.75035+0.00056=-0.74979
_images/cec672047075ebf2ea480dc288f7741eddaf595bfd7da26d9a662bc9a2c247b4.png
---------------------------------------------
Step #21
The derivative (gradient) at x = -0.74979 is 0.001
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.74979-0.00034=-0.75013
_images/673216174d4865ab1502bb0c3e9e0778fe4f32031406f6397a17be5ef5dfa4e7.png
---------------------------------------------
Step #22
The derivative (gradient) at x = -0.75013 is -0.001
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.75013+0.00020=-0.74992
_images/90a298ec5ef61f7523ace18ddf4898b81f15d26aecc0d7c6383eb3d533c02731.png
---------------------------------------------
Step #23
The derivative (gradient) at x = -0.74992 is 0.000
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.74992-0.00012=-0.75005
_images/9cc386ce831f58422f282203aa63089aa71815a232b8960f550d1edc17e64b79.png
---------------------------------------------
Step #24
The derivative (gradient) at x = -0.75005 is -0.000
	As it is negative, go right by: |this amount|*eta(=0.40).
	==> The new x is -0.75005+0.00007=-0.74997
_images/7073fd668d3b789326ae2eac3db8026965379f96320efe5cbb1d17a7de449710.png
---------------------------------------------
Step #25
The derivative (gradient) at x = -0.74997 is 0.000
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is -0.74997-0.00004=-0.75002
_images/eea0ca77f8fa67091832cbbccc06feabc7fc50cce38b3d240834a8d1e54e2180.png
---------------------------------------------
Step #26
The derivative (gradient) at x = -0.75002 is -0.000
	As it is sufficiently close to zero, we have found the minima!
# Real minimum:
np.roots([2*abc[0],abc[1]]) # root of 2ax + b
array([-0.75])

“Negative” Gaussian#

def f(x,mu,sigma):
    return -np.exp(-(x-mu)**2/(2*sigma**2))
def g(x,mu,sigma):
    return (x-mu)/(sigma**2)*np.exp(-(x-mu)**2/(2*sigma**2))
mu = 5
sigma = 1
xx = np.linspace(0,15,100)
plt.plot(xx,f(xx,mu,sigma))
plt.show()
_images/a1760a2745e81524ee4ba80a03f8841893c5e232f129a330c2f0a7c755ee9cf5.png
x = 8
N = 60
eta = .4
tolerance = 1E-4
xs_so_far = [x]
fs_so_far = [f(x,mu,sigma)]
for i in range(N):
    gg = g(x,mu,sigma)
    print("Step #{:d}".format(i+1))
    print("The derivative (gradient) at x = {:7.5f} is {:5.4f}"\
          .format(x,gg))
    if(np.abs(gg)<tolerance):
        print("\tAs it is sufficiently close to zero, we have found the minima!")
        break
    elif(gg>0):
        print("\tAs it is positive, go left by: "+
              "(this amount)*eta(={:.2f}).".format(eta))
    else:
        print("\tAs it is negative, go right by: "+
              "|this amount|*eta(={:.2f}).".format(eta))

    delta = -gg*eta
    x0 = x
    x = x + delta
    xs_so_far.append(x)
    fs_so_far.append(f(x,mu,sigma))
    print("\t==> The new x is {:7.5f}{:+7.5f}={:7.5f}".format(x0,delta,x))
    plt.plot(xx,f(xx,mu,sigma),color="orange")
    plt.plot(xs_so_far,fs_so_far,"*-")
    plt.show()

    print("-"*45)
Step #1
The derivative (gradient) at x = 8.00000 is 0.0333
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 8.00000-0.01333=7.98667
_images/19e2cf037d901d0325bbc976a4a5364357e7a209da05789f79c26bccafebcc15.png
---------------------------------------------
Step #2
The derivative (gradient) at x = 7.98667 is 0.0345
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.98667-0.01381=7.97286
_images/171f344434a82b1f8f3a3a6e4ea84758ec49b46b5364782c07507868e21782cc.png
---------------------------------------------
Step #3
The derivative (gradient) at x = 7.97286 is 0.0358
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.97286-0.01433=7.95853
_images/880ed4770f9d625e067b66fcdd0fae9b0beda88579530b7dd224429b835874b6.png
---------------------------------------------
Step #4
The derivative (gradient) at x = 7.95853 is 0.0372
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.95853-0.01488=7.94366
_images/0c52d5893890f65a135f99ff78fbd93b81c74b3f3cdc17d593cf032c4afc0659.png
---------------------------------------------
Step #5
The derivative (gradient) at x = 7.94366 is 0.0387
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.94366-0.01546=7.92819
_images/677c9f8c0e8ba4edf54f5e74e51a31251483412adc32ccd9ed6c2e72a368a5a9.png
---------------------------------------------
Step #6
The derivative (gradient) at x = 7.92819 is 0.0402
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.92819-0.01610=7.91209
_images/c8b4cd52033e22a71b55d010cb5a23b3d7f51b0cada4677dd1117b3cc107e41c.png
---------------------------------------------
Step #7
The derivative (gradient) at x = 7.91209 is 0.0420
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.91209-0.01678=7.89531
_images/8ed5b995697c025ca5c35436833a6aeed17ccafd625067cb12eec0be007d3797.png
---------------------------------------------
Step #8
The derivative (gradient) at x = 7.89531 is 0.0438
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.89531-0.01752=7.87780
_images/bb6944bdeab0becf73ef78be28bd7327c07d864bc6f81ef8d5571313ea850a32.png
---------------------------------------------
Step #9
The derivative (gradient) at x = 7.87780 is 0.0458
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.87780-0.01831=7.85948
_images/f61573d26e8c77a057056d00227e613641a2d60b50f54b43c08557cfbb313a51.png
---------------------------------------------
Step #10
The derivative (gradient) at x = 7.85948 is 0.0479
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.85948-0.01918=7.84031
_images/5b1e80efd67cc6bac28cedfb1120cdb7f2b908e94413f4d52727ca6b605518ef.png
---------------------------------------------
Step #11
The derivative (gradient) at x = 7.84031 is 0.0503
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.84031-0.02012=7.82019
_images/378f0399ecdd5f84ad3e1c3850b98f482567ae27f83efa27b8c41e6394490e0c.png
---------------------------------------------
Step #12
The derivative (gradient) at x = 7.82019 is 0.0529
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.82019-0.02115=7.79904
_images/030c5dac2afec508eab470cf39f1a8031e806ee37c8d9f8eacbab2ddcb0e5bfe.png
---------------------------------------------
Step #13
The derivative (gradient) at x = 7.79904 is 0.0557
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.79904-0.02227=7.77676
_images/8c695f419bb30a5421ce32373f0bd436730d68acdb8a4d05cbe654d76015d22e.png
---------------------------------------------
Step #14
The derivative (gradient) at x = 7.77676 is 0.0588
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.77676-0.02351=7.75325
_images/ba62879153acf752fcd18b94d6f038fc891310e213f5a1d966acb44695c937f0.png
---------------------------------------------
Step #15
The derivative (gradient) at x = 7.75325 is 0.0622
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.75325-0.02488=7.72837
_images/e7ac55f5f8bcbf711668a926dcb709720e4dfcab74b644855281f45e60566063.png
---------------------------------------------
Step #16
The derivative (gradient) at x = 7.72837 is 0.0660
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.72837-0.02639=7.70198
_images/a427f5a7f9d12eb9c755121332ad39e8c26fa2b679f3947adcdf1daeed9eb9ca.png
---------------------------------------------
Step #17
The derivative (gradient) at x = 7.70198 is 0.0702
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.70198-0.02808=7.67389
_images/1b877fc43bf4525790d37c83cf899c66bf9d07ed99bd71ea9add2935f79b4926.png
---------------------------------------------
Step #18
The derivative (gradient) at x = 7.67389 is 0.0749
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.67389-0.02997=7.64393
_images/cb87ac0b761d5c5791ecd4adc2a62e1b3e766b70695ec492e34645be75e0647c.png
---------------------------------------------
Step #19
The derivative (gradient) at x = 7.64393 is 0.0802
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.64393-0.03209=7.61184
_images/952bbe2e5dee31a380fe3a7646f45f772f685e00e1d1620139f25bf4777da2c3.png
---------------------------------------------
Step #20
The derivative (gradient) at x = 7.61184 is 0.0862
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.61184-0.03449=7.57735
_images/a8b2cf5bf3823c68d01d3407ee29d91ac700de397b29be79cff7e77790b9bd76.png
---------------------------------------------
Step #21
The derivative (gradient) at x = 7.57735 is 0.0931
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.57735-0.03722=7.54012
_images/675d8e7e2a9166982a9a10d2ef2acfe9bc070cb811eaa2cc90866355f9bf33bb.png
---------------------------------------------
Step #22
The derivative (gradient) at x = 7.54012 is 0.1009
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.54012-0.04035=7.49978
_images/6be98950e2585a18ca9ad9949e53d4a2ac39879857ca48f90c2221f79091bb08.png
---------------------------------------------
Step #23
The derivative (gradient) at x = 7.49978 is 0.1099
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.49978-0.04396=7.45582
_images/9fe2b58417b91cd7a38a37344851c3d76ba9d239e9d819da0420c9adea3708da.png
---------------------------------------------
Step #24
The derivative (gradient) at x = 7.45582 is 0.1204
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.45582-0.04815=7.40766
_images/8c40ad2d3032d31cb3e5f4ef9d7b373c8b8f047ee043222203b7d44285f7f3e6.png
---------------------------------------------
Step #25
The derivative (gradient) at x = 7.40766 is 0.1327
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.40766-0.05307=7.35459
_images/5c8af3a3da1336c752f1f6f4f92113d6e091355068cacd59837de202f11b1252.png
---------------------------------------------
Step #26
The derivative (gradient) at x = 7.35459 is 0.1472
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.35459-0.05890=7.29569
_images/4bd203c2c6be70a901e5e99b236b276d925fc7d5f976a19858a5e422bc33ccb4.png
---------------------------------------------
Step #27
The derivative (gradient) at x = 7.29569 is 0.1646
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.29569-0.06585=7.22984
_images/86101dbd05edb45a301e36c5277e861d57fd37db044dc2d32aff5850968fb4ee.png
---------------------------------------------
Step #28
The derivative (gradient) at x = 7.22984 is 0.1856
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.22984-0.07424=7.15560
_images/acbf678a2c6eff2d6c8be688a2535016d86a703c3f8e78732325fbfacf5660bd.png
---------------------------------------------
Step #29
The derivative (gradient) at x = 7.15560 is 0.2111
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.15560-0.08446=7.07115
_images/dd4cb631c1aeb3bf50466b6a363e57db6995069320383e250db33eb46a6d9427.png
---------------------------------------------
Step #30
The derivative (gradient) at x = 7.07115 is 0.2425
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 7.07115-0.09700=6.97414
_images/5d8191044a4500adba44bc73c619b1243fc9ec54ec9ea58f75daf20776d1e298.png
---------------------------------------------
Step #31
The derivative (gradient) at x = 6.97414 is 0.2813
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 6.97414-0.11250=6.86164
_images/d60bed6c02958bc0bac712071cd18275207fa39a1c185de667488d2f4f0fb9f3.png
---------------------------------------------
Step #32
The derivative (gradient) at x = 6.86164 is 0.3291
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 6.86164-0.13164=6.73000
_images/390bc75c26b508d6908321d33b5e6abb5caea8dff347dbcae11b487fd3b6cddb.png
---------------------------------------------
Step #33
The derivative (gradient) at x = 6.73000 is 0.3874
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 6.73000-0.15495=6.57505
_images/b325c37e408c9d53ed03d93197d259194b257ea57b2b4c1e47a84934ffd4f3ea.png
---------------------------------------------
Step #34
The derivative (gradient) at x = 6.57505 is 0.4556
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 6.57505-0.18225=6.39280
_images/58680011b25c48446b5bc1982eac5239c99cdf678b8755c285fe4ba792db011c.png
---------------------------------------------
Step #35
The derivative (gradient) at x = 6.39280 is 0.5280
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 6.39280-0.21121=6.18159
_images/110f8c5c8bd64e268b871ef43381ff18b9975898f2689af51fba5f194481a543.png
---------------------------------------------
Step #36
The derivative (gradient) at x = 6.18159 is 0.5879
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 6.18159-0.23516=5.94644
_images/07f2fb028b3781268b35377c12d3c881d789aa7c8796d5642217bd88859f7309.png
---------------------------------------------
Step #37
The derivative (gradient) at x = 5.94644 is 0.6048
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.94644-0.24190=5.70453
_images/51909271bcac9e8b9d657980a87aa17b015b2f164ddd378a9c05581bbae5fce5.png
---------------------------------------------
Step #38
The derivative (gradient) at x = 5.70453 is 0.5497
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.70453-0.21988=5.48466
_images/284eae138e6c13b3af63f1451724560ce42a9d75c5642f4267785680add61450.png
---------------------------------------------
Step #39
The derivative (gradient) at x = 5.48466 is 0.4310
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.48466-0.17238=5.31228
_images/afa1e40e6e405863a3fbbfffffcafcab179e9876bead690b03e8cef5b290409b.png
---------------------------------------------
Step #40
The derivative (gradient) at x = 5.31228 is 0.2974
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.31228-0.11897=5.19331
_images/9c27ee01ae4bfbe351915516e6a414eb4f3646c375330a18c72ccb68b485d101.png
---------------------------------------------
Step #41
The derivative (gradient) at x = 5.19331 is 0.1897
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.19331-0.07589=5.11742
_images/37a28514a5a339496d5d96cea03b308a607bfbc0f153e94a15b5486de249f469.png
---------------------------------------------
Step #42
The derivative (gradient) at x = 5.11742 is 0.1166
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.11742-0.04664=5.07077
_images/5a83cbe6327bec0dfe76ba8b690b4fda5e857f42f98c8dfb0b0567e3617b8a37.png
---------------------------------------------
Step #43
The derivative (gradient) at x = 5.07077 is 0.0706
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.07077-0.02824=5.04253
_images/663f30ab10773d7bb4826d18a256822a6b7f67d9bc9d5b0db83a60f34d75d862.png
---------------------------------------------
Step #44
The derivative (gradient) at x = 5.04253 is 0.0425
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.04253-0.01700=5.02554
_images/4e3b5be4841732bfe05264a93168f783eb0595c9dc1079c7bfab629132567941.png
---------------------------------------------
Step #45
The derivative (gradient) at x = 5.02554 is 0.0255
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.02554-0.01021=5.01533
_images/9aa9715117d77f35ee74a0e8f9ca6fe4386891776f36cc80c2317361674ead1e.png
---------------------------------------------
Step #46
The derivative (gradient) at x = 5.01533 is 0.0153
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.01533-0.00613=5.00920
_images/96d561ac80127c55c1c64a314fe9cdacc3582a75828cf713897f286dae0bebb6.png
---------------------------------------------
Step #47
The derivative (gradient) at x = 5.00920 is 0.0092
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00920-0.00368=5.00552
_images/1cf14716cf63d2e4274d9a29159fd2a4e5a2eaf5a37ab683bed07c7b348a82ab.png
---------------------------------------------
Step #48
The derivative (gradient) at x = 5.00552 is 0.0055
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00552-0.00221=5.00331
_images/18ecfe978ae34da8a992b54cde6a24881ffbd22d66e1b463fdf379d8237a425d.png
---------------------------------------------
Step #49
The derivative (gradient) at x = 5.00331 is 0.0033
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00331-0.00132=5.00199
_images/dd46278bf025b6ec15754197753fe821793213f67480e3f52e379253f4589af2.png
---------------------------------------------
Step #50
The derivative (gradient) at x = 5.00199 is 0.0020
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00199-0.00079=5.00119
_images/50bdd6a5fcffa4ceceeda944a2e09bd303f0cf4ae52024f5c14e9abd710f4ed6.png
---------------------------------------------
Step #51
The derivative (gradient) at x = 5.00119 is 0.0012
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00119-0.00048=5.00072
_images/c0dd886b05ab7403f6ea781358817162e98487bb361fe95a3101934445fa542a.png
---------------------------------------------
Step #52
The derivative (gradient) at x = 5.00072 is 0.0007
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00072-0.00029=5.00043
_images/014859e3ed487907a4b70cde2dbdc744ab8f3d67b109648e98076746ec9682d4.png
---------------------------------------------
Step #53
The derivative (gradient) at x = 5.00043 is 0.0004
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00043-0.00017=5.00026
_images/a6498c51d62ec2f33ea375b33cfc2d8b1d9f0edf9e3d98499d29f2d0f9a3e8d1.png
---------------------------------------------
Step #54
The derivative (gradient) at x = 5.00026 is 0.0003
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00026-0.00010=5.00015
_images/bdea1a1aa3d1e123a3937daf3d4c05883a3bd77596a3a9128893b4d0995babec.png
---------------------------------------------
Step #55
The derivative (gradient) at x = 5.00015 is 0.0002
	As it is positive, go left by: (this amount)*eta(=0.40).
	==> The new x is 5.00015-0.00006=5.00009
_images/d05d9d692afd40b5444ff05301bf736fea07bbd5597a1d33ad36f88bc787d220.png
---------------------------------------------
Step #56
The derivative (gradient) at x = 5.00009 is 0.0001
	As it is sufficiently close to zero, we have found the minima!

Fitting parameters via gradient descent algorithm#

mu = 5
sigma = 1

N = 10
x = np.random.rand(N)*4+3
t = f(x,mu,sigma)

xx = np.linspace(3,7,100)

plt.plot(xx,f(xx,mu,sigma),color="orange")
plt.plot(x,t,"o")
plt.show()
_images/8b717ceac463ae1b396c7444767d9a91fc515bd6c19e6c3ed98099ea95a83884.png

\(\newcommand{\diff}{\text{d}} \newcommand{\dydx}[2]{\frac{\text{d}#1}{\text{d}#2}} \newcommand{\ddydx}[2]{\frac{\text{d}^2#1}{\text{d}#2^2}} \newcommand{\pypx}[2]{\frac{\partial#1}{\partial#2}} \newcommand{\unit}[1]{\,\text{#1}}\)

We have the data points, we know the function but we don’t have the mu & sigma.

\[f(x;\mu,\sigma)=-\exp{\left[-\frac{(x-\mu)^2}{2\sigma^2}\right]}\]

The function we are going to try to minimize will be the difference between the real values (\(\{t_i\}\)) corresponding to \(\{x_i\}\) and the projected values (\(\{y_i\}\)):

\[F(x_i,t_i,\mu,\sigma) = t_i - f(x_i;\mu,\sigma)\]

Begin by calculating the derivatives:

\[\begin{split}\pypx{F}{\mu}=\frac{x_i-\mu}{\sigma^2}\exp{\left[-\frac{(x_i-\mu)^2}{2\sigma^2}\right]}\\ \pypx{F}{\sigma}=\frac{(x_i-\mu)^2}{\sigma^3}\exp{\left[-\frac{(x_i-\mu)^2}{2\sigma^2}\right]}\end{split}\]

(don’t forget that \(\{x_i\}\) and \(\{t_i\}\) are fixed!)

Can you see the problem in this approach? As \(\{t_i\}\) are fixed, the problem is reduced to finding the \((\mu,\sigma)\) set that will make \(f(x_i;\mu,\sigma)\) minimum, regardless of \(\{t_i\}\) values. If we follow this approach, we will end up with \((\mu,\sigma)\) that will most likely fix the values all very close to 0.

You are invited to try this approach, i.e.,

def F_mu(x,mu,sigma):
    return (x-mu)/sigma**2*np.exp(-(x-mu)**2/(2*sigma**2))
def F_sigma(x,mu,sigma):
    return (x-mu)**2/sigma**3*np.exp(-(x-mu)**2/(2*sigma**2))

But what we really have in mind is the fact that, for a given \(x_i\), we want to find values as close to the corresponding \(t_i\) as possible. One way to obtain this would be to define the error function as:

\[F(x_i,t_i,\mu,\sigma) = \left[t_i - f(x_i;\mu,\sigma)\right]^2=\left\{t_i - \left[-\exp{\left(-\frac{(x-\mu)^2}{2\sigma^2}\right)}\right] \right\}^2\]

and then we would have the following derivatives:

\[\begin{split}\pypx{F}{\mu} = \frac{2(x_i-\mu)}{\sigma^2}\exp{\left[-\frac{(x_i-\mu)^2}{2\sigma^2}\right]}\left\{t_i+\exp{\left[-\frac{(x_i-\mu)^2}{2\sigma^2}\right]}\right\}\\ \pypx{F}{\sigma}=\frac{2(x_i-\mu)^2}{\sigma^3}\exp{\left[-\frac{(x_i-\mu)^2}{2\sigma^2}\right]}\left\{t_i+\exp{\left[-\frac{(x_i-\mu)^2}{2\sigma^2}\right]}\right\}\end{split}\]

(Evaluated via WolframAlpha: 1, 2)

def F_mu(x,t,mu,sigma):
    return 2*(x-mu)/sigma**2*np.exp(-(x-mu)**2/(2*sigma**2))*\
(t+np.exp(-(x-mu)**2/(2*sigma**2)))
def F_sigma(x,t,mu,sigma):
    return 2*(x-mu)**2/sigma**3*np.exp(-(x-mu)**2/(2*sigma**2))*\
(t+np.exp(-(x-mu)**2/(2*sigma**2)))
np.array([x,t]).T
array([[ 5.68303007, -0.79194365],
       [ 5.29078268, -0.95860394],
       [ 4.17595952, -0.71211109],
       [ 6.24959654, -0.45806428],
       [ 4.41660884, -0.84351919],
       [ 6.96423794, -0.14527666],
       [ 6.53987834, -0.30555892],
       [ 4.86294066, -0.99065134],
       [ 3.3056984 , -0.23803705],
       [ 6.23655758, -0.46554928]])
eta = 1

# Starting values
mu_opt = 2.7
sigma_opt = 2.3
tolerance = 1E-4

for i in range(10000):
    for ii in range(x.size):
        xi = x[ii]
        ti = t[ii]
        #print(xi,ti)
        F_mu_xi = F_mu(xi,ti,mu_opt,sigma_opt)
        F_sigma_xi = F_sigma(xi,ti,mu_opt,sigma_opt)
        mu_opt -= eta*F_mu_xi
        sigma_opt -= eta*F_sigma_xi
    total_absolute_error = np.sum(np.abs(t-f(x,mu_opt,sigma_opt)))
    if(total_absolute_error < tolerance):
        print(("As the sum of the absolute errors is sufficiently close to zero ({:.7f}),\n"+
              "\tbreaking the iteration at the {:d}. step!").
              format(total_absolute_error,i+1))
        break
print("mu: {:.4f}\tsigma: {:.4f}".format(mu_opt,sigma_opt))

plt.plot(xx,f(xx,mu,sigma),color="orange")
plt.plot(xx,f(xx,mu_opt,sigma_opt),":b")
plt.plot(x,t,"o")
plt.show()
As the sum of the absolute errors is sufficiently close to zero (0.0000034),
	breaking the iteration at the 44. step!
mu: 5.0000	sigma: 1.0000
_images/5d65b56f16c318f47c15a47c14b086f54077243c592e5492bf68dfe55f8858a3.png

Stochastic Gradient Descent Algorithm#

In this approach, instead of optimizing the variables at every step for one data point, we optimize them as a whole:

eta = 0.1

# Starting values
mu_opt = 2.7
sigma_opt = 2.3
tolerance = 1E-4
total_absolute_error0 = 1000

for i in range(10000):
    d_mu    = -eta*np.sum(F_mu(x,t,mu_opt,sigma_opt))
    d_sigma = -eta*np.sum(F_sigma(x,t,mu_opt,sigma_opt))

    mu_opt    += d_mu
    sigma_opt += d_sigma

    total_absolute_error = np.sum(np.abs(t-f(x,mu_opt,sigma_opt)))

    if(total_absolute_error < tolerance):
        print(("As the sum of the absolute errors is sufficiently close to zero ({:.7f}),\n"+
              "\tbreaking the iteration at the {:d}. step!").
              format(total_absolute_error,i+1))
        break
print("mu: {:.4f}\tsigma: {:.4f}".format(mu_opt,sigma_opt))

plt.plot(xx,f(xx,mu,sigma),color="orange")
plt.plot(xx,f(xx,mu_opt,sigma_opt),":b")
plt.plot(x,t,"o")
plt.show()
As the sum of the absolute errors is sufficiently close to zero (0.0000838),
	breaking the iteration at the 473. step!
mu: 5.0000	sigma: 1.0000
_images/50a44d62a36b3fd1f11894e18015ccdea1529675761737736e5a22ce3ad9aed2.png
def F(musigma,x,t):
    return np.sum((t + np.exp(-(x-musigma[0])**2/(2*musigma[1]**2)))**2)
opt = optimize.minimize(F,x0=(2.7,2.3),args=(x,t))
opt.x,opt.fun
(array([-0.94939593,  6.24977685]), 0.7517808253037702)
opt = optimize.minimize(F,x0=(2.7,2.3),args=(x,t),bounds=[(3,6.5),(None,None)])
opt.x,opt.fun
(array([5.00000008, 0.99999998]), 1.0592809689731542e-14)