%matplotlib inline
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
print("hi")
Let us define the random variables $X$ and $Y$ by $$ f(x) = x \frac{1 + x}{1 + x^2}, \qquad X \sim \mathcal{N}(0, 1), \quad Y = f(X) + \varepsilon, $$ where $\varepsilon \sim \mathcal{N}(0, 1/4)$ is independent of X
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 1000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
plt.scatter(X, Y, alpha=0.3)
mesh = np.linspace(-4, 4, 101)
plt.plot(mesh, f(mesh), 'r', linewidth=3)
plt.show()
def reg_non_param(x, bdwidth, x_sample, y_sample):
    """Values of the non-parametric regression of Y wrt X using a Gaussian kernel.
    Parameters
    ----------
    x: numpy array, one dimensional
        Values at which the regression is evaluated
    bdwidth: positive float, value of the bandwidth parameter
    x_sample: numpy array, one dimensional, non-empty
        x values of the sample
    y_sample: numpy array, one dimensional
        y values of the sample, must have the same length as x_sample.    
    """
    def kern(u, x):
        """Gaussian kernel function"""
        return np.exp(-(u[:, np.newaxis] - x) ** 2 / (2 * bdwidth ** 2))
    return np.sum(kern(x_sample, x) * y_sample[:, np.newaxis], axis=0) \
        / np.sum(kern(x_sample, x), axis=0)
Plotting non-parametric regressions of $Y$ with respect to $X$ with different values of the bandwidth:
plt.scatter(X, Y, alpha=0.3)
plt.plot(mesh, reg_non_param(mesh, 0.1, X, Y), 'y', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.2, X, Y), 'r', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'g', linewidth=3)
plt.show()
def basis(knots, x):
    """Values of order-1 B-spline basis functions.
    
    For an increasingly sorted collection of knots and a collection of
    query points x, returns a 2-dimensional array of values, of dimension
    len(x) x len(knots).
    
    Parameters
    ----------
    knots: numpy array, one dimensional, increasingly sorted
        Knots of the B-spline function
    x: numpy array, one dimensional
        Query points where to evaluate the basis functions.
    """
    nb_knots = len(knots)
    diag = np.identity(nb_knots)
    res = np.empty((len(x), nb_knots))
    for i in xrange(nb_knots):
        res[:, i] = np.interp(x, knots, diag[i])
    return res
basis_len = 10
knots = np.linspace(-3.0, 3.0, basis_len)
plt.plot(mesh, basis(knots, mesh), linewidth=2)
plt.ylim(0.0, 3)
plt.show()
Experiment with
and
Bonus question. Experiment with second-order Tikhonov regularization. The second derivative of a piecewise-linear function being a Dirac comb. The penalization should be the sum of the squares of the weights of those Dirac masses.
See graph above of the regression output using the default params (you may have to scroll down). Below I try with more knot points that span more or the mesh (but same mesh).
%matplotlib inline
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 1000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
mesh = np.linspace(-4, 4, 101)          #<---------------------PARAM-----------------------------
def basis(knots, x):
    """Values of order-1 B-spline basis functions.
    
    For an increasingly sorted collection of knots and a collection of
    query points x, returns a 2-dimensional array of values, of dimension
    len(x) x len(knots).
    
    Parameters
    ----------
    knots: numpy array, one dimensional, increasingly sorted
        Knots of the B-spline function
    x: numpy array, one dimensional
        Query points where to evaluate the basis functions.
    """
    nb_knots = len(knots)
    diag = np.identity(nb_knots)
    res = np.empty((len(x), nb_knots))
    for i in xrange(nb_knots):
        res[:, i] = np.interp(x, knots, diag[i])
    return res
basis_len = 10            #<---------------------------------PARAM-------------------------------
knots = np.linspace(-3.0, 3.0, basis_len)    #<--------------------------PARAM-------------------
plt.plot(mesh, basis(knots, mesh), linewidth=2)
plt.ylim(0.0, 3)
plt.show()
print("hello")
#index=0;
#for i in mesh :
#    print (i)
#    print (index) 
#    index=index+1
#index=0
#closevalindex=np.empty(len(X),1)    
#for x in X :
#      closevalindex[index] = (np.abs(mesh-x)).argmin()
    #index=index+1
        
meshBasis=basis(knots, mesh);        
Qmat=basis(knots,X);
#print(Qmat)
weights=np.empty([len(X),1])
#print(np.shape(Y))
#print(np.shape(Qmat))
###################################ADD Y INTERCEPT TO THE REGRESSION######################################################
#onesArr=np.ones([len(X)])
#Qmatnew=np.empty([len(X),basis_len+1]);
#Qmatnew[:,0]=onesArr
#Qmatnew[:,1:]=Qmat
#del Qmat
#Qmat=np.empty([len(X), basis_len+1]);
#Qmat=Qmatnew;
###########################################################################################################################
weights = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(Qmat),Qmat)),np.transpose(Qmat)),np.transpose(Y)) 
print(weights)
Yfitted=np.empty([len(mesh),basis_len])
#print(np.shape(Yfitted))
for i in xrange(basis_len) :
    #Yfitted[:,i]=meshBasis[:,i] * weights[i+1]   ###first weight is y intercept
    Yfitted[:,i]=meshBasis[:,i] * weights[i]
print(np.shape(Yfitted))
    
summedVals=np.zeros([len(mesh),1])
#print(np.shape(summedVals))
#print(np.shape(Yfitted))
for i in xrange(len(mesh)) :
 
    #summedVals[i]=np.sum(Yfitted[i,:])+weights[0]   ###add y intercept
    summedVals[i]=np.sum(Yfitted[i,:])
plt.plot(mesh, summedVals, linewidth=2)   
%matplotlib inline
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 1000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
mesh = np.linspace(-4, 4, 301)          #<---------------------PARAM-----------------------------
def basis(knots, x):
    """Values of order-1 B-spline basis functions.
    
    For an increasingly sorted collection of knots and a collection of
    query points x, returns a 2-dimensional array of values, of dimension
    len(x) x len(knots).
    
    Parameters
    ----------
    knots: numpy array, one dimensional, increasingly sorted
        Knots of the B-spline function
    x: numpy array, one dimensional
        Query points where to evaluate the basis functions.
    """
    nb_knots = len(knots)
    diag = np.identity(nb_knots)
    res = np.empty((len(x), nb_knots))
    for i in xrange(nb_knots):
        res[:, i] = np.interp(x, knots, diag[i])
    return res
basis_len = 15          #<---------------------------------PARAM-------------------------------
knots = np.linspace(-3.0, 3.0, basis_len)     #<--------------------PARAM-----------------
plt.plot(mesh, basis(knots, mesh), linewidth=2)
plt.ylim(0.0, 3)
plt.show()
print("hello")
#index=0;
#for i in mesh :
#    print (i)
#    print (index) 
#    index=index+1
#index=0
#closevalindex=np.empty(len(X),1)    
#for x in X :
#      closevalindex[index] = (np.abs(mesh-x)).argmin()
    #index=index+1
        
meshBasis=basis(knots, mesh);        
Qmat=basis(knots,X);
#print(Qmat)
weights=np.empty([len(X),1])
#print(np.shape(Y))
#print(np.shape(Qmat))
###################################ADD Y INTERCEPT TO THE REGRESSION######################################################
#onesArr=np.ones([len(X)])
#Qmatnew=np.empty([len(X),basis_len+1]);
#Qmatnew[:,0]=onesArr
#Qmatnew[:,1:]=Qmat
#del Qmat
#Qmat=np.empty([len(X), basis_len+1]);
#Qmat=Qmatnew;
###########################################################################################################################
weights = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(Qmat),Qmat)),np.transpose(Qmat)),np.transpose(Y)) 
print(weights)
Yfitted=np.empty([len(mesh),basis_len])
#print(np.shape(Yfitted))
for i in xrange(basis_len) :
    #Yfitted[:,i]=meshBasis[:,i] * weights[i+1]   ###first weight is y intercept
    Yfitted[:,i]=meshBasis[:,i] * weights[i]
print(np.shape(Yfitted))
    
summedVals=np.zeros([len(mesh),1])
#print(np.shape(summedVals))
#print(np.shape(Yfitted))
for i in xrange(len(mesh)) :
 
    #summedVals[i]=np.sum(Yfitted[i,:])+weights[0]   ###add y intercept
    summedVals[i]=np.sum(Yfitted[i,:])
plt.plot(mesh, summedVals, 'g', linewidth=2) 
plt.scatter(X, Y, alpha=0.3)
print("This is more sensitive to variation in the sample, since we have increased the number of basis functions to ",basis_len, " as well as the number of mesh points.")
Of note, when the bounds of the basis functions are extended past [-3,3] python will often throw a Singular Matrix error, possibly because there is less data at the edges compared to near the center of the mesh, so the regression breaks down (the error increases substantially). If you increase the number of samples in the top block, the results will get better/less singular.
Question 2.
#different bandwidths
plt.scatter(X, Y, alpha=0.3)
plt.plot(mesh, reg_non_param(mesh, 0.05, X, Y), 'y', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'r', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.9, X, Y), 'g', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 1, X, Y), 'b', linewidth=3)
plt.show()
#more samples
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 10000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
plt.scatter(X, Y, alpha=0.3)
plt.plot(mesh, reg_non_param(mesh, 0.02, X, Y), 'y', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.05, X, Y), 'y', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'r', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.9, X, Y), 'g', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 1, X, Y), 'b', linewidth=3)
plt.show()
#Fewer Samples
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 100
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
plt.scatter(X, Y, alpha=0.3)
plt.plot(mesh, reg_non_param(mesh, 0.04, X, Y), 'y', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.05, X, Y), 'y', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'r', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.9, X, Y), 'g', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 1, X, Y), 'b', linewidth=3)
plt.show()
We change the Gaussian kernel such that, instead of (u-x)^2, we use abs(u-x) \/
def reg_non_param(x, bdwidth, x_sample, y_sample):
    """Values of the non-parametric regression of Y wrt X using a Gaussian kernel.
    Parameters
    ----------
    x: numpy array, one dimensional
        Values at which the regression is evaluated
    bdwidth: positive float, value of the bandwidth parameter
    x_sample: numpy array, one dimensional, non-empty
        x values of the sample
    y_sample: numpy array, one dimensional
        y values of the sample, must have the same length as x_sample.    
    """
    def kern(u, x):
        """Gaussian kernel function"""
        return np.exp(-np.abs((u[:, np.newaxis] - x)) / (2 * bdwidth ** 2))
    return np.sum(kern(x_sample, x) * y_sample[:, np.newaxis], axis=0) \
        / np.sum(kern(x_sample, x), axis=0)
    
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 1000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)    
plt.scatter(X, Y, alpha=0.3)
plt.plot(mesh, reg_non_param(mesh, 0.05, X, Y), 'y', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'r', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 0.9, X, Y), 'g', linewidth=3)
plt.plot(mesh, reg_non_param(mesh, 1, X, Y), 'b', linewidth=3)
plt.show()
We see that the low bandwidths have more variation in them compared with the Gaussian Kernel, as large deviations from the mean are not penalized as much using an absolute value function.
%matplotlib inline
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 10000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
mesh = np.linspace(-4, 4, 1001)          #<---------------------PARAM-----------------------------
def basis(knots, x):
    """Values of order-1 B-spline basis functions.
    
    For an increasingly sorted collection of knots and a collection of
    query points x, returns a 2-dimensional array of values, of dimension
    len(x) x len(knots).
    
    Parameters
    ----------
    knots: numpy array, one dimensional, increasingly sorted
        Knots of the B-spline function
    x: numpy array, one dimensional
        Query points where to evaluate the basis functions.
    """
    nb_knots = len(knots)
    diag = np.identity(nb_knots)
    res = np.empty((len(x), nb_knots))
    for i in xrange(nb_knots):
        res[:, i] = np.interp(x, knots, diag[i])
    return res
basis_len = 30            #<---------------------------------PARAM-------------------------------
knots = np.linspace(-3.5, 3.5, basis_len)    #<--------------------------PARAM-------------------
plt.plot(mesh, basis(knots, mesh), linewidth=2)
plt.ylim(0.0, 3)
plt.show()
print("hello")
#index=0;
#for i in mesh :
#    print (i)
#    print (index) 
#    index=index+1
#index=0
#closevalindex=np.empty(len(X),1)    
#for x in X :
#      closevalindex[index] = (np.abs(mesh-x)).argmin()
    #index=index+1
        
meshBasis=basis(knots, mesh);        
Qmat=basis(knots,X);
#print(Qmat)
weights=np.empty([len(X),1])
#print(np.shape(Y))
#print(np.shape(Qmat))
###################################ADD Y INTERCEPT TO THE REGRESSION######################################################
#onesArr=np.ones([len(X)])
#Qmatnew=np.empty([len(X),basis_len+1]);
#Qmatnew[:,0]=onesArr
#Qmatnew[:,1:]=Qmat
#del Qmat
#Qmat=np.empty([len(X), basis_len+1]);
#Qmat=Qmatnew;
###########################################################################################################################
weights = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(Qmat),Qmat)),np.transpose(Qmat)),np.transpose(Y)) 
print(weights)
Yfitted=np.empty([len(mesh),basis_len])
#print(np.shape(Yfitted))
for i in xrange(basis_len) :
    #Yfitted[:,i]=meshBasis[:,i] * weights[i+1]   ###first weight is y intercept
    Yfitted[:,i]=meshBasis[:,i] * weights[i]
print(np.shape(Yfitted))
    
summedVals=np.zeros([len(mesh),1])
#print(np.shape(summedVals))
#print(np.shape(Yfitted))
for i in xrange(len(mesh)) :
 
    #summedVals[i]=np.sum(Yfitted[i,:])+weights[0]   ###add y intercept
    summedVals[i]=np.sum(Yfitted[i,:])
plt.plot(mesh, summedVals, 'g', linewidth=2) 
plt.scatter(X, Y, alpha=0.3)
We see that, with many more samples, we can use more knot points and basis function without having a singular matrix. Near the center, the graph is fairly smooth, but it becomes more jagged near the edges. It seems to track the data well.
%matplotlib inline
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
def f(x):
    return (x + x ** 2) / (1 + x ** 2)
n = 10000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
mesh = np.linspace(-4, 4, 1001)          #<---------------------PARAM-----------------------------
def basis(knots, x):
    """Values of order-1 B-spline basis functions.
    
    For an increasingly sorted collection of knots and a collection of
    query points x, returns a 2-dimensional array of values, of dimension
    len(x) x len(knots).
    
    Parameters
    ----------
    knots: numpy array, one dimensional, increasingly sorted
        Knots of the B-spline function
    x: numpy array, one dimensional
        Query points where to evaluate the basis functions.
    """
    nb_knots = len(knots)
    diag = np.identity(nb_knots)
    res = np.empty((len(x), nb_knots))
    for i in xrange(nb_knots):
        res[:, i] = np.interp(x, knots, diag[i])
    return res
basis_len = 30            #<---------------------------------PARAM-------------------------------
knots = np.linspace(-3.5, 3.5, basis_len)    #<--------------------------PARAM-------------------
plt.plot(mesh, basis(knots, mesh), linewidth=2)
plt.ylim(0.0, 3)
plt.show()
print("hello")
#index=0;
#for i in mesh :
#    print (i)
#    print (index) 
#    index=index+1
#index=0
#closevalindex=np.empty(len(X),1)    
#for x in X :
#      closevalindex[index] = (np.abs(mesh-x)).argmin()
    #index=index+1
        
meshBasis=basis(knots, mesh);        
Qmat=basis(knots,X);
#print(Qmat)
weights=np.empty([len(X),1])
#print(np.shape(Y))
#print(np.shape(Qmat))
###################################ADD Y INTERCEPT TO THE REGRESSION######################################################
#onesArr=np.ones([len(X)])
#Qmatnew=np.empty([len(X),basis_len+1]);
#Qmatnew[:,0]=onesArr
#Qmatnew[:,1:]=Qmat
#del Qmat
#Qmat=np.empty([len(X), basis_len+1]);
#Qmat=Qmatnew;
###########################################################################################################################
weights = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(Qmat),Qmat)),np.transpose(Qmat)),np.transpose(Y)) 
print(weights)
Yfitted=np.empty([len(mesh),basis_len])
#print(np.shape(Yfitted))
for i in xrange(basis_len) :
    #Yfitted[:,i]=meshBasis[:,i] * weights[i+1]   ###first weight is y intercept
    Yfitted[:,i]=meshBasis[:,i] * weights[i]
print(np.shape(Yfitted))
    
summedVals=np.zeros([len(mesh),1])
#print(np.shape(summedVals))
#print(np.shape(Yfitted))
for i in xrange(len(mesh)) :
 
    #summedVals[i]=np.sum(Yfitted[i,:])+weights[0]   ###add y intercept
    summedVals[i]=np.sum(Yfitted[i,:])
plt.plot(mesh, summedVals, linewidth=2)   
# Model
S0 = 100
r = 0.05
sigma = 0.2
T = 1.0
# Payoff: ATM Put
K = 100
def payoff(x):
    return np.maximum(K - x, 0)
# Sample size and time discretization
nb_mc = 5000
nb_step = 10
def bs_path(nb_step, nb_mc, S0, r, sigma, T):
    """Simulate geometric Brownian motion
    
    Parameters
    ----------
    nb_step: integer, greater than 1
        number of time steps
    nb_mc: positive integer
        sample size
    S0: positive float
        starting value
    r: float
        drift
    sigma: float
        volatility
    T: positive float
        maturity
    """
    brownian = np.empty((nb_step + 1, nb_mc))
    brownian[0] = 0.0
    brownian[1:] = np.cumsum(np.random.randn(nb_step, nb_mc), axis=0) *\
                   np.sqrt(T / nb_step)
    return S0 * np.exp(sigma * brownian + (r - 0.5 * sigma ** 2) *\
                       np.linspace(0.0, T, nb_step + 1)[:, np.newaxis])
plt.figure()
plt.plot(bs_path(nb_step, nb_mc, S0, r, sigma, T), 'b', alpha=0.2)
plt.show()
We provide a simplistic implementation of the TVR algorithm.
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
print("shape of Price is")
print(price.shape)
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)  
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
# Corresponding European option price
np.mean(np.exp(-r * T) * payoff(path[-1]))
Question 1 below
r=-0.100
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
r=0.001
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
r=0.01
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
r=0.05
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
r=0.10
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
r=0.20
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
r=0.40
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
r=1.00
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
price = payoff(path[-1])
# Dynamic programming
for index in range(nb_step):
    i = nb_step - index - 1
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    if i != 0:
        continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
np.mean(price)
We see that, as interest rates increase, the price of the option drops. The option is very expensive for very small interest rates (an interest rate of 0.1% yields a price of 16.14) and very cheap for high interest rates (for r=40%, the price is 2.65). For r=100%, the price drops to 0.19. Even as r goes negative, to -10%, the price of the option still increases, to 22.86. I can speculate that, for large risk-free rates, the added benefit of the American optionality is small compared to the benefit, say, of investing at a high interest rate 20%. Since the money invested in the option is tied up for T=1.0 year, the payoff must be very great to componsate for high interest rates, but large payoffs are very unlikely, hence the low price.
For very low interest rates, this optionality becomes more and more important. There is less loss incurred by tying up money in the option for T=1.0 year compared with investing it in the market. If r goes negative, then it is preferable to keep money in the option rather than in cash. Even small payoffs would be profitable at low interest rates. Since these small payoffs are more likely to occur, the price of the option must be higher.
Questions 2 and 3 and 4 Below
We want to implement the piecewise linear regression scheme from above to the scatter plot points for the put price:
##### import matplotlib.cm as cm
# Model
S0 = 100
r = 0.05
sigma = 0.2
T = 1.0
# Payoff: ATM Put
K = 100
def payoff(x):
    return np.maximum(K - x, 0)
# Sample size and time discretization
nb_mc = 5000
nb_step = 10
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
#print(path.shape)
price = payoff(path[-1])
#print(path[-1])
# Dynamic programming
fullpricematrix=np.empty([nb_step, nb_mc])
for index in range(nb_step):
    i = nb_step - index - 1
    print("i=",i)
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    print("shape of exercise is")
    print(exercise.shape)
    if i != 0:
        #######################################BEGIN REGRESSION########################################################
        #continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
        
        basis_len = 30          #<---------------------------------PARAM-------------------------------
        mesh = np.linspace(70, 170, 301)          #<---------------------------------PARAM-------------------------------
        knots = np.linspace(1.05*np.min(path[i]),.95*np.max(path[i]) , basis_len)    #<--------------------------PARAM-------------------
       
        
        
        meshBasis=basis(knots, mesh);        
        Qmat=basis(knots,path[i]);
        #print("Size of Qmat is")
        #print(Qmat.shape)
        #print(Qmat)
        print("Cond of Q is")
        print(np.linalg.cond(Qmat))
        weights = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(Qmat),Qmat)),np.transpose(Qmat)),np.transpose(discount*price)) 
        
        #print("Size of weights is")
        #print(weights.shape)
        for j in xrange(basis_len) :
            Qmat[:,j]=Qmat[:,j] * weights[j]
        #print(np.shape(Yfitted))
        summedVals=np.zeros([len(path[i])])
        #print(np.shape(summedVals))
        #print(np.shape(Yfitted))
        for j in xrange(len(path[i])) :
            summedVals[j]=np.sum(Qmat[j,:])
        #print("Shape of summedvals is")
        #print(summedVals.shape)
        continuation=summedVals;
        #print("Shape of continuation is")
        #print(continuation.shape)
        
        #print("i=")
        #print(i)
        #print("path[i] shape")
        #print(path[i].shape)
        #print()
        #print("Val of discount")
        #print(discount)
        #print("Size of price mat")
        #print(price.shape)
        #mult=discount * price
        #print("Mult Size is")
        #print(mult.shape)
        plt.figure(index)
        plt.scatter(path[i],discount * price, s=2, alpha=0.8)
        plt.scatter(path[i], summedVals, s=2, color='g', linewidth=2)
        
        plt.title('Regression at time %s' % (i * T / nb_step))
        
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(exercise, continuation)
    
    fullpricematrix[i]=price
    
    
    
    
plt.figure(1000)
colors = iter(cm.rainbow(np.linspace(0, 1, nb_step+1)))
for index in range(nb_step):
    i = nb_step - index - 1
    plt.scatter(path[i], fullpricematrix[i], color=next(colors), marker='2', alpha=0.8)
    
    
plt.title('Exercise boundaries, dates 0.1*T-0.9*T, Latest dates are blue/violet, earliest dates are red')
plt.ylim(-1, 40)
plt.xlim(50, 200)
plt.show()
    #print("Size of exercise mat at end of for loop")
    #print(exercise.shape)
    #print("Size of cont mat at end of for loop")
    #print(continuation.shape)
    #print("Size of price mat at end of for loop")
    #print(price.shape)
    
np.mean(price)
##### import matplotlib.cm as cm
# Model
S0 = 100
r = 0.05
sigma = 0.2
T = 1.0
# Payoff: ATM Put
K = 100
def payoff(x):
    return np.maximum(K - x, 0)
# Sample size and time discretization
nb_mc = 5000
nb_step = 10
path = bs_path(nb_step, nb_mc, S0, r, sigma, T)
#print(path.shape)
price = payoff(path[-1])
#print(path[-1])
# Dynamic programming
fullpricematrixnew=np.empty([nb_step, nb_mc])
for index in range(nb_step):
    i = nb_step - index - 1
    print("i=",i)
    discount = np.exp(-r * T / nb_step)
    exercise = payoff(path[i])
    print("shape of exercise is")
    print(exercise.shape)
    if i != 0:
        #######################################BEGIN REGRESSION########################################################
        #continuation = reg_non_param(path[i], 0.3 * np.std(path[i]), path[i], discount * price)
        
        basis_len = 30          #<---------------------------------PARAM-------------------------------
        mesh = np.linspace(70, 170, 301)          #<---------------------------------PARAM-------------------------------
        knots = np.linspace(1.05*np.min(path[i]), .95*np.max(path[i]) , basis_len)    #<--------------------------PARAM-------------------
       
        
        
        meshBasis=basis(knots, mesh);        
        Qmat=basis(knots,path[i]);
        #print("Size of Qmat is")
        #print(Qmat.shape)
        #print(Qmat)
        print("Cond of Q is")
        print(np.linalg.cond(Qmat))
        weights = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(Qmat),Qmat)),np.transpose(Qmat)),np.transpose(discount*price)) 
        
        #print("Size of weights is")
        #print(weights.shape)
        for j in xrange(basis_len) :
            Qmat[:,j]=Qmat[:,j] * weights[j]
        #print(np.shape(Yfitted))
        summedVals=np.zeros([len(path[i])])
        #print(np.shape(summedVals))
        #print(np.shape(Yfitted))
        for j in xrange(len(path[i])) :
            summedVals[j]=np.sum(Qmat[j,:])
        #print("Shape of summedvals is")
        #print(summedVals.shape)
        continuation=summedVals;
        #print("Shape of continuation is")
        #print(continuation.shape)
        
        #print("i=")
        #print(i)
        #print("path[i] shape")
        #print(path[i].shape)
        #print()
        #print("Val of discount")
        #print(discount)
        #print("Size of price mat")
        #print(price.shape)
        #mult=discount * price
        #print("Mult Size is")
        #print(mult.shape)
        plt.figure(index)
        plt.scatter(path[i],discount * price, s=2, alpha=0.8)
        plt.scatter(path[i], summedVals, s=2, color='g', linewidth=2)
        
        plt.title('Regression at time %s' % (i * T / nb_step))
        
    else:
        continuation = np.mean(discount * price)
    # intermediate plot at mid time step
    if i == nb_step / 2:
        plt.scatter(path[i], discount * price, marker='2', alpha=0.4)
        plt.scatter(path[i], exercise, color='r')
        plt.scatter(path[i], continuation, color='y')
        plt.title('Continuation and exercise at date %s' % (i * T / nb_step))
    price = np.maximum(fullpricematrix[i], continuation)   ###WE USE THE EXERCISE/PRICE MATRIX FROM QUESTION 2/3 AS THE EXERCISE BOUNDARY
    
    
    
    
    
    fullpricematrixnew[i]=price             ###NEW FOR THIS CELL, QUESTION 4
    
    
    
    
plt.figure(1000)
colors = iter(cm.rainbow(np.linspace(0, 1, nb_step+1)))
for index in range(nb_step):
    i = nb_step - index - 1
    plt.scatter(path[i], fullpricematrixnew[i], color=next(colors), marker='2', alpha=0.8)    ###WE PLOT THE NEW PRICE MATRIX
    
    
plt.title('Exercise boundaries, dates 0.1*T-0.9*T, Latest dates are blue/violet, earliest dates are red')
plt.ylim(-1, 40)
plt.xlim(50, 200)
plt.show()
    #print("Size of exercise mat at end of for loop")
    #print(exercise.shape)
    #print("Size of cont mat at end of for loop")
    #print(continuation.shape)
    #print("Size of price mat at end of for loop")
    #print(price.shape)
    
np.mean(price)
Question 4 may need to run several times to finish. It often encounters singular matrix errors because of the accuracy of the regression. Although there is a lot of noise, we can see the curves generated for different dates.
This is a lower bound because we have to account for an investors preference to exercise the option and invest in the risk free rate (eg one would exercise the option and immediately invest the proceeds at the risk-free rate. Therefore we don't discount the strikes.
The next assigment will certainly involve regression methods or flavors of the Longstaff-Schwartz algorithm.