// Approximate DP - Practice Course 2.2 - Exercise 2 and 3
// Infinite Horizon Option Pricing
// Approximate Policy Iteration

// Outline
// ---------
// Start with a randomized policy 'mu'
// Repeat: 
// - Policy Evaluation:
//    - Generate a long trajectory 'Traj' of index {i_k} according to the price model
//    - Obtain samples 
//    - Calculate the approximate cost r
//    - Calculate the Q factors based on the trajectory 'Traj' and the cost 'r'
// - Policy Improvement:
//    - Update 'mu' by comparing Q factors and the exercising cost
// Plot the convergence of 'mu' 
// End


clc
clear

////// Parameters //////

K = 1;        // Strike Price
S = 1;        // Initial Price
p = .5;       // Probability of Moving Up
u = 1+1e-2;   // Growth Rate
d = 1/u;      // Diminish Rate
n = 50;       // Number of States
alpha = 0.99; // Discount Factor
NoPI = 30;    // Number of Iterations of API
LengthTraj = 4000;    // Length of Simulation Trajectory


////// Key Variables //////
// n - number of states
// s - number of features/basis functions 
// Q - n x 1 vector, the Q factors of the control 'HOLD'
// Phi - n x s vector, the matrix of features/basis functions

//// Initialization ////

price_list = S*[d.^((n/2-1):-1:0), u.^(1:(n/2))]'; // List of All Possible States

Q = zeros(n,1);

s = 3; // Number of Features
////// Polinomial Features
//Phi = [ones(n,1), price_list, price_list.^2]; 

X = price_list;
Phi = [exp(-X/2), exp(-X/2).*(1-X),exp(-X/2).*(1-X+X.^2/2)];


//// Policy Iteration ////

////// Initiate a randomized policy
mu = 1+ceil(rand(NoPI,n)-.5);

for noPI = 1 : NoPI
    
    //// Policy Evaluation Step ////
    // ------------------------ //
    
    //// Generate Random Trajectories of Stock Prices ////
    Traj = ones(1, LengthTraj);

    for index = 1 : LengthTraj-1
    
        ////// Generate a long trajectory i_1, i_2, ... according to the
        ////// binomial model of stock prices
    
        ////// Store the trajory in the variable 'Traj'
    
        ////// Write code here:


    end
    
    //// Fit \Phi r = J using the sample trajectories ////
 
    b = zeros(s,1);
    A = zeros(s,s);

    for k = 1 : LengthTraj-1
        i = Traj(k); // State of the Current Sample k

        ////// Calculate the sample costs
        ////// Store the sample costs in C and d

        ////// Write code here:


    end
    
    if A ~= 0
        r = inv(A)*b;
    end
    
    
    //// Evaluate Q Factos (only for Exercise 3) ////
    ////// Evaulate Q factors for the control 'HOLD' by using the approximate
    ////// cost and the simulation-based samples
    ////// Write code here:
    ////// Q = ...
    

    
    //// Policy Improvement Step ////
    // ------------------------- // 
    ////// Obtain a new policy 'mu' using the approximate cost J= Phi x r
    ////// Write code here:
    ////// mu = ...
    
   
end



////// Plot Option Prices //////
figure(1)

plot(price_list, Phi*r);

xlabel('Stock Price');
ylabel('Approximate Option Prices');
  

////// Plot Convergence Process of Policies //////
figure(2)
surf(1:NoPI+1,   price_list,mu');

xlabel('Number of Policy Iteration');
ylabel('Price');
title('Convergence of Policies (blue: exercise, red: hold)')