// Approximate DP - Practice Course 2.3 - Exercise 1
// Infinite Horizon Option Pricing
// Q-Learning

// Outline
// ---------
// Generate a long trajectory of index {(i_k,j_k)} according to the price model
// Choose suitable stepsizes and Update Q-factors
//


clc
clear

////// Parameters //////

K = 1;        // Strike Price
S = 1;        // Initial Price
p = .5;       // Probability of Moving Up
u = 1+1e-2;   // Growth Rate
d = 1/u;      // Diminish Rate
n = 10;      // Number of States
alpha = 0.9;  // Discount Factor

LengthTraj = 10000; // Length of Trajectory, or Number of Samples


////// Variables //////
// n - number of states
// Q - n-dimensional vector, store the current Q factors
// J - n x LengthTraj vector, store all past cost vectors
// mu - n x LengthTraj vector, store all past policies
// gk - scalar, stepsize gamme_k

//// Initialization ////

price_list = S*[d.^((n/2-1):-1:0), u.^(1:(n/2))]'; // List of All Possible States

Q = zeros(1,n); 
f = zeros(1,n); 
mu =ones(n, LengthTraj); 
J = zeros(n, LengthTraj);


//// Q-Learning Algorithm ////



for k = 1 : LengthTraj
    
    ////// Generate a sample index i = i_k and the next sample index j = i_{k+1} //////
    ////// Write code here:
    ////// i = ...
    ////// j = ...
 
    
    ////// Select stepsize //////
    ////// Write code here:
    ////// gk = ...

    
    ////// Update Q-factors //////
    ////// Write code here:
    ////// Q(i) = ...
    
   
    ////// Update J and mu accordingly
    ////// You don't have to edit
    J(:, k) = max(price_list-K, Q');   
    if k>1 
        mu(:, k) = mu(:,k-1);
    end
    if price_list(i)-K>Q(i)
        mu(i,k) = 1;
    else 
        mu(i,k) = 2;
    end
end


//// Plot The Convergence of Q-factors, J vectors, and policies ////
figure(1);
plot(J(n/2,:));
xlabel('Number of Samples');
ylabel('Option Prices At S=K=1');
title('Convergence of Cost Vectors in Q-Learning');
//print(1,'ADP-Q-J1','-depsc')
    
figure(3);
surf(1:100, price_list, mu(:,1:LengthTraj/100:LengthTraj));
 xlabel('Number of Policy Iteration');
ylabel('Price');
title('Convergence of Policies (blue: exercise, red: hold)')
//print(3,'ADP-Q-mu','-depsc')


figure(2);
plot(J(:,1:LengthTraj/100:LengthTraj));
xlabel('Stock Price');
ylabel('Option Price');
title('Convergence of Option Prices');
//print(2,'ADP-Q-J','-depsc')