// Approximate DP - Practice Course 2.2 - Exercise 2 and 3 // Infinite Horizon Option Pricing // Approximate Policy Iteration // Outline // --------- // Start with a randomized policy 'mu' // Repeat: // - Policy Evaluation: // - Generate a long trajectory 'Traj' of index {i_k} according to the price model // - Obtain samples // - Calculate the approximate cost r // - Calculate the Q factors based on the trajectory 'Traj' and the cost 'r' // - Policy Improvement: // - Update 'mu' by comparing Q factors and the exercising cost // Plot the convergence of 'mu' // End clc clear ////// Parameters ////// K = 1; // Strike Price S = 1; // Initial Price p = .5; // Probability of Moving Up u = 1+1e-2; // Growth Rate d = 1/u; // Diminish Rate n = 50; // Number of States alpha = 0.99; // Discount Factor NoPI = 30; // Number of Iterations of API LengthTraj = 4000; // Length of Simulation Trajectory ////// Key Variables ////// // n - number of states // s - number of features/basis functions // Q - n x 1 vector, the Q factors of the control 'HOLD' // Phi - n x s vector, the matrix of features/basis functions //// Initialization //// price_list = S*[d.^((n/2-1):-1:0), u.^(1:(n/2))]'; // List of All Possible States Q = zeros(n,1); s = 3; // Number of Features ////// Polinomial Features //Phi = [ones(n,1), price_list, price_list.^2]; X = price_list; Phi = [exp(-X/2), exp(-X/2).*(1-X),exp(-X/2).*(1-X+X.^2/2)]; //// Policy Iteration //// ////// Initiate a randomized policy mu = 1+ceil(rand(NoPI,n)-.5); for noPI = 1 : NoPI //// Policy Evaluation Step //// // ------------------------ // //// Generate Random Trajectories of Stock Prices //// Traj = ones(1, LengthTraj); for index = 1 : LengthTraj-1 ////// Generate a long trajectory i_1, i_2, ... according to the ////// binomial model of stock prices ////// Store the trajory in the variable 'Traj' ////// Write code here: end //// Fit \Phi r = J using the sample trajectories //// b = zeros(s,1); A = zeros(s,s); for k = 1 : LengthTraj-1 i = Traj(k); // State of the Current Sample k ////// Calculate the sample costs ////// Store the sample costs in C and d ////// Write code here: end if A ~= 0 r = inv(A)*b; end //// Evaluate Q Factos (only for Exercise 3) //// ////// Evaulate Q factors for the control 'HOLD' by using the approximate ////// cost and the simulation-based samples ////// Write code here: ////// Q = ... //// Policy Improvement Step //// // ------------------------- // ////// Obtain a new policy 'mu' using the approximate cost J= Phi x r ////// Write code here: ////// mu = ... end ////// Plot Option Prices ////// figure(1) plot(price_list, Phi*r); xlabel('Stock Price'); ylabel('Approximate Option Prices'); ////// Plot Convergence Process of Policies ////// figure(2) surf(1:NoPI+1, price_list,mu'); xlabel('Number of Policy Iteration'); ylabel('Price'); title('Convergence of Policies (blue: exercise, red: hold)')