Let time t be measured in discrete units (such as days, weeks or months). Consider the inventory problem

When x(t) < 0, this corresponds to a backlogged demand, supposed to be ﬁlled immediately once inventory is again available.

1 Expected costs minimization

We suppose that w(t), the uncertain demand, is a random variable with distribution $p0,...,pN$ on the set ${0, ...,N }$ :

A decision rule 𝔲 : ℕ × 𝕏 → 𝕌 assigns a (stock) order $u = 𝔲(t,x)$ to any state x of inventory stock and to any period t. Once given, we obtain random trajectories

The dynamic programming equation associated to the problem of minimizing the expected costs is

where w is a random variable with the distribution $p ,...,p 0 N$ on the set ${0,...,N }$ .

Question 1 Recalling that $b > c > 0$ , show that $g(z) = 𝔼[cz + ℛ (z − w )]$ is a convex function with a minimum achieved at some S_T−1 ∈{0,…,N}. You can draw a graph of the above function for N = 1 and p₀ = 1∕2.

Observe that $𝔼[cu + ℛ (x + u − w)] = − cx + g(x + u)$ , and deduce that the optimal rule is

{ ST −1 − x if x < ST −1 𝔲(T − 1, x) = 0 if x ≥ ST −1.

(11)

Interpret this rule, sometimes called base-stock policy.

  // exec inventory_control.sce

  // Costs functions parameters
  purchasing=100;
  shortage=150;
  holding=20;

  // Shortage/Holding costs
  function c=SHcosts(zz)
    c=shortage*maxi(-zz,0)+holding*maxi(zz,0);
  endfunction

  // Instantaneous costs function
  function c=instant_costs(xx,uu,ww)
    c=purchasing*uu+SHcosts(xx+uu-ww);
  endfunction

  // Decision rule
  function u=constant_rule(t,x)
    u=2;
  endfunction

  // Trajectories simulations

  function [XX,UU,CC]=trajectories(simulations,rule)
    XX=[];
    UU=[];
    CC=[];
    WW=grand(horizon,'markov',transition,ones(1,simulations));

    for s=1:simulations do
      xx=0;
      uu=[];
      cc=0;
      ww=WW(s,:);
      for t=0:(horizon-1) do
        uu=[uu,rule(t,xx($))];
        xx=[xx,xx($)+uu($)-ww(t+1)];
        cc=cc+instant_costs(xx($),uu($),ww(t+1));
      end
      //  plot2d([0:horizon],xx)
      XX=[XX;xx];
      UU=[UU;uu];
      CC=[CC;cc];
    end
    //
    disp('expected costs are '+string(mean(CC)));
  endfunction

  horizon=12;
  proba=0.01*[40,20,30,10];

  // simulation of iid sequences of length horizon
  transition=ones(proba')*proba;

  // Number of Monte Carlo simulations
  simulations=1;
  simulations=100;
  simulations=10;

  // Trajectories simulations and visualization
  xset("window",21);xbasc();
  [XX,UU,CC]=trajectories(simulations,constant_rule);
  plot2d(0:horizon,XX')

  constant_rule_costs=mean(CC)

  // number of Monte Carlo simulations
  simulations=1;
  simulations=100;

  [XX,UU,CC]=trajectories(simulations,constant_rule);
  plot2d(0:horizon,XX')

  constant_rule_costs=mean(CC)

  ////////////////////////////////////////////////////////////////////////
  //    STOCHASTIC DYNAMIC PROGRAMMING EQUATION
  ////////////////////////////////////////////////////////////////////////

  function [FEEDBACK,state_min]=SDP()
    states=[(mini(controls)-maxi(demands))*(horizon): ...
            (maxi(controls)-mini(demands))*(horizon)];
    // Due to bounds on controls and demands, the state is bounded a priori.

    cardinal_states=prod(size(states));
    cardinal_controls=prod(size(controls));
    cardinal_demands=prod(size(demands));

    state_min=mini(states);
    state_max=maxi(states);

    function i=index(x)
      i=x-state_min+1;
    endfunction

    function xdot=dynamics(x,u,w)
      xdot=maxi(state_min,mini(state_max,x+u-w));
    endfunction

    VALUE=zeros([0:horizon]'*states);
    FEEDBACK=zeros([0:horizon]'*states);

    VALUE(horizon+1,:)=zeros(states);

    shift=1+[(horizon-1):(-1):1];
    for tt=shift do
      loc=zeros(cardinal_controls,cardinal_states);
      // local variable containg the values of the function to be minimized
      for jj=1:cardinal_controls do
        uu=controls(jj);
        loc(jj,:)=0;
        // the following loop computes an expectation
        for dd=1:cardinal_demands do
          ww=demands(dd);
          loc(jj,:)=loc(jj,:)+ ...
                    proba(dd)* ...
                    (instant_costs(states,uu,ww)+ ...
                     VALUE(tt+1,index(dynamics(states,uu,ww))));
        end;
      end
      //
      [mmn,jjn]=mini(loc,'r');
      [mmx,jjx]=maxi(loc,'r');
      // mm is the extremum achieved
      // jj is the index of the extremum argument
      //
      VALUE(tt,:)=mmn;
      // minimal cost
      FEEDBACK(tt,:)=controls(jjn);
      // optimal feedback
    end
  endfunction


  horizon=12;
  proba=0.01*[40,20,30,10];
  transition=ones(proba')*proba;
  demands=[0:(prod(size(proba))-1)];
  controls=[0:2*(prod(size(proba)))];

  [FEEDBACK,state_min]=SDP();

  // Decision rule
  function u=optimal_rule(t,x)
    tt=t+1;
    u=FEEDBACK(tt,x-state_min+1);
  endfunction
  //



  // Trajectories simulations and visualization
  xset("window",22);xbasc();
  [XX,UU,CC]=trajectories(simulations,optimal_rule);
  plot2d([0:horizon],XX')
  xtitle("Stock trajectories (minimal cost)");

  optimal_costs=mean(CC)

  horizon=4;
  proba=0.01*[40,20,40];
  demands=[0:(prod(size(proba))-1)];
  controls=[0:10];

  [FEEDBACK,state_min]=SDP();

  K=30;

  // Instantaneous costs function
  function c=instant_costs(xx,uu,ww)
    c=K*sign(uu)+purchasing*uu+SHcosts(xx+uu-ww);
  endfunction

2 Stochastic viability

that stocks are above a critical level $x♭$ (to limit unsatisﬁed clients) and that purchasing and holding costs are bounded above by C^♯.

where w is a random variable with the distribution $p0,...,pN$ on the set ${0,...,N }$ .

  ////////////////////////////////////////////////////////////////////////
  //    STOCHASTIC VIABILITY DYNAMIC PROGRAMMING EQUATION
  ////////////////////////////////////////////////////////////////////////

  // The following code contains a bug:
  // it produces probability value functions larger than 1.
  //

  function [FEEDBACK,state_min]=SVSDP()
    states=[(mini(controls)-maxi(demands))*(horizon): ...
            (maxi(controls)-mini(demands))*(horizon)];

    cardinal_states=prod(size(states));
    cardinal_controls=prod(size(controls));
    cardinal_demands=prod(size(demands));

    state_min=mini(states);
    state_max=maxi(states);

    function i=index(x)
      i=x-state_min+1;
    endfunction

    function xdot=dynamics(x,u,w)
      xdot=maxi(state_min,mini(state_max,x+u-w));
    endfunction

    VALUE=zeros([0:horizon]'*states);
    FEEDBACK=zeros([0:horizon]'*states);

    VALUE(horizon+1,:)=ones(states);

    shift=1+[(horizon-1):(-1):1];
    for tt=shift do
      loc=zeros(cardinal_controls,cardinal_states);
      // local variable containg the values of the function to be maximized
      for jj=1:cardinal_controls do
        uu=controls(jj);
        loc(jj,:)=0;
        // the following loop computes an expectation
        for dd=1:cardinal_demands do
          ww=demands(dd);
          loc(jj,:)=loc(jj,:)+ ...
                    proba(dd)* ...
                    (bool2s(states >= stockflat) .* ...
                     bool2s((purchasing*uu+holding*maxi(states+uu-ww,0)) <= costsharp) .* ...
                     VALUE(tt+1,index(dynamics(states,uu,ww))));
        end;
      end
      //
      [mmn,jjn]=mini(loc,'r');
      [mmx,jjx]=maxi(loc,'r');
      // mm is the extremum achieved
      // jj is the index of the extremum argument
      //
      VALUE(tt,:)=mmx;
      // maximal probability
      FEEDBACK(tt,:)=controls(jjx);
      // optimal feedback
    end
    //
  endfunction


  horizon=12;
  proba=0.01*[40,20,30,10];
  transition=ones(proba')*proba;
  demands=[0:(prod(size(proba))-1)];
  controls=[0:2*(prod(size(proba)))];

  stockflat=-2;
  costsharp=1000;

  [FEEDBACK,state_min]=SVSDP();

  // Decision rule
  function u=optimal_viable_rule(t,x)
    tt=t+1;
    u=FEEDBACK(tt,x-state_min+1);
  endfunction
  //


  // Trajectories simulations and visualization
  xset("window",23);xbasc();
  [XX,UU,CC]=trajectories(simulations,optimal_viable_rule);
  plot2d([0:horizon],XX')
  xtitle("Stock trajectories (maximal viability probability)");

  // Take care: CC is not the right criterion
  // The additive criterion should be replaced by a multiplicative one

Inventory Control

Contents

1 Expected costs minimization

2 Stochastic viability

References