Pages

Wednesday, December 16, 2009

Discrete Processes: Binomial Tree Model

After reading a bit from Baxter and Rennie's book about discrete processes, I decided to implement the basic binomial tree described in the book. It's a build onto the binomial branch model, in essence it's the same thing but done on a recursive scale. It is mostly used for evaluating the cost of an option via arbitrage. The main downfall to discrete process modelling computationally is that it becomes exceedingly difficult to scale it for problems of large instances. Unlike continuous processes which are evaluated through approximation, discrete processes models each tiny step and for an accurate representation would require a lot of memory and computational time.

The model suggests having a portfolio of stocks and bonds. This simple combination allows us to synthesize the derivative by replicating desired values. The key point is the probability of up and down moves from a given stock at a given time does not influence our ability to generate the desired values. For example, we can say for certainty that if the stock moved up then down and then up again we would reach our desired value f(n). We use an algebraic relation from the next stock up and down prices to determine our claim value. We evaluate the claim value from the back of the tree where the payoff/desired value is guaranteed and move our way back to our initial node. This claim value at the initial node is what we should price the option at.

Now if the market was to move up or down, we need to adjust our proportion of stocks and bonds to compensate. It's essentially like a board game, when the stock moves we make an appropriate adjustment to ensure that the next move (either up or down) is within our described tree regardless of the outcome. At the end of the time period, we are guaranteed to get the claim value for the given sequence of stock movements from t = 0 to t = n, where n is the number of time ticks (i.e. depth of the binomial tree). For more information refer to Baxter and Rennie's Financial Calculus book.

#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <algorithm>
#include <cmath>

using namespace std;

// represent the stock as a value in time
class Stock {
public:
   double price;
   double prUp;
   double prDown;
   Stock() { }
   Stock(double p, double up, double down) : price(p), prUp(up), prDown(down) { }
};

vector<double> fTable;   // represents the claim tree
vector<Stock> graph;   // represents the recombinant tree
int sz;

#define EPS 1e-09

// claims are calculated as:
//      q = (stock_now - stock_down) / (stock_up - stock_down)
//      f = q * f_up + (1 - q) * f_down
// base conditions can be computed based on valuation of the option at t=0 versus the
// payoff given a sequence of situations (when the stock goes up/down for t=0 to t=n).
double computeClaims(int node) {
   int baseNode = (sz-1) >> 1;
   if (fTable[node] >= 0) {
      // memoisation
      return fTable[node];
   }
   if (node >= baseNode) {
      // base case
      return fTable[node] = max(0.0, graph[node].price - graph[1].price);
   }
   // compute
   double q = (graph[node].price - graph[node*2+1].price) / 
      (graph[node*2].price - graph[node*2+1].price);
   fTable[node] = q * computeClaims(node*2) + (1 - q) * computeClaims(node*2+1);
   return fTable[node];
}

// define an recombinant tree and reconstruct the option value
int main() 
{
   int N;   // the depth of the tree
   cin >> N;
   graph = vector<Stock>((1 << N) + 1);
   
   // read in the recombinant tree (stated in order of time)
   // file format:
   // stockprice prUp
   // stockprice2 prUp2
   // etc.

   int totalCnt = (1 << N);
   for (int i = 0; i < totalCnt-1; i++) {
      double stockPrice, prUp, prDown;
      cin >> stockPrice >> prUp;
      prDown = 1 - prUp;
      graph[i+1] = Stock(stockPrice, prUp, prDown);
   }

   // reconstruct the option price by backtracking
   fTable = vector<double>((1 << N) + 1, -1.0);
   sz = (1 << N) + 1;
   double optionPrice = computeClaims(1);
   cout << "Option price is " << optionPrice << "\n";
   
   // run simulations (optional)
   // file format:
   //   sequence of {up, down}'s equal to N-1.
   // note: stockHolding is calculated by:
   //   s = (f_up - f_down) / (s_up - s_down)
   string status;
   string lastStatus = "-";
   int cnt = 0;
   int curTime = 0;
   double stockHolding = 0.0;
   double bondHolding = 0.0;
   int curBranch = 1;
   while (cin >> status && cnt++ <= N-1) {
      // output the status
      cout << "Time: " << curTime << " Last Jump: " << lastStatus << 
         " Stock Price: " << graph[curBranch].price << 
         " Option Value: " << fTable[curBranch] << " Stock Holding: " 
         << stockHolding << " Bond Holding: " << bondHolding << "\n";
      curTime++;
      lastStatus = status;
      stockHolding = (fTable[curBranch*2] - fTable[curBranch*2+1]) / 
         (graph[curBranch*2].price - graph[curBranch*2+1].price);
      bondHolding = fTable[curBranch] - (stockHolding * graph[curBranch].price);
      if (status == "up") {
         curBranch = (curBranch * 2);
      } else {
         curBranch = (curBranch * 2) + 1;
      }
   }
   if (cnt > 0) {
      cout << "Time: " << curTime << " Last Jump: " << lastStatus << 
         " Stock Price: " << graph[curBranch].price << 
         " Option Value: " << fTable[curBranch] << " Stock Holding: " 
         << stockHolding << " Bond Holding: " << bondHolding << "\n";
   }
   return 0;
}

With an example file input based on the book examples (pipe it in):

4
100 0.75
120 0.75
80 0.25
140 0.75
100 0.25
100 0.75
60 0.25
160 0.75
120 0.25
120 0.75
80 0.25
120 0.75
80 0.25
80 0.75
40 0.25
up
up
down

Tuesday, December 8, 2009

TC SRM 454 D1-500/D2-1000 (NumbersAndMatches)

Category: Dynamic Programming
URL: http://www.topcoder.com/stat?c=problem_statement&pm=10709

We are given a number which is composed of a sequence of digits. Each digit is represented by a combination of matches represented in a certain position to represent the digit. We are given the option of using up to K different match moves and are asked to calculate the number of different integers we can construct without discarding or adding any new matches to our given number.

First, we decompose the problem into several parts. The first involves somehow representing each digit in our program. A simple way to do this is to use a 2D integer array in which index (i,j) is equal to 1 if the digit i has a match in position j. An alternative way is to use a bitmask with the bit representation of the j-th bit set to 1 if there is a match in position j.

Next, we need to somehow compute the transitional costs between two different digits. For example, we need to quantify how many moves we would use up if we convert say a 6 to a 3. Since we are only allowed a limited number of moves its obvious we need a way to keep track of how many we have used up. One possible way is to determine how many places the two numbers differ. Then the number of moves we need to make is equivalent to exactly half of this.

There is another issue we need to deal with, that is the situation where the number of matches used to construct two digits differ. For example, the digit 6 uses 6 matches whereas digit 3 only uses 5 matches. We need to keep track of how much surplus (or deficit) matches we get from transitioning from one digit to another. This is important as we cannot add or remove matches from the final number - i.e. the number of matches used in constructing a unique integer must be the same as the original integer. Now we need to keep track of two things when we consider transforming one digit to another: the number of moves it takes and the difference in matches we have used so far.

We can define the cost function as follows:



To calculate the surplus/deficit it's quite simple, just calculate the number of matches used for each digit and subtract one from the other depending on which direction the transformation is taken. The above cost function ensures that we don't double count the number of moves required from moving a match from a surplus digit to a deficit digit. A way to visualise this is we "precharge" the surplus with an additional move but we are also granted a free move since this surplus is already counted once so the actual number of moves decreases if we need to import more matches from another transformation.

Lastly, we need to somehow compute all the ways we can construct unique integers given that we keep the number of matches used the same (i.e. at the end of our decision algorithm the number of surplus matches is exactly 0) as well as using less than or equal to K moves (which we subtract along the way). This suggests a recursive algorithm from potentially changing the first digit to something else, depending on the transformation we are left with either more/less/equal number of matches and the number of available moves to be less or equal to what we started with. We must also handle the case where we "borrow" matches from the latter part of the integer, so we must also handle cases where our match balance is negative. This can be done by computing an offset to keep the total number positive (to use for our caching purposes).

To efficiently compute the number of ways we need to memoise or apply bottom up DP to our algorithm. Note that the number of unique states is rather small so computational time isn't an issue. An implementation of the above ideas is shown below (using memoisation):

class NumbersAndMatches {
public:
   long long differentNumbers(long long, int);
};

int mat[10][7] = {
  {1,1,1,0,1,1,1}, // 0
  {0,0,1,0,0,1,1}, // 1
  {1,0,1,1,1,0,1}, // 2
  {1,0,1,1,0,1,1}, // 3
  {0,1,1,1,0,1,0}, // 4
  {1,1,0,1,0,1,1}, // 5
  {1,1,0,1,1,1,1}, // 6
  {1,0,1,0,0,1,0}, // 7
  {1,1,1,1,1,1,1}, // 8
  {1,1,1,1,0,1,1}  // 9
};

// defines the total number of matches used for digit i
int tot[10] = {6, 3, 5, 5, 4, 5, 6, 3, 7, 6};   

long long dp[19][155][155];
pair<int,int> costTab[10][10];
string num;

#define SURPLUS_MOD 72

long long func(int idx, int moves, int surplus) {
   // offset due to the possibility of negative surplus
   int surplusMod = surplus + SURPLUS_MOD;
   if (idx >= num.size()) {
      // requires the surplus amount of matches to be 0 otherwise its not valid
      if (moves >= 0 && surplus == 0) return 1;
      return 0;   
   }
   if (dp[idx][moves][surplusMod] != -1) return dp[idx][moves][surplusMod];
   long long res = 0;
   // try each digit
   int curDig = num[idx] - '0';
   for (int dig = 0; dig <= 9; dig++) {
      int c = costTab[curDig][dig].first;
      int d = costTab[curDig][dig].second;
      if (moves - c >= 0) {
         res += func(idx+1, moves - c, surplus + d);
      }
   }
   return dp[idx][moves][surplusMod] = res;
}

long long NumbersAndMatches::differentNumbers(long long N, int K) {
   long long res = 0;
   memset(dp,-1,sizeof(dp));
   // compute the cost function for every digit -> digit transition
   for (int i = 0; i <= 9; i++) {
      for (int j = 0; j <= 9; j++) {
         int c = 0;
         for (int k = 0; k < 7; k++) {
            if (mat[i][k] != mat[j][k]) c++;
         }
         int s = tot[i] - tot[j];
         costTab[i][j] = make_pair((c+s)/2, s);
      }
   }
   // decompose the number into a string
   long long vN = N;
   while (vN > 0) {
      num += ((vN % 10) + '0');
      vN /= 10;
   }
   reverse(num.begin(),num.end());
   // calculate the number of ways
   return res = func(0, K, 0);
}

Friday, December 4, 2009

Matrix Exponentiation

Matrix Exponentiation is a useful technique that can be applied to a wide variety of problems. Most commonly it is used for efficiently solving linear recurrence problems and as such can be used in any problem that can be represented as a linear recurrence. The main problem lies with the fact that it is inefficient when we naively evaluate them. Let's consider an easy example that you should know about: Fibonacci numbers.

The sequence is defined by:



It's easy enough to increasingly evaluate it 1 by 1 each time until we hit our targetted n-th Fibonacci number. However, let's say n was 1 billion - we need at least 1 billion computations of the sequence to derive the answer. If we represent the sequence in terms of a matrix we discover a clever optimisation which can be applied.



The correctness can be verified by a simple matrix multiplication. What's more important to note is that the next two sequence of Fibonacci numbers can be represented like the one above.


If we expand:

This is equivalent to:


We can continue expanding and eventually we see a remarkable observation. We can always reduce it to the power of our matrix multiplied by our initial conditions. This yields the recurrence below:

 

Since exponentiation can be done in logarithmic time we have essentially reduced our Fibonacci computation speed from linear to logarithmic. A huge difference in speed for large numbers! As an exercise, write a logarithmic Fibonacci term generator - as the numbers do get quite large, feel free to modulo it with an appropriate number (like 10^8).

We now expand our knowledge to a slightly more complicated linear recurrence. This is based on the problem "Number Sequences": http://acm.tju.edu.cn/toj/showp2169.html

To solve this recurrence we simply just do the same thing as we did with Fibonacci with a slight modification. Fibonacci is really a special case of the above recurrence where x = 1 and y = 1 and a0 = a1 = 1. Note the prime observation below:



If we matrix multiply - we yield the correct recurrence for both Fn and Fn-1. Therefore we simply need to change our original Fibonacci matrix of [ 1 1, 1 0 ] to [ x y, 1 0] and the initial conditions from being always 1 and 1 (F1 and F0 respectively) to [a1 a0]. We then simply use matrix exponentiation to calculate the correct term, as always we apply modulo arithmetic to keep the number representable with integers.

The implementation below demonstrates this idea:
class Matrix {
public:
   long long a;
   long long b;
   long long c;
   long long d;
   Matrix() { }
   Matrix(long long _a, long long _b, long long _c, long long _d) : 
      a(_a), b(_b), c(_c), d(_d) { }
};

Matrix multiply(const Matrix& lhs, const Matrix& rhs) {
   long long a = lhs.a * rhs.a + lhs.b * rhs.c;
   long long b = lhs.a * rhs.b + lhs.b * rhs.d;
   long long c = lhs.c * rhs.a + lhs.d * rhs.c;
   long long d = lhs.c * rhs.b + lhs.d * rhs.d;
   return Matrix(a % 100, b % 100, c % 100, d % 100);
}

ostream& operator<<(ostream& out, const Matrix& M) {
   out << "a: " << M.a << " b: " << M.b << " c: " << M.c << " d: " << M.d;
   return out;
}

Matrix power(Matrix M, int n) {
   if (n == 1) return M;
   Matrix tmp = power(M, n/2);
   if (n % 2 != 0) {
      Matrix n = multiply(tmp,tmp);
      Matrix m = multiply(n, M);
      return m;
   }
   Matrix v = multiply(tmp,tmp);
   return v;
}

void output(int n) {
   if (n < 10) cout << "0";
   cout << n << "\n";
}

int main() {
   long long x, y, a0, a1, n;
   while (cin >> x >> y) {
      if (x == 0 && y == 0) break;
      cin >> a0 >> a1 >> n;
      if (n == 0) { output(a0%100); continue; }
      else if (n == 1) { output(a1%100); continue; }
      Matrix m = power(Matrix(x,y,1,0), n-1);
      output((m.a * a1 + m.b * a0) % 100);
   }
   return 0;
}

Now for a more advanced example, we use matrix exponentiation to determine the number of cycles with a length smaller than k in a given directed graph.

Problem: TourCounting
Source: TopCoder SRM 306 D1-1050
URL: http://www.topcoder.com/stat?c=problem_statement&pm=6386

An elegant way to calculate the number of paths from a source vertex u to a destination vertex v that takes exactly k steps is to use matrix multiplication. First let's define A to be the adjacency matrix with each entry representing the number of edges from u to v. The (u,v) entry of A^t is precisely the answer we are looking for. The way this works is similar to the way that Floyd-Warshall algorithm works. The matrix multiplication considers the connectivity between an intermediate node k and attempts to link u to k and k to v. Due to the multiplicative nature of matrix multiplication as opposed to an additive (for shortest paths) it determines the number of paths based on the multiplication principle from discrete mathematics.

To illustrate this principle, consider a snapshot of the matrix A^t and it's adjacency matrix which is represented as A^1:


If we were to calculate the number of paths that start from vertex 0 to itself (i.e. a cycle) that takes exactly t+1 steps. Note that we can access vertex 0 from either vertex 1 or vertex 2. Since at state t, we can reach vertex 1 from vertex 0 with 2 paths and we can reach vertex 2 from vertex 0 with 3 paths. It stands that we can now reach vertex 0 to itself from 2+3=5 paths. Note that this is a direct consequence of the matrix multiplication process - validate this yourself for the first row and column. If the adjacency matrix was changed such that there are two edges from vertex 1 to vertex 0 like the following:



Then we can reach vertex 0 in a total of 2*2 + 1*3 = 7 ways since we have an option of choosing one of the two edges from vertex 1 to vertex 0. Again, this is consequence of the matrix multiplication algorithm. The argument holds for every other cell in the matrix. So what's the advantage of this method? The prime factor is efficiency - we can calculate matrix powers in logarithmic number of matrix multiplications. We accomplish this through a similar algorithm to fast powering.



So now we know how to calculate the number of paths from a source vertex u to a destination vertex v that takes exactly k steps. The actual problem requires us to compute the number of cycles that are present with less than k steps. So for a given instance of the matrix A^t we want to sum the diagonal to yield the number of paths with exactly t steps. To calculate it for less than k steps we would normally have to compute each power of t and sum all the diagonals up, however this is not fast enough as the number of steps can be as much as 1 million. We can reduce this using a similar method to our matrix powering algorithm. First define a function f:

f(n) = represents the number of ways we can get from all vertices to other vertices using less than n steps (matrix form)

The main optimisation is reducing it from linear (summing all matrices less than n) to logarithmic. The huge hint in logarithmic is dividing the input space by a (usually) constant factor. Note that if we divide the space into two f(n/2) parts we are close to our answer but not quite there. The problem is that upper n/2 part is not symmetrical to the lower n/2 part. But wait! Given the simple mathematical property of powers a^(b+n) = a^b * a^n we can simply just multiply one of the f(n/2) parts by A^(n/2) to yield the correct upper half! With this our recurrence becomes:



Like our fast powering algorithm we need to distinguish between odd and even states. The easy way to make an odd number even is to subtract it by 1. We then just calculate one instance of the odd power and make all successively calls even. This maintains the correctness of the recurrence whilst also maintaining the time complexity.



Then it's simply a matter of implementation which is the easy part. Just note we need to use sensible modulo arithmetic to ensure that we don't overflow any of our computations.

class TourCounting {
public:
   int countTours(vector <string>, int, int);
};

class Matrix {
public:
   vector<vector<long long> > data;
   Matrix() { }
   Matrix(int n, int m) {
      data = vector<vector<long long> >(n, vector<long long>(m, 0));
   }
};

long long MOD;

Matrix operator*(const Matrix& lhs, const Matrix& rhs) {
   Matrix res(lhs.data.size(), rhs.data[0].size());
   for (int i = 0; i < lhs.data.size(); i++) {
      for (int j = 0; j < lhs.data[i].size(); j++) {
         for (int k = 0; k < rhs.data[0].size(); k++) {
            res.data[i][k] = (res.data[i][k] + lhs.data[i][j] * rhs.data[j][k])%MOD;
         }
      }
   }
   return res;
}

Matrix operator+(const Matrix& lhs, const Matrix& rhs) {
   Matrix res(lhs.data.size(),lhs.data[0].size());
   for (int i = 0; i < lhs.data.size(); i++) {
      for (int j = 0; j < lhs.data[0].size(); j++) {
         res.data[i][j] = (lhs.data[i][j] + rhs.data[i][j])%MOD;
      }
   }
   return res;
}

Matrix power(const Matrix& lhs, int P) {
   if (P == 1) return lhs;
   Matrix tmp = power(lhs, P/2);
   if (P % 2 != 0) {
      Matrix n = tmp * tmp;
      Matrix m = n * lhs;
      return m;
   }
   Matrix v = tmp * tmp;
   return v;
}

long long compute(const Matrix& m) {
   long long res = 0;
   for (int i = 0; i < m.data.size(); i++) res = (res + m.data[i][i]) % MOD;
   return res % MOD;
}

Matrix dp[1000001];
int visited[1000001];

Matrix f(const Matrix& ref, int n) {
   if (visited[n]) return dp[n];
   visited[n] = 1;
   if (n == 1) return dp[n] = ref;
   if (n % 2 != 0) {
      // odd
      Matrix v = f(ref, n-1) + power(ref, n);
      return dp[n] = v;
   }
   // even 
   Matrix vE = f(ref, n/2) * power(ref, n/2);
   Matrix vEE = vE + f(ref, n/2);
   return dp[n] = vEE;
}

int TourCounting::countTours(vector <string> g, int k, int m) {
   Matrix mat((int)g.size(),(int)g.size());
   for (int i = 0; i < g.size(); i++)
      for (int j = 0; j < g[i].size(); j++)
         mat.data[i][j] = g[i][j] == 'Y' ? 1 : 0;
   MOD = m;
   long long res = 0;
   Matrix rr = f(mat, k-1);
   res = compute(rr);
   return res;
}