DNA Sequence Alignment using Needleman–Wunsch Algorithm

Posted Saturday, October 2, 2010 in Old JamesCMS Posts

Here's an implementation of the Needleman-Wunsch algorithm to align two DNA sequences.

{{C#}}
/*Author: James Santiago
 *Date: 10 October 2010
 */
 
 
using System;
using System.Collections.Generic;
using System.Text;
 
namespace DnaSequenceAlignment
{
    class Program
    {
        static void Main(string[] args)
        {
            Console.WriteLine("Enter the first sequence to evaluate:");
            string sequence1 = Console.ReadLine();
 
            Console.WriteLine("Enter the second sequence to evaluate:");
            string sequence2 = Console.ReadLine();
 
            Console.WriteLine(AlignSequences(sequence1, sequence2, -5));
            Console.ReadLine();
        }
 
        /// <summary>
        /// Discovers the optimal alignment of two DNA sequences according to the 
        /// Needleman–Wunsch algorithm.
        /// </summary>
        /// <param name="sequence1">The first DNA sequence to align.</param>
        /// <param name="sequence2">The second DNA sequence to align.</param>
        /// <param name="gapPenalty">The gap penalty.</param>
        /// <returns>Returns a string denoting the optimal alignment of the two 
        /// sequences provided.</returns>
        private static string AlignSequences(string sequence1, string sequence2,
            int gapPenalty)
        {
            //This declares the similarity matrix and the appropriate scores.
            int[,] simMatrix = new int[,] { { 10, -1, -3, -4 }, 
                                            { -1, 7, -5, -3 }, 
                                            { -3, -5, 9, 0 }, 
                                            { -4, -3, 0, 8 } };
                       
            //Sequences are stored in char arrays.
            char[] sequenceArray1 = sequence1.ToCharArray();
            char[] sequenceArray2 = sequence2.ToCharArray();
 
            int length1 = sequence1.Length;
            int length2 = sequence2.Length;
 
            //The score matrix is used to score each combination of characters.
            int [,] scoreMatrix = new int[length1, length2];
 
            string matrixp = "\nScore Matrix:\n\n";
 
            //Calculates the scores for first column in the score matrix.
            for (int i = 0; i < length1; i++)
            {
                scoreMatrix[i, 0] = gapPenalty * i;
            }
 
            //Calulates the scores for the first row in the score matrix.
            for (int i = 0; i < length2; i++)
            {
                scoreMatrix[0, i] = gapPenalty * i;
            }
 
            //Calculates the scores for the inner portion of the score matrix.
            int match, delete, insert;
            for (int i = 1; i < length1; i++)
            {
                for (int j = 1; j < length2; j++)
                {
                    int seq1 = seqVal(sequence1[i]);
                    int seq2 = seqVal(sequence2[j]);
 
                    match = scoreMatrix[i - 1, j - 1] + simMatrix[seq1, seq2];
                    delete = scoreMatrix[i - 1, j] + gapPenalty;
                    insert = scoreMatrix[i, j - 1] + gapPenalty;
 
                    int max = Math.Max(match, delete);
                    scoreMatrix[i, j] = Math.Max(insert, max);
                    matrixp += "\t" + scoreMatrix[i, j] + " ";
                }
                matrixp += "\n";
            }
 
            //This prints out the inner portion of the score matrix (no first 
            //row/first column).
            Console.WriteLine(matrixp);
 
            string align1 = "";
            string align2 = "";
            int k = sequence1.Length - 1;
            int l = sequence2.Length - 1;
 
            //Creates the alignment according to the scores in the score matrix.  
            //The while loop works from the last cell of the matrix (last value
            //in the array) then moves according to scores in the top, left and
            //diagonal cells.  A score that denotes a match would move the loop
            //diagonally, a score of delete would move the loop left and a score
            //of insert would move the loop up.
            while (k >= 0 && l >= 0)
            {
                int score, scoreDiag, scoreUp, scoreLeft;
                score = scoreMatrix[k, l];
                if (k != 0 && l != 0) scoreDiag = scoreMatrix[k - 1, l - 1];
                else scoreDiag = 100;
                if (l != 0) scoreUp = scoreMatrix[k, l - 1];
                else scoreUp = 100;
                if (k != 0) scoreLeft = scoreMatrix[k - 1, l];
                else scoreLeft = 100;
 
                int seq1 = seqVal(sequence1[k]);
                int seq2 = seqVal(sequence2[l]);
 
                if (score == scoreDiag + simMatrix[seq1, seq2])
                {
                    align1 = sequence1[k] + align1;
                    align2 = sequence2[l] + align2;
                    k--;
                    l--;
                }
                else if (score == scoreLeft + gapPenalty)
                {
                    align1 = sequence1[k] + align1;
                    align2 = "-" + align2;
                    k--;
                }
                else if (score == scoreUp + gapPenalty)
                {
                    align1 = "-" + align1;
                    align2 = sequence2[l] + align2;
                    l--;
                }
                else
                {
                    align1 = sequence1[k] + align1;
                    align2 = sequence2[l] + align2;
                    k--;
                    l--;
                }
            }
            while (k >= 0)
            {
                align1 = sequence1[k] + align1;
                align2 = "-" + align2;
                k--;
            }
            while (l >= 0)
            {
                align1 = "-" + align1;
                align2 = sequence1[l] + align2;
                l--;
            }
 
            return "Optimal Alignment:\n\n" + align1 + "\n" + align2;
        }
 
        /// <summary>
        /// Finds the value of each nucleotide.
        /// </summary>
        /// <param name="evalChar">The character to be replaced.</param>
        /// <returns>Returns the int value of the nucleotide.</returns>
        private static int seqVal(char evalChar)
        {
            switch (evalChar)
            {
                case 'A':
                    return 0;
                case 'C':
                    return 1;
                case 'G':
                    return 2;
                case 'T':
                    return 3;
                default:
                    return -1;
            }
            
        }
 
    }
}