libStatGen Software 1
SmithWaterman.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <stdint.h>
21#include "SmithWaterman.h"
22
23// put TEST below here, so that makedepend will see the .h, so that we
24// can get a clean dependency for SmithWaterman.o, so that we can at least
25// compile the header when we change it.
26
27#if defined(TEST)
28
29#include <getopt.h>
30#include "Generic.h"
31
32// g++ -g -o testSW -DTEST SmithWaterman.cpp
33//
34// Smith-Waterman - test code uses a 256x256 array of int16
35//
36int swat(
37 bool showAllCases,
38 const char *A,
39 const char *qualities,
40 const char *B,
41 int direction,
42 const char *expectedCigarString,
43 int expectedSumQ
44)
45{
46 int allowedInsertDelete = 1024;
47 int errors = 0;
48
49 // read length 256
50 // reference length 1024
51 SmithWaterman<256, 1024, uint16_t, const char *, const char *, const char *, uint32_t, uint32_t > sw(&A, &qualities, &B, strlen(A), strlen(B), allowedInsertDelete, direction);
52
53 //
54 // now we align the read:
55 //
56 sw.populateH();
57 sw.populateAlignment();
58
59 int sumQ = sw.getSumQ();
60
61 CigarRoller cigar;
62 cigar.clear();
63 sw.rollCigar(cigar);
64
65 const char *cigarStr = cigar.getString();
66
67 //
68 // now we pretty print the results
69 //
70
71 bool badCigar = false, badQuality = false;
72
73 if (strcmp(cigarStr, expectedCigarString)!=0)
74 {
75 badCigar = true;
76 errors ++;
77 }
78
79 if (sumQ != expectedSumQ)
80 {
81 badQuality = true;
82 errors ++;
83 }
84
85
86
87 if (showAllCases || errors>0)
88 {
89 cout << "=============" << endl;
90 cout << " Read: " << A << endl;
91 cout << " Reference: " << B << endl;
92 cout << " Direction: " << direction << endl;
93 cout << "Max Cell: " << sw.maxCostValue << " located at " << sw.maxCostPosition << endl;
94 cout << "M: " << sw.m << " N: " << sw.n << endl;
95 cout << "Cigar String: " << cigarStr ;
96 if (badCigar)
97 cout << " (EXPECTED: " << expectedCigarString << ")";
98 cout << endl;
99 cout << " sumQ:" << sumQ;
100 if (badQuality)
101 cout << " (EXPECTED: " << expectedSumQ << ")";
102 cout << endl;
103
104 if (strlen(B) < 100 || showAllCases)
105 sw.printH(false);
106
107 for (vector<pair<int,int> >::iterator i = sw.alignment.begin(); i != sw.alignment.end(); i++) cout << *i << endl;
108
109 cout << "=============" << endl << endl;
110 }
111
112 delete cigarStr;
113
114 return errors;
115}
116
117
118// test with Sequence 1 = ACACACTA
119// Sequence 2 = AGCACACA
120int main(int argc, const char **argv)
121{
122 int errors = 0;
123
124 bool showAllCasesFlag = false;
125 int opt;
126
127 while ((opt = getopt(argc, (char **) argv, "v")) != -1)
128 {
129 switch (opt)
130 {
131 case 'v':
132 showAllCasesFlag = true;
133 break;
134 default:
135 cerr << "usage: testSW [-v]" << std::endl;
136 exit(1);
137 }
138 }
139
140
141 // CIGAR explanation - for backward SW runs, the corresponding
142 // CIGAR string is generated from the back of the string to the
143 // front. Recall that the soft clipping is only done at the
144 // "end" of the string, taking direction into account.
145
146 // forwards - simple
147 errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", 1, "3M1S", 0);
148
149 // backwards - simple
150 errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", -1, "4M", 12);
151
152 // backwards - soft left clip
153 errors += swat(showAllCasesFlag, "1234", "\"#$-", "0234", -1, "1S3M", 0);
154
155 // delete in read (arg 1) - forward
156 errors += swat(showAllCasesFlag, "123467890", "\"#$%^&*()-", "1234567890", +1, "4M1D5M", 50);
157
158 // insert in read (arg 1) - forward
159 errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "1234567890", +1, "5M1I4M", 50);
160
161 // delete in read (arg 1) - backward
162 errors += swat(showAllCasesFlag, "X123467890", "#\"#$%^&*()-", "1234567890", -1, "1S4M1D5M", 50);
163
164 // insert in read (arg 1) - backward
165 errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "0123456789", -1, "4M1I5M", 50);
166
167 // insert in read (arg 1) - backward
168 errors += swat(showAllCasesFlag, "X1223456789", "00000000000", "00123456789", -1, "1S1M1I8M", 50);
169
170 // insert in read (arg 1) - backward
171 errors += swat(showAllCasesFlag, "XY1223456789", "000000000000", "000123456789", -1, "2S1M1I8M", 50);
172
173 // forward - soft right clip of 2 bases - sumQ should be 0
174 errors += swat(showAllCasesFlag, "123456700", "\"#$%^&*()-", "123456789", +1, "7M2S", 0);
175
176 // insert in read (arg 1) - forward w/2 mismatches at end
177 errors += swat(showAllCasesFlag, "1023456700", "\"#$%^&*()-", "123456789", +1, "1M1I6M2S", 50);
178
179 // insert in read (arg 1) - forward w/2 mismatches at end
180 errors += swat(showAllCasesFlag, "CTCCACCTCCCGGTT", "111111111111111", "TCCACCTCCCAGGTT", -1, "1S10M1D4M", 50);
181
182 //
183 errors += swat(showAllCasesFlag, "1234", "0000", "12345", +1, "4M", 0);
184
185 //
186 errors += swat(showAllCasesFlag, "1234X", "00000", "12345", +1, "4M1S", 0);
187
188 //
189 errors += swat(showAllCasesFlag, "4321", "0000", "7654321", -1, "4M", 0);
190
191 //
192 errors += swat(showAllCasesFlag, "X4321", "00000", "7654321", -1, "1S4M", 0);
193
194 //
195 errors += swat(showAllCasesFlag, "X432A10", "0000000", "76543210", -1, "1S3M1I2M", 50);
196
197 //
198 errors += swat(showAllCasesFlag, "1345", "0000", "12345", -1, "1M1D3M", 50);
199
200 errors += swat(showAllCasesFlag, "45689", "00000", "1234567890", -1, "3M1D2M", 50);
201
202// errors += swat(showAllCasesFlag, "AATAATTTTTTATATACAGATCGCTGTAGAGTGTAGTTATAGTATGATTCCAACTTTTATTTCTTTCATGACTAATTATATGTATACATGTGCCATGTTGGTGTGCTG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "TCCAATGTAGGGCTGTTATAAACAGTGTTGATACATATGTTTTTGTATAAGTCTTTGTTGAATACATGCTTTCATTTTTGTAGGGTATATGTCCAGGAATTAAATTTTTGCATTATTGGGGAAGTTCAAACGTAGATCAGTAGATGTTCCCAAATGATTTTCAGGATATGTATCCATGTAAATTCCTACCAGCAATGCAGGAGAATTCCAATTGCCCATGTTCTAATCAGAATATTGTTATATCCTAAGACTAATTTTAAATATTCTGATGGGTGTAGAGTGGAGGCATAGTATGATTTCAACTTGTATTTCTTTCATGACTAATTATCTTCTATGTTAATTGTTATTTTGTATGTTTATTGCAAAGTGCCTATCCAGAATTTTTGTCTATAATTTTGTTGTGCTGTCTCTTGCTTTATGAATTTTATAGGATTCTTAATATTATAATTGAGTTATCTTTCTTTTTTATTATTATTATTATACTTTAAGTTTTAGGGTATATGTGCACAACGTGCAAGTTTGTCACATATGTATACATGTGCCATGTTGGTGTGCTGCACCCATTAACTCATCATTTAGCATTAGGTATATCTCCTAATGCTATCCCTTCCTCCTCCCCCCACCCCACAACAGTCCCCGGTGTGTGATGTTCCCCTGCCTTTGTCCTCTTTCTTATACTTGCATGAGCAATCTCCTCAAACTGATACTTGCCTTTTTTGTCCTTGGTGTGGTTTGGCTCTGTGTTCCCACCCAAATCTTCATAATACCCATGTGCCAAGGGTGGGACTGGGTGGAGGTAATTGGGTCATGGGGATGGTTTCCCTCATACTATTATGATAGTGAGTGTTTTCACGAGACCTGATGGTTTTATAACTGTGTGGCATTTCCCTTGCTTCCACTCACTCCATCCTGCCACCCTGTGAAGAAGGTGCCTGCTTCTCCTTTGGTTACTGCTATGATTGTAAGTTTCCTGAGGCCTCCCCAGCAACGCAAAACTGTGAATCAATTAAACCTTTTTCCTTTATAAATTACTAAGTCTTGGGTATTTCTTCATAGTGTTGTGAGCATAGACTAAAACAGTAAGTTGTTACCAGGAGTGGGGTACTGCTGTAAGATAACTGAGAATGTGAAAGTGACTTAGGAACTAGGTAATGAGCAGAGGTTGGAACAGTTTAAAAGGCTCAGAAGAAGACAGAAAGATGTGGGAAAGTTTGGA", -1, "77M200D31M", 50);
203
204 errors += swat(showAllCasesFlag, "TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACGTCATCGGTCAGAAATTGGG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "CCGAGATTGTGCCATTGCACTCCTGCCTGGGTAACAGAGTCAGACCCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGATTAGGTTTTATAGATGGAAAATTCACAGCTCTCTCCAGATCAGAAATCTCCAAGAGTAAATTAGTGTCTTAAAGGGGTTGTAATAACTTTCCTATGTGACTAAGTGCATTATTAATCAATTTTTCTATGATCAAGTACTCCTTTACATACCTGCTAATACAATTTTTGATATGAAATCAGTCCTAGAGGGAATCAATGTAAGATACAGACTTGATGAGTGCTTGCAGTTTTTTATTGACAATCTGAAGAATGACTTGACTCTAAATTGCAGCTCAAGGCTTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACAGGAATCAAATCTGTCTAGCCTCTCTTTTTGGCAAGGTTAAGAACAATTCCACTTCATCCTAATCCCAATGATTCCTGCCGACCCTCTTCCAAAAACTATTTAAAGACATGTTCTTCAAAGTTATATTTGTCTTTCCTTCAGGGAGAAAAAGAATACCAATCACTTATAATATGGAAACTAGCAGAAATGGGTCACATAAGTCATCTGTCAGAAATTGGGAAAATAGAGTAGGTCAGTCTTTCCAGTCATGGTACTTTTACCTTCAATCA", -1, "88M200D20M", 50);
205
206 // prefix TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATAC
207 // suffix GTCATCTGTCAGAAATTGGGA
208 cout << endl << "Total Errors found: " << errors << endl;
209}
210#endif
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
Definition: CigarRoller.h:67
void clear()
Clear this object so that it has no Cigar Operations.
const char * getString()
Get the string reprentation of the Cigar operations in this object, caller must delete the returned v...