libStatGen Software 1
BaseAsciiMap.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "BaseAsciiMap.h"
19
20//
21// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
22// both base and color space.
23// class 0 -> 'A' (Adenine - 0x41 and 0x61)
24// class 1 -> 'C' (Cytosine - 0x43 and 0x63)
25// class 2 -> 'G' (Guanine - 0x47 and 0x67)
26// class 3 -> 'T' (Thymine - 0x54 and 0x74)
27// class 4 -> 'N' (Unknown - read error or incomplete data - 0x4E and 0x6E)
28// class 5 -> not a valid DNA base pair character
29//
30// Note: The +1 array size is for the terminating NUL character
31//
32// NB: This table also maps 0, 1, 2, and 3 to the corresponding integers,
33// and '.' to class 4. This allows ABI SOLiD reads to be converted
34// to integers via ReadIndexer::Word2Integer.
35//
36unsigned char BaseAsciiMap::baseColor2int[256+1] =
37 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F
38 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F
39 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005" // 0x20-0x2F
40 "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F
41 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x40-0x4F
42 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F
43 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x60-0x6F
44 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F
45// not used, but included for completeness:
46 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F
47 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F
48 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF
49 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF
50 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF
51 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF
52 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF
53 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF
54 ;
55
56// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
57// just base space (ACTGNactgn).
58unsigned char BaseAsciiMap::base2int[256+1] =
59 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F
60 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F
61 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x20-0x2F
62 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F
63 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x40-0x4F
64 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F
65 "\005\000\005\001\005\005\005\002\005\005\005\005\005\005\004\005" // 0x60-0x6F
66 "\005\005\005\005\003\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F
67// not used, but included for completeness:
68 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F
69 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F
70 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF
71 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF
72 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF
73 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF
74 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF
75 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF
76 ;
77
78// Map ASCII values to a 2 (or 3) bit encoding for the base pair value for
79// just color space (0123).
80unsigned char BaseAsciiMap::color2int[256+1] =
81 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x00-0x0F
82 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x10-0x1F
83 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\004\005" // 0x20-0x2F
84 "\000\001\002\003\005\005\005\005\005\005\005\005\005\005\005\005" // 0x30-0x3F
85 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x40-0x4F
86 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x50-0x5F
87 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x60-0x6F
88 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x70-0x7F
89// not used, but included for completeness:
90 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x80-0x8F
91 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0x90-0x9F
92 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xA0-0xAF
93 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xB0-0xBF
94 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xC0-0xCF
95 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xD0-0xDF
96 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xE0-0xEF
97 "\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005\005" // 0xF0-0xFF
98 ;
99
100
101//
102// This is obviously for base space use only:
103//
104const char BaseAsciiMap::int2base[] = "ACGTNMXXXXXXXXXX";
105//
106// convert int to color space value
107//
108const char BaseAsciiMap::int2colorSpace[] = "0123NXXXXXXXXXXX";
109
110/// This table maps 5' base space to the 3' complement base space
111/// values, as well as 5' color space values to the corresponding
112/// 3' complement color space values.
113///
114/// In both cases, invalids are mapped to 'N', which isn't accurate
115/// for ABI SOLiD, but internally it shouldn't matter (on output it
116/// will).
117unsigned char BaseAsciiMap::base2complement[256+1 /* for NUL char */] =
118 "NNNNNNNNNNNNNNNN" // 0x00-0x0F
119 "NNNNNNNNNNNNNNNN" // 0x10-0x1F
120 "NNNNNNNNNNNNNNNN" // 0x20-0x2F
121 "0123NNNNNNNNNNNN" // 0x30-0x3F
122 "NTNGNNNCNNNNNNNN" // 0x40-0x4F
123 "NNNNANNNNNNNNNNN" // 0x50-0x5F
124 "NTNGNNNCNNNNNNNN" // 0x60-0x6F
125 "NNNNANNNNNNNNNNN" // 0x70-0x7F
126// not used, but included for completeness:
127 "NNNNNNNNNNNNNNNN" // 0x80-0x8F
128 "NNNNNNNNNNNNNNNN" // 0x90-0x9F
129 "NNNNNNNNNNNNNNNN" // 0xA0-0xAF
130 "NNNNNNNNNNNNNNNN" // 0xB0-0xBF
131 "NNNNNNNNNNNNNNNN" // 0xC0-0xCF
132 "NNNNNNNNNNNNNNNN" // 0xD0-0xDF
133 "NNNNNNNNNNNNNNNN" // 0xE0-0xEF
134 "NNNNNNNNNNNNNNNN" // 0xF0-0xFF
135 ;
136
137BaseAsciiMap::BaseAsciiMap()
138 : myNumPrimerBases(1)
139{
140 myBase2IntMapPtr = NULL;
141}
142
143BaseAsciiMap::~BaseAsciiMap()
144{
145}
static unsigned char base2complement[]
This table maps 5' base space to the 3' complement base space values, as well as 5' color space value...
Definition: BaseAsciiMap.h:41
static unsigned char color2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just color space (0123).
Definition: BaseAsciiMap.h:65
static unsigned char base2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for just base space (ACTGNactgn).
Definition: BaseAsciiMap.h:61
static const char int2colorSpace[]
Convert from int representation to colorspace representation.
Definition: BaseAsciiMap.h:40
static unsigned char baseColor2int[256+1]
Map ASCII values to a 2 (or 3) bit encoding for the base pair value for both base and color space.
Definition: BaseAsciiMap.h:56
static const char int2base[]
Convert from int representation to the base.
Definition: BaseAsciiMap.h:38