libStatGen Software 1
NonOverlapRegions.cpp
1/*
2 * Copyright (C) 2011 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18//////////////////////////////////////////////////////////////////////////
19
20#include "NonOverlapRegions.h"
21#include <iostream>
22
23NonOverlapRegions::NonOverlapRegions()
24 : myRegions()
25{
26}
27
28
29NonOverlapRegions::~NonOverlapRegions()
30{
31 myRegions.clear();
32}
33
34
35void NonOverlapRegions::add(const char* chrom, int32_t start, int32_t end)
36{
37 // Add the region.
38 myRegions[chrom].add(start, end);
39}
40
41
42bool NonOverlapRegions::inRegion(const char* chrom, int32_t pos)
43{
44 // Return whether or not the position was found within a region.
45 // Note, this will create a NonOverlapRegion for this chrom if it
46 // did not already exist, but it won't have any regions.
47 return(myRegions[chrom].inRegion(pos));
48}
49
50
51NonOverlapRegionPos::NonOverlapRegionPos()
52 : myRegions()
53{
54 myRegionIter = myRegions.begin();
55 myTmpIter = myRegions.begin();
56}
57
58
59NonOverlapRegionPos::NonOverlapRegionPos(const NonOverlapRegionPos& reg)
60 : myRegions()
61{
62 myRegionIter = myRegions.begin();
63 myTmpIter = myRegions.begin();
64}
65
66
67NonOverlapRegionPos::~NonOverlapRegionPos()
68{
69 myRegionIter = myRegions.begin();
70 myTmpIter = myRegions.begin();
71 myRegions.clear();
72}
73
74
75void NonOverlapRegionPos::add(int32_t start, int32_t end)
76{
77 // Check to see if the start/end are valid in relation.
78 if(start >= end)
79 {
80 std::cerr << "NonOverlapRegionPos::add: Invalid Range, "
81 << "start must be < end, but " << start << " >= " << end
82 << std::endl;
83 return;
84 }
85
86 bool added = false;
87 // Locate the correct position in the region list for this start/end.
88 if(inRegion(start))
89 {
90 // Check if the region end needs to be updated.
91 if(end > myRegionIter->second)
92 {
93 myRegionIter->second = end;
94 }
95 added = true;
96 }
97 else
98 {
99 // Check to see if we are at the end.
100 if(myRegionIter != myRegions.end())
101 {
102 // Not at the end.
103 // Check to see if the region overlaps the current region.
104 if(end >= myRegionIter->first)
105 {
106 // Overlaps, so update the start.
107 myRegionIter->first = start;
108 // Check if the end needs to be updated.
109 if(myRegionIter->second < end)
110 {
111 myRegionIter->second = end;
112 }
113 added = true;
114 }
115 }
116 }
117
118 // If we already added the record, check to see if the end of the
119 // new region overlaps any additional regions (know that myRegionIter is
120 // not at the end.
121 if(added)
122 {
123 // Check to see if any other regions were overlapped by this record.
124 myTmpIter = myRegionIter;
125 ++myTmpIter;
126 while(myTmpIter != myRegions.end())
127 {
128 // If the region starts before the end of this one, consume it.
129 if(myTmpIter->first <= end)
130 {
131 if(myTmpIter->second > myRegionIter->second)
132 {
133 // Update this region with the new end.
134 myRegionIter->second = myTmpIter->second;
135 }
136
137 myTmpIter = myRegions.erase(myTmpIter);
138 }
139 else
140 {
141 // This region is not overlapped by the new region, so stop.
142 break;
143 }
144 }
145 }
146 else
147 {
148 // Add the region.
149 myRegionIter = myRegions.insert(myRegionIter,
150 std::make_pair(start, end));
151 }
152}
153
154
156{
157 // Return whether or not the position was found within a region.
158 // If it is found within the region, myRegionIter will point to the region
159 // otherwise myRegionIter will point to the region after the position
160 // or to the end if the position is after the last region.
161
162 // Determine if it needs to search to the left
163 // a) it is at the end
164 // b) the region starts after the position.
165 if(myRegionIter == myRegions.end())
166 {
167 // If the iterator is at the end, search to the left.
168 return(findLeft(pos));
169 }
170 else if(pos < myRegionIter->first)
171 {
172 // Not at the end, so search left if the position is less
173 // than this region's start.
174 return(findLeft(pos));
175 }
176 else
177 {
178 return(findRight(pos));
179 }
180}
181
182
183bool NonOverlapRegionPos::findRight(int32_t pos)
184{
185 // Keep looping until the end or until the position is found.
186 while(myRegionIter != myRegions.end())
187 {
188 // Check to see if we have passed the position.
189 if(pos < myRegionIter->first)
190 {
191 // stop here, position comes before this region,
192 // so myRegionIter is pointing to just after it,
193 // but was not found in the region.
194 return(false);
195 }
196 else if(pos < myRegionIter->second)
197 {
198 // the position is in the region, so return true.
199 return(true);
200 }
201 else
202 {
203 // The position is after this region, so increment.
204 ++myRegionIter;
205 }
206 }
207 // exited because we are at the end of the regions and the position was
208 // not found.
209 return(false);
210}
211
212
213bool NonOverlapRegionPos::findLeft(int32_t pos)
214{
215 if(myRegionIter == myRegions.end())
216 {
217 if(myRegionIter == myRegions.begin())
218 {
219 // There is nothing in this list, so just return.
220 return(false);
221 }
222 // Move 1 lower than the end.
223 --myRegionIter;
224 }
225
226 while(myRegionIter->first > pos)
227 {
228 // This region is before our position, so move to the previous region
229 // unless this is the first region in the list.
230 if(myRegionIter == myRegions.begin())
231 {
232 // Did not find the position and the iter is at the element
233 // just after the position.
234 return(false);
235 }
236 // Not yet to the beginning of the list, so decrement.
237 --myRegionIter;
238 }
239
240 // At this point, the regions iter points to a region that starts
241 // before the position.
242 // Determine if the position is in the region by checking if it is
243 // less than the end of the region.
244 if(pos < myRegionIter->second)
245 {
246 // in the region.
247 return(true);
248 }
249
250 // This region ends before this position. The iterator needs to point
251 // to the region after the position, so increment it.
252 ++myRegionIter;
253 return(false);
254}
This class contains a list of non-overlapping regions, just positions, not including chromosomes (see...
void add(int32_t start, int32_t end)
End position is not included in the region.
bool inRegion(int32_t pos)
Return whether or not the position was found within a region.
bool inRegion(const char *chrom, int32_t pos)
Return whether or not the position was found within a region.
void add(const char *chrom, int32_t start, int32_t end)
End position is not included in the region.