libStatGen Software 1
MemoryMapArray.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __MEMORYMAPARRAY_H
19#define __MEMORYMAPARRAY_H
20
21#ifndef __STDC_LIMIT_MACROS
22#define __STDC_LIMIT_MACROS
23#endif
24#include <errno.h>
25#include <stdint.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#ifndef _WIN32
31#include <unistd.h> // for gethostname()
32#endif
33
34#include <string>
35#include <sys/types.h>
36#include <time.h>
37
38// STL:
39#include <ostream>
40#include <sstream>
41
42#include "Generic.h"
43#include "MemoryMap.h"
44
45
46//
47// This file defines a template for generating memory map backed arrays
48// of different types of values.
49//
50// The template assumes that the mapped files are broken into two parts,
51// first, a header (MemoryMapArrayHeader), then followed by the data
52// in the array.
53//
54// typedefs are used to declare various types of arrays beforehand,
55// since there will be only a few.
56//
57// They are:
58// mmapArrayUint32_t;
59// mmapArrayBool_t;
60// mmapArray4Bit_t;
61//
62// XXX consider adding env("USER"), argv[0], date/time creation, etc.
63//
65{
66public:
67 void constructorClear()
68 {
69 memset(this, 0, sizeof(*this));
70 }
71 uint32_t typeCookie;
72 uint32_t typeVersion;
73 uint32_t contentCookie;
74 uint32_t contentVersion;
75 size_t headerSize;
76
77 // file generation info
78 time_t creationDate;
79 char creationUser[32];
80 char creationHost[32];
81 char application[32];
82 // now describe the data:
83 size_t elementCount;
84 void debugPrint(FILE *);
85 size_t getHeaderSize(int i)
86 {
87 return sizeof(*this);
88 }
89
90 void setApplication(const char *s)
91 {
92 strncpy(application, s, sizeof(application)-1);
93 application[sizeof(application)-1] = '\0';
94 }
95 void setCreationUser(const char *s)
96 {
97 strncpy(creationUser, s, sizeof(creationUser)-1);
98 creationUser[sizeof(creationUser)-1] = '\0';
99 }
100 void setCreationHost(const char *s)
101 {
102 strncpy(creationHost, s, sizeof(creationHost)-1);
103 creationHost[sizeof(creationHost)-1] = '\0';
104 }
105};
106
107//
108// stream output for header information
109//
110std::ostream &operator << (std::ostream &stream, MemoryMapArrayHeader &h);
111
112//
113// This class object represents the application specific information that doesn't
114// fit in the general header above. Since it is only allocated via an mmap operation,
115// as part of the mapped file, the destructor must never be called. The virtual
116// destructor is declared to eliminate gcc warnings.
117//
118// For many arrays, this will be empty.
119//
121{
122protected:
123 size_t headerSize; // set in ::create and ::open only
124public:
125 size_t getHeaderSize()
126 {
127 return headerSize;
128 }
129 // other stuff follows...
130};
131
132template <
133class elementT,
134typename indexT,
135unsigned int cookieVal,
136unsigned int versionVal,
137elementT accessorFunc(char *base, indexT),
138void setterFunc(char *base, indexT, elementT),
139size_t elementCount2BytesFunc(indexT),
140class arrayHeaderClass>
142{
143protected:
144 arrayHeaderClass *header;
145 char *data;
146 std::string errorStr;
147public:
148 void constructorClear()
149 {
150 header = NULL;
151 data = NULL;
152// errorStr = "";
153 }
155 {
156 constructorClear();
157 }
159 {
160 if (data) close();
161 }
162
163 const std::string &getErrorString()
164 {
165 return errorStr;
166 }
167
168 arrayHeaderClass &getHeader()
169 {
170 return *header;
171 }
172
173 void setContentCookie(uint32_t c)
174 {
175 header->contentCookie = c;
176 }
177 void setContentVersion(uint32_t v)
178 {
179 header->contentVersion = v;
180 }
181
182 // accessing
183 inline elementT operator[](indexT i)
184 {
185 return accessorFunc(data, i);
186 }
187 inline void set(indexT i, elementT v)
188 {
189 setterFunc(data, i, v);
190 }
191
192
193
194 /// Create a vector with elementCount memebers.
195 //
196 /// Does administrative setup of the header and populating this
197 /// class members. User will need to finish populating the
198 /// contents of the metaData and data sections.
199 ///
200 /// If file==NULL, the underlying allocation is done via malloc(),
201 /// so that the results of write access to this vecor are not
202 /// saved in a file.
203 ///
204 /// If file!=NULL, a file will be created on disk, and all
205 /// write accesses done via the method ::set will be persistent
206 /// in that file.
207 ///
208 int create(const char *file, indexT elementCount, int optionalHeaderCount = 0)
209 {
210 size_t len = elementCount2BytesFunc(elementCount) +
211 header->getHeaderSize(optionalHeaderCount);
212 int rc;
213 rc = MemoryMap::create(file, len);
214 if (rc)
215 {
216 std::ostringstream buf;
217 buf << file << ": failed to create file";
218 errorStr = buf.str();
219 close();
220 return rc;
221 }
222 header = (arrayHeaderClass *) MemoryMap::data;
223 header->constructorClear();
224 header->typeCookie = cookieVal;
225 header->typeVersion = versionVal;
226 header->headerSize = header->getHeaderSize(optionalHeaderCount);
227 header->elementCount = elementCount;
228 data = (char *)((char *) MemoryMap::data + header->headerSize);
229
230 const char *env;
231 char hostname[256];
232 env = getenv("USER");
233 if (env) header->setCreationUser(env);
234 header->creationDate = time(NULL);
235#if defined(_WIN32)
236 hostname[0] = '\0';
237#else
238 gethostname(hostname, sizeof(hostname));
239#endif
240 header->setCreationHost(hostname);
241 return 0;
242 }
243
244 /// allow anonymous (malloc) create.
245 ///
246 /// we do this when we don't expect to save the results.
247 ///
248 /// The single use case so far is in GenomeSequence::populateDBSNP.
249 ///
250 int create(indexT elementCount, int optionalHeaderCount = 0)
251 {
252 return create(NULL, elementCount, optionalHeaderCount);
253 }
254
255 //
256 // Open the given filename. flags may be set to
257 // O_RDONLY or O_RDWR, and allows the file to be
258 // condtionally written to.
259 //
260 // Several sanity checks are done:
261 // compare the expected cookie value to the actual one
262 // compare the expected version value to the actual one
263 //
264 // if either condition is not met, the member errorStr is
265 // set to explain why, and true is returned.
266 //
267 // If there were no errors, false is returned.
268 //
269 bool open(const char *file, int flags = O_RDONLY)
270 {
271 int rc = MemoryMap::open(file, flags);
272 if (rc)
273 {
274 std::ostringstream buf;
275 buf << file << ": open() failed (error=" << strerror(errno) << ").";
276 errorStr = buf.str();
277 return true;
278 }
279 header = (arrayHeaderClass *) MemoryMap::data;
280 data = (char *)((char *) MemoryMap::data + header->headerSize);
281 if (header->typeCookie!=cookieVal)
282 {
283 std::ostringstream buf;
284 buf << file << ": wrong type of file (expected type "
285 << cookieVal << " but got " << header->typeCookie << ")";
286 errorStr = buf.str();
287 // XXX insert better error handling
288 close();
289 return true;
290 }
291 if (header->typeVersion!=versionVal)
292 {
293 std::ostringstream buf;
294 buf << file << ": wrong version of file (expected version "
295 << versionVal << " but got " << header->typeVersion << ")";
296 errorStr = buf.str();
297 // XXX insert better error handling
298 close();
299 return true;
300 }
301 return false;
302 }
303
304 bool close()
305 {
306 constructorClear();
307 return MemoryMap::close();
308 }
309 void debugPrint(FILE *f)
310 {
311 if (header) header->debugPrint(f);
312 }
313
314 size_t getElementCount() const
315 {
316 return header->elementCount;
317 }
318
319};
320
322{
323public:
324 size_t getHeaderSize()
325 {
326 return sizeof(*this);
327 }
328};
329
330//
331// define the uint32 array type:
332//
333inline uint32_t mmapUint32Access(char *base, uint32_t index)
334{
335 return ((uint32_t *)base)[index];
336}
337inline void mmapUint32Set(char *base, uint32_t index, uint32_t v)
338{
339 ((uint32_t *)base)[index] = v;
340}
341inline size_t mmapUint32elementCount2Bytes(uint32_t i)
342{
343 return sizeof(uint32_t) * i;
344}
345
346typedef MemoryMapArray<
347uint32_t,
348uint32_t,
3490x16b3816c,
35020090109,
351mmapUint32Access,
352mmapUint32Set,
353mmapUint32elementCount2Bytes,
356
357//
358// define the boolean memory mapped array type.
359// NB: it is limited to 2**32 elements
360//
361
362typedef MemoryMapArray<
363uint32_t,
364uint32_t,
3650xac6c1dc7,
36620090109,
367PackedAccess_1Bit,
368PackedAssign_1Bit,
369Packed1BitElementCount2Bytes,
372
373//
374// define the two bit memory mapped array type:
375//
376
377typedef MemoryMapArray<
378uint32_t,
379uint32_t,
3800x25b3ea5f,
38120090109,
382PackedAccess_2Bit,
383PackedAssign_2Bit,
384Packed2BitElementCount2Bytes,
387
388typedef MemoryMapArray<
389uint32_t,
390uint32_t,
3910x418e1874,
39220090109,
393PackedAccess_4Bit,
394PackedAssign_4Bit,
395Packed4BitElementCount2Bytes,
398
399#if 0
400// XXX this is example code I want to use to define arrays of genome wide match values
401class baseRecord
402{
403 unsigned int base:4;
404 unsigned int qScore:7;
405 unsigned int conflicts:5; // how many cases of poorer matches that disagree
406};
407
408//
409// define the baseRecord array type:
410//
411inline baseRecord &mmapBaseRecordAccess(void *base, uint32_t index)
412{
413 return *((baseRecord *)((char *)base + index*sizeof(baseRecord)));
414}
415inline void mmapBaseRecordSet(void *base, uint32_t index, baseRecord &v)
416{
417 mmapBaseRecordAccess(base, index) = v;
418}
419inline size_t mmapBaseRecordElementCount2Bytes(uint32_t i)
420{
421 return sizeof(baseRecord) * i;
422}
423
424typedef MemoryMapArray<
425baseRecord &,
426uint32_t,
4270x12341234,
4280xdeadbeef,
429&mmapBaseRecordAccess,
430mmapBaseRecordSet,
431mmapBaseRecordElementCount2Bytes,
433> mmapArrayBaseRecord_t;
434#endif
435
436#endif
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736
bool open(const char *file, int flags=O_RDONLY)
open a previously created mapped vector
int create(indexT elementCount, int optionalHeaderCount=0)
allow anonymous (malloc) create.
int create(const char *file, indexT elementCount, int optionalHeaderCount=0)
Create a vector with elementCount memebers.
There are a pair of related data structures in the operating system, and also a few simple algorithms...
Definition: MemoryMap.h:156
virtual bool open(const char *file, int flags=O_RDONLY)
open a previously created mapped vector
Definition: MemoryMap.cpp:156
virtual bool create(const char *file, size_t size)
create the memory mapped file on disk
Definition: MemoryMap.cpp:243