libStatGen Software 1
PedigreeGlobals.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "PedigreeGlobals.h"
19#include "Sort.h"
20#include "Error.h"
21
22#include <math.h>
23#include <string.h>
24#include <ctype.h>
25
26int PedigreeGlobals::traitCount = 0;
27int PedigreeGlobals::affectionCount = 0;
28int PedigreeGlobals::covariateCount = 0;
29int PedigreeGlobals::markerCount = 0;
30int PedigreeGlobals::stringCount = 0;
31
32// If this value isn't set, all X chromosome data will be rejected
33bool PedigreeGlobals::chromosomeX = false;
34bool PedigreeGlobals::sexSpecificMap = false;
35
36StringArray PedigreeGlobals::traitNames;
37StringArray PedigreeGlobals::markerNames;
38StringArray PedigreeGlobals::covariateNames;
39StringArray PedigreeGlobals::affectionNames;
40StringArray PedigreeGlobals::stringNames;
41StringIntHash PedigreeGlobals::markerLookup;
42StringIntHash PedigreeGlobals::traitLookup;
43StringIntHash PedigreeGlobals::affectionLookup;
44StringIntHash PedigreeGlobals::covariateLookup;
45StringIntHash PedigreeGlobals::stringLookup;
46
47int PedigreeGlobals::markerInfoCount = 0;
48int PedigreeGlobals::markerInfoSize = 0;
49
50MarkerInfo ** PedigreeGlobals::markerInfo = NULL;
51MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL;
52StringHash PedigreeGlobals::markerInfoByName;
53
54int MarkerInfo::count = 0;
55
56int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right)
57{
58 if ((*left)->chromosome != (*right)->chromosome)
59 return (*left)->chromosome - (*right)->chromosome;
60
61 double difference = (*left)->position - (*right)->position;
62
63 if (difference > 0.0)
64 return 1;
65 else if (difference == 0.0)
66 return (*left)->serial - (*right)->serial;
67 else
68 return -1;
69}
70
71String MarkerInfo::GetAlleleLabel(int allele)
72{
73 if (alleleLabels.Length() > allele && alleleLabels[allele].Length())
74 return alleleLabels[allele];
75 else if (alleleLabels.Length() <= allele)
76 alleleLabels.Dimension(allele + 1);
77 return alleleLabels[allele] = allele;
78}
79
80bool MarkerInfo::AdjustFrequencies()
81{
82 if (freq.Length() <= 1)
83 {
84 freq.Clear();
85 return false;
86 }
87
88 if (freq.Min() < 0.0)
89 error("Locus %s has negative allele frequencies\n", (const char *) name);
90
91 double sum = freq.Sum();
92
93 if (sum <= 0.0)
94 error("Locus %s frequencies sum to %f, which doesn't make sense\n",
95 (const char *) name, sum);
96
97 if (sum != 1.0)
98 freq *= 1.0 / sum;
99
100 if (fabs(sum - 1.0) > 1.2e-5)
101 {
102 printf("Locus %s frequencies sum to %f, adjusted to 1.0\n",
103 (const char *) name, sum);
104 return true;
105 }
106
107 return false;
108}
109
110void MarkerInfo::IndexAlleles()
111{
112 if (alleleLabels.Length() >= 255)
113 error("Marker %s has more than 254 distinct alleles\n",
114 (const char *) name);
115
116 alleleNumbers.Clear();
117
118 for (int i = 1; i < alleleLabels.Length(); i++)
119 alleleNumbers.SetInteger(alleleLabels[i], i);
120}
121
122int MarkerInfo::NewAllele(const String & label)
123{
124 if (alleleLabels.Length() == 0)
125 alleleLabels.Push("");
126
127 if (alleleLabels.Length() >= 255)
128 error("Marker %s has more than 254 distinct alleles\n",
129 (const char *) name);
130
131 alleleNumbers.SetInteger(label, alleleLabels.Length());
132 alleleLabels.Push(label);
133
134 return alleleLabels.Length() - 1;
135}
136
137int PedigreeGlobals::GetTraitID(const char * name)
138{
139 int idx = traitLookup.Integer(name);
140
141 if (idx != -1) return idx;
142
143 traitNames.Add(name);
144 traitLookup.SetInteger(name, traitCount);
145 return traitCount++;
146}
147
148int PedigreeGlobals::GetAffectionID(const char * name)
149{
150 int idx = affectionLookup.Integer(name);
151
152 if (idx != -1) return idx;
153
154 affectionNames.Add(name);
155 affectionLookup.SetInteger(name, affectionCount);
156 return affectionCount++;
157}
158
159int PedigreeGlobals::GetCovariateID(const char * name)
160{
161 int idx = covariateLookup.Integer(name);
162
163 if (idx != -1) return idx;
164
165 covariateNames.Add(name);
166 covariateLookup.SetInteger(name, covariateCount);
167 return covariateCount++;
168}
169
170int PedigreeGlobals::GetStringID(const char * name)
171{
172 int idx = stringLookup.Integer(name);
173
174 if (idx != -1) return idx;
175
176 stringNames.Add(name);
177 stringLookup.SetInteger(name, stringCount);
178 return stringCount++;
179}
180
181int PedigreeGlobals::GetMarkerID(const char * name)
182{
183 int idx = markerLookup.Integer(name);
184
185 if (idx != -1) return idx;
186
187 markerNames.Add(name);
188 markerLookup.SetInteger(name, markerCount);
189
190 // Grow the marker info key ...
191 if (markerCount == 0)
192 {
193 markerInfoByInteger = new MarkerInfo * [16];
194
195 for (int i = 0; i < 16; i++)
196 markerInfoByInteger[i] = NULL;
197 }
198 else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15)
199 {
200 MarkerInfo ** newKey = new MarkerInfo * [markerCount * 2];
201
202 for (int i = 0; i < markerCount; i++)
203 newKey[i] = markerInfoByInteger[i];
204
205 for (int i = markerCount; i < markerCount * 2; i++)
206 newKey[i] = NULL;
207
208 delete [] markerInfoByInteger;
209
210 markerInfoByInteger = newKey;
211 }
212
213 return markerCount++;
214}
215
216MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name)
217{
218 MarkerInfo * info = (MarkerInfo *) markerInfoByName.Object(name);
219
220 if (info != NULL) return info;
221
222 info = new MarkerInfo(name);
223 markerInfoByName.Add(name, info);
224
225 if (markerInfoCount >= markerInfoSize)
226 GrowMarkerInfo();
227
228 markerInfo[markerInfoCount++] = info;
229
230 int markerId = LookupMarker(name);
231 if (markerId >= 0) markerInfoByInteger[markerId] = info;
232
233 return info;
234}
235
236MarkerInfo * PedigreeGlobals::GetMarkerInfo(int markerId)
237{
238 if (markerId >= markerCount)
239 error("Attempted to retrieve MarkerInfo using out-of-bounds index\n");
240
241 if (markerInfoByInteger[markerId] != NULL)
242 return markerInfoByInteger[markerId];
243 else
244 return GetMarkerInfo(markerNames[markerId]);
245}
246
247void PedigreeGlobals::GrowMarkerInfo()
248{
249 int newSize = markerInfoSize ? 2 * markerInfoSize : 32;
250
251 MarkerInfo ** newArray = new MarkerInfo * [newSize];
252
253 if (markerInfoSize)
254 {
255 memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize);
256 delete [] markerInfo;
257 }
258
259 markerInfo = newArray;
260 markerInfoSize = newSize;
261}
262
263void PedigreeGlobals::FlagMissingMarkers(IntArray & missingMarkers)
264{
265 int skipped_markers = 0;
266
267 if (missingMarkers.Length())
268 {
269 StringArray names;
270
271 printf("These markers couldn't be placed and won't be analysed:");
272
273 for (int i = 0; i < missingMarkers.Length(); i++)
274 names.Push(GetMarkerInfo(missingMarkers[i])->name);
275 names.Sort();
276
277 for (int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++)
278 {
279 if (line + names[i].Length() + 1 > 79)
280 printf("\n "), line = 3, lines++;
281
282 if (lines < 5)
283 {
284 printf("%s ", (const char *) names[i]);
285 line += names[i].Length() + 1;
286 }
287 else
288 skipped_markers++;
289 }
290
291 if (skipped_markers)
292 printf("as well as %d other unlisted markers...", skipped_markers);
293
294 printf("\n\n");
295 }
296}
297
298void PedigreeGlobals::GetOrderedMarkers(IntArray & markers)
299{
300 if (markers.Length() == 0)
301 {
302 markers.Dimension(markerCount);
303 markers.SetSequence(0, 1);
304 }
305
306 MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
307
308 int count = 0;
309 IntArray missingMarkers;
310
311 for (int i = 0; i < markers.Length(); i++)
312 {
313 MarkerInfo * info = GetMarkerInfo(markers[i]);
314
315 if (info->chromosome != -1)
316 subset[count++] = info;
317 else
318 missingMarkers.Push(i);
319 }
320
321 FlagMissingMarkers(missingMarkers);
322
323 QuickSort(subset, count, sizeof(MarkerInfo *),
324 COMPAREFUNC MarkerInfo::ComparePosition);
325
326 markers.Clear();
327 for (int i = 0; i < count; i++)
328 markers.Push(GetMarkerID(subset[i]->name));
329}
330
331int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome)
332{
333 if (markers.Length() == 0)
334 {
335 markers.Dimension(markerCount);
336 markers.SetSequence(0, 1);
337 }
338
339 MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
340
341 int count = 0;
342 IntArray missingMarkers;
343
344 for (int i = 0; i < markers.Length(); i++)
345 {
346 MarkerInfo * info = GetMarkerInfo(markers[i]);
347
348 if (info->chromosome != -1)
349 subset[count++] = info;
350 else if (chromosome == -1)
351 missingMarkers.Push(i);
352 }
353
354 if (chromosome == -1)
355 FlagMissingMarkers(missingMarkers);
356
357 QuickSort(subset, count, sizeof(MarkerInfo *),
358 COMPAREFUNC MarkerInfo::ComparePosition);
359
360 markers.Clear();
361
362 int current_chromosome = -1, next_chromosome = 0;
363
364 for (int i = 0; i < count; i++)
365 if (subset[i]->chromosome < chromosome)
366 continue;
367 else if (current_chromosome == -1 ||
368 subset[i]->chromosome == current_chromosome)
369 {
370 markers.Push(GetMarkerID(subset[i]->name));
371 current_chromosome = subset[i]->chromosome;
372 }
373 else if (!next_chromosome)
374 {
375 next_chromosome = subset[i]->chromosome;
376 break;
377 }
378
379 delete [] subset;
380
381 return next_chromosome;
382}
383
384void PedigreeGlobals::VerifySexSpecificOrder()
385{
386 if (markerCount <= 1)
387 return;
388
389 MarkerInfo ** sortedMarkers = new MarkerInfo * [markerCount];
390
391 for (int i = 0; i < markerCount; i++)
392 sortedMarkers[i] = GetMarkerInfo(i);
393
394 QuickSort(sortedMarkers, markerCount, sizeof(MarkerInfo *),
395 COMPAREFUNC MarkerInfo::ComparePosition);
396
397 double prev_female = sortedMarkers[0]->positionFemale;
398 double prev_male = sortedMarkers[0]->positionMale;
399 double curr_female, curr_male;
400
401 int prev_chromosome = sortedMarkers[0]->chromosome;
402 int curr_chromosome;
403
404 for (int i = 1; i < markerCount; i++)
405 {
406 curr_chromosome = sortedMarkers[i]->chromosome;
407 curr_female = sortedMarkers[i]->positionFemale;
408 curr_male = sortedMarkers[i]->positionMale;
409
410 if (curr_chromosome == prev_chromosome &&
411 (curr_female < prev_female || curr_male < prev_male))
412 error("Sex-specific and sex-averaged maps are inconsistent.\n\n"
413 "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n"
414 "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n",
415 (const char *) sortedMarkers[i]->name,
416 sortedMarkers[i]->position * 100,
417 (const char *) sortedMarkers[i-1]->name,
418 sortedMarkers[i-1]->position * 100,
419 curr_female < prev_female ? "fe" : "",
420 (const char *) sortedMarkers[i]->name,
421 (curr_female < prev_female ? curr_female : curr_male) * 100,
422 (const char *) sortedMarkers[i-1]->name,
423 (curr_female < prev_female ? prev_female : prev_male) * 100);
424
425 prev_chromosome = curr_chromosome;
426 prev_female = curr_female;
427 prev_male = curr_male;
428 }
429
430 delete [] sortedMarkers;
431}
432
433void PedigreeGlobals::LoadAlleleFrequencies(const char * filename, bool required)
434{
435 // This function is often called with an empty string, and not
436 // all implementations of the C library like that ...
437 if (filename[0] == 0)
438 {
439 if (required)
440 error("No name provided for required allele freuquency file\n");
441 else
442 return;
443 }
444
445 // If we get here, the filename is not empty and things should
446 // work as planned
447 IFILE f = ifopen(filename, "rb");
448
449 if (f == NULL)
450 {
451 if (required)
452 error("Failed to open required allele frequency file '%s'",
453 (const char *) filename);
454 else
455 return;
456 }
457
458 LoadAlleleFrequencies(f);
459 ifclose(f);
460}
461
462void PedigreeGlobals::LoadAlleleFrequencies(IFILE & input)
463{
464 int done = 0;
465 String buffer;
466 StringArray tokens;
467 MarkerInfo *info = NULL;
468
469 bool need_blank_line = false;
470 int allele_size, old_max, next_allele = 0; // Initialization avoids compiler warning
471
472 while (!ifeof(input) && !done)
473 {
474 int i, j;
475
476 buffer.ReadLine(input);
477
478 tokens.Clear();
479 tokens.AddTokens(buffer, WHITESPACE);
480
481 if (tokens.Length() < 1) continue;
482
483 switch (toupper(tokens[0][0]))
484 {
485 case 'M' :
486 if (tokens.Length() == 1)
487 error("Unnamed marker in allele frequency file");
488 if (info != NULL)
489 need_blank_line |= info->AdjustFrequencies();
490 info = GetMarkerInfo(tokens[1]);
491 info->freq.Clear();
492 info->freq.Push(0.0);
493 next_allele = 1;
494 break;
495 case 'F' :
496 if (info != NULL)
497 for (i = 1; i < tokens.Length(); i++)
498 {
499 buffer = next_allele++;
500
501 int allele = LoadAllele(info, buffer);
502
503 if (allele >= info->freq.Length())
504 {
505 old_max = info->freq.Length();
506 info->freq.Dimension(allele + 1);
507 for (j = old_max; j < allele; j++)
508 info->freq[j] = 0.0;
509 }
510
511 info->freq[allele] = tokens[i].AsDouble();
512 }
513 break;
514 case 'A' :
515 if (info == NULL) continue;
516
517 if (tokens.Length() != 3)
518 error("Error reading named allele frequencies for locus %s\n"
519 "Lines with named alleles should have the format\n"
520 " A allele_label allele_frequency\n\n"
521 "But the following line was read:\n%s\n",
522 (const char *) info->name, (const char *) buffer);
523
524 allele_size = LoadAllele(info, tokens[1]);
525 next_allele = atoi(tokens[1]) + 1;
526
527 if (allele_size < 1)
528 error("Error reading named allele frequencies for locus %s\n"
529 "An invalid allele label was encountered\n",
530 (const char *) info->name);
531
532 if (allele_size >= info->freq.Length())
533 {
534 old_max = info->freq.Length();
535 info->freq.Dimension(allele_size + 1);
536 for (i = old_max; i < allele_size; i++)
537 info->freq[i] = 0.0;
538 }
539
540 info->freq[allele_size] = tokens[2];
541 break;
542 case 'E' :
543 done = 1;
544 break;
545 default :
546 error("Problem in allele frequency file.\n"
547 "Lines in this file should be of two types:\n"
548 " -- Marker name lines begin with an M\n"
549 " -- Frequency lines begin with an F\n\n"
550 "However the following line is different:\n%s\n",
551 (const char *) buffer);
552 }
553 }
554
555 if (info != NULL)
556 need_blank_line |= info->AdjustFrequencies();
557
558 if (need_blank_line) printf("\n");
559}
560
561void PedigreeGlobals::LoadMarkerMap(const char * filename, bool filter)
562{
563 IFILE f = ifopen(filename, "rb");
564 if (f == NULL) return;
565 LoadMarkerMap(f, filter);
566 ifclose(f);
567}
568
569void PedigreeGlobals::LoadMarkerMap(IFILE & input, bool filter)
570{
571 String buffer;
572 StringArray tokens;
573 bool first_pass = true;
574
575 while (!ifeof(input))
576 {
577 buffer.ReadLine(input);
578
579 tokens.Clear();
580 tokens.AddTokens(buffer, WHITESPACE);
581
582 if (tokens.Length() < 1) continue;
583
584 if (first_pass)
585 {
586 sexSpecificMap = (tokens.Length() == 5);
587
588 // if (sexSpecificMap)
589 // printf("\n Found sex-specific map ...\n\n");
590
591 first_pass = false;
592 }
593
594 if (tokens.Length() != 3 && !sexSpecificMap)
595 error("Error reading map file\n"
596 "Each line in this file should include 3 fields:\n"
597 "CHROMOSOME, MARKER_NAME, and POSITION\n"
598 "However the following line has %d fields\n%s\n",
599 tokens.Length(), (const char *) buffer);
600
601 if (tokens.Length() != 5 && sexSpecificMap)
602 error("Error reading map file\n"
603 "Each line in this file should include 5 fields:\n\n"
604 "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n"
605 "However the following line has %d fields\n%s\n",
606 tokens.Length(), (const char *) buffer);
607
608 bool previous_state = String::caseSensitive;
609 String::caseSensitive = false;
610
611 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
612 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
613 (tokens[2] == "KOSAMBI" || tokens[2] == "POS" || tokens[2] == "POSITION" ||
614 tokens[2] == "SEX_AVERAGED_POS" || tokens[2] == "CM" || tokens[2] == "HALDANE"))
615 continue;
616
617 String::caseSensitive = previous_state;
618
619 if (filter)
620 if (LookupMarker(tokens[1]) < 0)
621 continue;
622
623 MarkerInfo * info = GetMarkerInfo(tokens[1]);
624
625 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
626
627 info->chromosome = chr;
628 info->position = (double) tokens[2] * 0.01;
629
630 if (sexSpecificMap)
631 {
632 char * flag;
633
634 double female = strtod(tokens[3], &flag);
635 if (*flag)
636 error("In the map file, the female cM position for marker\n"
637 "%s is %s. This is not a valid number.",
638 (const char *) tokens[1], (const char *) tokens[3]);
639
640 double male = strtod(tokens[4], &flag);
641 if (*flag)
642 error("In the map file, the male cM position for marker\n"
643 "%s is %s. This is not a valid number.",
644 (const char *) tokens[1], (const char *) tokens[4]);
645
646 info->positionFemale = (double) female * 0.01;
647 info->positionMale = (double) male * 0.01;
648 }
649 else
650 info->positionFemale = info->positionMale = info->position;
651 }
652
653 if (sexSpecificMap) VerifySexSpecificOrder();
654}
655
656void PedigreeGlobals::LoadBasepairMap(const char * filename)
657{
658 IFILE f = ifopen(filename, "rb");
659 if (f == NULL)
660 error("The map file [%s] could not be opened\n\n"
661 "Please check that the filename is correct and that the file is\n"
662 "not being used by another program", filename);
663 LoadBasepairMap(f);
664 ifclose(f);
665}
666
667void PedigreeGlobals::LoadBasepairMap(IFILE & input)
668{
669 String buffer;
670 StringArray tokens;
671
672 sexSpecificMap = false;
673
674 while (!ifeof(input))
675 {
676 buffer.ReadLine(input);
677
678 tokens.Clear();
679 tokens.AddTokens(buffer, WHITESPACE);
680
681 if (tokens.Length() < 1) continue;
682
683 if (tokens.Length() != 3)
684 error("Error reading map file\n"
685 "Each line in this file should include 3 fields:\n"
686 "CHROMOSOME, MARKER_NAME, and POSITION\n"
687 "However the following line has %d fields\n%s\n",
688 tokens.Length(), (const char *) buffer);
689
690 bool previous_state = String::caseSensitive;
691 String::caseSensitive = false;
692
693 if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
694 (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
695 (tokens[2] == "BASEPAIR" || tokens[2] == "POS" || tokens[2] == "POSITION"))
696 continue;
697
698 String::caseSensitive = previous_state;
699
700 MarkerInfo * info = GetMarkerInfo(tokens[1]);
701
702 int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
703
704 info->chromosome = chr;
705 info->position = (double) tokens[2];
706 }
707}
708
709int PedigreeGlobals::instanceCount = 0;
710
711PedigreeGlobals::~PedigreeGlobals()
712{
713 if (--instanceCount == 0 && markerInfoSize)
714 {
715 for (int i = 0; i < markerInfoCount; i++)
716 delete markerInfo[i];
717 delete [] markerInfo;
718 delete [] markerInfoByInteger;
719 }
720}
721
722void PedigreeGlobals::WriteMapFile(const char * filename)
723{
724 if (!MarkerPositionsAvailable())
725 return;
726
727 FILE * output = fopen(filename, "wt");
728
729 if (output == NULL)
730 error("Creating map file \"%s\"", filename);
731
732 WriteMapFile(output);
733 fclose(output);
734}
735
736void PedigreeGlobals::WriteMapFile(FILE * output)
737{
738 if (!sexSpecificMap)
739 fprintf(output, "CHR MARKER POS\n");
740 else
741 fprintf(output, "CHR MARKER POS POSF POSM\n");
742
743 for (int i = 0; i < markerInfoCount; i++)
744 {
745 if (markerInfo[i]->chromosome != -1)
746 {
747 if (!sexSpecificMap)
748 fprintf(output, "%3d %-10s %g\n",
749 markerInfo[i]->chromosome,
750 (const char *) markerInfo[i]->name,
751 markerInfo[i]->position * 100.0);
752 else
753 fprintf(output, "%3d %-10s %g %g %g\n",
754 markerInfo[i]->chromosome,
755 (const char *) markerInfo[i]->name,
756 markerInfo[i]->position * 100.0,
757 markerInfo[i]->positionFemale * 100.0,
758 markerInfo[i]->positionMale * 100.0);
759 }
760 }
761}
762
763void PedigreeGlobals::WriteFreqFile(const char * filename, bool old_format)
764{
765 FILE * output = fopen(filename, "wt");
766
767 if (output == NULL)
768 error("Creating allele frequency file \"%s\"", filename);
769
770 WriteFreqFile(output, old_format);
771 fclose(output);
772}
773
774void PedigreeGlobals::WriteFreqFile(FILE * output, bool old_format)
775{
776 for (int i = 0; i < markerInfoCount; i++)
777 {
778 MarkerInfo * info = markerInfo[i];
779
780 if (info->freq.Length() == 0) continue;
781
782 fprintf(output, "M %s\n", (const char *) info->name);
783
784 if (old_format && info->alleleLabels.Length() == 0)
785 for (int j = 1; j < info->freq.Length(); j++)
786 fprintf(output, "%s%.5f%s",
787 j % 7 == 1 ? "F " : "", info->freq[j],
788 j == info->freq.Length() - 1 ? "\n" : j % 7 == 0 ? "\n" : " ");
789 else
790 for (int j = 1; j < info->freq.Length(); j++)
791 if (info->freq[j] > 1e-7)
792 fprintf(output, "A %5s %.5f\n",
793 (const char *) info->GetAlleleLabel(j), info->freq[j]);
794 }
795}
796
797bool PedigreeGlobals::MarkerPositionsAvailable()
798{
799 for (int i = 0; i < markerInfoCount; i++)
800 if (markerInfo[i]->chromosome != -1)
801 return true;
802
803 return false;
804}
805
806bool PedigreeGlobals::AlleleFrequenciesAvailable()
807{
808 for (int i = 0; i < markerInfoCount; i++)
809 if (markerInfo[i]->freq.Length() > 1)
810 return true;
811
812 return false;
813}
814
815int PedigreeGlobals::LoadAllele(int marker, String & token)
816{
817 return LoadAllele(GetMarkerInfo(marker), token);
818}
819
820int PedigreeGlobals::LoadAllele(MarkerInfo * info, String & token)
821{
822 int allele = info->GetAlleleNumber(token);
823
824 if (allele >= 0) return allele;
825
826 static unsigned char lookup[128];
827 static bool init = false;
828
829 if (!init)
830 {
831 init = true;
832
833 for (int i = 0; i < 128; i++)
834 lookup[i] = 0;
835
836 for (int i = '1'; i <= '9'; i++)
837 lookup[i] = 1;
838
839 lookup[int('a')] = lookup[int('A')] = lookup[int('c')] = lookup[int('C')] = 2;
840 lookup[int('g')] = lookup[int('G')] = lookup[int('t')] = lookup[int('T')] = 2;
841 }
842
843 int first = token[0];
844 bool goodstart = first > 0 && first < 128;
845
846 if (token.Length() == 1 && goodstart && lookup[int(token[0])])
847 return info->NewAllele(token);
848
849 if (!goodstart || lookup[int(token[0])] != 1)
850 return 0;
851
852 int integer = token.AsInteger();
853 token = integer;
854
855 allele = info->GetAlleleNumber(token);
856
857 if (allele > 0)
858 return allele;
859
860 if (integer <= 0) return 0;
861
862 if (integer > 1000000)
863 {
864 static bool warn_user = true;
865
866 if (warn_user)
867 {
868 printf("Some allele numbers for marker %s are > 1000000\n"
869 "All allele numbers >1000000 will be treated as missing\n\n",
870 (const char *) info->name);
871 warn_user = false;
872 }
873
874 return 0;
875 }
876
877 return info->NewAllele(token);
878}
879
880std::ostream &operator << (std::ostream &stream, MarkerInfo &m)
881{
882 stream << "MarkerInfo for marker " << m.name << std::endl;
883 stream << " located on chromsome " << m.chromosome << ":" << (int64_t)(100 * m.position) << std::endl;
884 stream << " allele count = " << m.freq.Length() << std::endl;
885 stream << " label count = " << m.alleleLabels.Length() << std::endl;
886 if (m.freq.Length() == m.alleleLabels.Length())
887 {
888 for (int i=0; i<m.freq.Length(); i++)
889 {
890 stream << " " << m.alleleLabels[i] << ":" << m.freq[i];
891 }
892 stream << std::endl;
893 }
894 else
895 {
896 stream << " output truncated - counts appear corrupt." << std::endl;
897 }
898 return stream;
899}
900
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition: InputFile.h:562
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736
int ifclose(IFILE &file)
Close the file.
Definition: InputFile.h:580
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37