001/*- 002 ******************************************************************************* 003 * Copyright (c) 2011, 2016 Diamond Light Source Ltd. 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the Eclipse Public License v1.0 006 * which accompanies this distribution, and is available at 007 * http://www.eclipse.org/legal/epl-v10.html 008 * 009 * Contributors: 010 * Peter Chang - initial API and implementation and/or initial documentation 011 *******************************************************************************/ 012 013package org.eclipse.january.dataset; 014 015import java.lang.ref.SoftReference; 016import java.util.ArrayList; 017import java.util.Collections; 018import java.util.HashMap; 019import java.util.List; 020import java.util.Map; 021import java.util.TreeMap; 022 023import org.apache.commons.math3.complex.Complex; 024import org.apache.commons.math3.stat.descriptive.moment.Kurtosis; 025import org.apache.commons.math3.stat.descriptive.moment.Skewness; 026import org.eclipse.january.metadata.Dirtiable; 027import org.eclipse.january.metadata.MetadataType; 028 029 030/** 031 * Statistics class 032 * 033 * TODO Where is mode? http://en.wikipedia.org/wiki/Mode_(statistics) 034 * 035 */ 036public class Stats { 037 038 private static class ReferencedDataset extends SoftReference<Dataset> { 039 public ReferencedDataset(Dataset d) { 040 super(d); 041 } 042 } 043 044 private static class QStatisticsImpl<T> implements MetadataType { 045 private static final long serialVersionUID = -3296671666463190388L; 046 final static Double Q1 = 0.25; 047 final static Double Q2 = 0.5; 048 final static Double Q3 = 0.75; 049 Map<Double, T> qmap = new HashMap<Double, T>(); 050 transient Map<Integer, Map<Double, ReferencedDataset>> aqmap = new HashMap<Integer, Map<Double, ReferencedDataset>>(); 051 transient ReferencedDataset s; // store 0th element 052 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 053 054 @Dirtiable 055 private boolean isDirty = true; 056 057 @Override 058 public QStatisticsImpl<T> clone() { 059 return new QStatisticsImpl<T>(this); 060 } 061 062 public QStatisticsImpl() { 063 } 064 065 private QStatisticsImpl(QStatisticsImpl<T> qstats) { 066 if (qstats.s != null && qstats.s.get() != null) { 067 s = new ReferencedDataset(qstats.s.get().getView(false)); 068 } 069 qmap.putAll(qstats.qmap); 070 for (Integer i : qstats.aqmap.keySet()) { 071 aqmap.put(i, new HashMap<>(qstats.aqmap.get(i))); 072 } 073 smap.putAll(qstats.smap); 074 isDirty = qstats.isDirty; 075 } 076 077 public void setQuantile(double q, T v) { 078 qmap.put(q, v); 079 } 080 081 public T getQuantile(double q) { 082 return qmap.get(q); 083 } 084 085 private Map<Double, ReferencedDataset> getMap(int axis) { 086 Map<Double, ReferencedDataset> qm = aqmap.get(axis); 087 if (qm == null) { 088 qm = new HashMap<>(); 089 aqmap.put(axis, qm); 090 } 091 return qm; 092 } 093 094 public void setQuantile(int axis, double q, Dataset v) { 095 Map<Double, ReferencedDataset> qm = getMap(axis); 096 qm.put(q, new ReferencedDataset(v)); 097 } 098 099 public Dataset getQuantile(int axis, double q) { 100 Map<Double, ReferencedDataset> qm = getMap(axis); 101 ReferencedDataset rd = qm.get(q); 102 return rd == null ? null : rd.get(); 103 } 104 105 Dataset getSortedDataset(int axis) { 106 return smap.containsKey(axis) ? smap.get(axis).get() : null; 107 } 108 109 void setSortedDataset(int axis, Dataset v) { 110 smap.put(axis, new ReferencedDataset(v)); 111 } 112 } 113 114 // calculates statistics and returns sorted dataset (0th element if compound) 115 private static QStatisticsImpl<?> calcQuartileStats(final Dataset a) { 116 Dataset s = null; 117 final int is = a.getElementsPerItem(); 118 119 if (is == 1) { 120 s = DatasetUtils.sort(a); 121 122 QStatisticsImpl<Double> qstats = new QStatisticsImpl<Double>(); 123 124 qstats.setQuantile(QStatisticsImpl.Q1, pQuantile(s, QStatisticsImpl.Q1)); 125 qstats.setQuantile(QStatisticsImpl.Q2, pQuantile(s, QStatisticsImpl.Q2)); 126 qstats.setQuantile(QStatisticsImpl.Q3, pQuantile(s, QStatisticsImpl.Q3)); 127 qstats.s = new ReferencedDataset(s); 128 return qstats; 129 } 130 131 QStatisticsImpl<double[]> qstats = new QStatisticsImpl<double[]>(); 132 133 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 134 double[] q1 = new double[is]; 135 double[] q2 = new double[is]; 136 double[] q3 = new double[is]; 137 qstats.setQuantile(QStatisticsImpl.Q1, q1); 138 qstats.setQuantile(QStatisticsImpl.Q2, q2); 139 qstats.setQuantile(QStatisticsImpl.Q3, q3); 140 for (int j = 0; j < is; j++) { 141 ((CompoundDataset) a).copyElements(w, j); 142 w.sort(null); 143 144 q1[j] = pQuantile(w, QStatisticsImpl.Q1); 145 q2[j] = pQuantile(w, QStatisticsImpl.Q2); 146 q3[j] = pQuantile(w, QStatisticsImpl.Q3); 147 if (j == 0) 148 s = w.clone(); 149 } 150 qstats.s = new ReferencedDataset(s); 151 152 return qstats; 153 } 154 155 static private QStatisticsImpl<?> getQStatistics(final Dataset a) { 156 QStatisticsImpl<?> m = a.getFirstMetadata(QStatisticsImpl.class); 157 if (m == null || m.isDirty) { 158 m = calcQuartileStats(a); 159 a.setMetadata(m); 160 } 161 return m; 162 } 163 164 static private QStatisticsImpl<?> getQStatistics(final Dataset a, final int axis) { 165 final int is = a.getElementsPerItem(); 166 QStatisticsImpl<?> qstats = a.getFirstMetadata(QStatisticsImpl.class); 167 168 if (qstats == null || qstats.isDirty) { 169 if (is == 1) { 170 qstats = new QStatisticsImpl<Double>(); 171 } else { 172 qstats = new QStatisticsImpl<double[]>(); 173 } 174 a.setMetadata(qstats); 175 } 176 177 if (qstats.getQuantile(axis, QStatisticsImpl.Q2) == null) { 178 if (is == 1) { 179 Dataset s = DatasetUtils.sort(a, axis); 180 181 qstats.setQuantile(axis, QStatisticsImpl.Q1, pQuantile(s, axis, QStatisticsImpl.Q1)); 182 qstats.setQuantile(axis, QStatisticsImpl.Q2, pQuantile(s, axis, QStatisticsImpl.Q2)); 183 qstats.setQuantile(axis, QStatisticsImpl.Q3, pQuantile(s, axis, QStatisticsImpl.Q3)); 184 qstats.setSortedDataset(axis, s); 185 } else { 186 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 187 CompoundDoubleDataset q1 = null, q2 = null, q3 = null; 188 for (int j = 0; j < is; j++) { 189 ((CompoundDataset) a).copyElements(w, j); 190 w.sort(axis); 191 192 final Dataset c = pQuantile(w, axis, QStatisticsImpl.Q1); 193 if (j == 0) { 194 q1 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 195 q2 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 196 q3 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 197 } 198 q1.setElements(c, j); 199 200 q2.setElements(pQuantile(w, axis, QStatisticsImpl.Q2), j); 201 202 q3.setElements(pQuantile(w, axis, QStatisticsImpl.Q3), j); 203 } 204 qstats.setQuantile(axis, QStatisticsImpl.Q1, q1); 205 qstats.setQuantile(axis, QStatisticsImpl.Q2, q2); 206 qstats.setQuantile(axis, QStatisticsImpl.Q3, q3); 207 } 208 } 209 210 return qstats; 211 } 212 213 // process a sorted dataset 214 private static double pQuantile(final Dataset s, final double q) { 215 double f = (s.getSize() - 1) * q; // fraction of sample number 216 if (f < 0) 217 return Double.NaN; 218 int qpt = (int) Math.floor(f); // quantile point 219 f -= qpt; 220 221 double quantile = s.getElementDoubleAbs(qpt); 222 if (f > 0) { 223 quantile = (1-f)*quantile + f*s.getElementDoubleAbs(qpt+1); 224 } 225 return quantile; 226 } 227 228 // process a sorted dataset and returns a double or compound double dataset 229 private static Dataset pQuantile(final Dataset s, final int axis, final double q) { 230 final int rank = s.getRank(); 231 final int is = s.getElementsPerItem(); 232 233 int[] oshape = s.getShape(); 234 235 double f = (oshape[axis] - 1) * q; // fraction of sample number 236 int qpt = (int) Math.floor(f); // quantile point 237 f -= qpt; 238 239 oshape[axis] = 1; 240 int[] qshape = ShapeUtils.squeezeShape(oshape, false); 241 Dataset qds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape); 242 243 IndexIterator qiter = qds.getIterator(true); 244 int[] qpos = qiter.getPos(); 245 int[] spos = oshape; 246 247 while (qiter.hasNext()) { 248 int i = 0; 249 for (; i < axis; i++) { 250 spos[i] = qpos[i]; 251 } 252 spos[i++] = qpt; 253 for (; i < rank; i++) { 254 spos[i] = qpos[i-1]; 255 } 256 257 Object obj = s.getObject(spos); 258 qds.set(obj, qpos); 259 } 260 261 if (f > 0) { 262 qiter = qds.getIterator(true); 263 qpos = qiter.getPos(); 264 qpt++; 265 Dataset rds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape); 266 267 while (qiter.hasNext()) { 268 int i = 0; 269 for (; i < axis; i++) { 270 spos[i] = qpos[i]; 271 } 272 spos[i++] = qpt; 273 for (; i < rank; i++) { 274 spos[i] = qpos[i-1]; 275 } 276 277 Object obj = s.getObject(spos); 278 rds.set(obj, qpos); 279 } 280 rds.imultiply(f); 281 qds.imultiply(1-f); 282 qds.iadd(rds); 283 } 284 285 return qds; 286 } 287 288 /** 289 * Calculate quantile of dataset which is defined as the inverse of the cumulative distribution function (CDF) 290 * @param a 291 * @param q 292 * @return point at which CDF has value q 293 */ 294 @SuppressWarnings("unchecked") 295 public static double quantile(final Dataset a, final double q) { 296 if (q < 0 || q > 1) { 297 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 298 } 299 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 300 Double qv = qs.getQuantile(q); 301 if (qv == null) { 302 qv = pQuantile(qs.s.get(), q); 303 qs.setQuantile(q, qv); 304 } 305 return qv; 306 } 307 308 /** 309 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 310 * @param a 311 * @param values 312 * @return points at which CDF has given values 313 */ 314 @SuppressWarnings("unchecked") 315 public static double[] quantile(final Dataset a, final double... values) { 316 final double[] points = new double[values.length]; 317 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 318 for (int i = 0; i < points.length; i++) { 319 final double q = values[i]; 320 if (q < 0 || q > 1) { 321 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 322 } 323 Double qv = qs.getQuantile(q); 324 if (qv == null) { 325 qv = pQuantile(qs.s.get(), q); 326 qs.setQuantile(q, qv); 327 } 328 points[i] = qv; 329 } 330 331 return points; 332 } 333 334 /** 335 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 336 * @param a 337 * @param axis 338 * @param values 339 * @return points at which CDF has given values 340 */ 341 @SuppressWarnings({ "unchecked" }) 342 public static Dataset[] quantile(final Dataset a, int axis, final double... values) { 343 final Dataset[] points = new Dataset[values.length]; 344 final int is = a.getElementsPerItem(); 345 axis = a.checkAxis(axis); 346 347 if (is == 1) { 348 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a, axis); 349 for (int i = 0; i < points.length; i++) { 350 final double q = values[i]; 351 if (q < 0 || q > 1) { 352 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 353 } 354 Dataset qv = qs.getQuantile(axis, q); 355 if (qv == null) { 356 qv = pQuantile(qs.getSortedDataset(axis), axis, q); 357 qs.setQuantile(axis, q, qv); 358 } 359 points[i] = qv; 360 } 361 } else { 362 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 363 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 364 for (int j = 0; j < is; j++) { 365 boolean copied = false; 366 367 for (int i = 0; i < points.length; i++) { 368 final double q = values[i]; 369 if (q < 0 || q > 1) { 370 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 371 } 372 Dataset qv = qs.getQuantile(axis, q); 373 if (qv == null) { 374 if (!copied) { 375 copied = true; 376 ((CompoundDataset) a).copyElements(w, j); 377 w.sort(axis); 378 } 379 qv = pQuantile(w, axis, q); 380 qs.setQuantile(axis, q, qv); 381 if (j == 0) { 382 points[i] = DatasetFactory.zeros(is, qv.getClass(), qv.getShapeRef()); 383 } 384 ((CompoundDoubleDataset) points[i]).setElements(qv, j); 385 } 386 } 387 } 388 } 389 390 return points; 391 } 392 393 /** 394 * @param a dataset 395 * @param axis 396 * @return median 397 */ 398 public static Dataset median(final Dataset a, int axis) { 399 axis = a.checkAxis(axis); 400 return getQStatistics(a, axis).getQuantile(axis, QStatisticsImpl.Q2); 401 } 402 403 /** 404 * @param a dataset 405 * @return median 406 */ 407 public static Object median(final Dataset a) { 408 return getQStatistics(a).getQuantile(QStatisticsImpl.Q2); 409 } 410 411 /** 412 * Interquartile range: Q3 - Q1 413 * @param a 414 * @return range 415 */ 416 @SuppressWarnings("unchecked") 417 public static Object iqr(final Dataset a) { 418 final int is = a.getElementsPerItem(); 419 if (is == 1) { 420 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 421 return qs.getQuantile(QStatisticsImpl.Q3) - qs.getQuantile(QStatisticsImpl.Q1); 422 } 423 424 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 425 double[] q1 = (double[]) qs.getQuantile(QStatisticsImpl.Q1); 426 double[] q3 = ((double[]) qs.getQuantile(QStatisticsImpl.Q3)).clone(); 427 for (int j = 0; j < is; j++) { 428 q3[j] -= q1[j]; 429 } 430 return q3; 431 } 432 433 /** 434 * Interquartile range: Q3 - Q1 435 * @param a 436 * @param axis 437 * @return range 438 */ 439 public static Dataset iqr(final Dataset a, int axis) { 440 axis = a.checkAxis(axis); 441 QStatisticsImpl<?> qs = getQStatistics(a, axis); 442 Dataset q3 = qs.getQuantile(axis, QStatisticsImpl.Q3); 443 444 return Maths.subtract(q3, qs.getQuantile(axis, QStatisticsImpl.Q1)); 445 } 446 447 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, boolean[] ignoreInvalids) { 448 boolean ignoreNaNs = false; 449 boolean ignoreInfs = false; 450 if (a.hasFloatingPointElements()) { 451 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 452 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 453 } 454 455 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 456 if (stats == null || stats.isDirty) { 457 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs); 458 a.setMetadata(stats); 459 } 460 461 return stats; 462 } 463 464 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, final boolean[] ignoreInvalids, final int axis) { 465 boolean ignoreNaNs = false; 466 boolean ignoreInfs = false; 467 if (a.hasFloatingPointElements()) { 468 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 469 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 470 } 471 472 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 473 if (stats == null || stats.getSkewness(axis) == null || stats.isDirty) { 474 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs, axis); 475 a.setMetadata(stats); 476 } 477 478 return stats; 479 } 480 481 private static class HigherStatisticsImpl<T> implements MetadataType { 482 private static final long serialVersionUID = -6587974784104116792L; 483 T skewness; 484 T kurtosis; 485 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 486 transient Map<Integer, ReferencedDataset> kmap = new HashMap<>(); 487 488 @Dirtiable 489 private boolean isDirty = true; 490 491 @Override 492 public HigherStatisticsImpl<T> clone() { 493 return new HigherStatisticsImpl<>(this); 494 } 495 496 public HigherStatisticsImpl() { 497 } 498 499 private HigherStatisticsImpl(HigherStatisticsImpl<T> hstats) { 500 skewness = hstats.skewness; 501 kurtosis = hstats.kurtosis; 502 smap.putAll(hstats.smap); 503 kmap.putAll(hstats.kmap); 504 isDirty = hstats.isDirty; 505 } 506 507// public void setSkewness(T skewness) { 508// this.skewness = skewness; 509// } 510// 511// public void setKurtosis(T kurtosis) { 512// this.kurtosis = kurtosis; 513// } 514// 515// public T getSkewness() { 516// return skewness; 517// } 518// 519// public T getKurtosis() { 520// return kurtosis; 521// } 522 523 public Dataset getSkewness(int axis) { 524 ReferencedDataset rd = smap.get(axis); 525 return rd == null ? null : rd.get(); 526 } 527 528 public Dataset getKurtosis(int axis) { 529 ReferencedDataset rd = kmap.get(axis); 530 return rd == null ? null : rd.get(); 531 } 532 533 public void setSkewness(int axis, Dataset s) { 534 smap.put(axis, new ReferencedDataset(s)); 535 } 536 537 public void setKurtosis(int axis, Dataset k) { 538 kmap.put(axis, new ReferencedDataset(k)); 539 } 540 } 541 542 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs) { 543 final int is = a.getElementsPerItem(); 544 final IndexIterator iter = a.getIterator(); 545 546 if (is == 1) { 547 Skewness s = new Skewness(); 548 Kurtosis k = new Kurtosis(); 549 if (ignoreNaNs) { 550 while (iter.hasNext()) { 551 final double x = a.getElementDoubleAbs(iter.index); 552 if (Double.isNaN(x)) 553 continue; 554 s.increment(x); 555 k.increment(x); 556 } 557 } else { 558 while (iter.hasNext()) { 559 final double x = a.getElementDoubleAbs(iter.index); 560 s.increment(x); 561 k.increment(x); 562 } 563 } 564 565 HigherStatisticsImpl<Double> stats = new HigherStatisticsImpl<Double>(); 566 stats.skewness = s.getResult(); 567 stats.kurtosis = k.getResult(); 568 return stats; 569 } 570 final Skewness[] s = new Skewness[is]; 571 final Kurtosis[] k = new Kurtosis[is]; 572 573 for (int j = 0; j < is; j++) { 574 s[j] = new Skewness(); 575 k[j] = new Kurtosis(); 576 } 577 if (ignoreNaNs) { 578 while (iter.hasNext()) { 579 boolean skip = false; 580 for (int j = 0; j < is; j++) { 581 if (Double.isNaN(a.getElementDoubleAbs(iter.index + j))) { 582 skip = true; 583 break; 584 } 585 } 586 if (!skip) 587 for (int j = 0; j < is; j++) { 588 final double val = a.getElementDoubleAbs(iter.index + j); 589 s[j].increment(val); 590 k[j].increment(val); 591 } 592 } 593 } else { 594 while (iter.hasNext()) { 595 for (int j = 0; j < is; j++) { 596 final double val = a.getElementDoubleAbs(iter.index + j); 597 s[j].increment(val); 598 k[j].increment(val); 599 } 600 } 601 } 602 final double[] ts = new double[is]; 603 final double[] tk = new double[is]; 604 for (int j = 0; j < is; j++) { 605 ts[j] = s[j].getResult(); 606 tk[j] = k[j].getResult(); 607 } 608 609 HigherStatisticsImpl<double[]> stats = new HigherStatisticsImpl<double[]>(); 610 stats.skewness = ts; 611 stats.kurtosis = tk; 612 return stats; 613 } 614 615 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { 616 final int rank = a.getRank(); 617 final int is = a.getElementsPerItem(); 618 final int[] oshape = a.getShape(); 619 final int alen = oshape[axis]; 620 oshape[axis] = 1; 621 622 final int[] nshape = ShapeUtils.squeezeShape(oshape, false); 623 final Dataset sk; 624 final Dataset ku; 625 HigherStatisticsImpl<?> stats = null; 626 627 if (is == 1) { 628 if (stats == null) { 629 stats = new HigherStatisticsImpl<Double>(); 630 a.setMetadata(stats); 631 } 632 sk = DatasetFactory.zeros(DoubleDataset.class, nshape); 633 ku = DatasetFactory.zeros(DoubleDataset.class, nshape); 634 final IndexIterator qiter = sk.getIterator(true); 635 final int[] qpos = qiter.getPos(); 636 final int[] spos = oshape; 637 638 while (qiter.hasNext()) { 639 int i = 0; 640 for (; i < axis; i++) { 641 spos[i] = qpos[i]; 642 } 643 spos[i++] = 0; 644 for (; i < rank; i++) { 645 spos[i] = qpos[i - 1]; 646 } 647 648 Skewness s = new Skewness(); 649 Kurtosis k = new Kurtosis(); 650 if (ignoreNaNs) { 651 for (int j = 0; j < alen; j++) { 652 spos[axis] = j; 653 final double val = a.getDouble(spos); 654 if (Double.isNaN(val)) 655 continue; 656 657 s.increment(val); 658 k.increment(val); 659 } 660 } else { 661 for (int j = 0; j < alen; j++) { 662 spos[axis] = j; 663 final double val = a.getDouble(spos); 664 s.increment(val); 665 k.increment(val); 666 } 667 } 668 sk.set(s.getResult(), qpos); 669 ku.set(k.getResult(), qpos); 670 } 671 } else { 672 if (stats == null) { 673 stats = new HigherStatisticsImpl<double[]>(); 674 a.setMetadata(stats); 675 } 676 sk = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape); 677 ku = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape); 678 final IndexIterator qiter = sk.getIterator(true); 679 final int[] qpos = qiter.getPos(); 680 final int[] spos = oshape; 681 final Skewness[] s = new Skewness[is]; 682 final Kurtosis[] k = new Kurtosis[is]; 683 final double[] ts = new double[is]; 684 final double[] tk = new double[is]; 685 686 for (int j = 0; j < is; j++) { 687 s[j] = new Skewness(); 688 k[j] = new Kurtosis(); 689 } 690 while (qiter.hasNext()) { 691 int i = 0; 692 for (; i < axis; i++) { 693 spos[i] = qpos[i]; 694 } 695 spos[i++] = 0; 696 for (; i < rank; i++) { 697 spos[i] = qpos[i-1]; 698 } 699 700 for (int j = 0; j < is; j++) { 701 s[j].clear(); 702 k[j].clear(); 703 } 704 int index = a.get1DIndex(spos); 705 if (ignoreNaNs) { 706 boolean skip = false; 707 for (int j = 0; j < is; j++) { 708 if (Double.isNaN(a.getElementDoubleAbs(index + j))) { 709 skip = true; 710 break; 711 } 712 } 713 if (!skip) 714 for (int j = 0; j < is; j++) { 715 final double val = a.getElementDoubleAbs(index + j); 716 s[j].increment(val); 717 k[j].increment(val); 718 } 719 } else { 720 for (int j = 0; j < is; j++) { 721 final double val = a.getElementDoubleAbs(index + j); 722 s[j].increment(val); 723 k[j].increment(val); 724 } 725 } 726 for (int j = 0; j < is; j++) { 727 ts[j] = s[j].getResult(); 728 tk[j] = k[j].getResult(); 729 } 730 sk.set(ts, qpos); 731 ku.set(tk, qpos); 732 } 733 } 734 735 stats.setSkewness(axis, sk); 736 stats.setKurtosis(axis, ku); 737 return stats; 738 } 739 740 /** 741 * @param a dataset 742 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 743 * @return skewness 744 * @since 2.0 745 */ 746 public static Object skewness(final Dataset a, final boolean... ignoreInvalids) { 747 return getHigherStatistic(a, ignoreInvalids).skewness; 748 } 749 750 /** 751 * @param a dataset 752 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 753 * @return kurtosis 754 * @since 2.0 755 */ 756 public static Object kurtosis(final Dataset a, final boolean... ignoreInvalids) { 757 return getHigherStatistic(a, ignoreInvalids).kurtosis; 758 } 759 760 /** 761 * @param a dataset 762 * @param axis 763 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 764 * @return skewness along axis in dataset 765 * @since 2.0 766 */ 767 public static Dataset skewness(final Dataset a, int axis, final boolean... ignoreInvalids) { 768 axis = a.checkAxis(axis); 769 return getHigherStatistic(a, ignoreInvalids, axis).getSkewness(axis); 770 } 771 772 /** 773 * @param a dataset 774 * @param axis 775 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 776 * @return kurtosis along axis in dataset 777 * @since 2.0 778 */ 779 public static Dataset kurtosis(final Dataset a, int axis, final boolean... ignoreInvalids) { 780 axis = a.checkAxis(axis); 781 return getHigherStatistic(a, ignoreInvalids, axis).getKurtosis(axis); 782 } 783 784 /** 785 * @param a dataset 786 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 787 * @return sum of dataset 788 * @since 2.0 789 */ 790 public static Object sum(final Dataset a, final boolean... ignoreInvalids) { 791 return a.sum(ignoreInvalids); 792 } 793 794 /** 795 * @param a dataset 796 * @param dtype 797 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 798 * @return typed sum of dataset 799 * @since 2.0 800 */ 801 public static Object typedSum(final Dataset a, int dtype, final boolean... ignoreInvalids) { 802 if (a.isComplex()) { 803 Complex m = (Complex) a.sum(ignoreInvalids); 804 return m; 805 } else if (a instanceof CompoundDataset) { 806 return DTypeUtils.fromDoublesToBiggestPrimitives((double[]) a.sum(ignoreInvalids), dtype); 807 } 808 return DTypeUtils.fromDoubleToBiggestNumber(DTypeUtils.toReal(a.sum(ignoreInvalids)), dtype); 809 } 810 811 /** 812 * @param a dataset 813 * @param dtype 814 * @param axis 815 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 816 * @return typed sum of items along axis in dataset 817 * @since 2.0 818 */ 819 public static Dataset typedSum(final Dataset a, int dtype, int axis, final boolean... ignoreInvalids) { 820 return DatasetUtils.cast(a.sum(axis, ignoreInvalids), dtype); 821 } 822 823 /** 824 * @param a dataset 825 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 826 * @return product of all items in dataset 827 * @since 2.0 828 */ 829 public static Object product(final Dataset a, final boolean... ignoreInvalids) { 830 return typedProduct(a, a.getDType(), ignoreInvalids); 831 } 832 833 /** 834 * @param a dataset 835 * @param axis 836 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 837 * @return product of items along axis in dataset 838 * @since 2.0 839 */ 840 public static Dataset product(final Dataset a, final int axis, final boolean... ignoreInvalids) { 841 return typedProduct(a, a.getDType(), axis, ignoreInvalids); 842 } 843 844 /** 845 * @param a dataset 846 * @param axis 847 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 848 * @return product of items along axis in dataset 849 * @since 2.2 850 */ 851 public static Dataset product(final Dataset a, final int[] axis, final boolean... ignoreInvalids) { 852 return typedProduct(a, a.getDType(), axis, ignoreInvalids); 853 } 854 855 /** 856 * @param a dataset 857 * @param dtype 858 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 859 * @return typed product of all items in dataset 860 * @since 2.0 861 */ 862 public static Object typedProduct(final Dataset a, final int dtype, final boolean... ignoreInvalids) { 863 final boolean ignoreNaNs; 864 final boolean ignoreInfs; 865 if (a.hasFloatingPointElements()) { 866 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 867 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 868 } else { 869 ignoreNaNs = false; 870 ignoreInfs = false; 871 } 872 873 if (a.isComplex()) { 874 IndexIterator it = a.getIterator(); 875 double rv = 1, iv = 0; 876 877 while (it.hasNext()) { 878 final double r1 = a.getElementDoubleAbs(it.index); 879 final double i1 = a.getElementDoubleAbs(it.index + 1); 880 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 881 continue; 882 } 883 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 884 continue; 885 } 886 final double tv = r1*rv - i1*iv; 887 iv = r1*iv + i1*rv; 888 rv = tv; 889 if (Double.isNaN(rv) && Double.isNaN(iv)) { 890 break; 891 } 892 } 893 894 return new Complex(rv, iv); 895 } 896 897 IndexIterator it = a.getIterator(); 898 final int is; 899 final long[] lresults; 900 final double[] dresults; 901 902 switch (dtype) { 903 case Dataset.BOOL: 904 case Dataset.INT8: case Dataset.INT16: 905 case Dataset.INT32: case Dataset.INT64: 906 long lresult = 1; 907 while (it.hasNext()) { 908 lresult *= a.getElementLongAbs(it.index); 909 } 910 return new Long(lresult); 911 case Dataset.ARRAYINT8: case Dataset.ARRAYINT16: 912 case Dataset.ARRAYINT32: case Dataset.ARRAYINT64: 913 lresult = 1; 914 is = a.getElementsPerItem(); 915 lresults = new long[is]; 916 for (int j = 0; j < is; j++) { 917 lresults[j] = 1; 918 } 919 while (it.hasNext()) { 920 for (int j = 0; j < is; j++) 921 lresults[j] *= a.getElementLongAbs(it.index+j); 922 } 923 return lresults; 924 case Dataset.FLOAT32: case Dataset.FLOAT64: 925 double dresult = 1.; 926 while (it.hasNext()) { 927 final double x = a.getElementDoubleAbs(it.index); 928 if (ignoreNaNs && Double.isNaN(x)) { 929 continue; 930 } 931 if (ignoreInfs && Double.isInfinite(x)) { 932 continue; 933 } 934 dresult *= x; 935 if (Double.isNaN(dresult)) { 936 break; 937 } 938 } 939 return Double.valueOf(dresult); 940 case Dataset.ARRAYFLOAT32: 941 case Dataset.ARRAYFLOAT64: 942 is = a.getElementsPerItem(); 943 double[] vals = new double[is]; 944 dresults = new double[is]; 945 for (int j = 0; j < is; j++) { 946 dresults[j] = 1.; 947 } 948 while (it.hasNext()) { 949 boolean okay = true; 950 for (int j = 0; j < is; j++) { 951 final double val = a.getElementDoubleAbs(it.index + j); 952 if (ignoreNaNs && Double.isNaN(val)) { 953 okay = false; 954 break; 955 } 956 if (ignoreInfs && Double.isInfinite(val)) { 957 okay = false; 958 break; 959 } 960 vals[j] = val; 961 } 962 if (okay) { 963 for (int j = 0; j < is; j++) { 964 double val = vals[j]; 965 dresults[j] *= val; 966 } 967 } 968 } 969 return dresults; 970 } 971 972 return null; 973 } 974 975 /** 976 * @param a dataset 977 * @param dtype 978 * @param axis 979 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 980 * @return typed product of items along axis in dataset 981 * @since 2.0 982 */ 983 public static Dataset typedProduct(final Dataset a, final int dtype, int axis, final boolean... ignoreInvalids) { 984 return typedProduct(a, dtype, new int[] {axis}, ignoreInvalids); 985 } 986 987 /** 988 * @param a dataset 989 * @param dtype 990 * @param axes 991 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 992 * @return typed product of items in axes of dataset 993 * @since 2.2 994 */ 995 public static Dataset typedProduct(final Dataset a, final int dtype, int[] axes, final boolean... ignoreInvalids) { 996 axes = ShapeUtils.checkAxes(a.getRank(), axes); 997 SliceNDIterator siter = new SliceNDIterator(new SliceND(a.getShapeRef()), axes); 998 999 int[] nshape = siter.getUsedSlice().getSourceShape(); 1000 final int is = a.getElementsPerItem(); 1001 1002 final boolean ignoreNaNs; 1003 final boolean ignoreInfs; 1004 if (a.hasFloatingPointElements()) { 1005 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1006 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1007 } else { 1008 ignoreNaNs = false; 1009 ignoreInfs = false; 1010 } 1011 @SuppressWarnings("deprecation") 1012 Dataset result = DatasetFactory.zeros(is, nshape, dtype); 1013 1014 int[] spos = siter.getUsedPos(); 1015 1016 // TODO add getLongArray(long[], int...) to CompoundDataset 1017 final boolean isComplex = a.isComplex(); 1018 while (siter.hasNext()) { 1019 IndexIterator iter = a.getSliceIterator(siter.getCurrentSlice()); 1020 final int[] pos = iter.getPos(); 1021 1022 if (isComplex) { 1023 double rv = 1, iv = 0; 1024 switch (dtype) { 1025 case Dataset.COMPLEX64: 1026 ComplexFloatDataset af = (ComplexFloatDataset) a; 1027 while (iter.hasNext()) { 1028 final float r1 = af.getReal(pos); 1029 final float i1 = af.getImag(pos); 1030 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1031 continue; 1032 } 1033 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1034 continue; 1035 } 1036 final double tv = r1*rv - i1*iv; 1037 iv = r1*iv + i1*rv; 1038 rv = tv; 1039 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1040 break; 1041 } 1042 } 1043 break; 1044 case Dataset.COMPLEX128: 1045 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1046 while (iter.hasNext()) { 1047 final double r1 = ad.getReal(pos); 1048 final double i1 = ad.getImag(pos); 1049 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1050 continue; 1051 } 1052 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1053 continue; 1054 } 1055 final double tv = r1*rv - i1*iv; 1056 iv = r1*iv + i1*rv; 1057 rv = tv; 1058 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1059 break; 1060 } 1061 } 1062 break; 1063 } 1064 1065 result.set(new Complex(rv, iv), spos); 1066 } else { 1067 final long[] lresults; 1068 final double[] dresults; 1069 1070 switch (dtype) { 1071 case Dataset.BOOL: 1072 case Dataset.INT8: case Dataset.INT16: 1073 case Dataset.INT32: case Dataset.INT64: 1074 long lresult = 1; 1075 while (iter.hasNext()) { 1076 lresult *= a.getInt(pos); 1077 } 1078 result.set(lresult, spos); 1079 break; 1080 case Dataset.ARRAYINT8: 1081 lresults = new long[is]; 1082 for (int k = 0; k < is; k++) { 1083 lresults[k] = 1; 1084 } 1085 while (iter.hasNext()) { 1086 final byte[] va = (byte[]) a.getObject(pos); 1087 for (int k = 0; k < is; k++) { 1088 lresults[k] *= va[k]; 1089 } 1090 } 1091 result.set(lresults, spos); 1092 break; 1093 case Dataset.ARRAYINT16: 1094 lresults = new long[is]; 1095 for (int k = 0; k < is; k++) { 1096 lresults[k] = 1; 1097 } 1098 while (iter.hasNext()) { 1099 final short[] va = (short[]) a.getObject(pos); 1100 for (int k = 0; k < is; k++) { 1101 lresults[k] *= va[k]; 1102 } 1103 } 1104 result.set(lresults, spos); 1105 break; 1106 case Dataset.ARRAYINT32: 1107 lresults = new long[is]; 1108 for (int k = 0; k < is; k++) { 1109 lresults[k] = 1; 1110 } 1111 while (iter.hasNext()) { 1112 final int[] va = (int[]) a.getObject(pos); 1113 for (int k = 0; k < is; k++) { 1114 lresults[k] *= va[k]; 1115 } 1116 } 1117 result.set(lresults, spos); 1118 break; 1119 case Dataset.ARRAYINT64: 1120 lresults = new long[is]; 1121 for (int k = 0; k < is; k++) { 1122 lresults[k] = 1; 1123 } 1124 while (iter.hasNext()) { 1125 final long[] va = (long[]) a.getObject(pos); 1126 for (int k = 0; k < is; k++) { 1127 lresults[k] *= va[k]; 1128 } 1129 } 1130 result.set(lresults, spos); 1131 break; 1132 case Dataset.FLOAT32: case Dataset.FLOAT64: 1133 double dresult = 1.; 1134 while (iter.hasNext()) { 1135 final double x = a.getElementDoubleAbs(iter.index); 1136 if (ignoreNaNs && Double.isNaN(x)) { 1137 continue; 1138 } 1139 if (ignoreInfs && Double.isInfinite(x)) { 1140 continue; 1141 } 1142 dresult *= x; 1143 if (Double.isNaN(dresult)) { 1144 break; 1145 } 1146 } 1147 result.set(dresult, spos); 1148 break; 1149 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1150 CompoundDataset da = (CompoundDataset) a; 1151 double[] dvalues = new double[is]; 1152 dresults = new double[is]; 1153 for (int k = 0; k < is; k++) { 1154 dresults[k] = 1.; 1155 } 1156 while (iter.hasNext()) { 1157 da.getDoubleArrayAbs(iter.index, dvalues); 1158 boolean okay = true; 1159 for (int k = 0; k < is; k++) { 1160 final double val = dvalues[k]; 1161 if (ignoreNaNs && Double.isNaN(val)) { 1162 okay = false; 1163 break; 1164 } 1165 if (ignoreInfs && Double.isInfinite(val)) { 1166 okay = false; 1167 break; 1168 } 1169 } 1170 if (okay) { 1171 for (int k = 0; k < is; k++) { 1172 dresults[k] *= dvalues[k]; 1173 } 1174 } 1175 } 1176 result.set(dresults, spos); 1177 break; 1178 } 1179 } 1180 } 1181 1182// result.setShape(ShapeUtils.squeezeShape(oshape, axes)); 1183 return result; 1184 } 1185 1186 /** 1187 * @param a dataset 1188 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1189 * @return cumulative product of items in flattened dataset 1190 * @since 2.0 1191 */ 1192 public static Dataset cumulativeProduct(final Dataset a, final boolean... ignoreInvalids) { 1193 return cumulativeProduct(a.flatten(), 0, ignoreInvalids); 1194 } 1195 1196 /** 1197 * @param a dataset 1198 * @param axis 1199 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1200 * @return cumulative product of items along axis in dataset 1201 * @since 2.0 1202 */ 1203 public static Dataset cumulativeProduct(final Dataset a, int axis, final boolean... ignoreInvalids) { 1204 axis = a.checkAxis(axis); 1205 int dtype = a.getDType(); 1206 int[] oshape = a.getShape(); 1207 int alen = oshape[axis]; 1208 oshape[axis] = 1; 1209 1210 final boolean ignoreNaNs; 1211 final boolean ignoreInfs; 1212 if (a.hasFloatingPointElements()) { 1213 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1214 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1215 } else { 1216 ignoreNaNs = false; 1217 ignoreInfs = false; 1218 } 1219 Dataset result = DatasetFactory.zeros(a); 1220 PositionIterator pi = result.getPositionIterator(axis); 1221 1222 int[] pos = pi.getPos(); 1223 1224 while (pi.hasNext()) { 1225 1226 if (a.isComplex()) { 1227 double rv = 1, iv = 0; 1228 switch (dtype) { 1229 case Dataset.COMPLEX64: 1230 ComplexFloatDataset af = (ComplexFloatDataset) a; 1231 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1232 for (int j = 0; j < alen; j++) { 1233 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1234 pos[axis] = j; 1235 final float r1 = af.getReal(pos); 1236 final float i1 = af.getImag(pos); 1237 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1238 continue; 1239 } 1240 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1241 continue; 1242 } 1243 final double tv = r1*rv - i1*iv; 1244 iv = r1*iv + i1*rv; 1245 rv = tv; 1246 } 1247 rf.set((float) rv, (float) iv, pos); 1248 } 1249 break; 1250 case Dataset.COMPLEX128: 1251 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1252 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1253 for (int j = 0; j < alen; j++) { 1254 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1255 pos[axis] = j; 1256 final double r1 = ad.getReal(pos); 1257 final double i1 = ad.getImag(pos); 1258 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1259 continue; 1260 } 1261 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1262 continue; 1263 } 1264 final double tv = r1*rv - i1*iv; 1265 iv = r1*iv + i1*rv; 1266 rv = tv; 1267 } 1268 rd.set(rv, iv, pos); 1269 } 1270 break; 1271 } 1272 } else { 1273 final int is; 1274 final long[] lresults; 1275 final double[] dresults; 1276 1277 switch (dtype) { 1278 case Dataset.BOOL: 1279 case Dataset.INT8: case Dataset.INT16: 1280 case Dataset.INT32: case Dataset.INT64: 1281 long lresult = 1; 1282 for (int j = 0; j < alen; j++) { 1283 pos[axis] = j; 1284 lresult *= a.getInt(pos); 1285 result.set(lresult, pos); 1286 } 1287 break; 1288 case Dataset.ARRAYINT8: 1289 is = a.getElementsPerItem(); 1290 lresults = new long[is]; 1291 for (int k = 0; k < is; k++) { 1292 lresults[k] = 1; 1293 } 1294 for (int j = 0; j < alen; j++) { 1295 pos[axis] = j; 1296 final byte[] va = (byte[]) a.getObject(pos); 1297 for (int k = 0; k < is; k++) { 1298 lresults[k] *= va[k]; 1299 } 1300 result.set(lresults, pos); 1301 } 1302 break; 1303 case Dataset.ARRAYINT16: 1304 is = a.getElementsPerItem(); 1305 lresults = new long[is]; 1306 for (int k = 0; k < is; k++) { 1307 lresults[k] = 1; 1308 } 1309 for (int j = 0; j < alen; j++) { 1310 pos[axis] = j; 1311 final short[] va = (short[]) a.getObject(pos); 1312 for (int k = 0; k < is; k++) { 1313 lresults[k] *= va[k]; 1314 } 1315 result.set(lresults, pos); 1316 } 1317 break; 1318 case Dataset.ARRAYINT32: 1319 is = a.getElementsPerItem(); 1320 lresults = new long[is]; 1321 for (int k = 0; k < is; k++) { 1322 lresults[k] = 1; 1323 } 1324 for (int j = 0; j < alen; j++) { 1325 pos[axis] = j; 1326 final int[] va = (int[]) a.getObject(pos); 1327 for (int k = 0; k < is; k++) { 1328 lresults[k] *= va[k]; 1329 } 1330 result.set(lresults, pos); 1331 } 1332 break; 1333 case Dataset.ARRAYINT64: 1334 is = a.getElementsPerItem(); 1335 lresults = new long[is]; 1336 for (int k = 0; k < is; k++) { 1337 lresults[k] = 1; 1338 } 1339 for (int j = 0; j < alen; j++) { 1340 pos[axis] = j; 1341 final long[] va = (long[]) a.getObject(pos); 1342 for (int k = 0; k < is; k++) { 1343 lresults[k] *= va[k]; 1344 } 1345 result.set(lresults, pos); 1346 } 1347 break; 1348 case Dataset.FLOAT32: case Dataset.FLOAT64: 1349 double dresult = 1.; 1350 for (int j = 0; j < alen; j++) { 1351 if (!Double.isNaN(dresult)) { 1352 pos[axis] = j; 1353 final double x = a.getDouble(pos); 1354 if (ignoreNaNs && Double.isNaN(x)) { 1355 continue; 1356 } 1357 if (ignoreInfs && Double.isInfinite(x)) { 1358 continue; 1359 } 1360 dresult *= x; 1361 } 1362 result.set(dresult, pos); 1363 } 1364 break; 1365 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1366 is = a.getElementsPerItem(); 1367 CompoundDataset da = (CompoundDataset) a; 1368 double[] dvalues = new double[is]; 1369 dresults = new double[is]; 1370 for (int k = 0; k < is; k++) { 1371 dresults[k] = 1.; 1372 } 1373 for (int j = 0; j < alen; j++) { 1374 pos[axis] = j; 1375 da.getDoubleArray(dvalues, pos); 1376 boolean okay = true; 1377 for (int k = 0; k < is; k++) { 1378 final double val = dvalues[k]; 1379 if (ignoreNaNs && Double.isNaN(val)) { 1380 okay = false; 1381 break; 1382 } 1383 if (ignoreInfs && Double.isInfinite(val)) { 1384 okay = false; 1385 break; 1386 } 1387 } 1388 if (okay) { 1389 for (int k = 0; k < is; k++) { 1390 dresults[k] *= dvalues[k]; 1391 } 1392 } 1393 result.set(dresults, pos); 1394 } 1395 break; 1396 } 1397 } 1398 } 1399 1400 return result; 1401 } 1402 1403 /** 1404 * @param a dataset 1405 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1406 * @return cumulative sum of items in flattened dataset 1407 * @since 2.0 1408 */ 1409 public static Dataset cumulativeSum(final Dataset a, final boolean... ignoreInvalids) { 1410 return cumulativeSum(a.flatten(), 0, ignoreInvalids); 1411 } 1412 1413 /** 1414 * @param a dataset 1415 * @param axis 1416 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1417 * @return cumulative sum of items along axis in dataset 1418 * @since 2.0 1419 */ 1420 public static Dataset cumulativeSum(final Dataset a, int axis, final boolean... ignoreInvalids) { 1421 axis = a.checkAxis(axis); 1422 int dtype = a.getDType(); 1423 int[] oshape = a.getShape(); 1424 int alen = oshape[axis]; 1425 oshape[axis] = 1; 1426 1427 final boolean ignoreNaNs; 1428 final boolean ignoreInfs; 1429 if (a.hasFloatingPointElements()) { 1430 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1431 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1432 } else { 1433 ignoreNaNs = false; 1434 ignoreInfs = false; 1435 } 1436 Dataset result = DatasetFactory.zeros(a); 1437 PositionIterator pi = result.getPositionIterator(axis); 1438 1439 int[] pos = pi.getPos(); 1440 1441 while (pi.hasNext()) { 1442 1443 if (a.isComplex()) { 1444 double rv = 0, iv = 0; 1445 switch (dtype) { 1446 case Dataset.COMPLEX64: 1447 ComplexFloatDataset af = (ComplexFloatDataset) a; 1448 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1449 for (int j = 0; j < alen; j++) { 1450 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1451 pos[axis] = j; 1452 final float r1 = af.getReal(pos); 1453 final float i1 = af.getImag(pos); 1454 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1455 continue; 1456 } 1457 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1458 continue; 1459 } 1460 rv += r1; 1461 iv += i1; 1462 } 1463 rf.set((float) rv, (float) iv, pos); 1464 } 1465 break; 1466 case Dataset.COMPLEX128: 1467 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1468 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1469 for (int j = 0; j < alen; j++) { 1470 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1471 pos[axis] = j; 1472 final double r1 = ad.getReal(pos); 1473 final double i1 = ad.getImag(pos); 1474 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1475 continue; 1476 } 1477 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1478 continue; 1479 } 1480 rv += r1; 1481 iv += i1; 1482 } 1483 rd.set(rv, iv, pos); 1484 } 1485 break; 1486 } 1487 } else { 1488 final int is; 1489 final long[] lresults; 1490 final double[] dresults; 1491 1492 switch (dtype) { 1493 case Dataset.BOOL: 1494 case Dataset.INT8: case Dataset.INT16: 1495 case Dataset.INT32: case Dataset.INT64: 1496 long lresult = 0; 1497 for (int j = 0; j < alen; j++) { 1498 pos[axis] = j; 1499 lresult += a.getInt(pos); 1500 result.set(lresult, pos); 1501 } 1502 break; 1503 case Dataset.ARRAYINT8: 1504 is = a.getElementsPerItem(); 1505 lresults = new long[is]; 1506 for (int j = 0; j < alen; j++) { 1507 pos[axis] = j; 1508 final byte[] va = (byte[]) a.getObject(pos); 1509 for (int k = 0; k < is; k++) { 1510 lresults[k] += va[k]; 1511 } 1512 result.set(lresults, pos); 1513 } 1514 break; 1515 case Dataset.ARRAYINT16: 1516 is = a.getElementsPerItem(); 1517 lresults = new long[is]; 1518 for (int j = 0; j < alen; j++) { 1519 pos[axis] = j; 1520 final short[] va = (short[]) a.getObject(pos); 1521 for (int k = 0; k < is; k++) { 1522 lresults[k] += va[k]; 1523 } 1524 result.set(lresults, pos); 1525 } 1526 break; 1527 case Dataset.ARRAYINT32: 1528 is = a.getElementsPerItem(); 1529 lresults = new long[is]; 1530 for (int j = 0; j < alen; j++) { 1531 pos[axis] = j; 1532 final int[] va = (int[]) a.getObject(pos); 1533 for (int k = 0; k < is; k++) { 1534 lresults[k] += va[k]; 1535 } 1536 result.set(lresults, pos); 1537 } 1538 break; 1539 case Dataset.ARRAYINT64: 1540 is = a.getElementsPerItem(); 1541 lresults = new long[is]; 1542 for (int j = 0; j < alen; j++) { 1543 pos[axis] = j; 1544 final long[] va = (long[]) a.getObject(pos); 1545 for (int k = 0; k < is; k++) { 1546 lresults[k] += va[k]; 1547 } 1548 result.set(lresults, pos); 1549 } 1550 break; 1551 case Dataset.FLOAT32: case Dataset.FLOAT64: 1552 double dresult = 0.; 1553 for (int j = 0; j < alen; j++) { 1554 if (!Double.isNaN(dresult)) { 1555 pos[axis] = j; 1556 final double x = a.getDouble(pos); 1557 if (ignoreNaNs && Double.isNaN(x)) { 1558 continue; 1559 } 1560 if (ignoreInfs && Double.isInfinite(x)) { 1561 continue; 1562 } 1563 dresult += x; 1564 } 1565 result.set(dresult, pos); 1566 } 1567 break; 1568 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1569 is = a.getElementsPerItem(); 1570 CompoundDataset da = (CompoundDataset) a; 1571 double[] dvalues = new double[is]; 1572 dresults = new double[is]; 1573 for (int j = 0; j < alen; j++) { 1574 pos[axis] = j; 1575 da.getDoubleArray(dvalues, pos); 1576 boolean okay = true; 1577 for (int k = 0; k < is; k++) { 1578 final double val = dvalues[k]; 1579 if (ignoreNaNs && Double.isNaN(val)) { 1580 okay = false; 1581 break; 1582 } 1583 if (ignoreInfs && Double.isInfinite(val)) { 1584 okay = false; 1585 break; 1586 } 1587 } 1588 if (okay) { 1589 for (int k = 0; k < is; k++) { 1590 dresults[k] += dvalues[k]; 1591 } 1592 } 1593 result.set(dresults, pos); 1594 } 1595 break; 1596 } 1597 } 1598 } 1599 1600 return result; 1601 } 1602 1603 /** 1604 * @param a 1605 * @return average deviation value of all items the dataset 1606 */ 1607 public static Object averageDeviation(final Dataset a) { 1608 final IndexIterator it = a.getIterator(); 1609 final int is = a.getElementsPerItem(); 1610 1611 if (is == 1) { 1612 double mean = (Double) a.mean(); 1613 double sum = 0.0; 1614 1615 while (it.hasNext()) { 1616 sum += Math.abs(a.getElementDoubleAbs(it.index) - mean); 1617 } 1618 1619 return sum / a.getSize(); 1620 } 1621 1622 double[] means = (double[]) a.mean(); 1623 double[] sums = new double[is]; 1624 1625 while (it.hasNext()) { 1626 for (int j = 0; j < is; j++) 1627 sums[j] += Math.abs(a.getElementDoubleAbs(it.index + j) - means[j]); 1628 } 1629 1630 double n = a.getSize(); 1631 for (int j = 0; j < is; j++) 1632 sums[j] /= n; 1633 1634 return sums; 1635 } 1636 1637 /** 1638 * The residual is the sum of squared differences 1639 * @param a 1640 * @param b 1641 * @return residual value 1642 */ 1643 public static double residual(final Dataset a, final Dataset b) { 1644 return a.residual(b); 1645 } 1646 1647 /** 1648 * The residual is the sum of squared differences 1649 * @param a 1650 * @param b 1651 * @param w 1652 * @return residual value 1653 */ 1654 public static double weightedResidual(final Dataset a, final Dataset b, final Dataset w) { 1655 return a.residual(b, w, false); 1656 } 1657 1658 /** 1659 * Calculate approximate outlier values. These are defined as the values in the dataset 1660 * that are approximately below and above the given thresholds - in terms of percentages 1661 * of dataset size. 1662 * <p> 1663 * It approximates by limiting the number of items (given by length) used internally by 1664 * data structures - the larger this is, the more accurate will those outlier values become. 1665 * The actual thresholds used are returned in the array. 1666 * <p> 1667 * Also, the low and high values will be made distinct if possible by adjusting the thresholds 1668 * @param a 1669 * @param lo percentage threshold for lower limit 1670 * @param hi percentage threshold for higher limit 1671 * @param length maximum number of items used internally, if negative, then unlimited 1672 * @return double array with low and high values, and low and high percentage thresholds 1673 */ 1674 public static double[] outlierValues(final Dataset a, double lo, double hi, final int length) { 1675 return outlierValues(a, null, true, lo, hi, length); 1676 } 1677 1678 /** 1679 * Calculate approximate outlier values. These are defined as the values in the dataset 1680 * that are approximately below and above the given thresholds - in terms of percentages 1681 * of dataset size. 1682 * <p> 1683 * It approximates by limiting the number of items (given by length) used internally by 1684 * data structures - the larger this is, the more accurate will those outlier values become. 1685 * The actual thresholds used are returned in the array. 1686 * <p> 1687 * Also, the low and high values will be made distinct if possible by adjusting the thresholds 1688 * @param a 1689 * @param mask can be null 1690 * @param value value of mask to match to include for calculation 1691 * @param lo percentage threshold for lower limit 1692 * @param hi percentage threshold for higher limit 1693 * @param length maximum number of items used internally, if negative, then unlimited 1694 * @return double array with low and high values, and low and high percentage thresholds 1695 * @since 2.1 1696 */ 1697 public static double[] outlierValues(final Dataset a, final Dataset mask, final boolean value, double lo, double hi, final int length) { 1698 if (lo <= 0 || hi <= 0 || lo >= hi || hi >= 100 || Double.isNaN(lo)|| Double.isNaN(hi)) { 1699 throw new IllegalArgumentException("Thresholds must be between (0,100) and in order"); 1700 } 1701 final int size = a.getSize(); 1702 int nl = Math.max((int) ((lo*size)/100), 1); 1703 if (length > 0 && nl > length) 1704 nl = length; 1705 int nh = Math.max((int) (((100-hi)*size)/100), 1); 1706 if (length > 0 && nh > length) 1707 nh = length; 1708 1709 IndexIterator it = mask == null ? a.getIterator() : a.getBooleanIterator(mask, value); 1710 double[] results = Math.max(nl, nh) > 640 ? outlierValuesMap(a, it, nl, nh) : outlierValuesList(a, it, nl, nh); 1711 1712 results[2] = results[2]*100./size; 1713 results[3] = 100. - results[3]*100./size; 1714 return results; 1715 } 1716 1717 static double[] outlierValuesMap(final Dataset a, final IndexIterator it, int nl, int nh) { 1718 final TreeMap<Double, Integer> lMap = new TreeMap<Double, Integer>(); 1719 final TreeMap<Double, Integer> hMap = new TreeMap<Double, Integer>(); 1720 1721 int ml = 0; 1722 int mh = 0; 1723 while (it.hasNext()) { 1724 Double x = a.getElementDoubleAbs(it.index); 1725 if (Double.isNaN(x)) { 1726 continue; 1727 } 1728 Integer i; 1729 if (ml == nl) { 1730 Double k = lMap.lastKey(); 1731 if (x < k) { 1732 i = lMap.get(k) - 1; 1733 if (i == 0) { 1734 lMap.remove(k); 1735 } else { 1736 lMap.put(k, i); 1737 } 1738 i = lMap.get(x); 1739 if (i == null) { 1740 lMap.put(x, 1); 1741 } else { 1742 lMap.put(x, i + 1); 1743 } 1744 } 1745 } else { 1746 i = lMap.get(x); 1747 if (i == null) { 1748 lMap.put(x, 1); 1749 } else { 1750 lMap.put(x, i + 1); 1751 } 1752 ml++; 1753 } 1754 1755 if (mh == nh) { 1756 Double k = hMap.firstKey(); 1757 if (x > k) { 1758 i = hMap.get(k) - 1; 1759 if (i == 0) { 1760 hMap.remove(k); 1761 } else { 1762 hMap.put(k, i); 1763 } 1764 i = hMap.get(x); 1765 if (i == null) { 1766 hMap.put(x, 1); 1767 } else { 1768 hMap.put(x, i+1); 1769 } 1770 } 1771 } else { 1772 i = hMap.get(x); 1773 if (i == null) { 1774 hMap.put(x, 1); 1775 } else { 1776 hMap.put(x, i+1); 1777 } 1778 mh++; 1779 } 1780 } 1781 1782 // Attempt to make values distinct 1783 double lx = lMap.lastKey(); 1784 double hx = hMap.firstKey(); 1785 if (lx >= hx) { 1786 Double h = hMap.higherKey(lx); 1787 if (h != null) { 1788 hx = h; 1789 mh--; 1790 } else { 1791 Double l = lMap.lowerKey(hx); 1792 if (l != null) { 1793 lx = l; 1794 ml--; 1795 } 1796 } 1797 1798 } 1799 return new double[] {lMap.lastKey(), hMap.firstKey(), ml, mh}; 1800 } 1801 1802 static double[] outlierValuesList(final Dataset a, final IndexIterator it, int nl, int nh) { 1803 final List<Double> lList = new ArrayList<Double>(nl); 1804 final List<Double> hList = new ArrayList<Double>(nh); 1805 1806 double lx = Double.POSITIVE_INFINITY; 1807 double hx = Double.NEGATIVE_INFINITY; 1808 1809 while (it.hasNext()) { 1810 double x = a.getElementDoubleAbs(it.index); 1811 if (Double.isNaN(x)) { 1812 continue; 1813 } 1814 if (x < lx) { 1815 if (lList.size() == nl) { 1816 lList.remove(lx); 1817 } 1818 lList.add(x); 1819 lx = Collections.max(lList); 1820 } else if (x == lx) { 1821 if (lList.size() < nl) { 1822 lList.add(x); 1823 } 1824 } 1825 1826 if (x > hx) { 1827 if (hList.size() == nh) { 1828 hList.remove(hx); 1829 } 1830 hList.add(x); 1831 hx = Collections.min(hList); 1832 } else if (x == hx) { 1833 if (hList.size() < nh) { 1834 hList.add(x); 1835 } 1836 } 1837 } 1838 1839 nl = lList.size(); 1840 nh = hList.size(); 1841 1842 // Attempt to make values distinct 1843 if (lx >= hx) { 1844 Collections.sort(hList); 1845 for (double h : hList) { 1846 if (h > hx) { 1847 hx = h; 1848 break; 1849 } 1850 nh--; 1851 } 1852 if (lx >= hx) { 1853 Collections.sort(lList); 1854 Collections.reverse(lList); 1855 for (double l : lList) { 1856 if (l < lx) { 1857 lx = l; 1858 break; 1859 } 1860 nl--; 1861 } 1862 } 1863 } 1864 return new double[] {lx, hx, nl, nh}; 1865 } 1866 1867 /** 1868 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null. 1869 * @param a 1870 * @return covariance array of a 1871 */ 1872 public static Dataset covariance(final Dataset a) { 1873 return covariance(a, true, false, null); 1874 } 1875 1876 /** 1877 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null. 1878 * @param a 1879 * @return covariance array of a 1880 * @since 2.0 1881 */ 1882 public static Dataset covariance(final Dataset a, 1883 boolean rowvar, boolean bias, Integer ddof) { 1884 return covariance(a, null, rowvar, bias, ddof); 1885 } 1886 1887 /** 1888 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null. 1889 * @param a 1890 * @return covariance array of a concatenated with b 1891 */ 1892 public static Dataset covariance(final Dataset a, final Dataset b) { 1893 return covariance(a, b, true, false, null); 1894 } 1895 1896 /** 1897 * Calculate the covariance matrix (array) of a concatenated with b. This 1898 * method is directly based on the implementation in numpy (cov). 1899 * @param a Array containing multiple variable and observations. Each row represents a variable, each column an observation. 1900 * @param b An extra set of variables and observations. Must be of same type as a and have a compatible shape. 1901 * @param rowvar When true (default), each row is a variable; when false each column is a variable. 1902 * @param bias Default normalisation is (N - 1) - N is number of observations. If set true, normalisation is (N). 1903 * @param ddof Default normalisation is (N - 1). If ddof is set, then normalisation is (N - ddof). 1904 * @return covariance array of a concatenated with b 1905 * @since 2.0 1906 */ 1907 public static Dataset covariance (final Dataset a, final Dataset b, 1908 boolean rowvar, boolean bias, Integer ddof) { 1909 1910 //Create a working copy of the dataset & check its rank. 1911 Dataset vars = a.clone(); 1912 if (a.getRank() == 1) { 1913 vars.setShape(1, a.getShape()[0]); 1914 } 1915 1916 //1D of variables, so consider rows as variables 1917 if (vars.getShape()[0] == 1) { 1918 rowvar = true; 1919 } 1920 1921 //nr is the number of records; axis is index 1922 int nr, axis; 1923 if (rowvar) { 1924 nr = vars.getShape()[1]; 1925 axis = 0; 1926 } else { 1927 nr = vars.getShape()[0]; 1928 axis = 1; 1929 } 1930 1931 //Set the reduced degrees of freedom & normalisation factor 1932 if (ddof == null) { 1933 if (bias == false) { 1934 ddof = 1; 1935 } else { 1936 ddof = 0; 1937 } 1938 } 1939 double norm_fact = nr - ddof; 1940 if (norm_fact <= 0.) { 1941 //TODO Some sort of warning here? 1942 norm_fact = 0.; 1943 } 1944 1945 //Concatenate additional set of variables with main set 1946 if (b != null) { 1947 //Create a working copy of the dataset & check its rank. 1948 Dataset extraVars = b.clone(); 1949 if (b.getRank() == 1) { 1950 extraVars.setShape(1, a.getShape()[0]); 1951 } 1952 vars = DatasetUtils.concatenate(new Dataset[]{vars, extraVars}, axis); 1953 } 1954 1955 //Calculate deviations & covariance matrix 1956 Dataset varsMean = vars.mean(1-axis, false); 1957 //-vars & varsMean need same shape (this is a hack!) 1958 int[] meanShape = vars.getShape(); 1959 meanShape[1-axis] = 1; 1960 varsMean.setShape(meanShape); 1961 vars.isubtract(varsMean); 1962 Dataset cov; 1963 if (rowvar) { 1964 cov = Maths.divide(LinearAlgebra.dotProduct(vars, Maths.conjugate(vars.transpose())), norm_fact).squeeze(); 1965 } else { 1966 cov = Maths.divide(LinearAlgebra.dotProduct(vars.transpose(), Maths.conjugate(vars)), norm_fact).squeeze(); 1967 } 1968 return cov; 1969 } 1970}