001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.csv;
019
020import static org.apache.commons.csv.Token.Type.TOKEN;
021
022import java.io.Closeable;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.io.Reader;
028import java.io.StringReader;
029import java.io.UncheckedIOException;
030import java.net.URL;
031import java.nio.charset.Charset;
032import java.nio.file.Files;
033import java.nio.file.Path;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Iterator;
038import java.util.LinkedHashMap;
039import java.util.List;
040import java.util.Map;
041import java.util.NoSuchElementException;
042import java.util.Objects;
043import java.util.Spliterator;
044import java.util.Spliterators;
045import java.util.TreeMap;
046import java.util.stream.Collectors;
047import java.util.stream.Stream;
048import java.util.stream.StreamSupport;
049
050import org.apache.commons.io.function.Uncheck;
051
052/**
053 * Parses CSV files according to the specified format.
054 *
055 * Because CSV appears in many different dialects, the parser supports many formats by allowing the
056 * specification of a {@link CSVFormat}.
057 *
058 * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
059 *
060 * <h2>Creating instances</h2>
061 * <p>
062 * There are several static factory methods that can be used to create instances for various types of resources:
063 * </p>
064 * <ul>
065 *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
066 *     <li>{@link #parse(String, CSVFormat)}</li>
067 *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
068 * </ul>
069 * <p>
070 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
071 *
072 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
073 * </p>
074 * <pre>
075 * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
076 *     ...
077 * }
078 * </pre>
079 *
080 * <h2>Parsing record wise</h2>
081 * <p>
082 * To parse a CSV input from a file, you write:
083 * </p>
084 *
085 * <pre>
086 * File csvData = new File(&quot;/path/to/csv&quot;);
087 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
088 * for (CSVRecord csvRecord : parser) {
089 *     ...
090 * }
091 * </pre>
092 *
093 * <p>
094 * This will read the parse the contents of the file using the
095 * <a href="https://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
096 * </p>
097 *
098 * <p>
099 * To parse CSV input in a format like Excel, you write:
100 * </p>
101 *
102 * <pre>
103 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
104 * for (CSVRecord csvRecord : parser) {
105 *     ...
106 * }
107 * </pre>
108 *
109 * <p>
110 * If the predefined formats don't match the format at hand, custom formats can be defined. More information about
111 * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
112 * </p>
113 *
114 * <h2>Parsing into memory</h2>
115 * <p>
116 * If parsing record-wise is not desired, the contents of the input can be read completely into memory.
117 * </p>
118 *
119 * <pre>
120 * Reader in = new StringReader(&quot;a;b\nc;d&quot;);
121 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
122 * List&lt;CSVRecord&gt; list = parser.getRecords();
123 * </pre>
124 *
125 * <p>
126 * There are two constraints that have to be kept in mind:
127 * </p>
128 *
129 * <ol>
130 *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
131 *     the input, those records will not end up in the in-memory representation of your CSV data.</li>
132 *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
133 *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
134 * </ol>
135 *
136 * <h2>Notes</h2>
137 * <p>
138 * The internal parser state is completely covered by the format and the reader state.
139 * </p>
140 *
141 * @see <a href="package-summary.html">package documentation for more details</a>
142 */
143public final class CSVParser implements Iterable<CSVRecord>, Closeable {
144
145    final class CSVRecordIterator implements Iterator<CSVRecord> {
146        private CSVRecord current;
147
148        private CSVRecord getNextRecord() {
149            return Uncheck.get(CSVParser.this::nextRecord);
150        }
151
152        @Override
153        public boolean hasNext() {
154            if (CSVParser.this.isClosed()) {
155                return false;
156            }
157            if (current == null) {
158                current = getNextRecord();
159            }
160
161            return current != null;
162        }
163
164        @Override
165        public CSVRecord next() {
166            if (CSVParser.this.isClosed()) {
167                throw new NoSuchElementException("CSVParser has been closed");
168            }
169            CSVRecord next = current;
170            current = null;
171
172            if (next == null) {
173                // hasNext() wasn't called before
174                next = getNextRecord();
175                if (next == null) {
176                    throw new NoSuchElementException("No more CSV records available");
177                }
178            }
179
180            return next;
181        }
182
183        @Override
184        public void remove() {
185            throw new UnsupportedOperationException();
186        }
187    }
188
189    /**
190     * Header information based on name and position.
191     */
192    private static final class Headers {
193
194        /**
195         * Header column positions (0-based)
196         */
197        final Map<String, Integer> headerMap;
198
199        /**
200         * Header names in column order
201         */
202        final List<String> headerNames;
203
204        Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
205            this.headerMap = headerMap;
206            this.headerNames = headerNames;
207        }
208    }
209
210    /**
211     * Creates a parser for the given {@link File}.
212     *
213     * @param file
214     *            a CSV file. Must not be null.
215     * @param charset
216     *            The Charset to decode the given file.
217     * @param format
218     *            the CSVFormat used for CSV parsing. Must not be null.
219     * @return a new parser
220     * @throws IllegalArgumentException
221     *             If the parameters of the format are inconsistent or if either file or format are null.
222     * @throws IOException
223     *             If an I/O error occurs
224     */
225    public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
226        Objects.requireNonNull(file, "file");
227        return parse(file.toPath(), charset, format);
228    }
229
230    /**
231     * Creates a CSV parser using the given {@link CSVFormat}.
232     *
233     * <p>
234     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
235     * unless you close the {@code reader}.
236     * </p>
237     *
238     * @param inputStream
239     *            an InputStream containing CSV-formatted input. Must not be null.
240     * @param charset
241     *            The Charset to decode the given file.
242     * @param format
243     *            the CSVFormat used for CSV parsing. Must not be null.
244     * @return a new CSVParser configured with the given reader and format.
245     * @throws IllegalArgumentException
246     *             If the parameters of the format are inconsistent or if either reader or format are null.
247     * @throws IOException
248     *             If there is a problem reading the header or skipping the first record
249     * @since 1.5
250     */
251    @SuppressWarnings("resource")
252    public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
253            throws IOException {
254        Objects.requireNonNull(inputStream, "inputStream");
255        Objects.requireNonNull(format, "format");
256        return parse(new InputStreamReader(inputStream, charset), format);
257    }
258
259    /**
260     * Creates and returns a parser for the given {@link Path}, which the caller MUST close.
261     *
262     * @param path
263     *            a CSV file. Must not be null.
264     * @param charset
265     *            The Charset to decode the given file.
266     * @param format
267     *            the CSVFormat used for CSV parsing. Must not be null.
268     * @return a new parser
269     * @throws IllegalArgumentException
270     *             If the parameters of the format are inconsistent or if either file or format are null.
271     * @throws IOException
272     *             If an I/O error occurs
273     * @since 1.5
274     */
275    @SuppressWarnings("resource")
276    public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
277        Objects.requireNonNull(path, "path");
278        Objects.requireNonNull(format, "format");
279        return parse(Files.newInputStream(path), charset, format);
280    }
281
282    /**
283     * Creates a CSV parser using the given {@link CSVFormat}
284     *
285     * <p>
286     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
287     * unless you close the {@code reader}.
288     * </p>
289     *
290     * @param reader
291     *            a Reader containing CSV-formatted input. Must not be null.
292     * @param format
293     *            the CSVFormat used for CSV parsing. Must not be null.
294     * @return a new CSVParser configured with the given reader and format.
295     * @throws IllegalArgumentException
296     *             If the parameters of the format are inconsistent or if either reader or format are null.
297     * @throws IOException
298     *             If there is a problem reading the header or skipping the first record
299     * @since 1.5
300     */
301    public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
302        return new CSVParser(reader, format);
303    }
304
305    /**
306     * Creates a parser for the given {@link String}.
307     *
308     * @param string
309     *            a CSV string. Must not be null.
310     * @param format
311     *            the CSVFormat used for CSV parsing. Must not be null.
312     * @return a new parser
313     * @throws IllegalArgumentException
314     *             If the parameters of the format are inconsistent or if either string or format are null.
315     * @throws IOException
316     *             If an I/O error occurs
317     */
318    public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
319        Objects.requireNonNull(string, "string");
320        Objects.requireNonNull(format, "format");
321
322        return new CSVParser(new StringReader(string), format);
323    }
324
325    /**
326     * Creates and returns a parser for the given URL, which the caller MUST close.
327     *
328     * <p>
329     * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
330     * you close the {@code url}.
331     * </p>
332     *
333     * @param url
334     *            a URL. Must not be null.
335     * @param charset
336     *            the charset for the resource. Must not be null.
337     * @param format
338     *            the CSVFormat used for CSV parsing. Must not be null.
339     * @return a new parser
340     * @throws IllegalArgumentException
341     *             If the parameters of the format are inconsistent or if either url, charset or format are null.
342     * @throws IOException
343     *             If an I/O error occurs
344     */
345    @SuppressWarnings("resource")
346    public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
347        Objects.requireNonNull(url, "url");
348        Objects.requireNonNull(charset, "charset");
349        Objects.requireNonNull(format, "format");
350
351        return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
352    }
353
354    private String headerComment;
355
356    private String trailerComment;
357
358    private final CSVFormat format;
359
360    private final Headers headers;
361
362    private final Lexer lexer;
363
364    private final CSVRecordIterator csvRecordIterator;
365
366    /** A record buffer for getRecord(). Grows as necessary and is reused. */
367    private final List<String> recordList = new ArrayList<>();
368
369    /**
370     * The next record number to assign.
371     */
372    private long recordNumber;
373
374    /**
375     * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
376     * with {@link #recordNumber}.
377     */
378    private final long characterOffset;
379
380    private final Token reusableToken = new Token();
381
382    /**
383     * Constructs a new instance using the given {@link CSVFormat}
384     *
385     * <p>
386     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
387     * unless you close the {@code reader}.
388     * </p>
389     *
390     * @param reader
391     *            a Reader containing CSV-formatted input. Must not be null.
392     * @param format
393     *            the CSVFormat used for CSV parsing. Must not be null.
394     * @throws IllegalArgumentException
395     *             If the parameters of the format are inconsistent or if either reader or format are null.
396     * @throws IOException
397     *             If there is a problem reading the header or skipping the first record
398     */
399    public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
400        this(reader, format, 0, 1);
401    }
402
403    /**
404     * Constructs a new instance using the given {@link CSVFormat}
405     *
406     * <p>
407     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
408     * unless you close the {@code reader}.
409     * </p>
410     *
411     * @param reader
412     *            a Reader containing CSV-formatted input. Must not be null.
413     * @param format
414     *            the CSVFormat used for CSV parsing. Must not be null.
415     * @param characterOffset
416     *            Lexer offset when the parser does not start parsing at the beginning of the source.
417     * @param recordNumber
418     *            The next record number to assign
419     * @throws IllegalArgumentException
420     *             If the parameters of the format are inconsistent or if either the reader or format is null.
421     * @throws IOException
422     *             If there is a problem reading the header or skipping the first record
423     * @since 1.1
424     */
425    @SuppressWarnings("resource")
426    public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
427        throws IOException {
428        Objects.requireNonNull(reader, "reader");
429        Objects.requireNonNull(format, "format");
430        this.format = format.copy();
431        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
432        this.csvRecordIterator = new CSVRecordIterator();
433        this.headers = createHeaders();
434        this.characterOffset = characterOffset;
435        this.recordNumber = recordNumber - 1;
436    }
437
438    private void addRecordValue(final boolean lastRecord) {
439        final String input = format.trim(reusableToken.content.toString());
440        if (lastRecord && input.isEmpty() && format.getTrailingDelimiter()) {
441            return;
442        }
443        recordList.add(handleNull(input));
444    }
445
446    /**
447     * Closes resources.
448     *
449     * @throws IOException
450     *             If an I/O error occurs
451     */
452    @Override
453    public void close() throws IOException {
454        lexer.close();
455    }
456
457    private Map<String, Integer> createEmptyHeaderMap() {
458        return format.getIgnoreHeaderCase() ?
459                new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
460                new LinkedHashMap<>();
461    }
462
463    /**
464     * Creates the name to index mapping if the format defines a header.
465     *
466     * @return null if the format has no header.
467     * @throws IOException if there is a problem reading the header or skipping the first record
468     */
469    private Headers createHeaders() throws IOException {
470        Map<String, Integer> hdrMap = null;
471        List<String> headerNames = null;
472        final String[] formatHeader = format.getHeader();
473        if (formatHeader != null) {
474            hdrMap = createEmptyHeaderMap();
475            String[] headerRecord = null;
476            if (formatHeader.length == 0) {
477                // read the header from the first line of the file
478                final CSVRecord nextRecord = nextRecord();
479                if (nextRecord != null) {
480                    headerRecord = nextRecord.values();
481                    headerComment = nextRecord.getComment();
482                }
483            } else {
484                if (format.getSkipHeaderRecord()) {
485                    final CSVRecord nextRecord = nextRecord();
486                    if (nextRecord != null) {
487                        headerComment = nextRecord.getComment();
488                    }
489                }
490                headerRecord = formatHeader;
491            }
492
493            // build the name to index mappings
494            if (headerRecord != null) {
495                // Track an occurrence of a null, empty or blank header.
496                boolean observedMissing = false;
497                for (int i = 0; i < headerRecord.length; i++) {
498                    final String header = headerRecord[i];
499                    final boolean blankHeader = CSVFormat.isBlank(header);
500                    if (blankHeader && !format.getAllowMissingColumnNames()) {
501                        throw new IllegalArgumentException(
502                            "A header name is missing in " + Arrays.toString(headerRecord));
503                    }
504
505                    final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header);
506                    final DuplicateHeaderMode headerMode = format.getDuplicateHeaderMode();
507                    final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
508                    final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
509
510                    if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
511                        throw new IllegalArgumentException(
512                            String.format(
513                                "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
514                                header, Arrays.toString(headerRecord)));
515                    }
516                    observedMissing |= blankHeader;
517                    if (header != null) {
518                        hdrMap.put(header, Integer.valueOf(i));
519                        if (headerNames == null) {
520                            headerNames = new ArrayList<>(headerRecord.length);
521                        }
522                        headerNames.add(header);
523                    }
524                }
525            }
526        }
527        // Make header names Collection immutable
528        return new Headers(hdrMap, headerNames == null ? Collections.emptyList() : Collections.unmodifiableList(headerNames));
529    }
530
531    /**
532     * Gets the current line number in the input stream.
533     *
534     * <p>
535     * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
536     * the record number.
537     * </p>
538     *
539     * @return current line number
540     */
541    public long getCurrentLineNumber() {
542        return lexer.getCurrentLineNumber();
543    }
544
545    /**
546     * Gets the first end-of-line string encountered.
547     *
548     * @return the first end-of-line string
549     * @since 1.5
550     */
551    public String getFirstEndOfLine() {
552        return lexer.getFirstEol();
553    }
554
555    /**
556     * Gets the header comment, if any.
557     * The header comment appears before the header record.
558     *
559     * @return the header comment for this stream, or null if no comment is available.
560     * @since 1.10.0
561     */
562    public String getHeaderComment() {
563        return headerComment;
564    }
565
566    /**
567     * Gets a copy of the header map as defined in the CSVFormat's header.
568     * <p>
569     * The map keys are column names. The map values are 0-based indices.
570     * </p>
571     * <p>
572     * Note: The map can only provide a one-to-one mapping when the format did not
573     * contain null or duplicate column names.
574     * </p>
575     *
576     * @return a copy of the header map.
577     */
578    public Map<String, Integer> getHeaderMap() {
579        if (headers.headerMap == null) {
580            return null;
581        }
582        final Map<String, Integer> map = createEmptyHeaderMap();
583        map.putAll(headers.headerMap);
584        return map;
585    }
586
587    /**
588     * Gets the underlying header map.
589     *
590     * @return the underlying header map.
591     */
592    Map<String, Integer> getHeaderMapRaw() {
593        return headers.headerMap;
594    }
595
596    /**
597     * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header.
598     * <p>
599     * Note: The list provides strings that can be used as keys in the header map.
600     * The list will not contain null column names if they were present in the input
601     * format.
602     * </p>
603     *
604     * @return read-only list of header names that iterates in column order.
605     * @see #getHeaderMap()
606     * @since 1.7
607     */
608    public List<String> getHeaderNames() {
609        return Collections.unmodifiableList(headers.headerNames);
610    }
611
612    /**
613     * Gets the current record number in the input stream.
614     *
615     * <p>
616     * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
617     * the line number.
618     * </p>
619     *
620     * @return current record number
621     */
622    public long getRecordNumber() {
623        return recordNumber;
624    }
625
626    /**
627     * Parses the CSV input according to the given format and returns the content as a list of
628     * {@link CSVRecord CSVRecords}.
629     *
630     * <p>
631     * The returned content starts at the current parse-position in the stream.
632     * </p>
633     *
634     * @return list of {@link CSVRecord CSVRecords}, may be empty
635     * @throws UncheckedIOException
636     *             on parse error or input read-failure
637     */
638    public List<CSVRecord> getRecords() {
639        return stream().collect(Collectors.toList());
640    }
641
642    /**
643     * Gets the trailer comment, if any.
644     * Trailer comments are located between the last record and EOF
645     *
646     * @return the trailer comment for this stream, or null if no comment is available.
647     * @since 1.10.0
648     */
649    public String getTrailerComment() {
650        return trailerComment;
651    }
652
653    /**
654     * Handles whether the input is parsed as null
655     *
656     * @param input
657     *           the cell data to further processed
658     * @return null if input is parsed as null, or input itself if the input isn't parsed as null
659     */
660    private String handleNull(final String input) {
661        final boolean isQuoted = reusableToken.isQuoted;
662        final String nullString = format.getNullString();
663        final boolean strictQuoteMode = isStrictQuoteMode();
664        if (input.equals(nullString)) {
665            // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
666            return strictQuoteMode && isQuoted ? input : null;
667        }
668        // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
669        return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
670    }
671
672    /**
673     * Checks whether there is a header comment.
674     * The header comment appears before the header record.
675     * Note that if the parser's format has been given an explicit header
676     * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload)
677     * and the header record is not being skipped
678     * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments
679     * will be associated with the first record, not the header.
680     *
681     * @return true if this parser has seen a header comment, false otherwise
682     * @since 1.10.0
683     */
684    public boolean hasHeaderComment() {
685        return headerComment != null;
686    }
687
688    /**
689     * Checks whether there is a trailer comment.
690     * Trailer comments are located between the last record and EOF.
691     * The trailer comments will only be available after the parser has
692     * finished processing this stream.
693     *
694     * @return true if this parser has seen a trailer comment, false otherwise
695     * @since 1.10.0
696     */
697    public boolean hasTrailerComment() {
698        return trailerComment != null;
699    }
700
701    /**
702     * Tests whether this parser is closed.
703     *
704     * @return whether this parser is closed.
705     */
706    public boolean isClosed() {
707        return lexer.isClosed();
708    }
709
710    /**
711     * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
712     *
713     * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
714     *         {@link QuoteMode#NON_NUMERIC}.
715     */
716    private boolean isStrictQuoteMode() {
717        return format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
718               format.getQuoteMode() == QuoteMode.NON_NUMERIC;
719    }
720
721    /**
722     * Returns the record iterator.
723     *
724     * <p>
725     * An {@link IOException} caught during the iteration is re-thrown as an
726     * {@link IllegalStateException}.
727     * </p>
728     * <p>
729     * If the parser is closed, the iterator will not yield any more records.
730     * A call to {@link Iterator#hasNext()} will return {@code false} and
731     * a call to {@link Iterator#next()} will throw a
732     * {@link NoSuchElementException}.
733     * </p>
734     * <p>
735     * If it is necessary to construct an iterator which is usable after the
736     * parser is closed, one option is to extract all records as a list with
737     * {@link #getRecords()}, and return an iterator to that list.
738     * </p>
739     */
740    @Override
741    public Iterator<CSVRecord> iterator() {
742        return csvRecordIterator;
743    }
744
745    /**
746     * Parses the next record from the current point in the stream.
747     *
748     * @return the record as an array of values, or {@code null} if the end of the stream has been reached
749     * @throws IOException
750     *             on parse error or input read-failure
751     */
752    CSVRecord nextRecord() throws IOException {
753        CSVRecord result = null;
754        recordList.clear();
755        StringBuilder sb = null;
756        final long startCharPosition = lexer.getCharacterPosition() + characterOffset;
757        do {
758            reusableToken.reset();
759            lexer.nextToken(reusableToken);
760            switch (reusableToken.type) {
761            case TOKEN:
762                addRecordValue(false);
763                break;
764            case EORECORD:
765                addRecordValue(true);
766                break;
767            case EOF:
768                if (reusableToken.isReady) {
769                    addRecordValue(true);
770                } else if (sb != null) {
771                    trailerComment = sb.toString();
772                }
773                break;
774            case INVALID:
775                throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence");
776            case COMMENT: // Ignored currently
777                if (sb == null) { // first comment for this record
778                    sb = new StringBuilder();
779                } else {
780                    sb.append(Constants.LF);
781                }
782                sb.append(reusableToken.content);
783                reusableToken.type = TOKEN; // Read another token
784                break;
785            default:
786                throw new IllegalStateException("Unexpected Token type: " + reusableToken.type);
787            }
788        } while (reusableToken.type == TOKEN);
789
790        if (!recordList.isEmpty()) {
791            recordNumber++;
792            final String comment = sb == null ? null : sb.toString();
793            result = new CSVRecord(this, recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
794                recordNumber, startCharPosition);
795        }
796        return result;
797    }
798
799    /**
800     * Returns a sequential {@code Stream} with this collection as its source.
801     * <p>
802     * If the parser is closed, the stream will not produce any more values.
803     * See the comments in {@link #iterator()}.
804     * </p>
805     * @return a sequential {@code Stream} with this collection as its source.
806     * @since 1.9.0
807     */
808    public Stream<CSVRecord> stream() {
809        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false);
810    }
811
812}