1 use std::fs::File;
2 use std::io::{self, BufRead, Seek};
3 use std::marker::PhantomData;
4 use std::path::Path;
5 use std::result;
6 
7 use csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder};
8 use serde::de::DeserializeOwned;
9 
10 use crate::byte_record::{ByteRecord, Position};
11 use crate::error::{Error, ErrorKind, Result, Utf8Error};
12 use crate::string_record::StringRecord;
13 use crate::{Terminator, Trim};
14 
15 /// Builds a CSV reader with various configuration knobs.
16 ///
17 /// This builder can be used to tweak the field delimiter, record terminator
18 /// and more. Once a CSV `Reader` is built, its configuration cannot be
19 /// changed.
20 #[derive(Debug)]
21 pub struct ReaderBuilder {
22     capacity: usize,
23     flexible: bool,
24     has_headers: bool,
25     trim: Trim,
26     /// The underlying CSV parser builder.
27     ///
28     /// We explicitly put this on the heap because CoreReaderBuilder embeds an
29     /// entire DFA transition table, which along with other things, tallies up
30     /// to almost 500 bytes on the stack.
31     builder: Box<CoreReaderBuilder>,
32 }
33 
34 impl Default for ReaderBuilder {
default() -> ReaderBuilder35     fn default() -> ReaderBuilder {
36         ReaderBuilder {
37             capacity: 8 * (1 << 10),
38             flexible: false,
39             has_headers: true,
40             trim: Trim::default(),
41             builder: Box::new(CoreReaderBuilder::default()),
42         }
43     }
44 }
45 
46 impl ReaderBuilder {
47     /// Create a new builder for configuring CSV parsing.
48     ///
49     /// To convert a builder into a reader, call one of the methods starting
50     /// with `from_`.
51     ///
52     /// # Example
53     ///
54     /// ```
55     /// use std::error::Error;
56     /// use csv::{ReaderBuilder, StringRecord};
57     ///
58     /// # fn main() { example().unwrap(); }
59     /// fn example() -> Result<(), Box<dyn Error>> {
60     ///     let data = "\
61     /// city,country,pop
62     /// Boston,United States,4628910
63     /// Concord,United States,42695
64     /// ";
65     ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
66     ///
67     ///     let records = rdr
68     ///         .records()
69     ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
70     ///     assert_eq!(records, vec![
71     ///         vec!["Boston", "United States", "4628910"],
72     ///         vec!["Concord", "United States", "42695"],
73     ///     ]);
74     ///     Ok(())
75     /// }
76     /// ```
new() -> ReaderBuilder77     pub fn new() -> ReaderBuilder {
78         ReaderBuilder::default()
79     }
80 
81     /// Build a CSV parser from this configuration that reads data from the
82     /// given file path.
83     ///
84     /// If there was a problem opening the file at the given path, then this
85     /// returns the corresponding error.
86     ///
87     /// # Example
88     ///
89     /// ```no_run
90     /// use std::error::Error;
91     /// use csv::ReaderBuilder;
92     ///
93     /// # fn main() { example().unwrap(); }
94     /// fn example() -> Result<(), Box<dyn Error>> {
95     ///     let mut rdr = ReaderBuilder::new().from_path("foo.csv")?;
96     ///     for result in rdr.records() {
97     ///         let record = result?;
98     ///         println!("{:?}", record);
99     ///     }
100     ///     Ok(())
101     /// }
102     /// ```
from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>>103     pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
104         Ok(Reader::new(self, File::open(path)?))
105     }
106 
107     /// Build a CSV parser from this configuration that reads data from `rdr`.
108     ///
109     /// Note that the CSV reader is buffered automatically, so you should not
110     /// wrap `rdr` in a buffered reader like `io::BufReader`.
111     ///
112     /// # Example
113     ///
114     /// ```
115     /// use std::error::Error;
116     /// use csv::ReaderBuilder;
117     ///
118     /// # fn main() { example().unwrap(); }
119     /// fn example() -> Result<(), Box<dyn Error>> {
120     ///     let data = "\
121     /// city,country,pop
122     /// Boston,United States,4628910
123     /// Concord,United States,42695
124     /// ";
125     ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
126     ///     for result in rdr.records() {
127     ///         let record = result?;
128     ///         println!("{:?}", record);
129     ///     }
130     ///     Ok(())
131     /// }
132     /// ```
from_reader<R: io::Read>(&self, rdr: R) -> Reader<R>133     pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> {
134         Reader::new(self, rdr)
135     }
136 
137     /// The field delimiter to use when parsing CSV.
138     ///
139     /// The default is `b','`.
140     ///
141     /// # Example
142     ///
143     /// ```
144     /// use std::error::Error;
145     /// use csv::ReaderBuilder;
146     ///
147     /// # fn main() { example().unwrap(); }
148     /// fn example() -> Result<(), Box<dyn Error>> {
149     ///     let data = "\
150     /// city;country;pop
151     /// Boston;United States;4628910
152     /// ";
153     ///     let mut rdr = ReaderBuilder::new()
154     ///         .delimiter(b';')
155     ///         .from_reader(data.as_bytes());
156     ///
157     ///     if let Some(result) = rdr.records().next() {
158     ///         let record = result?;
159     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
160     ///         Ok(())
161     ///     } else {
162     ///         Err(From::from("expected at least one record but got none"))
163     ///     }
164     /// }
165     /// ```
delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder166     pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
167         self.builder.delimiter(delimiter);
168         self
169     }
170 
171     /// Whether to treat the first row as a special header row.
172     ///
173     /// By default, the first row is treated as a special header row, which
174     /// means the header is never returned by any of the record reading methods
175     /// or iterators. When this is disabled (`yes` set to `false`), the first
176     /// row is not treated specially.
177     ///
178     /// Note that the `headers` and `byte_headers` methods are unaffected by
179     /// whether this is set. Those methods always return the first record.
180     ///
181     /// # Example
182     ///
183     /// This example shows what happens when `has_headers` is disabled.
184     /// Namely, the first row is treated just like any other row.
185     ///
186     /// ```
187     /// use std::error::Error;
188     /// use csv::ReaderBuilder;
189     ///
190     /// # fn main() { example().unwrap(); }
191     /// fn example() -> Result<(), Box<dyn Error>> {
192     ///     let data = "\
193     /// city,country,pop
194     /// Boston,United States,4628910
195     /// ";
196     ///     let mut rdr = ReaderBuilder::new()
197     ///         .has_headers(false)
198     ///         .from_reader(data.as_bytes());
199     ///     let mut iter = rdr.records();
200     ///
201     ///     // Read the first record.
202     ///     if let Some(result) = iter.next() {
203     ///         let record = result?;
204     ///         assert_eq!(record, vec!["city", "country", "pop"]);
205     ///     } else {
206     ///         return Err(From::from(
207     ///             "expected at least two records but got none"));
208     ///     }
209     ///
210     ///     // Read the second record.
211     ///     if let Some(result) = iter.next() {
212     ///         let record = result?;
213     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
214     ///     } else {
215     ///         return Err(From::from(
216     ///             "expected at least two records but got one"))
217     ///     }
218     ///     Ok(())
219     /// }
220     /// ```
has_headers(&mut self, yes: bool) -> &mut ReaderBuilder221     pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder {
222         self.has_headers = yes;
223         self
224     }
225 
226     /// Whether the number of fields in records is allowed to change or not.
227     ///
228     /// When disabled (which is the default), parsing CSV data will return an
229     /// error if a record is found with a number of fields different from the
230     /// number of fields in a previous record.
231     ///
232     /// When enabled, this error checking is turned off.
233     ///
234     /// # Example: flexible records enabled
235     ///
236     /// ```
237     /// use std::error::Error;
238     /// use csv::ReaderBuilder;
239     ///
240     /// # fn main() { example().unwrap(); }
241     /// fn example() -> Result<(), Box<dyn Error>> {
242     ///     // Notice that the first row is missing the population count.
243     ///     let data = "\
244     /// city,country,pop
245     /// Boston,United States
246     /// ";
247     ///     let mut rdr = ReaderBuilder::new()
248     ///         .flexible(true)
249     ///         .from_reader(data.as_bytes());
250     ///
251     ///     if let Some(result) = rdr.records().next() {
252     ///         let record = result?;
253     ///         assert_eq!(record, vec!["Boston", "United States"]);
254     ///         Ok(())
255     ///     } else {
256     ///         Err(From::from("expected at least one record but got none"))
257     ///     }
258     /// }
259     /// ```
260     ///
261     /// # Example: flexible records disabled
262     ///
263     /// This shows the error that appears when records of unequal length
264     /// are found and flexible records have been disabled (which is the
265     /// default).
266     ///
267     /// ```
268     /// use std::error::Error;
269     /// use csv::{ErrorKind, ReaderBuilder};
270     ///
271     /// # fn main() { example().unwrap(); }
272     /// fn example() -> Result<(), Box<dyn Error>> {
273     ///     // Notice that the first row is missing the population count.
274     ///     let data = "\
275     /// city,country,pop
276     /// Boston,United States
277     /// ";
278     ///     let mut rdr = ReaderBuilder::new()
279     ///         .flexible(false)
280     ///         .from_reader(data.as_bytes());
281     ///
282     ///     if let Some(Err(err)) = rdr.records().next() {
283     ///         match *err.kind() {
284     ///             ErrorKind::UnequalLengths { expected_len, len, .. } => {
285     ///                 // The header row has 3 fields...
286     ///                 assert_eq!(expected_len, 3);
287     ///                 // ... but the first row has only 2 fields.
288     ///                 assert_eq!(len, 2);
289     ///                 Ok(())
290     ///             }
291     ///             ref wrong => {
292     ///                 Err(From::from(format!(
293     ///                     "expected UnequalLengths error but got {:?}",
294     ///                     wrong)))
295     ///             }
296     ///         }
297     ///     } else {
298     ///         Err(From::from(
299     ///             "expected at least one errored record but got none"))
300     ///     }
301     /// }
302     /// ```
flexible(&mut self, yes: bool) -> &mut ReaderBuilder303     pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder {
304         self.flexible = yes;
305         self
306     }
307 
308     /// Whether fields are trimmed of leading and trailing whitespace or not.
309     ///
310     /// By default, no trimming is performed. This method permits one to
311     /// override that behavior and choose one of the following options:
312     ///
313     /// 1. `Trim::Headers` trims only header values.
314     /// 2. `Trim::Fields` trims only non-header or "field" values.
315     /// 3. `Trim::All` trims both header and non-header values.
316     ///
317     /// A value is only interpreted as a header value if this CSV reader is
318     /// configured to read a header record (which is the default).
319     ///
320     /// When reading string records, characters meeting the definition of
321     /// Unicode whitespace are trimmed. When reading byte records, characters
322     /// meeting the definition of ASCII whitespace are trimmed. ASCII
323     /// whitespace characters correspond to the set `[\t\n\v\f\r ]`.
324     ///
325     /// # Example
326     ///
327     /// This example shows what happens when all values are trimmed.
328     ///
329     /// ```
330     /// use std::error::Error;
331     /// use csv::{ReaderBuilder, StringRecord, Trim};
332     ///
333     /// # fn main() { example().unwrap(); }
334     /// fn example() -> Result<(), Box<dyn Error>> {
335     ///     let data = "\
336     /// city ,   country ,  pop
337     /// Boston,\"
338     ///    United States\",4628910
339     /// Concord,   United States   ,42695
340     /// ";
341     ///     let mut rdr = ReaderBuilder::new()
342     ///         .trim(Trim::All)
343     ///         .from_reader(data.as_bytes());
344     ///     let records = rdr
345     ///         .records()
346     ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
347     ///     assert_eq!(records, vec![
348     ///         vec!["Boston", "United States", "4628910"],
349     ///         vec!["Concord", "United States", "42695"],
350     ///     ]);
351     ///     Ok(())
352     /// }
353     /// ```
trim(&mut self, trim: Trim) -> &mut ReaderBuilder354     pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder {
355         self.trim = trim;
356         self
357     }
358 
359     /// The record terminator to use when parsing CSV.
360     ///
361     /// A record terminator can be any single byte. The default is a special
362     /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n`
363     /// or `\r\n` as a single record terminator.
364     ///
365     /// # Example: `$` as a record terminator
366     ///
367     /// ```
368     /// use std::error::Error;
369     /// use csv::{ReaderBuilder, Terminator};
370     ///
371     /// # fn main() { example().unwrap(); }
372     /// fn example() -> Result<(), Box<dyn Error>> {
373     ///     let data = "city,country,pop$Boston,United States,4628910";
374     ///     let mut rdr = ReaderBuilder::new()
375     ///         .terminator(Terminator::Any(b'$'))
376     ///         .from_reader(data.as_bytes());
377     ///
378     ///     if let Some(result) = rdr.records().next() {
379     ///         let record = result?;
380     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
381     ///         Ok(())
382     ///     } else {
383     ///         Err(From::from("expected at least one record but got none"))
384     ///     }
385     /// }
386     /// ```
terminator(&mut self, term: Terminator) -> &mut ReaderBuilder387     pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
388         self.builder.terminator(term.to_core());
389         self
390     }
391 
392     /// The quote character to use when parsing CSV.
393     ///
394     /// The default is `b'"'`.
395     ///
396     /// # Example: single quotes instead of double quotes
397     ///
398     /// ```
399     /// use std::error::Error;
400     /// use csv::ReaderBuilder;
401     ///
402     /// # fn main() { example().unwrap(); }
403     /// fn example() -> Result<(), Box<dyn Error>> {
404     ///     let data = "\
405     /// city,country,pop
406     /// Boston,'United States',4628910
407     /// ";
408     ///     let mut rdr = ReaderBuilder::new()
409     ///         .quote(b'\'')
410     ///         .from_reader(data.as_bytes());
411     ///
412     ///     if let Some(result) = rdr.records().next() {
413     ///         let record = result?;
414     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
415     ///         Ok(())
416     ///     } else {
417     ///         Err(From::from("expected at least one record but got none"))
418     ///     }
419     /// }
420     /// ```
quote(&mut self, quote: u8) -> &mut ReaderBuilder421     pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
422         self.builder.quote(quote);
423         self
424     }
425 
426     /// The escape character to use when parsing CSV.
427     ///
428     /// In some variants of CSV, quotes are escaped using a special escape
429     /// character like `\` (instead of escaping quotes by doubling them).
430     ///
431     /// By default, recognizing these idiosyncratic escapes is disabled.
432     ///
433     /// # Example
434     ///
435     /// ```
436     /// use std::error::Error;
437     /// use csv::ReaderBuilder;
438     ///
439     /// # fn main() { example().unwrap(); }
440     /// fn example() -> Result<(), Box<dyn Error>> {
441     ///     let data = "\
442     /// city,country,pop
443     /// Boston,\"The \\\"United\\\" States\",4628910
444     /// ";
445     ///     let mut rdr = ReaderBuilder::new()
446     ///         .escape(Some(b'\\'))
447     ///         .from_reader(data.as_bytes());
448     ///
449     ///     if let Some(result) = rdr.records().next() {
450     ///         let record = result?;
451     ///         assert_eq!(record, vec![
452     ///             "Boston", "The \"United\" States", "4628910",
453     ///         ]);
454     ///         Ok(())
455     ///     } else {
456     ///         Err(From::from("expected at least one record but got none"))
457     ///     }
458     /// }
459     /// ```
escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder460     pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
461         self.builder.escape(escape);
462         self
463     }
464 
465     /// Enable double quote escapes.
466     ///
467     /// This is enabled by default, but it may be disabled. When disabled,
468     /// doubled quotes are not interpreted as escapes.
469     ///
470     /// # Example
471     ///
472     /// ```
473     /// use std::error::Error;
474     /// use csv::ReaderBuilder;
475     ///
476     /// # fn main() { example().unwrap(); }
477     /// fn example() -> Result<(), Box<dyn Error>> {
478     ///     let data = "\
479     /// city,country,pop
480     /// Boston,\"The \"\"United\"\" States\",4628910
481     /// ";
482     ///     let mut rdr = ReaderBuilder::new()
483     ///         .double_quote(false)
484     ///         .from_reader(data.as_bytes());
485     ///
486     ///     if let Some(result) = rdr.records().next() {
487     ///         let record = result?;
488     ///         assert_eq!(record, vec![
489     ///             "Boston", "The \"United\"\" States\"", "4628910",
490     ///         ]);
491     ///         Ok(())
492     ///     } else {
493     ///         Err(From::from("expected at least one record but got none"))
494     ///     }
495     /// }
496     /// ```
double_quote(&mut self, yes: bool) -> &mut ReaderBuilder497     pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
498         self.builder.double_quote(yes);
499         self
500     }
501 
502     /// Enable or disable quoting.
503     ///
504     /// This is enabled by default, but it may be disabled. When disabled,
505     /// quotes are not treated specially.
506     ///
507     /// # Example
508     ///
509     /// ```
510     /// use std::error::Error;
511     /// use csv::ReaderBuilder;
512     ///
513     /// # fn main() { example().unwrap(); }
514     /// fn example() -> Result<(), Box<dyn Error>> {
515     ///     let data = "\
516     /// city,country,pop
517     /// Boston,\"The United States,4628910
518     /// ";
519     ///     let mut rdr = ReaderBuilder::new()
520     ///         .quoting(false)
521     ///         .from_reader(data.as_bytes());
522     ///
523     ///     if let Some(result) = rdr.records().next() {
524     ///         let record = result?;
525     ///         assert_eq!(record, vec![
526     ///             "Boston", "\"The United States", "4628910",
527     ///         ]);
528     ///         Ok(())
529     ///     } else {
530     ///         Err(From::from("expected at least one record but got none"))
531     ///     }
532     /// }
533     /// ```
quoting(&mut self, yes: bool) -> &mut ReaderBuilder534     pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
535         self.builder.quoting(yes);
536         self
537     }
538 
539     /// The comment character to use when parsing CSV.
540     ///
541     /// If the start of a record begins with the byte given here, then that
542     /// line is ignored by the CSV parser.
543     ///
544     /// This is disabled by default.
545     ///
546     /// # Example
547     ///
548     /// ```
549     /// use std::error::Error;
550     /// use csv::ReaderBuilder;
551     ///
552     /// # fn main() { example().unwrap(); }
553     /// fn example() -> Result<(), Box<dyn Error>> {
554     ///     let data = "\
555     /// city,country,pop
556     /// #Concord,United States,42695
557     /// Boston,United States,4628910
558     /// ";
559     ///     let mut rdr = ReaderBuilder::new()
560     ///         .comment(Some(b'#'))
561     ///         .from_reader(data.as_bytes());
562     ///
563     ///     if let Some(result) = rdr.records().next() {
564     ///         let record = result?;
565     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
566     ///         Ok(())
567     ///     } else {
568     ///         Err(From::from("expected at least one record but got none"))
569     ///     }
570     /// }
571     /// ```
comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder572     pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
573         self.builder.comment(comment);
574         self
575     }
576 
577     /// A convenience method for specifying a configuration to read ASCII
578     /// delimited text.
579     ///
580     /// This sets the delimiter and record terminator to the ASCII unit
581     /// separator (`\x1F`) and record separator (`\x1E`), respectively.
582     ///
583     /// # Example
584     ///
585     /// ```
586     /// use std::error::Error;
587     /// use csv::ReaderBuilder;
588     ///
589     /// # fn main() { example().unwrap(); }
590     /// fn example() -> Result<(), Box<dyn Error>> {
591     ///     let data = "\
592     /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910";
593     ///     let mut rdr = ReaderBuilder::new()
594     ///         .ascii()
595     ///         .from_reader(data.as_bytes());
596     ///
597     ///     if let Some(result) = rdr.records().next() {
598     ///         let record = result?;
599     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
600     ///         Ok(())
601     ///     } else {
602     ///         Err(From::from("expected at least one record but got none"))
603     ///     }
604     /// }
605     /// ```
ascii(&mut self) -> &mut ReaderBuilder606     pub fn ascii(&mut self) -> &mut ReaderBuilder {
607         self.builder.ascii();
608         self
609     }
610 
611     /// Set the capacity (in bytes) of the buffer used in the CSV reader.
612     /// This defaults to a reasonable setting.
buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder613     pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder {
614         self.capacity = capacity;
615         self
616     }
617 
618     /// Enable or disable the NFA for parsing CSV.
619     ///
620     /// This is intended to be a debug option. The NFA is always slower than
621     /// the DFA.
622     #[doc(hidden)]
nfa(&mut self, yes: bool) -> &mut ReaderBuilder623     pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
624         self.builder.nfa(yes);
625         self
626     }
627 }
628 
629 /// A already configured CSV reader.
630 ///
631 /// A CSV reader takes as input CSV data and transforms that into standard Rust
632 /// values. The most flexible way to read CSV data is as a sequence of records,
633 /// where a record is a sequence of fields and each field is a string. However,
634 /// a reader can also deserialize CSV data into Rust types like `i64` or
635 /// `(String, f64, f64, f64)` or even a custom struct automatically using
636 /// Serde.
637 ///
638 /// # Configuration
639 ///
640 /// A CSV reader has a couple convenient constructor methods like `from_path`
641 /// and `from_reader`. However, if you want to configure the CSV reader to use
642 /// a different delimiter or quote character (among many other things), then
643 /// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct
644 /// a `Reader`. For example, to change the field delimiter:
645 ///
646 /// ```
647 /// use std::error::Error;
648 /// use csv::ReaderBuilder;
649 ///
650 /// # fn main() { example().unwrap(); }
651 /// fn example() -> Result<(), Box<dyn Error>> {
652 ///     let data = "\
653 /// city;country;pop
654 /// Boston;United States;4628910
655 /// ";
656 ///     let mut rdr = ReaderBuilder::new()
657 ///         .delimiter(b';')
658 ///         .from_reader(data.as_bytes());
659 ///
660 ///     if let Some(result) = rdr.records().next() {
661 ///         let record = result?;
662 ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
663 ///         Ok(())
664 ///     } else {
665 ///         Err(From::from("expected at least one record but got none"))
666 ///     }
667 /// }
668 /// ```
669 ///
670 /// # Error handling
671 ///
672 /// In general, CSV *parsing* does not ever return an error. That is, there is
673 /// no such thing as malformed CSV data. Instead, this reader will prioritize
674 /// finding a parse over rejecting CSV data that it does not understand. This
675 /// choice was inspired by other popular CSV parsers, but also because it is
676 /// pragmatic. CSV data varies wildly, so even if the CSV data is malformed,
677 /// it might still be possible to work with the data. In the land of CSV, there
678 /// is no "right" or "wrong," only "right" and "less right."
679 ///
680 /// With that said, a number of errors can occur while reading CSV data:
681 ///
682 /// * By default, all records in CSV data must have the same number of fields.
683 ///   If a record is found with a different number of fields than a prior
684 ///   record, then an error is returned. This behavior can be disabled by
685 ///   enabling flexible parsing via the `flexible` method on
686 ///   [`ReaderBuilder`](struct.ReaderBuilder.html).
687 /// * When reading CSV data from a resource (like a file), it is possible for
688 ///   reading from the underlying resource to fail. This will return an error.
689 ///   For subsequent calls to the `Reader` after encountering a such error
690 ///   (unless `seek` is used), it will behave as if end of file had been
691 ///   reached, in order to avoid running into infinite loops when still
692 ///   attempting to read the next record when one has errored.
693 /// * When reading CSV data into `String` or `&str` fields (e.g., via a
694 ///   [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
695 ///   enforced. If CSV data is invalid UTF-8, then an error is returned. If
696 ///   you want to read invalid UTF-8, then you should use the byte oriented
697 ///   APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit
698 ///   support for another encoding entirely, then you'll need to use another
699 ///   crate to transcode your CSV data to UTF-8 before parsing it.
700 /// * When using Serde to deserialize CSV data into Rust types, it is possible
701 ///   for a number of additional errors to occur. For example, deserializing
702 ///   a field `xyz` into an `i32` field will result in an error.
703 ///
704 /// For more details on the precise semantics of errors, see the
705 /// [`Error`](enum.Error.html) type.
706 #[derive(Debug)]
707 pub struct Reader<R> {
708     /// The underlying CSV parser.
709     ///
710     /// We explicitly put this on the heap because CoreReader embeds an entire
711     /// DFA transition table, which along with other things, tallies up to
712     /// almost 500 bytes on the stack.
713     core: Box<CoreReader>,
714     /// The underlying reader.
715     rdr: io::BufReader<R>,
716     /// Various state tracking.
717     ///
718     /// There is more state embedded in the `CoreReader`.
719     state: ReaderState,
720 }
721 
722 #[derive(Debug)]
723 struct ReaderState {
724     /// When set, this contains the first row of any parsed CSV data.
725     ///
726     /// This is always populated, regardless of whether `has_headers` is set.
727     headers: Option<Headers>,
728     /// When set, the first row of parsed CSV data is excluded from things
729     /// that read records, like iterators and `read_record`.
730     has_headers: bool,
731     /// When set, there is no restriction on the length of records. When not
732     /// set, every record must have the same number of fields, or else an error
733     /// is reported.
734     flexible: bool,
735     trim: Trim,
736     /// The number of fields in the first record parsed.
737     first_field_count: Option<u64>,
738     /// The current position of the parser.
739     ///
740     /// Note that this position is only observable by callers at the start
741     /// of a record. More granular positions are not supported.
742     cur_pos: Position,
743     /// Whether the first record has been read or not.
744     first: bool,
745     /// Whether the reader has been seeked or not.
746     seeked: bool,
747     /// Whether EOF of the underlying reader has been reached or not.
748     ///
749     /// IO errors on the underlying reader will be considered as an EOF for
750     /// subsequent read attempts, as it would be incorrect to keep on trying
751     /// to read when the underlying reader has broken.
752     ///
753     /// For clarity, having the best `Debug` impl and in case they need to be
754     /// treated differently at some point, we store whether the `EOF` is
755     /// considered because an actual EOF happened, or because we encoundered
756     /// an IO error.
757     /// This has no additional runtime cost.
758     eof: ReaderEofState,
759 }
760 
761 /// Whether EOF of the underlying reader has been reached or not.
762 ///
763 /// IO errors on the underlying reader will be considered as an EOF for
764 /// subsequent read attempts, as it would be incorrect to keep on trying
765 /// to read when the underlying reader has broken.
766 ///
767 /// For clarity, having the best `Debug` impl and in case they need to be
768 /// treated differently at some point, we store whether the `EOF` is
769 /// considered because an actual EOF happened, or because we encoundered
770 /// an IO error
771 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
772 enum ReaderEofState {
773     NotEof,
774     Eof,
775     IOError,
776 }
777 
778 /// Headers encapsulates any data associated with the headers of CSV data.
779 ///
780 /// The headers always correspond to the first row.
781 #[derive(Debug)]
782 struct Headers {
783     /// The header, as raw bytes.
784     byte_record: ByteRecord,
785     /// The header, as valid UTF-8 (or a UTF-8 error).
786     string_record: result::Result<StringRecord, Utf8Error>,
787 }
788 
789 impl Reader<Reader<File>> {
790     /// Create a new CSV parser with a default configuration for the given
791     /// file path.
792     ///
793     /// To customize CSV parsing, use a `ReaderBuilder`.
794     ///
795     /// # Example
796     ///
797     /// ```no_run
798     /// use std::error::Error;
799     /// use csv::Reader;
800     ///
801     /// # fn main() { example().unwrap(); }
802     /// fn example() -> Result<(), Box<dyn Error>> {
803     ///     let mut rdr = Reader::from_path("foo.csv")?;
804     ///     for result in rdr.records() {
805     ///         let record = result?;
806     ///         println!("{:?}", record);
807     ///     }
808     ///     Ok(())
809     /// }
810     /// ```
from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>>811     pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
812         ReaderBuilder::new().from_path(path)
813     }
814 }
815 
816 impl<R: io::Read> Reader<R> {
817     /// Create a new CSV reader given a builder and a source of underlying
818     /// bytes.
new(builder: &ReaderBuilder, rdr: R) -> Reader<R>819     fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> {
820         Reader {
821             core: Box::new(builder.builder.build()),
822             rdr: io::BufReader::with_capacity(builder.capacity, rdr),
823             state: ReaderState {
824                 headers: None,
825                 has_headers: builder.has_headers,
826                 flexible: builder.flexible,
827                 trim: builder.trim,
828                 first_field_count: None,
829                 cur_pos: Position::new(),
830                 first: false,
831                 seeked: false,
832                 eof: ReaderEofState::NotEof,
833             },
834         }
835     }
836 
837     /// Create a new CSV parser with a default configuration for the given
838     /// reader.
839     ///
840     /// To customize CSV parsing, use a `ReaderBuilder`.
841     ///
842     /// # Example
843     ///
844     /// ```
845     /// use std::error::Error;
846     /// use csv::Reader;
847     ///
848     /// # fn main() { example().unwrap(); }
849     /// fn example() -> Result<(), Box<dyn Error>> {
850     ///     let data = "\
851     /// city,country,pop
852     /// Boston,United States,4628910
853     /// Concord,United States,42695
854     /// ";
855     ///     let mut rdr = Reader::from_reader(data.as_bytes());
856     ///     for result in rdr.records() {
857     ///         let record = result?;
858     ///         println!("{:?}", record);
859     ///     }
860     ///     Ok(())
861     /// }
862     /// ```
from_reader(rdr: R) -> Reader<R>863     pub fn from_reader(rdr: R) -> Reader<R> {
864         ReaderBuilder::new().from_reader(rdr)
865     }
866 
867     /// Returns a borrowed iterator over deserialized records.
868     ///
869     /// Each item yielded by this iterator is a `Result<D, Error>`.
870     /// Therefore, in order to access the record, callers must handle the
871     /// possibility of error (typically with `try!` or `?`).
872     ///
873     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
874     /// default), then this does not include the first record. Additionally,
875     /// if `has_headers` is enabled, then deserializing into a struct will
876     /// automatically align the values in each row to the fields of a struct
877     /// based on the header row.
878     ///
879     /// # Example
880     ///
881     /// This shows how to deserialize CSV data into normal Rust structs. The
882     /// fields of the header row are used to match up the values in each row
883     /// to the fields of the struct.
884     ///
885     /// ```
886     /// use std::error::Error;
887     ///
888     /// use csv::Reader;
889     /// use serde::Deserialize;
890     ///
891     /// #[derive(Debug, Deserialize, Eq, PartialEq)]
892     /// struct Row {
893     ///     city: String,
894     ///     country: String,
895     ///     #[serde(rename = "popcount")]
896     ///     population: u64,
897     /// }
898     ///
899     /// # fn main() { example().unwrap(); }
900     /// fn example() -> Result<(), Box<dyn Error>> {
901     ///     let data = "\
902     /// city,country,popcount
903     /// Boston,United States,4628910
904     /// ";
905     ///     let mut rdr = Reader::from_reader(data.as_bytes());
906     ///     let mut iter = rdr.deserialize();
907     ///
908     ///     if let Some(result) = iter.next() {
909     ///         let record: Row = result?;
910     ///         assert_eq!(record, Row {
911     ///             city: "Boston".to_string(),
912     ///             country: "United States".to_string(),
913     ///             population: 4628910,
914     ///         });
915     ///         Ok(())
916     ///     } else {
917     ///         Err(From::from("expected at least one record but got none"))
918     ///     }
919     /// }
920     /// ```
921     ///
922     /// # Rules
923     ///
924     /// For the most part, any Rust type that maps straight-forwardly to a CSV
925     /// record is supported. This includes maps, structs, tuples and tuple
926     /// structs. Other Rust types, such as `Vec`s, arrays, and enums have
927     /// a more complicated story. In general, when working with CSV data, one
928     /// should avoid *nested sequences* as much as possible.
929     ///
930     /// Maps, structs, tuples and tuple structs map to CSV records in a simple
931     /// way. Tuples and tuple structs decode their fields in the order that
932     /// they are defined. Structs will do the same only if `has_headers` has
933     /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html),
934     /// otherwise, structs and maps are deserialized based on the fields
935     /// defined in the header row. (If there is no header row, then
936     /// deserializing into a map will result in an error.)
937     ///
938     /// Nested sequences are supported in a limited capacity. Namely, they
939     /// are flattened. As a result, it's often useful to use a `Vec` to capture
940     /// a "tail" of fields in a record:
941     ///
942     /// ```
943     /// use std::error::Error;
944     ///
945     /// use csv::ReaderBuilder;
946     /// use serde::Deserialize;
947     ///
948     /// #[derive(Debug, Deserialize, Eq, PartialEq)]
949     /// struct Row {
950     ///     label: String,
951     ///     values: Vec<i32>,
952     /// }
953     ///
954     /// # fn main() { example().unwrap(); }
955     /// fn example() -> Result<(), Box<dyn Error>> {
956     ///     let data = "foo,1,2,3";
957     ///     let mut rdr = ReaderBuilder::new()
958     ///         .has_headers(false)
959     ///         .from_reader(data.as_bytes());
960     ///     let mut iter = rdr.deserialize();
961     ///
962     ///     if let Some(result) = iter.next() {
963     ///         let record: Row = result?;
964     ///         assert_eq!(record, Row {
965     ///             label: "foo".to_string(),
966     ///             values: vec![1, 2, 3],
967     ///         });
968     ///         Ok(())
969     ///     } else {
970     ///         Err(From::from("expected at least one record but got none"))
971     ///     }
972     /// }
973     /// ```
974     ///
975     /// In the above example, adding another field to the `Row` struct after
976     /// the `values` field will result in a deserialization error. This is
977     /// because the deserializer doesn't know when to stop reading fields
978     /// into the `values` vector, so it will consume the rest of the fields in
979     /// the record leaving none left over for the additional field.
980     ///
981     /// Finally, simple enums in Rust can be deserialized as well. Namely,
982     /// enums must either be variants with no arguments or variants with a
983     /// single argument. Variants with no arguments are deserialized based on
984     /// which variant name the field matches. Variants with a single argument
985     /// are deserialized based on which variant can store the data. The latter
986     /// is only supported when using "untagged" enum deserialization. The
987     /// following example shows both forms in action:
988     ///
989     /// ```
990     /// use std::error::Error;
991     ///
992     /// use csv::Reader;
993     /// use serde::Deserialize;
994     ///
995     /// #[derive(Debug, Deserialize, PartialEq)]
996     /// struct Row {
997     ///     label: Label,
998     ///     value: Number,
999     /// }
1000     ///
1001     /// #[derive(Debug, Deserialize, PartialEq)]
1002     /// #[serde(rename_all = "lowercase")]
1003     /// enum Label {
1004     ///     Celsius,
1005     ///     Fahrenheit,
1006     /// }
1007     ///
1008     /// #[derive(Debug, Deserialize, PartialEq)]
1009     /// #[serde(untagged)]
1010     /// enum Number {
1011     ///     Integer(i64),
1012     ///     Float(f64),
1013     /// }
1014     ///
1015     /// # fn main() { example().unwrap(); }
1016     /// fn example() -> Result<(), Box<dyn Error>> {
1017     ///     let data = "\
1018     /// label,value
1019     /// celsius,22.2222
1020     /// fahrenheit,72
1021     /// ";
1022     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1023     ///     let mut iter = rdr.deserialize();
1024     ///
1025     ///     // Read the first record.
1026     ///     if let Some(result) = iter.next() {
1027     ///         let record: Row = result?;
1028     ///         assert_eq!(record, Row {
1029     ///             label: Label::Celsius,
1030     ///             value: Number::Float(22.2222),
1031     ///         });
1032     ///     } else {
1033     ///         return Err(From::from(
1034     ///             "expected at least two records but got none"));
1035     ///     }
1036     ///
1037     ///     // Read the second record.
1038     ///     if let Some(result) = iter.next() {
1039     ///         let record: Row = result?;
1040     ///         assert_eq!(record, Row {
1041     ///             label: Label::Fahrenheit,
1042     ///             value: Number::Integer(72),
1043     ///         });
1044     ///         Ok(())
1045     ///     } else {
1046     ///         Err(From::from(
1047     ///             "expected at least two records but got only one"))
1048     ///     }
1049     /// }
1050     /// ```
deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> where D: DeserializeOwned,1051     pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D>
1052     where
1053         D: DeserializeOwned,
1054     {
1055         DeserializeRecordsIter::new(self)
1056     }
1057 
1058     /// Returns an owned iterator over deserialized records.
1059     ///
1060     /// Each item yielded by this iterator is a `Result<D, Error>`.
1061     /// Therefore, in order to access the record, callers must handle the
1062     /// possibility of error (typically with `try!` or `?`).
1063     ///
1064     /// This is mostly useful when you want to return a CSV iterator or store
1065     /// it somewhere.
1066     ///
1067     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1068     /// default), then this does not include the first record. Additionally,
1069     /// if `has_headers` is enabled, then deserializing into a struct will
1070     /// automatically align the values in each row to the fields of a struct
1071     /// based on the header row.
1072     ///
1073     /// For more detailed deserialization rules, see the documentation on the
1074     /// `deserialize` method.
1075     ///
1076     /// # Example
1077     ///
1078     /// ```
1079     /// use std::error::Error;
1080     ///
1081     /// use csv::Reader;
1082     /// use serde::Deserialize;
1083     ///
1084     /// #[derive(Debug, Deserialize, Eq, PartialEq)]
1085     /// struct Row {
1086     ///     city: String,
1087     ///     country: String,
1088     ///     #[serde(rename = "popcount")]
1089     ///     population: u64,
1090     /// }
1091     ///
1092     /// # fn main() { example().unwrap(); }
1093     /// fn example() -> Result<(), Box<dyn Error>> {
1094     ///     let data = "\
1095     /// city,country,popcount
1096     /// Boston,United States,4628910
1097     /// ";
1098     ///     let rdr = Reader::from_reader(data.as_bytes());
1099     ///     let mut iter = rdr.into_deserialize();
1100     ///
1101     ///     if let Some(result) = iter.next() {
1102     ///         let record: Row = result?;
1103     ///         assert_eq!(record, Row {
1104     ///             city: "Boston".to_string(),
1105     ///             country: "United States".to_string(),
1106     ///             population: 4628910,
1107     ///         });
1108     ///         Ok(())
1109     ///     } else {
1110     ///         Err(From::from("expected at least one record but got none"))
1111     ///     }
1112     /// }
1113     /// ```
into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> where D: DeserializeOwned,1114     pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D>
1115     where
1116         D: DeserializeOwned,
1117     {
1118         DeserializeRecordsIntoIter::new(self)
1119     }
1120 
1121     /// Returns a borrowed iterator over all records as strings.
1122     ///
1123     /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1124     /// Therefore, in order to access the record, callers must handle the
1125     /// possibility of error (typically with `try!` or `?`).
1126     ///
1127     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1128     /// default), then this does not include the first record.
1129     ///
1130     /// # Example
1131     ///
1132     /// ```
1133     /// use std::error::Error;
1134     /// use csv::Reader;
1135     ///
1136     /// # fn main() { example().unwrap(); }
1137     /// fn example() -> Result<(), Box<dyn Error>> {
1138     ///     let data = "\
1139     /// city,country,pop
1140     /// Boston,United States,4628910
1141     /// ";
1142     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1143     ///     let mut iter = rdr.records();
1144     ///
1145     ///     if let Some(result) = iter.next() {
1146     ///         let record = result?;
1147     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1148     ///         Ok(())
1149     ///     } else {
1150     ///         Err(From::from("expected at least one record but got none"))
1151     ///     }
1152     /// }
1153     /// ```
records(&mut self) -> StringRecordsIter<R>1154     pub fn records(&mut self) -> StringRecordsIter<R> {
1155         StringRecordsIter::new(self)
1156     }
1157 
1158     /// Returns an owned iterator over all records as strings.
1159     ///
1160     /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1161     /// Therefore, in order to access the record, callers must handle the
1162     /// possibility of error (typically with `try!` or `?`).
1163     ///
1164     /// This is mostly useful when you want to return a CSV iterator or store
1165     /// it somewhere.
1166     ///
1167     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1168     /// default), then this does not include the first record.
1169     ///
1170     /// # Example
1171     ///
1172     /// ```
1173     /// use std::error::Error;
1174     /// use csv::Reader;
1175     ///
1176     /// # fn main() { example().unwrap(); }
1177     /// fn example() -> Result<(), Box<dyn Error>> {
1178     ///     let data = "\
1179     /// city,country,pop
1180     /// Boston,United States,4628910
1181     /// ";
1182     ///     let rdr = Reader::from_reader(data.as_bytes());
1183     ///     let mut iter = rdr.into_records();
1184     ///
1185     ///     if let Some(result) = iter.next() {
1186     ///         let record = result?;
1187     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1188     ///         Ok(())
1189     ///     } else {
1190     ///         Err(From::from("expected at least one record but got none"))
1191     ///     }
1192     /// }
1193     /// ```
into_records(self) -> StringRecordsIntoIter<R>1194     pub fn into_records(self) -> StringRecordsIntoIter<R> {
1195         StringRecordsIntoIter::new(self)
1196     }
1197 
1198     /// Returns a borrowed iterator over all records as raw bytes.
1199     ///
1200     /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1201     /// Therefore, in order to access the record, callers must handle the
1202     /// possibility of error (typically with `try!` or `?`).
1203     ///
1204     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1205     /// default), then this does not include the first record.
1206     ///
1207     /// # Example
1208     ///
1209     /// ```
1210     /// use std::error::Error;
1211     /// use csv::Reader;
1212     ///
1213     /// # fn main() { example().unwrap(); }
1214     /// fn example() -> Result<(), Box<dyn Error>> {
1215     ///     let data = "\
1216     /// city,country,pop
1217     /// Boston,United States,4628910
1218     /// ";
1219     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1220     ///     let mut iter = rdr.byte_records();
1221     ///
1222     ///     if let Some(result) = iter.next() {
1223     ///         let record = result?;
1224     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1225     ///         Ok(())
1226     ///     } else {
1227     ///         Err(From::from("expected at least one record but got none"))
1228     ///     }
1229     /// }
1230     /// ```
byte_records(&mut self) -> ByteRecordsIter<R>1231     pub fn byte_records(&mut self) -> ByteRecordsIter<R> {
1232         ByteRecordsIter::new(self)
1233     }
1234 
1235     /// Returns an owned iterator over all records as raw bytes.
1236     ///
1237     /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1238     /// Therefore, in order to access the record, callers must handle the
1239     /// possibility of error (typically with `try!` or `?`).
1240     ///
1241     /// This is mostly useful when you want to return a CSV iterator or store
1242     /// it somewhere.
1243     ///
1244     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1245     /// default), then this does not include the first record.
1246     ///
1247     /// # Example
1248     ///
1249     /// ```
1250     /// use std::error::Error;
1251     /// use csv::Reader;
1252     ///
1253     /// # fn main() { example().unwrap(); }
1254     /// fn example() -> Result<(), Box<dyn Error>> {
1255     ///     let data = "\
1256     /// city,country,pop
1257     /// Boston,United States,4628910
1258     /// ";
1259     ///     let rdr = Reader::from_reader(data.as_bytes());
1260     ///     let mut iter = rdr.into_byte_records();
1261     ///
1262     ///     if let Some(result) = iter.next() {
1263     ///         let record = result?;
1264     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1265     ///         Ok(())
1266     ///     } else {
1267     ///         Err(From::from("expected at least one record but got none"))
1268     ///     }
1269     /// }
1270     /// ```
into_byte_records(self) -> ByteRecordsIntoIter<R>1271     pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
1272         ByteRecordsIntoIter::new(self)
1273     }
1274 
1275     /// Returns a reference to the first row read by this parser.
1276     ///
1277     /// If no row has been read yet, then this will force parsing of the first
1278     /// row.
1279     ///
1280     /// If there was a problem parsing the row or if it wasn't valid UTF-8,
1281     /// then this returns an error.
1282     ///
1283     /// If the underlying reader emits EOF before any data, then this returns
1284     /// an empty record.
1285     ///
1286     /// Note that this method may be used regardless of whether `has_headers`
1287     /// was enabled (but it is enabled by default).
1288     ///
1289     /// # Example
1290     ///
1291     /// This example shows how to get the header row of CSV data. Notice that
1292     /// the header row does not appear as a record in the iterator!
1293     ///
1294     /// ```
1295     /// use std::error::Error;
1296     /// use csv::Reader;
1297     ///
1298     /// # fn main() { example().unwrap(); }
1299     /// fn example() -> Result<(), Box<dyn Error>> {
1300     ///     let data = "\
1301     /// city,country,pop
1302     /// Boston,United States,4628910
1303     /// ";
1304     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1305     ///
1306     ///     // We can read the headers before iterating.
1307     ///     {
1308     ///         // `headers` borrows from the reader, so we put this in its
1309     ///         // own scope. That way, the borrow ends before we try iterating
1310     ///         // below. Alternatively, we could clone the headers.
1311     ///         let headers = rdr.headers()?;
1312     ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1313     ///     }
1314     ///
1315     ///     if let Some(result) = rdr.records().next() {
1316     ///         let record = result?;
1317     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1318     ///     } else {
1319     ///         return Err(From::from(
1320     ///             "expected at least one record but got none"))
1321     ///     }
1322     ///
1323     ///     // We can also read the headers after iterating.
1324     ///     let headers = rdr.headers()?;
1325     ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1326     ///     Ok(())
1327     /// }
1328     /// ```
headers(&mut self) -> Result<&StringRecord>1329     pub fn headers(&mut self) -> Result<&StringRecord> {
1330         if self.state.headers.is_none() {
1331             let mut record = ByteRecord::new();
1332             self.read_byte_record_impl(&mut record)?;
1333             self.set_headers_impl(Err(record));
1334         }
1335         let headers = self.state.headers.as_ref().unwrap();
1336         match headers.string_record {
1337             Ok(ref record) => Ok(record),
1338             Err(ref err) => Err(Error::new(ErrorKind::Utf8 {
1339                 pos: headers.byte_record.position().map(Clone::clone),
1340                 err: err.clone(),
1341             })),
1342         }
1343     }
1344 
1345     /// Returns a reference to the first row read by this parser as raw bytes.
1346     ///
1347     /// If no row has been read yet, then this will force parsing of the first
1348     /// row.
1349     ///
1350     /// If there was a problem parsing the row then this returns an error.
1351     ///
1352     /// If the underlying reader emits EOF before any data, then this returns
1353     /// an empty record.
1354     ///
1355     /// Note that this method may be used regardless of whether `has_headers`
1356     /// was enabled (but it is enabled by default).
1357     ///
1358     /// # Example
1359     ///
1360     /// This example shows how to get the header row of CSV data. Notice that
1361     /// the header row does not appear as a record in the iterator!
1362     ///
1363     /// ```
1364     /// use std::error::Error;
1365     /// use csv::Reader;
1366     ///
1367     /// # fn main() { example().unwrap(); }
1368     /// fn example() -> Result<(), Box<dyn Error>> {
1369     ///     let data = "\
1370     /// city,country,pop
1371     /// Boston,United States,4628910
1372     /// ";
1373     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1374     ///
1375     ///     // We can read the headers before iterating.
1376     ///     {
1377     ///         // `headers` borrows from the reader, so we put this in its
1378     ///         // own scope. That way, the borrow ends before we try iterating
1379     ///         // below. Alternatively, we could clone the headers.
1380     ///         let headers = rdr.byte_headers()?;
1381     ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1382     ///     }
1383     ///
1384     ///     if let Some(result) = rdr.byte_records().next() {
1385     ///         let record = result?;
1386     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1387     ///     } else {
1388     ///         return Err(From::from(
1389     ///             "expected at least one record but got none"))
1390     ///     }
1391     ///
1392     ///     // We can also read the headers after iterating.
1393     ///     let headers = rdr.byte_headers()?;
1394     ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1395     ///     Ok(())
1396     /// }
1397     /// ```
byte_headers(&mut self) -> Result<&ByteRecord>1398     pub fn byte_headers(&mut self) -> Result<&ByteRecord> {
1399         if self.state.headers.is_none() {
1400             let mut record = ByteRecord::new();
1401             self.read_byte_record_impl(&mut record)?;
1402             self.set_headers_impl(Err(record));
1403         }
1404         Ok(&self.state.headers.as_ref().unwrap().byte_record)
1405     }
1406 
1407     /// Set the headers of this CSV parser manually.
1408     ///
1409     /// This overrides any other setting (including `set_byte_headers`). Any
1410     /// automatic detection of headers is disabled. This may be called at any
1411     /// time.
1412     ///
1413     /// # Example
1414     ///
1415     /// ```
1416     /// use std::error::Error;
1417     /// use csv::{Reader, StringRecord};
1418     ///
1419     /// # fn main() { example().unwrap(); }
1420     /// fn example() -> Result<(), Box<dyn Error>> {
1421     ///     let data = "\
1422     /// city,country,pop
1423     /// Boston,United States,4628910
1424     /// ";
1425     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1426     ///
1427     ///     assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]);
1428     ///     rdr.set_headers(StringRecord::from(vec!["a", "b", "c"]));
1429     ///     assert_eq!(rdr.headers()?, vec!["a", "b", "c"]);
1430     ///
1431     ///     Ok(())
1432     /// }
1433     /// ```
set_headers(&mut self, headers: StringRecord)1434     pub fn set_headers(&mut self, headers: StringRecord) {
1435         self.set_headers_impl(Ok(headers));
1436     }
1437 
1438     /// Set the headers of this CSV parser manually as raw bytes.
1439     ///
1440     /// This overrides any other setting (including `set_headers`). Any
1441     /// automatic detection of headers is disabled. This may be called at any
1442     /// time.
1443     ///
1444     /// # Example
1445     ///
1446     /// ```
1447     /// use std::error::Error;
1448     /// use csv::{Reader, ByteRecord};
1449     ///
1450     /// # fn main() { example().unwrap(); }
1451     /// fn example() -> Result<(), Box<dyn Error>> {
1452     ///     let data = "\
1453     /// city,country,pop
1454     /// Boston,United States,4628910
1455     /// ";
1456     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1457     ///
1458     ///     assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]);
1459     ///     rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"]));
1460     ///     assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]);
1461     ///
1462     ///     Ok(())
1463     /// }
1464     /// ```
set_byte_headers(&mut self, headers: ByteRecord)1465     pub fn set_byte_headers(&mut self, headers: ByteRecord) {
1466         self.set_headers_impl(Err(headers));
1467     }
1468 
set_headers_impl( &mut self, headers: result::Result<StringRecord, ByteRecord>, )1469     fn set_headers_impl(
1470         &mut self,
1471         headers: result::Result<StringRecord, ByteRecord>,
1472     ) {
1473         // If we have string headers, then get byte headers. But if we have
1474         // byte headers, then get the string headers (or a UTF-8 error).
1475         let (mut str_headers, mut byte_headers) = match headers {
1476             Ok(string) => {
1477                 let bytes = string.clone().into_byte_record();
1478                 (Ok(string), bytes)
1479             }
1480             Err(bytes) => {
1481                 match StringRecord::from_byte_record(bytes.clone()) {
1482                     Ok(str_headers) => (Ok(str_headers), bytes),
1483                     Err(err) => (Err(err.utf8_error().clone()), bytes),
1484                 }
1485             }
1486         };
1487         if self.state.trim.should_trim_headers() {
1488             if let Ok(ref mut str_headers) = str_headers.as_mut() {
1489                 str_headers.trim();
1490             }
1491             byte_headers.trim();
1492         }
1493         self.state.headers = Some(Headers {
1494             byte_record: byte_headers,
1495             string_record: str_headers,
1496         });
1497     }
1498 
1499     /// Read a single row into the given record. Returns false when no more
1500     /// records could be read.
1501     ///
1502     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1503     /// default), then this will never read the first record.
1504     ///
1505     /// This method is useful when you want to read records as fast as
1506     /// as possible. It's less ergonomic than an iterator, but it permits the
1507     /// caller to reuse the `StringRecord` allocation, which usually results
1508     /// in higher throughput.
1509     ///
1510     /// Records read via this method are guaranteed to have a position set
1511     /// on them, even if the reader is at EOF or if an error is returned.
1512     ///
1513     /// # Example
1514     ///
1515     /// ```
1516     /// use std::error::Error;
1517     /// use csv::{Reader, StringRecord};
1518     ///
1519     /// # fn main() { example().unwrap(); }
1520     /// fn example() -> Result<(), Box<dyn Error>> {
1521     ///     let data = "\
1522     /// city,country,pop
1523     /// Boston,United States,4628910
1524     /// ";
1525     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1526     ///     let mut record = StringRecord::new();
1527     ///
1528     ///     if rdr.read_record(&mut record)? {
1529     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1530     ///         Ok(())
1531     ///     } else {
1532     ///         Err(From::from("expected at least one record but got none"))
1533     ///     }
1534     /// }
1535     /// ```
read_record(&mut self, record: &mut StringRecord) -> Result<bool>1536     pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> {
1537         let result = record.read(self);
1538         // We need to trim again because trimming string records includes
1539         // Unicode whitespace. (ByteRecord trimming only includes ASCII
1540         // whitespace.)
1541         if self.state.trim.should_trim_fields() {
1542             record.trim();
1543         }
1544         result
1545     }
1546 
1547     /// Read a single row into the given byte record. Returns false when no
1548     /// more records could be read.
1549     ///
1550     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1551     /// default), then this will never read the first record.
1552     ///
1553     /// This method is useful when you want to read records as fast as
1554     /// as possible. It's less ergonomic than an iterator, but it permits the
1555     /// caller to reuse the `ByteRecord` allocation, which usually results
1556     /// in higher throughput.
1557     ///
1558     /// Records read via this method are guaranteed to have a position set
1559     /// on them, even if the reader is at EOF or if an error is returned.
1560     ///
1561     /// # Example
1562     ///
1563     /// ```
1564     /// use std::error::Error;
1565     /// use csv::{ByteRecord, Reader};
1566     ///
1567     /// # fn main() { example().unwrap(); }
1568     /// fn example() -> Result<(), Box<dyn Error>> {
1569     ///     let data = "\
1570     /// city,country,pop
1571     /// Boston,United States,4628910
1572     /// ";
1573     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1574     ///     let mut record = ByteRecord::new();
1575     ///
1576     ///     if rdr.read_byte_record(&mut record)? {
1577     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1578     ///         Ok(())
1579     ///     } else {
1580     ///         Err(From::from("expected at least one record but got none"))
1581     ///     }
1582     /// }
1583     /// ```
read_byte_record( &mut self, record: &mut ByteRecord, ) -> Result<bool>1584     pub fn read_byte_record(
1585         &mut self,
1586         record: &mut ByteRecord,
1587     ) -> Result<bool> {
1588         if !self.state.seeked && !self.state.has_headers && !self.state.first {
1589             // If the caller indicated "no headers" and we haven't yielded the
1590             // first record yet, then we should yield our header row if we have
1591             // one.
1592             if let Some(ref headers) = self.state.headers {
1593                 self.state.first = true;
1594                 record.clone_from(&headers.byte_record);
1595                 if self.state.trim.should_trim_fields() {
1596                     record.trim();
1597                 }
1598                 return Ok(!record.is_empty());
1599             }
1600         }
1601         let ok = self.read_byte_record_impl(record)?;
1602         self.state.first = true;
1603         if !self.state.seeked && self.state.headers.is_none() {
1604             self.set_headers_impl(Err(record.clone()));
1605             // If the end user indicated that we have headers, then we should
1606             // never return the first row. Instead, we should attempt to
1607             // read and return the next one.
1608             if self.state.has_headers {
1609                 let result = self.read_byte_record_impl(record);
1610                 if self.state.trim.should_trim_fields() {
1611                     record.trim();
1612                 }
1613                 return result;
1614             }
1615         } else if self.state.trim.should_trim_fields() {
1616             record.trim();
1617         }
1618         Ok(ok)
1619     }
1620 
1621     /// Read a byte record from the underlying CSV reader, without accounting
1622     /// for headers.
1623     #[inline(always)]
read_byte_record_impl( &mut self, record: &mut ByteRecord, ) -> Result<bool>1624     fn read_byte_record_impl(
1625         &mut self,
1626         record: &mut ByteRecord,
1627     ) -> Result<bool> {
1628         use csv_core::ReadRecordResult::*;
1629 
1630         record.clear();
1631         record.set_position(Some(self.state.cur_pos.clone()));
1632         if self.state.eof != ReaderEofState::NotEof {
1633             return Ok(false);
1634         }
1635         let (mut outlen, mut endlen) = (0, 0);
1636         loop {
1637             let (res, nin, nout, nend) = {
1638                 let input_res = self.rdr.fill_buf();
1639                 if input_res.is_err() {
1640                     self.state.eof = ReaderEofState::IOError;
1641                 }
1642                 let input = input_res?;
1643                 let (fields, ends) = record.as_parts();
1644                 self.core.read_record(
1645                     input,
1646                     &mut fields[outlen..],
1647                     &mut ends[endlen..],
1648                 )
1649             };
1650             self.rdr.consume(nin);
1651             let byte = self.state.cur_pos.byte();
1652             self.state
1653                 .cur_pos
1654                 .set_byte(byte + nin as u64)
1655                 .set_line(self.core.line());
1656             outlen += nout;
1657             endlen += nend;
1658             match res {
1659                 InputEmpty => continue,
1660                 OutputFull => {
1661                     record.expand_fields();
1662                     continue;
1663                 }
1664                 OutputEndsFull => {
1665                     record.expand_ends();
1666                     continue;
1667                 }
1668                 Record => {
1669                     record.set_len(endlen);
1670                     self.state.add_record(record)?;
1671                     return Ok(true);
1672                 }
1673                 End => {
1674                     self.state.eof = ReaderEofState::Eof;
1675                     return Ok(false);
1676                 }
1677             }
1678         }
1679     }
1680 
1681     /// Return the current position of this CSV reader.
1682     ///
1683     /// The byte offset in the position returned can be used to `seek` this
1684     /// reader. In particular, seeking to a position returned here on the same
1685     /// data will result in parsing the same subsequent record.
1686     ///
1687     /// # Example: reading the position
1688     ///
1689     /// ```
1690     /// use std::error::Error;
1691     /// use std::io;
1692     /// use csv::{Reader, Position};
1693     ///
1694     /// # fn main() { example().unwrap(); }
1695     /// fn example() -> Result<(), Box<dyn Error>> {
1696     ///     let data = "\
1697     /// city,country,popcount
1698     /// Boston,United States,4628910
1699     /// Concord,United States,42695
1700     /// ";
1701     ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1702     ///     let mut iter = rdr.into_records();
1703     ///     let mut pos = Position::new();
1704     ///     loop {
1705     ///         // Read the position immediately before each record.
1706     ///         let next_pos = iter.reader().position().clone();
1707     ///         if iter.next().is_none() {
1708     ///             break;
1709     ///         }
1710     ///         pos = next_pos;
1711     ///     }
1712     ///
1713     ///     // `pos` should now be the position immediately before the last
1714     ///     // record.
1715     ///     assert_eq!(pos.byte(), 51);
1716     ///     assert_eq!(pos.line(), 3);
1717     ///     assert_eq!(pos.record(), 2);
1718     ///     Ok(())
1719     /// }
1720     /// ```
position(&self) -> &Position1721     pub fn position(&self) -> &Position {
1722         &self.state.cur_pos
1723     }
1724 
1725     /// Returns true if and only if this reader has been exhausted.
1726     ///
1727     /// When this returns true, no more records can be read from this reader
1728     /// (unless it has been seeked to another position).
1729     ///
1730     /// # Example
1731     ///
1732     /// ```
1733     /// use std::error::Error;
1734     /// use std::io;
1735     /// use csv::{Reader, Position};
1736     ///
1737     /// # fn main() { example().unwrap(); }
1738     /// fn example() -> Result<(), Box<dyn Error>> {
1739     ///     let data = "\
1740     /// city,country,popcount
1741     /// Boston,United States,4628910
1742     /// Concord,United States,42695
1743     /// ";
1744     ///     let mut rdr = Reader::from_reader(io::Cursor::new(data));
1745     ///     assert!(!rdr.is_done());
1746     ///     for result in rdr.records() {
1747     ///         let _ = result?;
1748     ///     }
1749     ///     assert!(rdr.is_done());
1750     ///     Ok(())
1751     /// }
1752     /// ```
is_done(&self) -> bool1753     pub fn is_done(&self) -> bool {
1754         self.state.eof != ReaderEofState::NotEof
1755     }
1756 
1757     /// Returns true if and only if this reader has been configured to
1758     /// interpret the first record as a header record.
has_headers(&self) -> bool1759     pub fn has_headers(&self) -> bool {
1760         self.state.has_headers
1761     }
1762 
1763     /// Returns a reference to the underlying reader.
get_ref(&self) -> &R1764     pub fn get_ref(&self) -> &R {
1765         self.rdr.get_ref()
1766     }
1767 
1768     /// Returns a mutable reference to the underlying reader.
get_mut(&mut self) -> &mut R1769     pub fn get_mut(&mut self) -> &mut R {
1770         self.rdr.get_mut()
1771     }
1772 
1773     /// Unwraps this CSV reader, returning the underlying reader.
1774     ///
1775     /// Note that any leftover data inside this reader's internal buffer is
1776     /// lost.
into_inner(self) -> R1777     pub fn into_inner(self) -> R {
1778         self.rdr.into_inner()
1779     }
1780 }
1781 
1782 impl<R: io::Read + io::Seek> Reader<R> {
1783     /// Seeks the underlying reader to the position given.
1784     ///
1785     /// This comes with a few caveats:
1786     ///
1787     /// * Any internal buffer associated with this reader is cleared.
1788     /// * If the given position does not correspond to a position immediately
1789     ///   before the start of a record, then the behavior of this reader is
1790     ///   unspecified.
1791     /// * Any special logic that skips the first record in the CSV reader
1792     ///   when reading or iterating over records is disabled.
1793     ///
1794     /// If the given position has a byte offset equivalent to the current
1795     /// position, then no seeking is performed.
1796     ///
1797     /// If the header row has not already been read, then this will attempt
1798     /// to read the header row before seeking. Therefore, it is possible that
1799     /// this returns an error associated with reading CSV data.
1800     ///
1801     /// Note that seeking is performed based only on the byte offset in the
1802     /// given position. Namely, the record or line numbers in the position may
1803     /// be incorrect, but this will cause any future position generated by
1804     /// this CSV reader to be similarly incorrect.
1805     ///
1806     /// # Example: seek to parse a record twice
1807     ///
1808     /// ```
1809     /// use std::error::Error;
1810     /// use std::io;
1811     /// use csv::{Reader, Position};
1812     ///
1813     /// # fn main() { example().unwrap(); }
1814     /// fn example() -> Result<(), Box<dyn Error>> {
1815     ///     let data = "\
1816     /// city,country,popcount
1817     /// Boston,United States,4628910
1818     /// Concord,United States,42695
1819     /// ";
1820     ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1821     ///     let mut iter = rdr.into_records();
1822     ///     let mut pos = Position::new();
1823     ///     loop {
1824     ///         // Read the position immediately before each record.
1825     ///         let next_pos = iter.reader().position().clone();
1826     ///         if iter.next().is_none() {
1827     ///             break;
1828     ///         }
1829     ///         pos = next_pos;
1830     ///     }
1831     ///
1832     ///     // Now seek the reader back to `pos`. This will let us read the
1833     ///     // last record again.
1834     ///     iter.reader_mut().seek(pos)?;
1835     ///     let mut iter = iter.into_reader().into_records();
1836     ///     if let Some(result) = iter.next() {
1837     ///         let record = result?;
1838     ///         assert_eq!(record, vec!["Concord", "United States", "42695"]);
1839     ///         Ok(())
1840     ///     } else {
1841     ///         Err(From::from("expected at least one record but got none"))
1842     ///     }
1843     /// }
1844     /// ```
seek(&mut self, pos: Position) -> Result<()>1845     pub fn seek(&mut self, pos: Position) -> Result<()> {
1846         self.byte_headers()?;
1847         self.state.seeked = true;
1848         if pos.byte() == self.state.cur_pos.byte() {
1849             return Ok(());
1850         }
1851         self.rdr.seek(io::SeekFrom::Start(pos.byte()))?;
1852         self.core.reset();
1853         self.core.set_line(pos.line());
1854         self.state.cur_pos = pos;
1855         self.state.eof = ReaderEofState::NotEof;
1856         Ok(())
1857     }
1858 
1859     /// This is like `seek`, but provides direct control over how the seeking
1860     /// operation is performed via `io::SeekFrom`.
1861     ///
1862     /// The `pos` position given *should* correspond the position indicated
1863     /// by `seek_from`, but there is no requirement. If the `pos` position
1864     /// given is incorrect, then the position information returned by this
1865     /// reader will be similarly incorrect.
1866     ///
1867     /// If the header row has not already been read, then this will attempt
1868     /// to read the header row before seeking. Therefore, it is possible that
1869     /// this returns an error associated with reading CSV data.
1870     ///
1871     /// Unlike `seek`, this will always cause an actual seek to be performed.
seek_raw( &mut self, seek_from: io::SeekFrom, pos: Position, ) -> Result<()>1872     pub fn seek_raw(
1873         &mut self,
1874         seek_from: io::SeekFrom,
1875         pos: Position,
1876     ) -> Result<()> {
1877         self.byte_headers()?;
1878         self.state.seeked = true;
1879         self.rdr.seek(seek_from)?;
1880         self.core.reset();
1881         self.core.set_line(pos.line());
1882         self.state.cur_pos = pos;
1883         self.state.eof = ReaderEofState::NotEof;
1884         Ok(())
1885     }
1886 }
1887 
1888 impl ReaderState {
1889     #[inline(always)]
add_record(&mut self, record: &ByteRecord) -> Result<()>1890     fn add_record(&mut self, record: &ByteRecord) -> Result<()> {
1891         let i = self.cur_pos.record();
1892         self.cur_pos.set_record(i.checked_add(1).unwrap());
1893         if !self.flexible {
1894             match self.first_field_count {
1895                 None => self.first_field_count = Some(record.len() as u64),
1896                 Some(expected) => {
1897                     if record.len() as u64 != expected {
1898                         return Err(Error::new(ErrorKind::UnequalLengths {
1899                             pos: record.position().map(Clone::clone),
1900                             expected_len: expected,
1901                             len: record.len() as u64,
1902                         }));
1903                     }
1904                 }
1905             }
1906         }
1907         Ok(())
1908     }
1909 }
1910 
1911 /// An owned iterator over deserialized records.
1912 ///
1913 /// The type parameter `R` refers to the underlying `io::Read` type, and `D`
1914 /// refers to the type that this iterator will deserialize a record into.
1915 pub struct DeserializeRecordsIntoIter<R, D> {
1916     rdr: Reader<R>,
1917     rec: StringRecord,
1918     headers: Option<StringRecord>,
1919     _priv: PhantomData<D>,
1920 }
1921 
1922 impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> {
new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D>1923     fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> {
1924         let headers = if !rdr.state.has_headers {
1925             None
1926         } else {
1927             rdr.headers().ok().map(Clone::clone)
1928         };
1929         DeserializeRecordsIntoIter {
1930             rdr: rdr,
1931             rec: StringRecord::new(),
1932             headers: headers,
1933             _priv: PhantomData,
1934         }
1935     }
1936 
1937     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>1938     pub fn reader(&self) -> &Reader<R> {
1939         &self.rdr
1940     }
1941 
1942     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>1943     pub fn reader_mut(&mut self) -> &mut Reader<R> {
1944         &mut self.rdr
1945     }
1946 
1947     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>1948     pub fn into_reader(self) -> Reader<R> {
1949         self.rdr
1950     }
1951 }
1952 
1953 impl<R: io::Read, D: DeserializeOwned> Iterator
1954     for DeserializeRecordsIntoIter<R, D>
1955 {
1956     type Item = Result<D>;
1957 
next(&mut self) -> Option<Result<D>>1958     fn next(&mut self) -> Option<Result<D>> {
1959         match self.rdr.read_record(&mut self.rec) {
1960             Err(err) => Some(Err(err)),
1961             Ok(false) => None,
1962             Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
1963         }
1964     }
1965 }
1966 
1967 /// A borrowed iterator over deserialized records.
1968 ///
1969 /// The lifetime parameter `'r` refers to the lifetime of the underlying
1970 /// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read`
1971 /// type, and `D` refers to the type that this iterator will deserialize a
1972 /// record into.
1973 pub struct DeserializeRecordsIter<'r, R: 'r, D> {
1974     rdr: &'r mut Reader<R>,
1975     rec: StringRecord,
1976     headers: Option<StringRecord>,
1977     _priv: PhantomData<D>,
1978 }
1979 
1980 impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> {
new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D>1981     fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> {
1982         let headers = if !rdr.state.has_headers {
1983             None
1984         } else {
1985             rdr.headers().ok().map(Clone::clone)
1986         };
1987         DeserializeRecordsIter {
1988             rdr: rdr,
1989             rec: StringRecord::new(),
1990             headers: headers,
1991             _priv: PhantomData,
1992         }
1993     }
1994 
1995     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>1996     pub fn reader(&self) -> &Reader<R> {
1997         &self.rdr
1998     }
1999 
2000     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2001     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2002         &mut self.rdr
2003     }
2004 }
2005 
2006 impl<'r, R: io::Read, D: DeserializeOwned> Iterator
2007     for DeserializeRecordsIter<'r, R, D>
2008 {
2009     type Item = Result<D>;
2010 
next(&mut self) -> Option<Result<D>>2011     fn next(&mut self) -> Option<Result<D>> {
2012         match self.rdr.read_record(&mut self.rec) {
2013             Err(err) => Some(Err(err)),
2014             Ok(false) => None,
2015             Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
2016         }
2017     }
2018 }
2019 
2020 /// An owned iterator over records as strings.
2021 pub struct StringRecordsIntoIter<R> {
2022     rdr: Reader<R>,
2023     rec: StringRecord,
2024 }
2025 
2026 impl<R: io::Read> StringRecordsIntoIter<R> {
new(rdr: Reader<R>) -> StringRecordsIntoIter<R>2027     fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> {
2028         StringRecordsIntoIter { rdr: rdr, rec: StringRecord::new() }
2029     }
2030 
2031     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2032     pub fn reader(&self) -> &Reader<R> {
2033         &self.rdr
2034     }
2035 
2036     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2037     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2038         &mut self.rdr
2039     }
2040 
2041     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>2042     pub fn into_reader(self) -> Reader<R> {
2043         self.rdr
2044     }
2045 }
2046 
2047 impl<R: io::Read> Iterator for StringRecordsIntoIter<R> {
2048     type Item = Result<StringRecord>;
2049 
next(&mut self) -> Option<Result<StringRecord>>2050     fn next(&mut self) -> Option<Result<StringRecord>> {
2051         match self.rdr.read_record(&mut self.rec) {
2052             Err(err) => Some(Err(err)),
2053             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2054             Ok(false) => None,
2055         }
2056     }
2057 }
2058 
2059 /// A borrowed iterator over records as strings.
2060 ///
2061 /// The lifetime parameter `'r` refers to the lifetime of the underlying
2062 /// CSV `Reader`.
2063 pub struct StringRecordsIter<'r, R: 'r> {
2064     rdr: &'r mut Reader<R>,
2065     rec: StringRecord,
2066 }
2067 
2068 impl<'r, R: io::Read> StringRecordsIter<'r, R> {
new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R>2069     fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> {
2070         StringRecordsIter { rdr: rdr, rec: StringRecord::new() }
2071     }
2072 
2073     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2074     pub fn reader(&self) -> &Reader<R> {
2075         &self.rdr
2076     }
2077 
2078     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2079     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2080         &mut self.rdr
2081     }
2082 }
2083 
2084 impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> {
2085     type Item = Result<StringRecord>;
2086 
next(&mut self) -> Option<Result<StringRecord>>2087     fn next(&mut self) -> Option<Result<StringRecord>> {
2088         match self.rdr.read_record(&mut self.rec) {
2089             Err(err) => Some(Err(err)),
2090             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2091             Ok(false) => None,
2092         }
2093     }
2094 }
2095 
2096 /// An owned iterator over records as raw bytes.
2097 pub struct ByteRecordsIntoIter<R> {
2098     rdr: Reader<R>,
2099     rec: ByteRecord,
2100 }
2101 
2102 impl<R: io::Read> ByteRecordsIntoIter<R> {
new(rdr: Reader<R>) -> ByteRecordsIntoIter<R>2103     fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> {
2104         ByteRecordsIntoIter { rdr: rdr, rec: ByteRecord::new() }
2105     }
2106 
2107     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2108     pub fn reader(&self) -> &Reader<R> {
2109         &self.rdr
2110     }
2111 
2112     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2113     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2114         &mut self.rdr
2115     }
2116 
2117     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>2118     pub fn into_reader(self) -> Reader<R> {
2119         self.rdr
2120     }
2121 }
2122 
2123 impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> {
2124     type Item = Result<ByteRecord>;
2125 
next(&mut self) -> Option<Result<ByteRecord>>2126     fn next(&mut self) -> Option<Result<ByteRecord>> {
2127         match self.rdr.read_byte_record(&mut self.rec) {
2128             Err(err) => Some(Err(err)),
2129             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2130             Ok(false) => None,
2131         }
2132     }
2133 }
2134 
2135 /// A borrowed iterator over records as raw bytes.
2136 ///
2137 /// The lifetime parameter `'r` refers to the lifetime of the underlying
2138 /// CSV `Reader`.
2139 pub struct ByteRecordsIter<'r, R: 'r> {
2140     rdr: &'r mut Reader<R>,
2141     rec: ByteRecord,
2142 }
2143 
2144 impl<'r, R: io::Read> ByteRecordsIter<'r, R> {
new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R>2145     fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> {
2146         ByteRecordsIter { rdr: rdr, rec: ByteRecord::new() }
2147     }
2148 
2149     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2150     pub fn reader(&self) -> &Reader<R> {
2151         &self.rdr
2152     }
2153 
2154     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2155     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2156         &mut self.rdr
2157     }
2158 }
2159 
2160 impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> {
2161     type Item = Result<ByteRecord>;
2162 
next(&mut self) -> Option<Result<ByteRecord>>2163     fn next(&mut self) -> Option<Result<ByteRecord>> {
2164         match self.rdr.read_byte_record(&mut self.rec) {
2165             Err(err) => Some(Err(err)),
2166             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2167             Ok(false) => None,
2168         }
2169     }
2170 }
2171 
2172 #[cfg(test)]
2173 mod tests {
2174     use std::io;
2175 
2176     use crate::byte_record::ByteRecord;
2177     use crate::error::ErrorKind;
2178     use crate::string_record::StringRecord;
2179 
2180     use super::{Position, ReaderBuilder, Trim};
2181 
b(s: &str) -> &[u8]2182     fn b(s: &str) -> &[u8] {
2183         s.as_bytes()
2184     }
s(b: &[u8]) -> &str2185     fn s(b: &[u8]) -> &str {
2186         ::std::str::from_utf8(b).unwrap()
2187     }
2188 
newpos(byte: u64, line: u64, record: u64) -> Position2189     fn newpos(byte: u64, line: u64, record: u64) -> Position {
2190         let mut p = Position::new();
2191         p.set_byte(byte).set_line(line).set_record(record);
2192         p
2193     }
2194 
2195     #[test]
read_byte_record()2196     fn read_byte_record() {
2197         let data = b("foo,\"b,ar\",baz\nabc,mno,xyz");
2198         let mut rdr =
2199             ReaderBuilder::new().has_headers(false).from_reader(data);
2200         let mut rec = ByteRecord::new();
2201 
2202         assert!(rdr.read_byte_record(&mut rec).unwrap());
2203         assert_eq!(3, rec.len());
2204         assert_eq!("foo", s(&rec[0]));
2205         assert_eq!("b,ar", s(&rec[1]));
2206         assert_eq!("baz", s(&rec[2]));
2207 
2208         assert!(rdr.read_byte_record(&mut rec).unwrap());
2209         assert_eq!(3, rec.len());
2210         assert_eq!("abc", s(&rec[0]));
2211         assert_eq!("mno", s(&rec[1]));
2212         assert_eq!("xyz", s(&rec[2]));
2213 
2214         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2215     }
2216 
2217     #[test]
read_trimmed_records_and_headers()2218     fn read_trimmed_records_and_headers() {
2219         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2220         let mut rdr = ReaderBuilder::new()
2221             .has_headers(true)
2222             .trim(Trim::All)
2223             .from_reader(data);
2224         let mut rec = ByteRecord::new();
2225         assert!(rdr.read_byte_record(&mut rec).unwrap());
2226         assert_eq!("1", s(&rec[0]));
2227         assert_eq!("2", s(&rec[1]));
2228         assert_eq!("3", s(&rec[2]));
2229         let mut rec = StringRecord::new();
2230         assert!(rdr.read_record(&mut rec).unwrap());
2231         assert_eq!("1", &rec[0]);
2232         assert_eq!("", &rec[1]);
2233         assert_eq!("3", &rec[2]);
2234         {
2235             let headers = rdr.headers().unwrap();
2236             assert_eq!(3, headers.len());
2237             assert_eq!("foo", &headers[0]);
2238             assert_eq!("bar", &headers[1]);
2239             assert_eq!("baz", &headers[2]);
2240         }
2241     }
2242 
2243     #[test]
read_trimmed_header()2244     fn read_trimmed_header() {
2245         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2246         let mut rdr = ReaderBuilder::new()
2247             .has_headers(true)
2248             .trim(Trim::Headers)
2249             .from_reader(data);
2250         let mut rec = ByteRecord::new();
2251         assert!(rdr.read_byte_record(&mut rec).unwrap());
2252         assert_eq!("  1", s(&rec[0]));
2253         assert_eq!("  2", s(&rec[1]));
2254         assert_eq!("  3", s(&rec[2]));
2255         {
2256             let headers = rdr.headers().unwrap();
2257             assert_eq!(3, headers.len());
2258             assert_eq!("foo", &headers[0]);
2259             assert_eq!("bar", &headers[1]);
2260             assert_eq!("baz", &headers[2]);
2261         }
2262     }
2263 
2264     #[test]
read_trimed_header_invalid_utf8()2265     fn read_trimed_header_invalid_utf8() {
2266         let data = &b"foo,  b\xFFar,\tbaz\na,b,c\nd,e,f"[..];
2267         let mut rdr = ReaderBuilder::new()
2268             .has_headers(true)
2269             .trim(Trim::Headers)
2270             .from_reader(data);
2271         let mut rec = StringRecord::new();
2272 
2273         // force the headers to be read
2274         let _ = rdr.read_record(&mut rec);
2275         // Check the byte headers are trimmed
2276         {
2277             let headers = rdr.byte_headers().unwrap();
2278             assert_eq!(3, headers.len());
2279             assert_eq!(b"foo", &headers[0]);
2280             assert_eq!(b"b\xFFar", &headers[1]);
2281             assert_eq!(b"baz", &headers[2]);
2282         }
2283         match *rdr.headers().unwrap_err().kind() {
2284             ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2285                 assert_eq!(pos, &newpos(0, 1, 0));
2286                 assert_eq!(err.field(), 1);
2287                 assert_eq!(err.valid_up_to(), 3);
2288             }
2289             ref err => panic!("match failed, got {:?}", err),
2290         }
2291     }
2292 
2293     #[test]
read_trimmed_records()2294     fn read_trimmed_records() {
2295         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2296         let mut rdr = ReaderBuilder::new()
2297             .has_headers(true)
2298             .trim(Trim::Fields)
2299             .from_reader(data);
2300         let mut rec = ByteRecord::new();
2301         assert!(rdr.read_byte_record(&mut rec).unwrap());
2302         assert_eq!("1", s(&rec[0]));
2303         assert_eq!("2", s(&rec[1]));
2304         assert_eq!("3", s(&rec[2]));
2305         {
2306             let headers = rdr.headers().unwrap();
2307             assert_eq!(3, headers.len());
2308             assert_eq!("foo", &headers[0]);
2309             assert_eq!("  bar", &headers[1]);
2310             assert_eq!("\tbaz", &headers[2]);
2311         }
2312     }
2313 
2314     #[test]
read_record_unequal_fails()2315     fn read_record_unequal_fails() {
2316         let data = b("foo\nbar,baz");
2317         let mut rdr =
2318             ReaderBuilder::new().has_headers(false).from_reader(data);
2319         let mut rec = ByteRecord::new();
2320 
2321         assert!(rdr.read_byte_record(&mut rec).unwrap());
2322         assert_eq!(1, rec.len());
2323         assert_eq!("foo", s(&rec[0]));
2324 
2325         match rdr.read_byte_record(&mut rec) {
2326             Err(err) => match *err.kind() {
2327                 ErrorKind::UnequalLengths {
2328                     expected_len: 1,
2329                     ref pos,
2330                     len: 2,
2331                 } => {
2332                     assert_eq!(pos, &Some(newpos(4, 2, 1)));
2333                 }
2334                 ref wrong => panic!("match failed, got {:?}", wrong),
2335             },
2336             wrong => panic!("match failed, got {:?}", wrong),
2337         }
2338     }
2339 
2340     #[test]
read_record_unequal_ok()2341     fn read_record_unequal_ok() {
2342         let data = b("foo\nbar,baz");
2343         let mut rdr = ReaderBuilder::new()
2344             .has_headers(false)
2345             .flexible(true)
2346             .from_reader(data);
2347         let mut rec = ByteRecord::new();
2348 
2349         assert!(rdr.read_byte_record(&mut rec).unwrap());
2350         assert_eq!(1, rec.len());
2351         assert_eq!("foo", s(&rec[0]));
2352 
2353         assert!(rdr.read_byte_record(&mut rec).unwrap());
2354         assert_eq!(2, rec.len());
2355         assert_eq!("bar", s(&rec[0]));
2356         assert_eq!("baz", s(&rec[1]));
2357 
2358         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2359     }
2360 
2361     // This tests that even if we get a CSV error, we can continue reading
2362     // if we want.
2363     #[test]
read_record_unequal_continue()2364     fn read_record_unequal_continue() {
2365         let data = b("foo\nbar,baz\nquux");
2366         let mut rdr =
2367             ReaderBuilder::new().has_headers(false).from_reader(data);
2368         let mut rec = ByteRecord::new();
2369 
2370         assert!(rdr.read_byte_record(&mut rec).unwrap());
2371         assert_eq!(1, rec.len());
2372         assert_eq!("foo", s(&rec[0]));
2373 
2374         match rdr.read_byte_record(&mut rec) {
2375             Err(err) => match err.kind() {
2376                 &ErrorKind::UnequalLengths {
2377                     expected_len: 1,
2378                     ref pos,
2379                     len: 2,
2380                 } => {
2381                     assert_eq!(pos, &Some(newpos(4, 2, 1)));
2382                 }
2383                 wrong => panic!("match failed, got {:?}", wrong),
2384             },
2385             wrong => panic!("match failed, got {:?}", wrong),
2386         }
2387 
2388         assert!(rdr.read_byte_record(&mut rec).unwrap());
2389         assert_eq!(1, rec.len());
2390         assert_eq!("quux", s(&rec[0]));
2391 
2392         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2393     }
2394 
2395     #[test]
read_record_headers()2396     fn read_record_headers() {
2397         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2398         let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2399         let mut rec = StringRecord::new();
2400 
2401         assert!(rdr.read_record(&mut rec).unwrap());
2402         assert_eq!(3, rec.len());
2403         assert_eq!("a", &rec[0]);
2404 
2405         assert!(rdr.read_record(&mut rec).unwrap());
2406         assert_eq!(3, rec.len());
2407         assert_eq!("d", &rec[0]);
2408 
2409         assert!(!rdr.read_record(&mut rec).unwrap());
2410 
2411         {
2412             let headers = rdr.byte_headers().unwrap();
2413             assert_eq!(3, headers.len());
2414             assert_eq!(b"foo", &headers[0]);
2415             assert_eq!(b"bar", &headers[1]);
2416             assert_eq!(b"baz", &headers[2]);
2417         }
2418         {
2419             let headers = rdr.headers().unwrap();
2420             assert_eq!(3, headers.len());
2421             assert_eq!("foo", &headers[0]);
2422             assert_eq!("bar", &headers[1]);
2423             assert_eq!("baz", &headers[2]);
2424         }
2425     }
2426 
2427     #[test]
read_record_headers_invalid_utf8()2428     fn read_record_headers_invalid_utf8() {
2429         let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..];
2430         let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2431         let mut rec = StringRecord::new();
2432 
2433         assert!(rdr.read_record(&mut rec).unwrap());
2434         assert_eq!(3, rec.len());
2435         assert_eq!("a", &rec[0]);
2436 
2437         assert!(rdr.read_record(&mut rec).unwrap());
2438         assert_eq!(3, rec.len());
2439         assert_eq!("d", &rec[0]);
2440 
2441         assert!(!rdr.read_record(&mut rec).unwrap());
2442 
2443         // Check that we can read the headers as raw bytes, but that
2444         // if we read them as strings, we get an appropriate UTF-8 error.
2445         {
2446             let headers = rdr.byte_headers().unwrap();
2447             assert_eq!(3, headers.len());
2448             assert_eq!(b"foo", &headers[0]);
2449             assert_eq!(b"b\xFFar", &headers[1]);
2450             assert_eq!(b"baz", &headers[2]);
2451         }
2452         match *rdr.headers().unwrap_err().kind() {
2453             ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2454                 assert_eq!(pos, &newpos(0, 1, 0));
2455                 assert_eq!(err.field(), 1);
2456                 assert_eq!(err.valid_up_to(), 1);
2457             }
2458             ref err => panic!("match failed, got {:?}", err),
2459         }
2460     }
2461 
2462     #[test]
read_record_no_headers_before()2463     fn read_record_no_headers_before() {
2464         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2465         let mut rdr =
2466             ReaderBuilder::new().has_headers(false).from_reader(data);
2467         let mut rec = StringRecord::new();
2468 
2469         {
2470             let headers = rdr.headers().unwrap();
2471             assert_eq!(3, headers.len());
2472             assert_eq!("foo", &headers[0]);
2473             assert_eq!("bar", &headers[1]);
2474             assert_eq!("baz", &headers[2]);
2475         }
2476 
2477         assert!(rdr.read_record(&mut rec).unwrap());
2478         assert_eq!(3, rec.len());
2479         assert_eq!("foo", &rec[0]);
2480 
2481         assert!(rdr.read_record(&mut rec).unwrap());
2482         assert_eq!(3, rec.len());
2483         assert_eq!("a", &rec[0]);
2484 
2485         assert!(rdr.read_record(&mut rec).unwrap());
2486         assert_eq!(3, rec.len());
2487         assert_eq!("d", &rec[0]);
2488 
2489         assert!(!rdr.read_record(&mut rec).unwrap());
2490     }
2491 
2492     #[test]
read_record_no_headers_after()2493     fn read_record_no_headers_after() {
2494         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2495         let mut rdr =
2496             ReaderBuilder::new().has_headers(false).from_reader(data);
2497         let mut rec = StringRecord::new();
2498 
2499         assert!(rdr.read_record(&mut rec).unwrap());
2500         assert_eq!(3, rec.len());
2501         assert_eq!("foo", &rec[0]);
2502 
2503         assert!(rdr.read_record(&mut rec).unwrap());
2504         assert_eq!(3, rec.len());
2505         assert_eq!("a", &rec[0]);
2506 
2507         assert!(rdr.read_record(&mut rec).unwrap());
2508         assert_eq!(3, rec.len());
2509         assert_eq!("d", &rec[0]);
2510 
2511         assert!(!rdr.read_record(&mut rec).unwrap());
2512 
2513         let headers = rdr.headers().unwrap();
2514         assert_eq!(3, headers.len());
2515         assert_eq!("foo", &headers[0]);
2516         assert_eq!("bar", &headers[1]);
2517         assert_eq!("baz", &headers[2]);
2518     }
2519 
2520     #[test]
seek()2521     fn seek() {
2522         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2523         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2524         rdr.seek(newpos(18, 3, 2)).unwrap();
2525 
2526         let mut rec = StringRecord::new();
2527 
2528         assert_eq!(18, rdr.position().byte());
2529         assert!(rdr.read_record(&mut rec).unwrap());
2530         assert_eq!(3, rec.len());
2531         assert_eq!("d", &rec[0]);
2532 
2533         assert_eq!(24, rdr.position().byte());
2534         assert_eq!(4, rdr.position().line());
2535         assert_eq!(3, rdr.position().record());
2536         assert!(rdr.read_record(&mut rec).unwrap());
2537         assert_eq!(3, rec.len());
2538         assert_eq!("g", &rec[0]);
2539 
2540         assert!(!rdr.read_record(&mut rec).unwrap());
2541     }
2542 
2543     // Test that we can read headers after seeking even if the headers weren't
2544     // explicit read before seeking.
2545     #[test]
seek_headers_after()2546     fn seek_headers_after() {
2547         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2548         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2549         rdr.seek(newpos(18, 3, 2)).unwrap();
2550         assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]);
2551     }
2552 
2553     // Test that we can read headers after seeking if the headers were read
2554     // before seeking.
2555     #[test]
seek_headers_before_after()2556     fn seek_headers_before_after() {
2557         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2558         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2559         let headers = rdr.headers().unwrap().clone();
2560         rdr.seek(newpos(18, 3, 2)).unwrap();
2561         assert_eq!(&headers, rdr.headers().unwrap());
2562     }
2563 
2564     // Test that even if we didn't read headers before seeking, if we seek to
2565     // the current byte offset, then no seeking is done and therefore we can
2566     // still read headers after seeking.
2567     #[test]
seek_headers_no_actual_seek()2568     fn seek_headers_no_actual_seek() {
2569         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2570         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2571         rdr.seek(Position::new()).unwrap();
2572         assert_eq!("foo", &rdr.headers().unwrap()[0]);
2573     }
2574 
2575     // Test that position info is reported correctly in absence of headers.
2576     #[test]
positions_no_headers()2577     fn positions_no_headers() {
2578         let mut rdr = ReaderBuilder::new()
2579             .has_headers(false)
2580             .from_reader("a,b,c\nx,y,z".as_bytes())
2581             .into_records();
2582 
2583         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2584         assert_eq!(pos.byte(), 0);
2585         assert_eq!(pos.line(), 1);
2586         assert_eq!(pos.record(), 0);
2587 
2588         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2589         assert_eq!(pos.byte(), 6);
2590         assert_eq!(pos.line(), 2);
2591         assert_eq!(pos.record(), 1);
2592     }
2593 
2594     // Test that position info is reported correctly with headers.
2595     #[test]
positions_headers()2596     fn positions_headers() {
2597         let mut rdr = ReaderBuilder::new()
2598             .has_headers(true)
2599             .from_reader("a,b,c\nx,y,z".as_bytes())
2600             .into_records();
2601 
2602         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2603         assert_eq!(pos.byte(), 6);
2604         assert_eq!(pos.line(), 2);
2605         assert_eq!(pos.record(), 1);
2606     }
2607 
2608     // Test that reading headers on empty data yields an empty record.
2609     #[test]
headers_on_empty_data()2610     fn headers_on_empty_data() {
2611         let mut rdr = ReaderBuilder::new().from_reader("".as_bytes());
2612         let r = rdr.byte_headers().unwrap();
2613         assert_eq!(r.len(), 0);
2614     }
2615 
2616     // Test that reading the first record on empty data works.
2617     #[test]
no_headers_on_empty_data()2618     fn no_headers_on_empty_data() {
2619         let mut rdr =
2620             ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2621         assert_eq!(rdr.records().count(), 0);
2622     }
2623 
2624     // Test that reading the first record on empty data works, even if
2625     // we've tried to read headers before hand.
2626     #[test]
no_headers_on_empty_data_after_headers()2627     fn no_headers_on_empty_data_after_headers() {
2628         let mut rdr =
2629             ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2630         assert_eq!(rdr.headers().unwrap().len(), 0);
2631         assert_eq!(rdr.records().count(), 0);
2632     }
2633 }
2634