1 use std::fs::File; 2 use std::io::{self, BufRead, Seek}; 3 use std::marker::PhantomData; 4 use std::path::Path; 5 use std::result; 6 7 use csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder}; 8 use serde::de::DeserializeOwned; 9 10 use crate::byte_record::{ByteRecord, Position}; 11 use crate::error::{Error, ErrorKind, Result, Utf8Error}; 12 use crate::string_record::StringRecord; 13 use crate::{Terminator, Trim}; 14 15 /// Builds a CSV reader with various configuration knobs. 16 /// 17 /// This builder can be used to tweak the field delimiter, record terminator 18 /// and more. Once a CSV `Reader` is built, its configuration cannot be 19 /// changed. 20 #[derive(Debug)] 21 pub struct ReaderBuilder { 22 capacity: usize, 23 flexible: bool, 24 has_headers: bool, 25 trim: Trim, 26 /// The underlying CSV parser builder. 27 /// 28 /// We explicitly put this on the heap because CoreReaderBuilder embeds an 29 /// entire DFA transition table, which along with other things, tallies up 30 /// to almost 500 bytes on the stack. 31 builder: Box<CoreReaderBuilder>, 32 } 33 34 impl Default for ReaderBuilder { default() -> ReaderBuilder35 fn default() -> ReaderBuilder { 36 ReaderBuilder { 37 capacity: 8 * (1 << 10), 38 flexible: false, 39 has_headers: true, 40 trim: Trim::default(), 41 builder: Box::new(CoreReaderBuilder::default()), 42 } 43 } 44 } 45 46 impl ReaderBuilder { 47 /// Create a new builder for configuring CSV parsing. 48 /// 49 /// To convert a builder into a reader, call one of the methods starting 50 /// with `from_`. 51 /// 52 /// # Example 53 /// 54 /// ``` 55 /// use std::error::Error; 56 /// use csv::{ReaderBuilder, StringRecord}; 57 /// 58 /// # fn main() { example().unwrap(); } 59 /// fn example() -> Result<(), Box<dyn Error>> { 60 /// let data = "\ 61 /// city,country,pop 62 /// Boston,United States,4628910 63 /// Concord,United States,42695 64 /// "; 65 /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes()); 66 /// 67 /// let records = rdr 68 /// .records() 69 /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?; 70 /// assert_eq!(records, vec![ 71 /// vec!["Boston", "United States", "4628910"], 72 /// vec!["Concord", "United States", "42695"], 73 /// ]); 74 /// Ok(()) 75 /// } 76 /// ``` new() -> ReaderBuilder77 pub fn new() -> ReaderBuilder { 78 ReaderBuilder::default() 79 } 80 81 /// Build a CSV parser from this configuration that reads data from the 82 /// given file path. 83 /// 84 /// If there was a problem opening the file at the given path, then this 85 /// returns the corresponding error. 86 /// 87 /// # Example 88 /// 89 /// ```no_run 90 /// use std::error::Error; 91 /// use csv::ReaderBuilder; 92 /// 93 /// # fn main() { example().unwrap(); } 94 /// fn example() -> Result<(), Box<dyn Error>> { 95 /// let mut rdr = ReaderBuilder::new().from_path("foo.csv")?; 96 /// for result in rdr.records() { 97 /// let record = result?; 98 /// println!("{:?}", record); 99 /// } 100 /// Ok(()) 101 /// } 102 /// ``` from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>>103 pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> { 104 Ok(Reader::new(self, File::open(path)?)) 105 } 106 107 /// Build a CSV parser from this configuration that reads data from `rdr`. 108 /// 109 /// Note that the CSV reader is buffered automatically, so you should not 110 /// wrap `rdr` in a buffered reader like `io::BufReader`. 111 /// 112 /// # Example 113 /// 114 /// ``` 115 /// use std::error::Error; 116 /// use csv::ReaderBuilder; 117 /// 118 /// # fn main() { example().unwrap(); } 119 /// fn example() -> Result<(), Box<dyn Error>> { 120 /// let data = "\ 121 /// city,country,pop 122 /// Boston,United States,4628910 123 /// Concord,United States,42695 124 /// "; 125 /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes()); 126 /// for result in rdr.records() { 127 /// let record = result?; 128 /// println!("{:?}", record); 129 /// } 130 /// Ok(()) 131 /// } 132 /// ``` from_reader<R: io::Read>(&self, rdr: R) -> Reader<R>133 pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> { 134 Reader::new(self, rdr) 135 } 136 137 /// The field delimiter to use when parsing CSV. 138 /// 139 /// The default is `b','`. 140 /// 141 /// # Example 142 /// 143 /// ``` 144 /// use std::error::Error; 145 /// use csv::ReaderBuilder; 146 /// 147 /// # fn main() { example().unwrap(); } 148 /// fn example() -> Result<(), Box<dyn Error>> { 149 /// let data = "\ 150 /// city;country;pop 151 /// Boston;United States;4628910 152 /// "; 153 /// let mut rdr = ReaderBuilder::new() 154 /// .delimiter(b';') 155 /// .from_reader(data.as_bytes()); 156 /// 157 /// if let Some(result) = rdr.records().next() { 158 /// let record = result?; 159 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 160 /// Ok(()) 161 /// } else { 162 /// Err(From::from("expected at least one record but got none")) 163 /// } 164 /// } 165 /// ``` delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder166 pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder { 167 self.builder.delimiter(delimiter); 168 self 169 } 170 171 /// Whether to treat the first row as a special header row. 172 /// 173 /// By default, the first row is treated as a special header row, which 174 /// means the header is never returned by any of the record reading methods 175 /// or iterators. When this is disabled (`yes` set to `false`), the first 176 /// row is not treated specially. 177 /// 178 /// Note that the `headers` and `byte_headers` methods are unaffected by 179 /// whether this is set. Those methods always return the first record. 180 /// 181 /// # Example 182 /// 183 /// This example shows what happens when `has_headers` is disabled. 184 /// Namely, the first row is treated just like any other row. 185 /// 186 /// ``` 187 /// use std::error::Error; 188 /// use csv::ReaderBuilder; 189 /// 190 /// # fn main() { example().unwrap(); } 191 /// fn example() -> Result<(), Box<dyn Error>> { 192 /// let data = "\ 193 /// city,country,pop 194 /// Boston,United States,4628910 195 /// "; 196 /// let mut rdr = ReaderBuilder::new() 197 /// .has_headers(false) 198 /// .from_reader(data.as_bytes()); 199 /// let mut iter = rdr.records(); 200 /// 201 /// // Read the first record. 202 /// if let Some(result) = iter.next() { 203 /// let record = result?; 204 /// assert_eq!(record, vec!["city", "country", "pop"]); 205 /// } else { 206 /// return Err(From::from( 207 /// "expected at least two records but got none")); 208 /// } 209 /// 210 /// // Read the second record. 211 /// if let Some(result) = iter.next() { 212 /// let record = result?; 213 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 214 /// } else { 215 /// return Err(From::from( 216 /// "expected at least two records but got one")) 217 /// } 218 /// Ok(()) 219 /// } 220 /// ``` has_headers(&mut self, yes: bool) -> &mut ReaderBuilder221 pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder { 222 self.has_headers = yes; 223 self 224 } 225 226 /// Whether the number of fields in records is allowed to change or not. 227 /// 228 /// When disabled (which is the default), parsing CSV data will return an 229 /// error if a record is found with a number of fields different from the 230 /// number of fields in a previous record. 231 /// 232 /// When enabled, this error checking is turned off. 233 /// 234 /// # Example: flexible records enabled 235 /// 236 /// ``` 237 /// use std::error::Error; 238 /// use csv::ReaderBuilder; 239 /// 240 /// # fn main() { example().unwrap(); } 241 /// fn example() -> Result<(), Box<dyn Error>> { 242 /// // Notice that the first row is missing the population count. 243 /// let data = "\ 244 /// city,country,pop 245 /// Boston,United States 246 /// "; 247 /// let mut rdr = ReaderBuilder::new() 248 /// .flexible(true) 249 /// .from_reader(data.as_bytes()); 250 /// 251 /// if let Some(result) = rdr.records().next() { 252 /// let record = result?; 253 /// assert_eq!(record, vec!["Boston", "United States"]); 254 /// Ok(()) 255 /// } else { 256 /// Err(From::from("expected at least one record but got none")) 257 /// } 258 /// } 259 /// ``` 260 /// 261 /// # Example: flexible records disabled 262 /// 263 /// This shows the error that appears when records of unequal length 264 /// are found and flexible records have been disabled (which is the 265 /// default). 266 /// 267 /// ``` 268 /// use std::error::Error; 269 /// use csv::{ErrorKind, ReaderBuilder}; 270 /// 271 /// # fn main() { example().unwrap(); } 272 /// fn example() -> Result<(), Box<dyn Error>> { 273 /// // Notice that the first row is missing the population count. 274 /// let data = "\ 275 /// city,country,pop 276 /// Boston,United States 277 /// "; 278 /// let mut rdr = ReaderBuilder::new() 279 /// .flexible(false) 280 /// .from_reader(data.as_bytes()); 281 /// 282 /// if let Some(Err(err)) = rdr.records().next() { 283 /// match *err.kind() { 284 /// ErrorKind::UnequalLengths { expected_len, len, .. } => { 285 /// // The header row has 3 fields... 286 /// assert_eq!(expected_len, 3); 287 /// // ... but the first row has only 2 fields. 288 /// assert_eq!(len, 2); 289 /// Ok(()) 290 /// } 291 /// ref wrong => { 292 /// Err(From::from(format!( 293 /// "expected UnequalLengths error but got {:?}", 294 /// wrong))) 295 /// } 296 /// } 297 /// } else { 298 /// Err(From::from( 299 /// "expected at least one errored record but got none")) 300 /// } 301 /// } 302 /// ``` flexible(&mut self, yes: bool) -> &mut ReaderBuilder303 pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder { 304 self.flexible = yes; 305 self 306 } 307 308 /// Whether fields are trimmed of leading and trailing whitespace or not. 309 /// 310 /// By default, no trimming is performed. This method permits one to 311 /// override that behavior and choose one of the following options: 312 /// 313 /// 1. `Trim::Headers` trims only header values. 314 /// 2. `Trim::Fields` trims only non-header or "field" values. 315 /// 3. `Trim::All` trims both header and non-header values. 316 /// 317 /// A value is only interpreted as a header value if this CSV reader is 318 /// configured to read a header record (which is the default). 319 /// 320 /// When reading string records, characters meeting the definition of 321 /// Unicode whitespace are trimmed. When reading byte records, characters 322 /// meeting the definition of ASCII whitespace are trimmed. ASCII 323 /// whitespace characters correspond to the set `[\t\n\v\f\r ]`. 324 /// 325 /// # Example 326 /// 327 /// This example shows what happens when all values are trimmed. 328 /// 329 /// ``` 330 /// use std::error::Error; 331 /// use csv::{ReaderBuilder, StringRecord, Trim}; 332 /// 333 /// # fn main() { example().unwrap(); } 334 /// fn example() -> Result<(), Box<dyn Error>> { 335 /// let data = "\ 336 /// city , country , pop 337 /// Boston,\" 338 /// United States\",4628910 339 /// Concord, United States ,42695 340 /// "; 341 /// let mut rdr = ReaderBuilder::new() 342 /// .trim(Trim::All) 343 /// .from_reader(data.as_bytes()); 344 /// let records = rdr 345 /// .records() 346 /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?; 347 /// assert_eq!(records, vec![ 348 /// vec!["Boston", "United States", "4628910"], 349 /// vec!["Concord", "United States", "42695"], 350 /// ]); 351 /// Ok(()) 352 /// } 353 /// ``` trim(&mut self, trim: Trim) -> &mut ReaderBuilder354 pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder { 355 self.trim = trim; 356 self 357 } 358 359 /// The record terminator to use when parsing CSV. 360 /// 361 /// A record terminator can be any single byte. The default is a special 362 /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n` 363 /// or `\r\n` as a single record terminator. 364 /// 365 /// # Example: `$` as a record terminator 366 /// 367 /// ``` 368 /// use std::error::Error; 369 /// use csv::{ReaderBuilder, Terminator}; 370 /// 371 /// # fn main() { example().unwrap(); } 372 /// fn example() -> Result<(), Box<dyn Error>> { 373 /// let data = "city,country,pop$Boston,United States,4628910"; 374 /// let mut rdr = ReaderBuilder::new() 375 /// .terminator(Terminator::Any(b'$')) 376 /// .from_reader(data.as_bytes()); 377 /// 378 /// if let Some(result) = rdr.records().next() { 379 /// let record = result?; 380 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 381 /// Ok(()) 382 /// } else { 383 /// Err(From::from("expected at least one record but got none")) 384 /// } 385 /// } 386 /// ``` terminator(&mut self, term: Terminator) -> &mut ReaderBuilder387 pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder { 388 self.builder.terminator(term.to_core()); 389 self 390 } 391 392 /// The quote character to use when parsing CSV. 393 /// 394 /// The default is `b'"'`. 395 /// 396 /// # Example: single quotes instead of double quotes 397 /// 398 /// ``` 399 /// use std::error::Error; 400 /// use csv::ReaderBuilder; 401 /// 402 /// # fn main() { example().unwrap(); } 403 /// fn example() -> Result<(), Box<dyn Error>> { 404 /// let data = "\ 405 /// city,country,pop 406 /// Boston,'United States',4628910 407 /// "; 408 /// let mut rdr = ReaderBuilder::new() 409 /// .quote(b'\'') 410 /// .from_reader(data.as_bytes()); 411 /// 412 /// if let Some(result) = rdr.records().next() { 413 /// let record = result?; 414 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 415 /// Ok(()) 416 /// } else { 417 /// Err(From::from("expected at least one record but got none")) 418 /// } 419 /// } 420 /// ``` quote(&mut self, quote: u8) -> &mut ReaderBuilder421 pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder { 422 self.builder.quote(quote); 423 self 424 } 425 426 /// The escape character to use when parsing CSV. 427 /// 428 /// In some variants of CSV, quotes are escaped using a special escape 429 /// character like `\` (instead of escaping quotes by doubling them). 430 /// 431 /// By default, recognizing these idiosyncratic escapes is disabled. 432 /// 433 /// # Example 434 /// 435 /// ``` 436 /// use std::error::Error; 437 /// use csv::ReaderBuilder; 438 /// 439 /// # fn main() { example().unwrap(); } 440 /// fn example() -> Result<(), Box<dyn Error>> { 441 /// let data = "\ 442 /// city,country,pop 443 /// Boston,\"The \\\"United\\\" States\",4628910 444 /// "; 445 /// let mut rdr = ReaderBuilder::new() 446 /// .escape(Some(b'\\')) 447 /// .from_reader(data.as_bytes()); 448 /// 449 /// if let Some(result) = rdr.records().next() { 450 /// let record = result?; 451 /// assert_eq!(record, vec![ 452 /// "Boston", "The \"United\" States", "4628910", 453 /// ]); 454 /// Ok(()) 455 /// } else { 456 /// Err(From::from("expected at least one record but got none")) 457 /// } 458 /// } 459 /// ``` escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder460 pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder { 461 self.builder.escape(escape); 462 self 463 } 464 465 /// Enable double quote escapes. 466 /// 467 /// This is enabled by default, but it may be disabled. When disabled, 468 /// doubled quotes are not interpreted as escapes. 469 /// 470 /// # Example 471 /// 472 /// ``` 473 /// use std::error::Error; 474 /// use csv::ReaderBuilder; 475 /// 476 /// # fn main() { example().unwrap(); } 477 /// fn example() -> Result<(), Box<dyn Error>> { 478 /// let data = "\ 479 /// city,country,pop 480 /// Boston,\"The \"\"United\"\" States\",4628910 481 /// "; 482 /// let mut rdr = ReaderBuilder::new() 483 /// .double_quote(false) 484 /// .from_reader(data.as_bytes()); 485 /// 486 /// if let Some(result) = rdr.records().next() { 487 /// let record = result?; 488 /// assert_eq!(record, vec![ 489 /// "Boston", "The \"United\"\" States\"", "4628910", 490 /// ]); 491 /// Ok(()) 492 /// } else { 493 /// Err(From::from("expected at least one record but got none")) 494 /// } 495 /// } 496 /// ``` double_quote(&mut self, yes: bool) -> &mut ReaderBuilder497 pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder { 498 self.builder.double_quote(yes); 499 self 500 } 501 502 /// Enable or disable quoting. 503 /// 504 /// This is enabled by default, but it may be disabled. When disabled, 505 /// quotes are not treated specially. 506 /// 507 /// # Example 508 /// 509 /// ``` 510 /// use std::error::Error; 511 /// use csv::ReaderBuilder; 512 /// 513 /// # fn main() { example().unwrap(); } 514 /// fn example() -> Result<(), Box<dyn Error>> { 515 /// let data = "\ 516 /// city,country,pop 517 /// Boston,\"The United States,4628910 518 /// "; 519 /// let mut rdr = ReaderBuilder::new() 520 /// .quoting(false) 521 /// .from_reader(data.as_bytes()); 522 /// 523 /// if let Some(result) = rdr.records().next() { 524 /// let record = result?; 525 /// assert_eq!(record, vec![ 526 /// "Boston", "\"The United States", "4628910", 527 /// ]); 528 /// Ok(()) 529 /// } else { 530 /// Err(From::from("expected at least one record but got none")) 531 /// } 532 /// } 533 /// ``` quoting(&mut self, yes: bool) -> &mut ReaderBuilder534 pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder { 535 self.builder.quoting(yes); 536 self 537 } 538 539 /// The comment character to use when parsing CSV. 540 /// 541 /// If the start of a record begins with the byte given here, then that 542 /// line is ignored by the CSV parser. 543 /// 544 /// This is disabled by default. 545 /// 546 /// # Example 547 /// 548 /// ``` 549 /// use std::error::Error; 550 /// use csv::ReaderBuilder; 551 /// 552 /// # fn main() { example().unwrap(); } 553 /// fn example() -> Result<(), Box<dyn Error>> { 554 /// let data = "\ 555 /// city,country,pop 556 /// #Concord,United States,42695 557 /// Boston,United States,4628910 558 /// "; 559 /// let mut rdr = ReaderBuilder::new() 560 /// .comment(Some(b'#')) 561 /// .from_reader(data.as_bytes()); 562 /// 563 /// if let Some(result) = rdr.records().next() { 564 /// let record = result?; 565 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 566 /// Ok(()) 567 /// } else { 568 /// Err(From::from("expected at least one record but got none")) 569 /// } 570 /// } 571 /// ``` comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder572 pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder { 573 self.builder.comment(comment); 574 self 575 } 576 577 /// A convenience method for specifying a configuration to read ASCII 578 /// delimited text. 579 /// 580 /// This sets the delimiter and record terminator to the ASCII unit 581 /// separator (`\x1F`) and record separator (`\x1E`), respectively. 582 /// 583 /// # Example 584 /// 585 /// ``` 586 /// use std::error::Error; 587 /// use csv::ReaderBuilder; 588 /// 589 /// # fn main() { example().unwrap(); } 590 /// fn example() -> Result<(), Box<dyn Error>> { 591 /// let data = "\ 592 /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910"; 593 /// let mut rdr = ReaderBuilder::new() 594 /// .ascii() 595 /// .from_reader(data.as_bytes()); 596 /// 597 /// if let Some(result) = rdr.records().next() { 598 /// let record = result?; 599 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 600 /// Ok(()) 601 /// } else { 602 /// Err(From::from("expected at least one record but got none")) 603 /// } 604 /// } 605 /// ``` ascii(&mut self) -> &mut ReaderBuilder606 pub fn ascii(&mut self) -> &mut ReaderBuilder { 607 self.builder.ascii(); 608 self 609 } 610 611 /// Set the capacity (in bytes) of the buffer used in the CSV reader. 612 /// This defaults to a reasonable setting. buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder613 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder { 614 self.capacity = capacity; 615 self 616 } 617 618 /// Enable or disable the NFA for parsing CSV. 619 /// 620 /// This is intended to be a debug option. The NFA is always slower than 621 /// the DFA. 622 #[doc(hidden)] nfa(&mut self, yes: bool) -> &mut ReaderBuilder623 pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder { 624 self.builder.nfa(yes); 625 self 626 } 627 } 628 629 /// A already configured CSV reader. 630 /// 631 /// A CSV reader takes as input CSV data and transforms that into standard Rust 632 /// values. The most flexible way to read CSV data is as a sequence of records, 633 /// where a record is a sequence of fields and each field is a string. However, 634 /// a reader can also deserialize CSV data into Rust types like `i64` or 635 /// `(String, f64, f64, f64)` or even a custom struct automatically using 636 /// Serde. 637 /// 638 /// # Configuration 639 /// 640 /// A CSV reader has a couple convenient constructor methods like `from_path` 641 /// and `from_reader`. However, if you want to configure the CSV reader to use 642 /// a different delimiter or quote character (among many other things), then 643 /// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct 644 /// a `Reader`. For example, to change the field delimiter: 645 /// 646 /// ``` 647 /// use std::error::Error; 648 /// use csv::ReaderBuilder; 649 /// 650 /// # fn main() { example().unwrap(); } 651 /// fn example() -> Result<(), Box<dyn Error>> { 652 /// let data = "\ 653 /// city;country;pop 654 /// Boston;United States;4628910 655 /// "; 656 /// let mut rdr = ReaderBuilder::new() 657 /// .delimiter(b';') 658 /// .from_reader(data.as_bytes()); 659 /// 660 /// if let Some(result) = rdr.records().next() { 661 /// let record = result?; 662 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 663 /// Ok(()) 664 /// } else { 665 /// Err(From::from("expected at least one record but got none")) 666 /// } 667 /// } 668 /// ``` 669 /// 670 /// # Error handling 671 /// 672 /// In general, CSV *parsing* does not ever return an error. That is, there is 673 /// no such thing as malformed CSV data. Instead, this reader will prioritize 674 /// finding a parse over rejecting CSV data that it does not understand. This 675 /// choice was inspired by other popular CSV parsers, but also because it is 676 /// pragmatic. CSV data varies wildly, so even if the CSV data is malformed, 677 /// it might still be possible to work with the data. In the land of CSV, there 678 /// is no "right" or "wrong," only "right" and "less right." 679 /// 680 /// With that said, a number of errors can occur while reading CSV data: 681 /// 682 /// * By default, all records in CSV data must have the same number of fields. 683 /// If a record is found with a different number of fields than a prior 684 /// record, then an error is returned. This behavior can be disabled by 685 /// enabling flexible parsing via the `flexible` method on 686 /// [`ReaderBuilder`](struct.ReaderBuilder.html). 687 /// * When reading CSV data from a resource (like a file), it is possible for 688 /// reading from the underlying resource to fail. This will return an error. 689 /// For subsequent calls to the `Reader` after encountering a such error 690 /// (unless `seek` is used), it will behave as if end of file had been 691 /// reached, in order to avoid running into infinite loops when still 692 /// attempting to read the next record when one has errored. 693 /// * When reading CSV data into `String` or `&str` fields (e.g., via a 694 /// [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly 695 /// enforced. If CSV data is invalid UTF-8, then an error is returned. If 696 /// you want to read invalid UTF-8, then you should use the byte oriented 697 /// APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit 698 /// support for another encoding entirely, then you'll need to use another 699 /// crate to transcode your CSV data to UTF-8 before parsing it. 700 /// * When using Serde to deserialize CSV data into Rust types, it is possible 701 /// for a number of additional errors to occur. For example, deserializing 702 /// a field `xyz` into an `i32` field will result in an error. 703 /// 704 /// For more details on the precise semantics of errors, see the 705 /// [`Error`](enum.Error.html) type. 706 #[derive(Debug)] 707 pub struct Reader<R> { 708 /// The underlying CSV parser. 709 /// 710 /// We explicitly put this on the heap because CoreReader embeds an entire 711 /// DFA transition table, which along with other things, tallies up to 712 /// almost 500 bytes on the stack. 713 core: Box<CoreReader>, 714 /// The underlying reader. 715 rdr: io::BufReader<R>, 716 /// Various state tracking. 717 /// 718 /// There is more state embedded in the `CoreReader`. 719 state: ReaderState, 720 } 721 722 #[derive(Debug)] 723 struct ReaderState { 724 /// When set, this contains the first row of any parsed CSV data. 725 /// 726 /// This is always populated, regardless of whether `has_headers` is set. 727 headers: Option<Headers>, 728 /// When set, the first row of parsed CSV data is excluded from things 729 /// that read records, like iterators and `read_record`. 730 has_headers: bool, 731 /// When set, there is no restriction on the length of records. When not 732 /// set, every record must have the same number of fields, or else an error 733 /// is reported. 734 flexible: bool, 735 trim: Trim, 736 /// The number of fields in the first record parsed. 737 first_field_count: Option<u64>, 738 /// The current position of the parser. 739 /// 740 /// Note that this position is only observable by callers at the start 741 /// of a record. More granular positions are not supported. 742 cur_pos: Position, 743 /// Whether the first record has been read or not. 744 first: bool, 745 /// Whether the reader has been seeked or not. 746 seeked: bool, 747 /// Whether EOF of the underlying reader has been reached or not. 748 /// 749 /// IO errors on the underlying reader will be considered as an EOF for 750 /// subsequent read attempts, as it would be incorrect to keep on trying 751 /// to read when the underlying reader has broken. 752 /// 753 /// For clarity, having the best `Debug` impl and in case they need to be 754 /// treated differently at some point, we store whether the `EOF` is 755 /// considered because an actual EOF happened, or because we encoundered 756 /// an IO error. 757 /// This has no additional runtime cost. 758 eof: ReaderEofState, 759 } 760 761 /// Whether EOF of the underlying reader has been reached or not. 762 /// 763 /// IO errors on the underlying reader will be considered as an EOF for 764 /// subsequent read attempts, as it would be incorrect to keep on trying 765 /// to read when the underlying reader has broken. 766 /// 767 /// For clarity, having the best `Debug` impl and in case they need to be 768 /// treated differently at some point, we store whether the `EOF` is 769 /// considered because an actual EOF happened, or because we encoundered 770 /// an IO error 771 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 772 enum ReaderEofState { 773 NotEof, 774 Eof, 775 IOError, 776 } 777 778 /// Headers encapsulates any data associated with the headers of CSV data. 779 /// 780 /// The headers always correspond to the first row. 781 #[derive(Debug)] 782 struct Headers { 783 /// The header, as raw bytes. 784 byte_record: ByteRecord, 785 /// The header, as valid UTF-8 (or a UTF-8 error). 786 string_record: result::Result<StringRecord, Utf8Error>, 787 } 788 789 impl Reader<Reader<File>> { 790 /// Create a new CSV parser with a default configuration for the given 791 /// file path. 792 /// 793 /// To customize CSV parsing, use a `ReaderBuilder`. 794 /// 795 /// # Example 796 /// 797 /// ```no_run 798 /// use std::error::Error; 799 /// use csv::Reader; 800 /// 801 /// # fn main() { example().unwrap(); } 802 /// fn example() -> Result<(), Box<dyn Error>> { 803 /// let mut rdr = Reader::from_path("foo.csv")?; 804 /// for result in rdr.records() { 805 /// let record = result?; 806 /// println!("{:?}", record); 807 /// } 808 /// Ok(()) 809 /// } 810 /// ``` from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>>811 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> { 812 ReaderBuilder::new().from_path(path) 813 } 814 } 815 816 impl<R: io::Read> Reader<R> { 817 /// Create a new CSV reader given a builder and a source of underlying 818 /// bytes. new(builder: &ReaderBuilder, rdr: R) -> Reader<R>819 fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> { 820 Reader { 821 core: Box::new(builder.builder.build()), 822 rdr: io::BufReader::with_capacity(builder.capacity, rdr), 823 state: ReaderState { 824 headers: None, 825 has_headers: builder.has_headers, 826 flexible: builder.flexible, 827 trim: builder.trim, 828 first_field_count: None, 829 cur_pos: Position::new(), 830 first: false, 831 seeked: false, 832 eof: ReaderEofState::NotEof, 833 }, 834 } 835 } 836 837 /// Create a new CSV parser with a default configuration for the given 838 /// reader. 839 /// 840 /// To customize CSV parsing, use a `ReaderBuilder`. 841 /// 842 /// # Example 843 /// 844 /// ``` 845 /// use std::error::Error; 846 /// use csv::Reader; 847 /// 848 /// # fn main() { example().unwrap(); } 849 /// fn example() -> Result<(), Box<dyn Error>> { 850 /// let data = "\ 851 /// city,country,pop 852 /// Boston,United States,4628910 853 /// Concord,United States,42695 854 /// "; 855 /// let mut rdr = Reader::from_reader(data.as_bytes()); 856 /// for result in rdr.records() { 857 /// let record = result?; 858 /// println!("{:?}", record); 859 /// } 860 /// Ok(()) 861 /// } 862 /// ``` from_reader(rdr: R) -> Reader<R>863 pub fn from_reader(rdr: R) -> Reader<R> { 864 ReaderBuilder::new().from_reader(rdr) 865 } 866 867 /// Returns a borrowed iterator over deserialized records. 868 /// 869 /// Each item yielded by this iterator is a `Result<D, Error>`. 870 /// Therefore, in order to access the record, callers must handle the 871 /// possibility of error (typically with `try!` or `?`). 872 /// 873 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 874 /// default), then this does not include the first record. Additionally, 875 /// if `has_headers` is enabled, then deserializing into a struct will 876 /// automatically align the values in each row to the fields of a struct 877 /// based on the header row. 878 /// 879 /// # Example 880 /// 881 /// This shows how to deserialize CSV data into normal Rust structs. The 882 /// fields of the header row are used to match up the values in each row 883 /// to the fields of the struct. 884 /// 885 /// ``` 886 /// use std::error::Error; 887 /// 888 /// use csv::Reader; 889 /// use serde::Deserialize; 890 /// 891 /// #[derive(Debug, Deserialize, Eq, PartialEq)] 892 /// struct Row { 893 /// city: String, 894 /// country: String, 895 /// #[serde(rename = "popcount")] 896 /// population: u64, 897 /// } 898 /// 899 /// # fn main() { example().unwrap(); } 900 /// fn example() -> Result<(), Box<dyn Error>> { 901 /// let data = "\ 902 /// city,country,popcount 903 /// Boston,United States,4628910 904 /// "; 905 /// let mut rdr = Reader::from_reader(data.as_bytes()); 906 /// let mut iter = rdr.deserialize(); 907 /// 908 /// if let Some(result) = iter.next() { 909 /// let record: Row = result?; 910 /// assert_eq!(record, Row { 911 /// city: "Boston".to_string(), 912 /// country: "United States".to_string(), 913 /// population: 4628910, 914 /// }); 915 /// Ok(()) 916 /// } else { 917 /// Err(From::from("expected at least one record but got none")) 918 /// } 919 /// } 920 /// ``` 921 /// 922 /// # Rules 923 /// 924 /// For the most part, any Rust type that maps straight-forwardly to a CSV 925 /// record is supported. This includes maps, structs, tuples and tuple 926 /// structs. Other Rust types, such as `Vec`s, arrays, and enums have 927 /// a more complicated story. In general, when working with CSV data, one 928 /// should avoid *nested sequences* as much as possible. 929 /// 930 /// Maps, structs, tuples and tuple structs map to CSV records in a simple 931 /// way. Tuples and tuple structs decode their fields in the order that 932 /// they are defined. Structs will do the same only if `has_headers` has 933 /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html), 934 /// otherwise, structs and maps are deserialized based on the fields 935 /// defined in the header row. (If there is no header row, then 936 /// deserializing into a map will result in an error.) 937 /// 938 /// Nested sequences are supported in a limited capacity. Namely, they 939 /// are flattened. As a result, it's often useful to use a `Vec` to capture 940 /// a "tail" of fields in a record: 941 /// 942 /// ``` 943 /// use std::error::Error; 944 /// 945 /// use csv::ReaderBuilder; 946 /// use serde::Deserialize; 947 /// 948 /// #[derive(Debug, Deserialize, Eq, PartialEq)] 949 /// struct Row { 950 /// label: String, 951 /// values: Vec<i32>, 952 /// } 953 /// 954 /// # fn main() { example().unwrap(); } 955 /// fn example() -> Result<(), Box<dyn Error>> { 956 /// let data = "foo,1,2,3"; 957 /// let mut rdr = ReaderBuilder::new() 958 /// .has_headers(false) 959 /// .from_reader(data.as_bytes()); 960 /// let mut iter = rdr.deserialize(); 961 /// 962 /// if let Some(result) = iter.next() { 963 /// let record: Row = result?; 964 /// assert_eq!(record, Row { 965 /// label: "foo".to_string(), 966 /// values: vec![1, 2, 3], 967 /// }); 968 /// Ok(()) 969 /// } else { 970 /// Err(From::from("expected at least one record but got none")) 971 /// } 972 /// } 973 /// ``` 974 /// 975 /// In the above example, adding another field to the `Row` struct after 976 /// the `values` field will result in a deserialization error. This is 977 /// because the deserializer doesn't know when to stop reading fields 978 /// into the `values` vector, so it will consume the rest of the fields in 979 /// the record leaving none left over for the additional field. 980 /// 981 /// Finally, simple enums in Rust can be deserialized as well. Namely, 982 /// enums must either be variants with no arguments or variants with a 983 /// single argument. Variants with no arguments are deserialized based on 984 /// which variant name the field matches. Variants with a single argument 985 /// are deserialized based on which variant can store the data. The latter 986 /// is only supported when using "untagged" enum deserialization. The 987 /// following example shows both forms in action: 988 /// 989 /// ``` 990 /// use std::error::Error; 991 /// 992 /// use csv::Reader; 993 /// use serde::Deserialize; 994 /// 995 /// #[derive(Debug, Deserialize, PartialEq)] 996 /// struct Row { 997 /// label: Label, 998 /// value: Number, 999 /// } 1000 /// 1001 /// #[derive(Debug, Deserialize, PartialEq)] 1002 /// #[serde(rename_all = "lowercase")] 1003 /// enum Label { 1004 /// Celsius, 1005 /// Fahrenheit, 1006 /// } 1007 /// 1008 /// #[derive(Debug, Deserialize, PartialEq)] 1009 /// #[serde(untagged)] 1010 /// enum Number { 1011 /// Integer(i64), 1012 /// Float(f64), 1013 /// } 1014 /// 1015 /// # fn main() { example().unwrap(); } 1016 /// fn example() -> Result<(), Box<dyn Error>> { 1017 /// let data = "\ 1018 /// label,value 1019 /// celsius,22.2222 1020 /// fahrenheit,72 1021 /// "; 1022 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1023 /// let mut iter = rdr.deserialize(); 1024 /// 1025 /// // Read the first record. 1026 /// if let Some(result) = iter.next() { 1027 /// let record: Row = result?; 1028 /// assert_eq!(record, Row { 1029 /// label: Label::Celsius, 1030 /// value: Number::Float(22.2222), 1031 /// }); 1032 /// } else { 1033 /// return Err(From::from( 1034 /// "expected at least two records but got none")); 1035 /// } 1036 /// 1037 /// // Read the second record. 1038 /// if let Some(result) = iter.next() { 1039 /// let record: Row = result?; 1040 /// assert_eq!(record, Row { 1041 /// label: Label::Fahrenheit, 1042 /// value: Number::Integer(72), 1043 /// }); 1044 /// Ok(()) 1045 /// } else { 1046 /// Err(From::from( 1047 /// "expected at least two records but got only one")) 1048 /// } 1049 /// } 1050 /// ``` deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> where D: DeserializeOwned,1051 pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> 1052 where 1053 D: DeserializeOwned, 1054 { 1055 DeserializeRecordsIter::new(self) 1056 } 1057 1058 /// Returns an owned iterator over deserialized records. 1059 /// 1060 /// Each item yielded by this iterator is a `Result<D, Error>`. 1061 /// Therefore, in order to access the record, callers must handle the 1062 /// possibility of error (typically with `try!` or `?`). 1063 /// 1064 /// This is mostly useful when you want to return a CSV iterator or store 1065 /// it somewhere. 1066 /// 1067 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1068 /// default), then this does not include the first record. Additionally, 1069 /// if `has_headers` is enabled, then deserializing into a struct will 1070 /// automatically align the values in each row to the fields of a struct 1071 /// based on the header row. 1072 /// 1073 /// For more detailed deserialization rules, see the documentation on the 1074 /// `deserialize` method. 1075 /// 1076 /// # Example 1077 /// 1078 /// ``` 1079 /// use std::error::Error; 1080 /// 1081 /// use csv::Reader; 1082 /// use serde::Deserialize; 1083 /// 1084 /// #[derive(Debug, Deserialize, Eq, PartialEq)] 1085 /// struct Row { 1086 /// city: String, 1087 /// country: String, 1088 /// #[serde(rename = "popcount")] 1089 /// population: u64, 1090 /// } 1091 /// 1092 /// # fn main() { example().unwrap(); } 1093 /// fn example() -> Result<(), Box<dyn Error>> { 1094 /// let data = "\ 1095 /// city,country,popcount 1096 /// Boston,United States,4628910 1097 /// "; 1098 /// let rdr = Reader::from_reader(data.as_bytes()); 1099 /// let mut iter = rdr.into_deserialize(); 1100 /// 1101 /// if let Some(result) = iter.next() { 1102 /// let record: Row = result?; 1103 /// assert_eq!(record, Row { 1104 /// city: "Boston".to_string(), 1105 /// country: "United States".to_string(), 1106 /// population: 4628910, 1107 /// }); 1108 /// Ok(()) 1109 /// } else { 1110 /// Err(From::from("expected at least one record but got none")) 1111 /// } 1112 /// } 1113 /// ``` into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> where D: DeserializeOwned,1114 pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> 1115 where 1116 D: DeserializeOwned, 1117 { 1118 DeserializeRecordsIntoIter::new(self) 1119 } 1120 1121 /// Returns a borrowed iterator over all records as strings. 1122 /// 1123 /// Each item yielded by this iterator is a `Result<StringRecord, Error>`. 1124 /// Therefore, in order to access the record, callers must handle the 1125 /// possibility of error (typically with `try!` or `?`). 1126 /// 1127 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1128 /// default), then this does not include the first record. 1129 /// 1130 /// # Example 1131 /// 1132 /// ``` 1133 /// use std::error::Error; 1134 /// use csv::Reader; 1135 /// 1136 /// # fn main() { example().unwrap(); } 1137 /// fn example() -> Result<(), Box<dyn Error>> { 1138 /// let data = "\ 1139 /// city,country,pop 1140 /// Boston,United States,4628910 1141 /// "; 1142 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1143 /// let mut iter = rdr.records(); 1144 /// 1145 /// if let Some(result) = iter.next() { 1146 /// let record = result?; 1147 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1148 /// Ok(()) 1149 /// } else { 1150 /// Err(From::from("expected at least one record but got none")) 1151 /// } 1152 /// } 1153 /// ``` records(&mut self) -> StringRecordsIter<R>1154 pub fn records(&mut self) -> StringRecordsIter<R> { 1155 StringRecordsIter::new(self) 1156 } 1157 1158 /// Returns an owned iterator over all records as strings. 1159 /// 1160 /// Each item yielded by this iterator is a `Result<StringRecord, Error>`. 1161 /// Therefore, in order to access the record, callers must handle the 1162 /// possibility of error (typically with `try!` or `?`). 1163 /// 1164 /// This is mostly useful when you want to return a CSV iterator or store 1165 /// it somewhere. 1166 /// 1167 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1168 /// default), then this does not include the first record. 1169 /// 1170 /// # Example 1171 /// 1172 /// ``` 1173 /// use std::error::Error; 1174 /// use csv::Reader; 1175 /// 1176 /// # fn main() { example().unwrap(); } 1177 /// fn example() -> Result<(), Box<dyn Error>> { 1178 /// let data = "\ 1179 /// city,country,pop 1180 /// Boston,United States,4628910 1181 /// "; 1182 /// let rdr = Reader::from_reader(data.as_bytes()); 1183 /// let mut iter = rdr.into_records(); 1184 /// 1185 /// if let Some(result) = iter.next() { 1186 /// let record = result?; 1187 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1188 /// Ok(()) 1189 /// } else { 1190 /// Err(From::from("expected at least one record but got none")) 1191 /// } 1192 /// } 1193 /// ``` into_records(self) -> StringRecordsIntoIter<R>1194 pub fn into_records(self) -> StringRecordsIntoIter<R> { 1195 StringRecordsIntoIter::new(self) 1196 } 1197 1198 /// Returns a borrowed iterator over all records as raw bytes. 1199 /// 1200 /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`. 1201 /// Therefore, in order to access the record, callers must handle the 1202 /// possibility of error (typically with `try!` or `?`). 1203 /// 1204 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1205 /// default), then this does not include the first record. 1206 /// 1207 /// # Example 1208 /// 1209 /// ``` 1210 /// use std::error::Error; 1211 /// use csv::Reader; 1212 /// 1213 /// # fn main() { example().unwrap(); } 1214 /// fn example() -> Result<(), Box<dyn Error>> { 1215 /// let data = "\ 1216 /// city,country,pop 1217 /// Boston,United States,4628910 1218 /// "; 1219 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1220 /// let mut iter = rdr.byte_records(); 1221 /// 1222 /// if let Some(result) = iter.next() { 1223 /// let record = result?; 1224 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1225 /// Ok(()) 1226 /// } else { 1227 /// Err(From::from("expected at least one record but got none")) 1228 /// } 1229 /// } 1230 /// ``` byte_records(&mut self) -> ByteRecordsIter<R>1231 pub fn byte_records(&mut self) -> ByteRecordsIter<R> { 1232 ByteRecordsIter::new(self) 1233 } 1234 1235 /// Returns an owned iterator over all records as raw bytes. 1236 /// 1237 /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`. 1238 /// Therefore, in order to access the record, callers must handle the 1239 /// possibility of error (typically with `try!` or `?`). 1240 /// 1241 /// This is mostly useful when you want to return a CSV iterator or store 1242 /// it somewhere. 1243 /// 1244 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1245 /// default), then this does not include the first record. 1246 /// 1247 /// # Example 1248 /// 1249 /// ``` 1250 /// use std::error::Error; 1251 /// use csv::Reader; 1252 /// 1253 /// # fn main() { example().unwrap(); } 1254 /// fn example() -> Result<(), Box<dyn Error>> { 1255 /// let data = "\ 1256 /// city,country,pop 1257 /// Boston,United States,4628910 1258 /// "; 1259 /// let rdr = Reader::from_reader(data.as_bytes()); 1260 /// let mut iter = rdr.into_byte_records(); 1261 /// 1262 /// if let Some(result) = iter.next() { 1263 /// let record = result?; 1264 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1265 /// Ok(()) 1266 /// } else { 1267 /// Err(From::from("expected at least one record but got none")) 1268 /// } 1269 /// } 1270 /// ``` into_byte_records(self) -> ByteRecordsIntoIter<R>1271 pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> { 1272 ByteRecordsIntoIter::new(self) 1273 } 1274 1275 /// Returns a reference to the first row read by this parser. 1276 /// 1277 /// If no row has been read yet, then this will force parsing of the first 1278 /// row. 1279 /// 1280 /// If there was a problem parsing the row or if it wasn't valid UTF-8, 1281 /// then this returns an error. 1282 /// 1283 /// If the underlying reader emits EOF before any data, then this returns 1284 /// an empty record. 1285 /// 1286 /// Note that this method may be used regardless of whether `has_headers` 1287 /// was enabled (but it is enabled by default). 1288 /// 1289 /// # Example 1290 /// 1291 /// This example shows how to get the header row of CSV data. Notice that 1292 /// the header row does not appear as a record in the iterator! 1293 /// 1294 /// ``` 1295 /// use std::error::Error; 1296 /// use csv::Reader; 1297 /// 1298 /// # fn main() { example().unwrap(); } 1299 /// fn example() -> Result<(), Box<dyn Error>> { 1300 /// let data = "\ 1301 /// city,country,pop 1302 /// Boston,United States,4628910 1303 /// "; 1304 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1305 /// 1306 /// // We can read the headers before iterating. 1307 /// { 1308 /// // `headers` borrows from the reader, so we put this in its 1309 /// // own scope. That way, the borrow ends before we try iterating 1310 /// // below. Alternatively, we could clone the headers. 1311 /// let headers = rdr.headers()?; 1312 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1313 /// } 1314 /// 1315 /// if let Some(result) = rdr.records().next() { 1316 /// let record = result?; 1317 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1318 /// } else { 1319 /// return Err(From::from( 1320 /// "expected at least one record but got none")) 1321 /// } 1322 /// 1323 /// // We can also read the headers after iterating. 1324 /// let headers = rdr.headers()?; 1325 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1326 /// Ok(()) 1327 /// } 1328 /// ``` headers(&mut self) -> Result<&StringRecord>1329 pub fn headers(&mut self) -> Result<&StringRecord> { 1330 if self.state.headers.is_none() { 1331 let mut record = ByteRecord::new(); 1332 self.read_byte_record_impl(&mut record)?; 1333 self.set_headers_impl(Err(record)); 1334 } 1335 let headers = self.state.headers.as_ref().unwrap(); 1336 match headers.string_record { 1337 Ok(ref record) => Ok(record), 1338 Err(ref err) => Err(Error::new(ErrorKind::Utf8 { 1339 pos: headers.byte_record.position().map(Clone::clone), 1340 err: err.clone(), 1341 })), 1342 } 1343 } 1344 1345 /// Returns a reference to the first row read by this parser as raw bytes. 1346 /// 1347 /// If no row has been read yet, then this will force parsing of the first 1348 /// row. 1349 /// 1350 /// If there was a problem parsing the row then this returns an error. 1351 /// 1352 /// If the underlying reader emits EOF before any data, then this returns 1353 /// an empty record. 1354 /// 1355 /// Note that this method may be used regardless of whether `has_headers` 1356 /// was enabled (but it is enabled by default). 1357 /// 1358 /// # Example 1359 /// 1360 /// This example shows how to get the header row of CSV data. Notice that 1361 /// the header row does not appear as a record in the iterator! 1362 /// 1363 /// ``` 1364 /// use std::error::Error; 1365 /// use csv::Reader; 1366 /// 1367 /// # fn main() { example().unwrap(); } 1368 /// fn example() -> Result<(), Box<dyn Error>> { 1369 /// let data = "\ 1370 /// city,country,pop 1371 /// Boston,United States,4628910 1372 /// "; 1373 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1374 /// 1375 /// // We can read the headers before iterating. 1376 /// { 1377 /// // `headers` borrows from the reader, so we put this in its 1378 /// // own scope. That way, the borrow ends before we try iterating 1379 /// // below. Alternatively, we could clone the headers. 1380 /// let headers = rdr.byte_headers()?; 1381 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1382 /// } 1383 /// 1384 /// if let Some(result) = rdr.byte_records().next() { 1385 /// let record = result?; 1386 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1387 /// } else { 1388 /// return Err(From::from( 1389 /// "expected at least one record but got none")) 1390 /// } 1391 /// 1392 /// // We can also read the headers after iterating. 1393 /// let headers = rdr.byte_headers()?; 1394 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1395 /// Ok(()) 1396 /// } 1397 /// ``` byte_headers(&mut self) -> Result<&ByteRecord>1398 pub fn byte_headers(&mut self) -> Result<&ByteRecord> { 1399 if self.state.headers.is_none() { 1400 let mut record = ByteRecord::new(); 1401 self.read_byte_record_impl(&mut record)?; 1402 self.set_headers_impl(Err(record)); 1403 } 1404 Ok(&self.state.headers.as_ref().unwrap().byte_record) 1405 } 1406 1407 /// Set the headers of this CSV parser manually. 1408 /// 1409 /// This overrides any other setting (including `set_byte_headers`). Any 1410 /// automatic detection of headers is disabled. This may be called at any 1411 /// time. 1412 /// 1413 /// # Example 1414 /// 1415 /// ``` 1416 /// use std::error::Error; 1417 /// use csv::{Reader, StringRecord}; 1418 /// 1419 /// # fn main() { example().unwrap(); } 1420 /// fn example() -> Result<(), Box<dyn Error>> { 1421 /// let data = "\ 1422 /// city,country,pop 1423 /// Boston,United States,4628910 1424 /// "; 1425 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1426 /// 1427 /// assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]); 1428 /// rdr.set_headers(StringRecord::from(vec!["a", "b", "c"])); 1429 /// assert_eq!(rdr.headers()?, vec!["a", "b", "c"]); 1430 /// 1431 /// Ok(()) 1432 /// } 1433 /// ``` set_headers(&mut self, headers: StringRecord)1434 pub fn set_headers(&mut self, headers: StringRecord) { 1435 self.set_headers_impl(Ok(headers)); 1436 } 1437 1438 /// Set the headers of this CSV parser manually as raw bytes. 1439 /// 1440 /// This overrides any other setting (including `set_headers`). Any 1441 /// automatic detection of headers is disabled. This may be called at any 1442 /// time. 1443 /// 1444 /// # Example 1445 /// 1446 /// ``` 1447 /// use std::error::Error; 1448 /// use csv::{Reader, ByteRecord}; 1449 /// 1450 /// # fn main() { example().unwrap(); } 1451 /// fn example() -> Result<(), Box<dyn Error>> { 1452 /// let data = "\ 1453 /// city,country,pop 1454 /// Boston,United States,4628910 1455 /// "; 1456 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1457 /// 1458 /// assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]); 1459 /// rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"])); 1460 /// assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]); 1461 /// 1462 /// Ok(()) 1463 /// } 1464 /// ``` set_byte_headers(&mut self, headers: ByteRecord)1465 pub fn set_byte_headers(&mut self, headers: ByteRecord) { 1466 self.set_headers_impl(Err(headers)); 1467 } 1468 set_headers_impl( &mut self, headers: result::Result<StringRecord, ByteRecord>, )1469 fn set_headers_impl( 1470 &mut self, 1471 headers: result::Result<StringRecord, ByteRecord>, 1472 ) { 1473 // If we have string headers, then get byte headers. But if we have 1474 // byte headers, then get the string headers (or a UTF-8 error). 1475 let (mut str_headers, mut byte_headers) = match headers { 1476 Ok(string) => { 1477 let bytes = string.clone().into_byte_record(); 1478 (Ok(string), bytes) 1479 } 1480 Err(bytes) => { 1481 match StringRecord::from_byte_record(bytes.clone()) { 1482 Ok(str_headers) => (Ok(str_headers), bytes), 1483 Err(err) => (Err(err.utf8_error().clone()), bytes), 1484 } 1485 } 1486 }; 1487 if self.state.trim.should_trim_headers() { 1488 if let Ok(ref mut str_headers) = str_headers.as_mut() { 1489 str_headers.trim(); 1490 } 1491 byte_headers.trim(); 1492 } 1493 self.state.headers = Some(Headers { 1494 byte_record: byte_headers, 1495 string_record: str_headers, 1496 }); 1497 } 1498 1499 /// Read a single row into the given record. Returns false when no more 1500 /// records could be read. 1501 /// 1502 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1503 /// default), then this will never read the first record. 1504 /// 1505 /// This method is useful when you want to read records as fast as 1506 /// as possible. It's less ergonomic than an iterator, but it permits the 1507 /// caller to reuse the `StringRecord` allocation, which usually results 1508 /// in higher throughput. 1509 /// 1510 /// Records read via this method are guaranteed to have a position set 1511 /// on them, even if the reader is at EOF or if an error is returned. 1512 /// 1513 /// # Example 1514 /// 1515 /// ``` 1516 /// use std::error::Error; 1517 /// use csv::{Reader, StringRecord}; 1518 /// 1519 /// # fn main() { example().unwrap(); } 1520 /// fn example() -> Result<(), Box<dyn Error>> { 1521 /// let data = "\ 1522 /// city,country,pop 1523 /// Boston,United States,4628910 1524 /// "; 1525 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1526 /// let mut record = StringRecord::new(); 1527 /// 1528 /// if rdr.read_record(&mut record)? { 1529 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1530 /// Ok(()) 1531 /// } else { 1532 /// Err(From::from("expected at least one record but got none")) 1533 /// } 1534 /// } 1535 /// ``` read_record(&mut self, record: &mut StringRecord) -> Result<bool>1536 pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> { 1537 let result = record.read(self); 1538 // We need to trim again because trimming string records includes 1539 // Unicode whitespace. (ByteRecord trimming only includes ASCII 1540 // whitespace.) 1541 if self.state.trim.should_trim_fields() { 1542 record.trim(); 1543 } 1544 result 1545 } 1546 1547 /// Read a single row into the given byte record. Returns false when no 1548 /// more records could be read. 1549 /// 1550 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1551 /// default), then this will never read the first record. 1552 /// 1553 /// This method is useful when you want to read records as fast as 1554 /// as possible. It's less ergonomic than an iterator, but it permits the 1555 /// caller to reuse the `ByteRecord` allocation, which usually results 1556 /// in higher throughput. 1557 /// 1558 /// Records read via this method are guaranteed to have a position set 1559 /// on them, even if the reader is at EOF or if an error is returned. 1560 /// 1561 /// # Example 1562 /// 1563 /// ``` 1564 /// use std::error::Error; 1565 /// use csv::{ByteRecord, Reader}; 1566 /// 1567 /// # fn main() { example().unwrap(); } 1568 /// fn example() -> Result<(), Box<dyn Error>> { 1569 /// let data = "\ 1570 /// city,country,pop 1571 /// Boston,United States,4628910 1572 /// "; 1573 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1574 /// let mut record = ByteRecord::new(); 1575 /// 1576 /// if rdr.read_byte_record(&mut record)? { 1577 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1578 /// Ok(()) 1579 /// } else { 1580 /// Err(From::from("expected at least one record but got none")) 1581 /// } 1582 /// } 1583 /// ``` read_byte_record( &mut self, record: &mut ByteRecord, ) -> Result<bool>1584 pub fn read_byte_record( 1585 &mut self, 1586 record: &mut ByteRecord, 1587 ) -> Result<bool> { 1588 if !self.state.seeked && !self.state.has_headers && !self.state.first { 1589 // If the caller indicated "no headers" and we haven't yielded the 1590 // first record yet, then we should yield our header row if we have 1591 // one. 1592 if let Some(ref headers) = self.state.headers { 1593 self.state.first = true; 1594 record.clone_from(&headers.byte_record); 1595 if self.state.trim.should_trim_fields() { 1596 record.trim(); 1597 } 1598 return Ok(!record.is_empty()); 1599 } 1600 } 1601 let ok = self.read_byte_record_impl(record)?; 1602 self.state.first = true; 1603 if !self.state.seeked && self.state.headers.is_none() { 1604 self.set_headers_impl(Err(record.clone())); 1605 // If the end user indicated that we have headers, then we should 1606 // never return the first row. Instead, we should attempt to 1607 // read and return the next one. 1608 if self.state.has_headers { 1609 let result = self.read_byte_record_impl(record); 1610 if self.state.trim.should_trim_fields() { 1611 record.trim(); 1612 } 1613 return result; 1614 } 1615 } else if self.state.trim.should_trim_fields() { 1616 record.trim(); 1617 } 1618 Ok(ok) 1619 } 1620 1621 /// Read a byte record from the underlying CSV reader, without accounting 1622 /// for headers. 1623 #[inline(always)] read_byte_record_impl( &mut self, record: &mut ByteRecord, ) -> Result<bool>1624 fn read_byte_record_impl( 1625 &mut self, 1626 record: &mut ByteRecord, 1627 ) -> Result<bool> { 1628 use csv_core::ReadRecordResult::*; 1629 1630 record.clear(); 1631 record.set_position(Some(self.state.cur_pos.clone())); 1632 if self.state.eof != ReaderEofState::NotEof { 1633 return Ok(false); 1634 } 1635 let (mut outlen, mut endlen) = (0, 0); 1636 loop { 1637 let (res, nin, nout, nend) = { 1638 let input_res = self.rdr.fill_buf(); 1639 if input_res.is_err() { 1640 self.state.eof = ReaderEofState::IOError; 1641 } 1642 let input = input_res?; 1643 let (fields, ends) = record.as_parts(); 1644 self.core.read_record( 1645 input, 1646 &mut fields[outlen..], 1647 &mut ends[endlen..], 1648 ) 1649 }; 1650 self.rdr.consume(nin); 1651 let byte = self.state.cur_pos.byte(); 1652 self.state 1653 .cur_pos 1654 .set_byte(byte + nin as u64) 1655 .set_line(self.core.line()); 1656 outlen += nout; 1657 endlen += nend; 1658 match res { 1659 InputEmpty => continue, 1660 OutputFull => { 1661 record.expand_fields(); 1662 continue; 1663 } 1664 OutputEndsFull => { 1665 record.expand_ends(); 1666 continue; 1667 } 1668 Record => { 1669 record.set_len(endlen); 1670 self.state.add_record(record)?; 1671 return Ok(true); 1672 } 1673 End => { 1674 self.state.eof = ReaderEofState::Eof; 1675 return Ok(false); 1676 } 1677 } 1678 } 1679 } 1680 1681 /// Return the current position of this CSV reader. 1682 /// 1683 /// The byte offset in the position returned can be used to `seek` this 1684 /// reader. In particular, seeking to a position returned here on the same 1685 /// data will result in parsing the same subsequent record. 1686 /// 1687 /// # Example: reading the position 1688 /// 1689 /// ``` 1690 /// use std::error::Error; 1691 /// use std::io; 1692 /// use csv::{Reader, Position}; 1693 /// 1694 /// # fn main() { example().unwrap(); } 1695 /// fn example() -> Result<(), Box<dyn Error>> { 1696 /// let data = "\ 1697 /// city,country,popcount 1698 /// Boston,United States,4628910 1699 /// Concord,United States,42695 1700 /// "; 1701 /// let rdr = Reader::from_reader(io::Cursor::new(data)); 1702 /// let mut iter = rdr.into_records(); 1703 /// let mut pos = Position::new(); 1704 /// loop { 1705 /// // Read the position immediately before each record. 1706 /// let next_pos = iter.reader().position().clone(); 1707 /// if iter.next().is_none() { 1708 /// break; 1709 /// } 1710 /// pos = next_pos; 1711 /// } 1712 /// 1713 /// // `pos` should now be the position immediately before the last 1714 /// // record. 1715 /// assert_eq!(pos.byte(), 51); 1716 /// assert_eq!(pos.line(), 3); 1717 /// assert_eq!(pos.record(), 2); 1718 /// Ok(()) 1719 /// } 1720 /// ``` position(&self) -> &Position1721 pub fn position(&self) -> &Position { 1722 &self.state.cur_pos 1723 } 1724 1725 /// Returns true if and only if this reader has been exhausted. 1726 /// 1727 /// When this returns true, no more records can be read from this reader 1728 /// (unless it has been seeked to another position). 1729 /// 1730 /// # Example 1731 /// 1732 /// ``` 1733 /// use std::error::Error; 1734 /// use std::io; 1735 /// use csv::{Reader, Position}; 1736 /// 1737 /// # fn main() { example().unwrap(); } 1738 /// fn example() -> Result<(), Box<dyn Error>> { 1739 /// let data = "\ 1740 /// city,country,popcount 1741 /// Boston,United States,4628910 1742 /// Concord,United States,42695 1743 /// "; 1744 /// let mut rdr = Reader::from_reader(io::Cursor::new(data)); 1745 /// assert!(!rdr.is_done()); 1746 /// for result in rdr.records() { 1747 /// let _ = result?; 1748 /// } 1749 /// assert!(rdr.is_done()); 1750 /// Ok(()) 1751 /// } 1752 /// ``` is_done(&self) -> bool1753 pub fn is_done(&self) -> bool { 1754 self.state.eof != ReaderEofState::NotEof 1755 } 1756 1757 /// Returns true if and only if this reader has been configured to 1758 /// interpret the first record as a header record. has_headers(&self) -> bool1759 pub fn has_headers(&self) -> bool { 1760 self.state.has_headers 1761 } 1762 1763 /// Returns a reference to the underlying reader. get_ref(&self) -> &R1764 pub fn get_ref(&self) -> &R { 1765 self.rdr.get_ref() 1766 } 1767 1768 /// Returns a mutable reference to the underlying reader. get_mut(&mut self) -> &mut R1769 pub fn get_mut(&mut self) -> &mut R { 1770 self.rdr.get_mut() 1771 } 1772 1773 /// Unwraps this CSV reader, returning the underlying reader. 1774 /// 1775 /// Note that any leftover data inside this reader's internal buffer is 1776 /// lost. into_inner(self) -> R1777 pub fn into_inner(self) -> R { 1778 self.rdr.into_inner() 1779 } 1780 } 1781 1782 impl<R: io::Read + io::Seek> Reader<R> { 1783 /// Seeks the underlying reader to the position given. 1784 /// 1785 /// This comes with a few caveats: 1786 /// 1787 /// * Any internal buffer associated with this reader is cleared. 1788 /// * If the given position does not correspond to a position immediately 1789 /// before the start of a record, then the behavior of this reader is 1790 /// unspecified. 1791 /// * Any special logic that skips the first record in the CSV reader 1792 /// when reading or iterating over records is disabled. 1793 /// 1794 /// If the given position has a byte offset equivalent to the current 1795 /// position, then no seeking is performed. 1796 /// 1797 /// If the header row has not already been read, then this will attempt 1798 /// to read the header row before seeking. Therefore, it is possible that 1799 /// this returns an error associated with reading CSV data. 1800 /// 1801 /// Note that seeking is performed based only on the byte offset in the 1802 /// given position. Namely, the record or line numbers in the position may 1803 /// be incorrect, but this will cause any future position generated by 1804 /// this CSV reader to be similarly incorrect. 1805 /// 1806 /// # Example: seek to parse a record twice 1807 /// 1808 /// ``` 1809 /// use std::error::Error; 1810 /// use std::io; 1811 /// use csv::{Reader, Position}; 1812 /// 1813 /// # fn main() { example().unwrap(); } 1814 /// fn example() -> Result<(), Box<dyn Error>> { 1815 /// let data = "\ 1816 /// city,country,popcount 1817 /// Boston,United States,4628910 1818 /// Concord,United States,42695 1819 /// "; 1820 /// let rdr = Reader::from_reader(io::Cursor::new(data)); 1821 /// let mut iter = rdr.into_records(); 1822 /// let mut pos = Position::new(); 1823 /// loop { 1824 /// // Read the position immediately before each record. 1825 /// let next_pos = iter.reader().position().clone(); 1826 /// if iter.next().is_none() { 1827 /// break; 1828 /// } 1829 /// pos = next_pos; 1830 /// } 1831 /// 1832 /// // Now seek the reader back to `pos`. This will let us read the 1833 /// // last record again. 1834 /// iter.reader_mut().seek(pos)?; 1835 /// let mut iter = iter.into_reader().into_records(); 1836 /// if let Some(result) = iter.next() { 1837 /// let record = result?; 1838 /// assert_eq!(record, vec!["Concord", "United States", "42695"]); 1839 /// Ok(()) 1840 /// } else { 1841 /// Err(From::from("expected at least one record but got none")) 1842 /// } 1843 /// } 1844 /// ``` seek(&mut self, pos: Position) -> Result<()>1845 pub fn seek(&mut self, pos: Position) -> Result<()> { 1846 self.byte_headers()?; 1847 self.state.seeked = true; 1848 if pos.byte() == self.state.cur_pos.byte() { 1849 return Ok(()); 1850 } 1851 self.rdr.seek(io::SeekFrom::Start(pos.byte()))?; 1852 self.core.reset(); 1853 self.core.set_line(pos.line()); 1854 self.state.cur_pos = pos; 1855 self.state.eof = ReaderEofState::NotEof; 1856 Ok(()) 1857 } 1858 1859 /// This is like `seek`, but provides direct control over how the seeking 1860 /// operation is performed via `io::SeekFrom`. 1861 /// 1862 /// The `pos` position given *should* correspond the position indicated 1863 /// by `seek_from`, but there is no requirement. If the `pos` position 1864 /// given is incorrect, then the position information returned by this 1865 /// reader will be similarly incorrect. 1866 /// 1867 /// If the header row has not already been read, then this will attempt 1868 /// to read the header row before seeking. Therefore, it is possible that 1869 /// this returns an error associated with reading CSV data. 1870 /// 1871 /// Unlike `seek`, this will always cause an actual seek to be performed. seek_raw( &mut self, seek_from: io::SeekFrom, pos: Position, ) -> Result<()>1872 pub fn seek_raw( 1873 &mut self, 1874 seek_from: io::SeekFrom, 1875 pos: Position, 1876 ) -> Result<()> { 1877 self.byte_headers()?; 1878 self.state.seeked = true; 1879 self.rdr.seek(seek_from)?; 1880 self.core.reset(); 1881 self.core.set_line(pos.line()); 1882 self.state.cur_pos = pos; 1883 self.state.eof = ReaderEofState::NotEof; 1884 Ok(()) 1885 } 1886 } 1887 1888 impl ReaderState { 1889 #[inline(always)] add_record(&mut self, record: &ByteRecord) -> Result<()>1890 fn add_record(&mut self, record: &ByteRecord) -> Result<()> { 1891 let i = self.cur_pos.record(); 1892 self.cur_pos.set_record(i.checked_add(1).unwrap()); 1893 if !self.flexible { 1894 match self.first_field_count { 1895 None => self.first_field_count = Some(record.len() as u64), 1896 Some(expected) => { 1897 if record.len() as u64 != expected { 1898 return Err(Error::new(ErrorKind::UnequalLengths { 1899 pos: record.position().map(Clone::clone), 1900 expected_len: expected, 1901 len: record.len() as u64, 1902 })); 1903 } 1904 } 1905 } 1906 } 1907 Ok(()) 1908 } 1909 } 1910 1911 /// An owned iterator over deserialized records. 1912 /// 1913 /// The type parameter `R` refers to the underlying `io::Read` type, and `D` 1914 /// refers to the type that this iterator will deserialize a record into. 1915 pub struct DeserializeRecordsIntoIter<R, D> { 1916 rdr: Reader<R>, 1917 rec: StringRecord, 1918 headers: Option<StringRecord>, 1919 _priv: PhantomData<D>, 1920 } 1921 1922 impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> { new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D>1923 fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> { 1924 let headers = if !rdr.state.has_headers { 1925 None 1926 } else { 1927 rdr.headers().ok().map(Clone::clone) 1928 }; 1929 DeserializeRecordsIntoIter { 1930 rdr: rdr, 1931 rec: StringRecord::new(), 1932 headers: headers, 1933 _priv: PhantomData, 1934 } 1935 } 1936 1937 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>1938 pub fn reader(&self) -> &Reader<R> { 1939 &self.rdr 1940 } 1941 1942 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>1943 pub fn reader_mut(&mut self) -> &mut Reader<R> { 1944 &mut self.rdr 1945 } 1946 1947 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>1948 pub fn into_reader(self) -> Reader<R> { 1949 self.rdr 1950 } 1951 } 1952 1953 impl<R: io::Read, D: DeserializeOwned> Iterator 1954 for DeserializeRecordsIntoIter<R, D> 1955 { 1956 type Item = Result<D>; 1957 next(&mut self) -> Option<Result<D>>1958 fn next(&mut self) -> Option<Result<D>> { 1959 match self.rdr.read_record(&mut self.rec) { 1960 Err(err) => Some(Err(err)), 1961 Ok(false) => None, 1962 Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())), 1963 } 1964 } 1965 } 1966 1967 /// A borrowed iterator over deserialized records. 1968 /// 1969 /// The lifetime parameter `'r` refers to the lifetime of the underlying 1970 /// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read` 1971 /// type, and `D` refers to the type that this iterator will deserialize a 1972 /// record into. 1973 pub struct DeserializeRecordsIter<'r, R: 'r, D> { 1974 rdr: &'r mut Reader<R>, 1975 rec: StringRecord, 1976 headers: Option<StringRecord>, 1977 _priv: PhantomData<D>, 1978 } 1979 1980 impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> { new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D>1981 fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> { 1982 let headers = if !rdr.state.has_headers { 1983 None 1984 } else { 1985 rdr.headers().ok().map(Clone::clone) 1986 }; 1987 DeserializeRecordsIter { 1988 rdr: rdr, 1989 rec: StringRecord::new(), 1990 headers: headers, 1991 _priv: PhantomData, 1992 } 1993 } 1994 1995 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>1996 pub fn reader(&self) -> &Reader<R> { 1997 &self.rdr 1998 } 1999 2000 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2001 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2002 &mut self.rdr 2003 } 2004 } 2005 2006 impl<'r, R: io::Read, D: DeserializeOwned> Iterator 2007 for DeserializeRecordsIter<'r, R, D> 2008 { 2009 type Item = Result<D>; 2010 next(&mut self) -> Option<Result<D>>2011 fn next(&mut self) -> Option<Result<D>> { 2012 match self.rdr.read_record(&mut self.rec) { 2013 Err(err) => Some(Err(err)), 2014 Ok(false) => None, 2015 Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())), 2016 } 2017 } 2018 } 2019 2020 /// An owned iterator over records as strings. 2021 pub struct StringRecordsIntoIter<R> { 2022 rdr: Reader<R>, 2023 rec: StringRecord, 2024 } 2025 2026 impl<R: io::Read> StringRecordsIntoIter<R> { new(rdr: Reader<R>) -> StringRecordsIntoIter<R>2027 fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> { 2028 StringRecordsIntoIter { rdr: rdr, rec: StringRecord::new() } 2029 } 2030 2031 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2032 pub fn reader(&self) -> &Reader<R> { 2033 &self.rdr 2034 } 2035 2036 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2037 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2038 &mut self.rdr 2039 } 2040 2041 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>2042 pub fn into_reader(self) -> Reader<R> { 2043 self.rdr 2044 } 2045 } 2046 2047 impl<R: io::Read> Iterator for StringRecordsIntoIter<R> { 2048 type Item = Result<StringRecord>; 2049 next(&mut self) -> Option<Result<StringRecord>>2050 fn next(&mut self) -> Option<Result<StringRecord>> { 2051 match self.rdr.read_record(&mut self.rec) { 2052 Err(err) => Some(Err(err)), 2053 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2054 Ok(false) => None, 2055 } 2056 } 2057 } 2058 2059 /// A borrowed iterator over records as strings. 2060 /// 2061 /// The lifetime parameter `'r` refers to the lifetime of the underlying 2062 /// CSV `Reader`. 2063 pub struct StringRecordsIter<'r, R: 'r> { 2064 rdr: &'r mut Reader<R>, 2065 rec: StringRecord, 2066 } 2067 2068 impl<'r, R: io::Read> StringRecordsIter<'r, R> { new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R>2069 fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> { 2070 StringRecordsIter { rdr: rdr, rec: StringRecord::new() } 2071 } 2072 2073 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2074 pub fn reader(&self) -> &Reader<R> { 2075 &self.rdr 2076 } 2077 2078 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2079 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2080 &mut self.rdr 2081 } 2082 } 2083 2084 impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> { 2085 type Item = Result<StringRecord>; 2086 next(&mut self) -> Option<Result<StringRecord>>2087 fn next(&mut self) -> Option<Result<StringRecord>> { 2088 match self.rdr.read_record(&mut self.rec) { 2089 Err(err) => Some(Err(err)), 2090 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2091 Ok(false) => None, 2092 } 2093 } 2094 } 2095 2096 /// An owned iterator over records as raw bytes. 2097 pub struct ByteRecordsIntoIter<R> { 2098 rdr: Reader<R>, 2099 rec: ByteRecord, 2100 } 2101 2102 impl<R: io::Read> ByteRecordsIntoIter<R> { new(rdr: Reader<R>) -> ByteRecordsIntoIter<R>2103 fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> { 2104 ByteRecordsIntoIter { rdr: rdr, rec: ByteRecord::new() } 2105 } 2106 2107 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2108 pub fn reader(&self) -> &Reader<R> { 2109 &self.rdr 2110 } 2111 2112 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2113 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2114 &mut self.rdr 2115 } 2116 2117 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>2118 pub fn into_reader(self) -> Reader<R> { 2119 self.rdr 2120 } 2121 } 2122 2123 impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> { 2124 type Item = Result<ByteRecord>; 2125 next(&mut self) -> Option<Result<ByteRecord>>2126 fn next(&mut self) -> Option<Result<ByteRecord>> { 2127 match self.rdr.read_byte_record(&mut self.rec) { 2128 Err(err) => Some(Err(err)), 2129 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2130 Ok(false) => None, 2131 } 2132 } 2133 } 2134 2135 /// A borrowed iterator over records as raw bytes. 2136 /// 2137 /// The lifetime parameter `'r` refers to the lifetime of the underlying 2138 /// CSV `Reader`. 2139 pub struct ByteRecordsIter<'r, R: 'r> { 2140 rdr: &'r mut Reader<R>, 2141 rec: ByteRecord, 2142 } 2143 2144 impl<'r, R: io::Read> ByteRecordsIter<'r, R> { new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R>2145 fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> { 2146 ByteRecordsIter { rdr: rdr, rec: ByteRecord::new() } 2147 } 2148 2149 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2150 pub fn reader(&self) -> &Reader<R> { 2151 &self.rdr 2152 } 2153 2154 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2155 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2156 &mut self.rdr 2157 } 2158 } 2159 2160 impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> { 2161 type Item = Result<ByteRecord>; 2162 next(&mut self) -> Option<Result<ByteRecord>>2163 fn next(&mut self) -> Option<Result<ByteRecord>> { 2164 match self.rdr.read_byte_record(&mut self.rec) { 2165 Err(err) => Some(Err(err)), 2166 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2167 Ok(false) => None, 2168 } 2169 } 2170 } 2171 2172 #[cfg(test)] 2173 mod tests { 2174 use std::io; 2175 2176 use crate::byte_record::ByteRecord; 2177 use crate::error::ErrorKind; 2178 use crate::string_record::StringRecord; 2179 2180 use super::{Position, ReaderBuilder, Trim}; 2181 b(s: &str) -> &[u8]2182 fn b(s: &str) -> &[u8] { 2183 s.as_bytes() 2184 } s(b: &[u8]) -> &str2185 fn s(b: &[u8]) -> &str { 2186 ::std::str::from_utf8(b).unwrap() 2187 } 2188 newpos(byte: u64, line: u64, record: u64) -> Position2189 fn newpos(byte: u64, line: u64, record: u64) -> Position { 2190 let mut p = Position::new(); 2191 p.set_byte(byte).set_line(line).set_record(record); 2192 p 2193 } 2194 2195 #[test] read_byte_record()2196 fn read_byte_record() { 2197 let data = b("foo,\"b,ar\",baz\nabc,mno,xyz"); 2198 let mut rdr = 2199 ReaderBuilder::new().has_headers(false).from_reader(data); 2200 let mut rec = ByteRecord::new(); 2201 2202 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2203 assert_eq!(3, rec.len()); 2204 assert_eq!("foo", s(&rec[0])); 2205 assert_eq!("b,ar", s(&rec[1])); 2206 assert_eq!("baz", s(&rec[2])); 2207 2208 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2209 assert_eq!(3, rec.len()); 2210 assert_eq!("abc", s(&rec[0])); 2211 assert_eq!("mno", s(&rec[1])); 2212 assert_eq!("xyz", s(&rec[2])); 2213 2214 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2215 } 2216 2217 #[test] read_trimmed_records_and_headers()2218 fn read_trimmed_records_and_headers() { 2219 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2220 let mut rdr = ReaderBuilder::new() 2221 .has_headers(true) 2222 .trim(Trim::All) 2223 .from_reader(data); 2224 let mut rec = ByteRecord::new(); 2225 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2226 assert_eq!("1", s(&rec[0])); 2227 assert_eq!("2", s(&rec[1])); 2228 assert_eq!("3", s(&rec[2])); 2229 let mut rec = StringRecord::new(); 2230 assert!(rdr.read_record(&mut rec).unwrap()); 2231 assert_eq!("1", &rec[0]); 2232 assert_eq!("", &rec[1]); 2233 assert_eq!("3", &rec[2]); 2234 { 2235 let headers = rdr.headers().unwrap(); 2236 assert_eq!(3, headers.len()); 2237 assert_eq!("foo", &headers[0]); 2238 assert_eq!("bar", &headers[1]); 2239 assert_eq!("baz", &headers[2]); 2240 } 2241 } 2242 2243 #[test] read_trimmed_header()2244 fn read_trimmed_header() { 2245 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2246 let mut rdr = ReaderBuilder::new() 2247 .has_headers(true) 2248 .trim(Trim::Headers) 2249 .from_reader(data); 2250 let mut rec = ByteRecord::new(); 2251 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2252 assert_eq!(" 1", s(&rec[0])); 2253 assert_eq!(" 2", s(&rec[1])); 2254 assert_eq!(" 3", s(&rec[2])); 2255 { 2256 let headers = rdr.headers().unwrap(); 2257 assert_eq!(3, headers.len()); 2258 assert_eq!("foo", &headers[0]); 2259 assert_eq!("bar", &headers[1]); 2260 assert_eq!("baz", &headers[2]); 2261 } 2262 } 2263 2264 #[test] read_trimed_header_invalid_utf8()2265 fn read_trimed_header_invalid_utf8() { 2266 let data = &b"foo, b\xFFar,\tbaz\na,b,c\nd,e,f"[..]; 2267 let mut rdr = ReaderBuilder::new() 2268 .has_headers(true) 2269 .trim(Trim::Headers) 2270 .from_reader(data); 2271 let mut rec = StringRecord::new(); 2272 2273 // force the headers to be read 2274 let _ = rdr.read_record(&mut rec); 2275 // Check the byte headers are trimmed 2276 { 2277 let headers = rdr.byte_headers().unwrap(); 2278 assert_eq!(3, headers.len()); 2279 assert_eq!(b"foo", &headers[0]); 2280 assert_eq!(b"b\xFFar", &headers[1]); 2281 assert_eq!(b"baz", &headers[2]); 2282 } 2283 match *rdr.headers().unwrap_err().kind() { 2284 ErrorKind::Utf8 { pos: Some(ref pos), ref err } => { 2285 assert_eq!(pos, &newpos(0, 1, 0)); 2286 assert_eq!(err.field(), 1); 2287 assert_eq!(err.valid_up_to(), 3); 2288 } 2289 ref err => panic!("match failed, got {:?}", err), 2290 } 2291 } 2292 2293 #[test] read_trimmed_records()2294 fn read_trimmed_records() { 2295 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2296 let mut rdr = ReaderBuilder::new() 2297 .has_headers(true) 2298 .trim(Trim::Fields) 2299 .from_reader(data); 2300 let mut rec = ByteRecord::new(); 2301 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2302 assert_eq!("1", s(&rec[0])); 2303 assert_eq!("2", s(&rec[1])); 2304 assert_eq!("3", s(&rec[2])); 2305 { 2306 let headers = rdr.headers().unwrap(); 2307 assert_eq!(3, headers.len()); 2308 assert_eq!("foo", &headers[0]); 2309 assert_eq!(" bar", &headers[1]); 2310 assert_eq!("\tbaz", &headers[2]); 2311 } 2312 } 2313 2314 #[test] read_record_unequal_fails()2315 fn read_record_unequal_fails() { 2316 let data = b("foo\nbar,baz"); 2317 let mut rdr = 2318 ReaderBuilder::new().has_headers(false).from_reader(data); 2319 let mut rec = ByteRecord::new(); 2320 2321 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2322 assert_eq!(1, rec.len()); 2323 assert_eq!("foo", s(&rec[0])); 2324 2325 match rdr.read_byte_record(&mut rec) { 2326 Err(err) => match *err.kind() { 2327 ErrorKind::UnequalLengths { 2328 expected_len: 1, 2329 ref pos, 2330 len: 2, 2331 } => { 2332 assert_eq!(pos, &Some(newpos(4, 2, 1))); 2333 } 2334 ref wrong => panic!("match failed, got {:?}", wrong), 2335 }, 2336 wrong => panic!("match failed, got {:?}", wrong), 2337 } 2338 } 2339 2340 #[test] read_record_unequal_ok()2341 fn read_record_unequal_ok() { 2342 let data = b("foo\nbar,baz"); 2343 let mut rdr = ReaderBuilder::new() 2344 .has_headers(false) 2345 .flexible(true) 2346 .from_reader(data); 2347 let mut rec = ByteRecord::new(); 2348 2349 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2350 assert_eq!(1, rec.len()); 2351 assert_eq!("foo", s(&rec[0])); 2352 2353 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2354 assert_eq!(2, rec.len()); 2355 assert_eq!("bar", s(&rec[0])); 2356 assert_eq!("baz", s(&rec[1])); 2357 2358 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2359 } 2360 2361 // This tests that even if we get a CSV error, we can continue reading 2362 // if we want. 2363 #[test] read_record_unequal_continue()2364 fn read_record_unequal_continue() { 2365 let data = b("foo\nbar,baz\nquux"); 2366 let mut rdr = 2367 ReaderBuilder::new().has_headers(false).from_reader(data); 2368 let mut rec = ByteRecord::new(); 2369 2370 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2371 assert_eq!(1, rec.len()); 2372 assert_eq!("foo", s(&rec[0])); 2373 2374 match rdr.read_byte_record(&mut rec) { 2375 Err(err) => match err.kind() { 2376 &ErrorKind::UnequalLengths { 2377 expected_len: 1, 2378 ref pos, 2379 len: 2, 2380 } => { 2381 assert_eq!(pos, &Some(newpos(4, 2, 1))); 2382 } 2383 wrong => panic!("match failed, got {:?}", wrong), 2384 }, 2385 wrong => panic!("match failed, got {:?}", wrong), 2386 } 2387 2388 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2389 assert_eq!(1, rec.len()); 2390 assert_eq!("quux", s(&rec[0])); 2391 2392 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2393 } 2394 2395 #[test] read_record_headers()2396 fn read_record_headers() { 2397 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2398 let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data); 2399 let mut rec = StringRecord::new(); 2400 2401 assert!(rdr.read_record(&mut rec).unwrap()); 2402 assert_eq!(3, rec.len()); 2403 assert_eq!("a", &rec[0]); 2404 2405 assert!(rdr.read_record(&mut rec).unwrap()); 2406 assert_eq!(3, rec.len()); 2407 assert_eq!("d", &rec[0]); 2408 2409 assert!(!rdr.read_record(&mut rec).unwrap()); 2410 2411 { 2412 let headers = rdr.byte_headers().unwrap(); 2413 assert_eq!(3, headers.len()); 2414 assert_eq!(b"foo", &headers[0]); 2415 assert_eq!(b"bar", &headers[1]); 2416 assert_eq!(b"baz", &headers[2]); 2417 } 2418 { 2419 let headers = rdr.headers().unwrap(); 2420 assert_eq!(3, headers.len()); 2421 assert_eq!("foo", &headers[0]); 2422 assert_eq!("bar", &headers[1]); 2423 assert_eq!("baz", &headers[2]); 2424 } 2425 } 2426 2427 #[test] read_record_headers_invalid_utf8()2428 fn read_record_headers_invalid_utf8() { 2429 let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..]; 2430 let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data); 2431 let mut rec = StringRecord::new(); 2432 2433 assert!(rdr.read_record(&mut rec).unwrap()); 2434 assert_eq!(3, rec.len()); 2435 assert_eq!("a", &rec[0]); 2436 2437 assert!(rdr.read_record(&mut rec).unwrap()); 2438 assert_eq!(3, rec.len()); 2439 assert_eq!("d", &rec[0]); 2440 2441 assert!(!rdr.read_record(&mut rec).unwrap()); 2442 2443 // Check that we can read the headers as raw bytes, but that 2444 // if we read them as strings, we get an appropriate UTF-8 error. 2445 { 2446 let headers = rdr.byte_headers().unwrap(); 2447 assert_eq!(3, headers.len()); 2448 assert_eq!(b"foo", &headers[0]); 2449 assert_eq!(b"b\xFFar", &headers[1]); 2450 assert_eq!(b"baz", &headers[2]); 2451 } 2452 match *rdr.headers().unwrap_err().kind() { 2453 ErrorKind::Utf8 { pos: Some(ref pos), ref err } => { 2454 assert_eq!(pos, &newpos(0, 1, 0)); 2455 assert_eq!(err.field(), 1); 2456 assert_eq!(err.valid_up_to(), 1); 2457 } 2458 ref err => panic!("match failed, got {:?}", err), 2459 } 2460 } 2461 2462 #[test] read_record_no_headers_before()2463 fn read_record_no_headers_before() { 2464 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2465 let mut rdr = 2466 ReaderBuilder::new().has_headers(false).from_reader(data); 2467 let mut rec = StringRecord::new(); 2468 2469 { 2470 let headers = rdr.headers().unwrap(); 2471 assert_eq!(3, headers.len()); 2472 assert_eq!("foo", &headers[0]); 2473 assert_eq!("bar", &headers[1]); 2474 assert_eq!("baz", &headers[2]); 2475 } 2476 2477 assert!(rdr.read_record(&mut rec).unwrap()); 2478 assert_eq!(3, rec.len()); 2479 assert_eq!("foo", &rec[0]); 2480 2481 assert!(rdr.read_record(&mut rec).unwrap()); 2482 assert_eq!(3, rec.len()); 2483 assert_eq!("a", &rec[0]); 2484 2485 assert!(rdr.read_record(&mut rec).unwrap()); 2486 assert_eq!(3, rec.len()); 2487 assert_eq!("d", &rec[0]); 2488 2489 assert!(!rdr.read_record(&mut rec).unwrap()); 2490 } 2491 2492 #[test] read_record_no_headers_after()2493 fn read_record_no_headers_after() { 2494 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2495 let mut rdr = 2496 ReaderBuilder::new().has_headers(false).from_reader(data); 2497 let mut rec = StringRecord::new(); 2498 2499 assert!(rdr.read_record(&mut rec).unwrap()); 2500 assert_eq!(3, rec.len()); 2501 assert_eq!("foo", &rec[0]); 2502 2503 assert!(rdr.read_record(&mut rec).unwrap()); 2504 assert_eq!(3, rec.len()); 2505 assert_eq!("a", &rec[0]); 2506 2507 assert!(rdr.read_record(&mut rec).unwrap()); 2508 assert_eq!(3, rec.len()); 2509 assert_eq!("d", &rec[0]); 2510 2511 assert!(!rdr.read_record(&mut rec).unwrap()); 2512 2513 let headers = rdr.headers().unwrap(); 2514 assert_eq!(3, headers.len()); 2515 assert_eq!("foo", &headers[0]); 2516 assert_eq!("bar", &headers[1]); 2517 assert_eq!("baz", &headers[2]); 2518 } 2519 2520 #[test] seek()2521 fn seek() { 2522 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2523 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2524 rdr.seek(newpos(18, 3, 2)).unwrap(); 2525 2526 let mut rec = StringRecord::new(); 2527 2528 assert_eq!(18, rdr.position().byte()); 2529 assert!(rdr.read_record(&mut rec).unwrap()); 2530 assert_eq!(3, rec.len()); 2531 assert_eq!("d", &rec[0]); 2532 2533 assert_eq!(24, rdr.position().byte()); 2534 assert_eq!(4, rdr.position().line()); 2535 assert_eq!(3, rdr.position().record()); 2536 assert!(rdr.read_record(&mut rec).unwrap()); 2537 assert_eq!(3, rec.len()); 2538 assert_eq!("g", &rec[0]); 2539 2540 assert!(!rdr.read_record(&mut rec).unwrap()); 2541 } 2542 2543 // Test that we can read headers after seeking even if the headers weren't 2544 // explicit read before seeking. 2545 #[test] seek_headers_after()2546 fn seek_headers_after() { 2547 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2548 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2549 rdr.seek(newpos(18, 3, 2)).unwrap(); 2550 assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]); 2551 } 2552 2553 // Test that we can read headers after seeking if the headers were read 2554 // before seeking. 2555 #[test] seek_headers_before_after()2556 fn seek_headers_before_after() { 2557 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2558 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2559 let headers = rdr.headers().unwrap().clone(); 2560 rdr.seek(newpos(18, 3, 2)).unwrap(); 2561 assert_eq!(&headers, rdr.headers().unwrap()); 2562 } 2563 2564 // Test that even if we didn't read headers before seeking, if we seek to 2565 // the current byte offset, then no seeking is done and therefore we can 2566 // still read headers after seeking. 2567 #[test] seek_headers_no_actual_seek()2568 fn seek_headers_no_actual_seek() { 2569 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2570 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2571 rdr.seek(Position::new()).unwrap(); 2572 assert_eq!("foo", &rdr.headers().unwrap()[0]); 2573 } 2574 2575 // Test that position info is reported correctly in absence of headers. 2576 #[test] positions_no_headers()2577 fn positions_no_headers() { 2578 let mut rdr = ReaderBuilder::new() 2579 .has_headers(false) 2580 .from_reader("a,b,c\nx,y,z".as_bytes()) 2581 .into_records(); 2582 2583 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2584 assert_eq!(pos.byte(), 0); 2585 assert_eq!(pos.line(), 1); 2586 assert_eq!(pos.record(), 0); 2587 2588 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2589 assert_eq!(pos.byte(), 6); 2590 assert_eq!(pos.line(), 2); 2591 assert_eq!(pos.record(), 1); 2592 } 2593 2594 // Test that position info is reported correctly with headers. 2595 #[test] positions_headers()2596 fn positions_headers() { 2597 let mut rdr = ReaderBuilder::new() 2598 .has_headers(true) 2599 .from_reader("a,b,c\nx,y,z".as_bytes()) 2600 .into_records(); 2601 2602 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2603 assert_eq!(pos.byte(), 6); 2604 assert_eq!(pos.line(), 2); 2605 assert_eq!(pos.record(), 1); 2606 } 2607 2608 // Test that reading headers on empty data yields an empty record. 2609 #[test] headers_on_empty_data()2610 fn headers_on_empty_data() { 2611 let mut rdr = ReaderBuilder::new().from_reader("".as_bytes()); 2612 let r = rdr.byte_headers().unwrap(); 2613 assert_eq!(r.len(), 0); 2614 } 2615 2616 // Test that reading the first record on empty data works. 2617 #[test] no_headers_on_empty_data()2618 fn no_headers_on_empty_data() { 2619 let mut rdr = 2620 ReaderBuilder::new().has_headers(false).from_reader("".as_bytes()); 2621 assert_eq!(rdr.records().count(), 0); 2622 } 2623 2624 // Test that reading the first record on empty data works, even if 2625 // we've tried to read headers before hand. 2626 #[test] no_headers_on_empty_data_after_headers()2627 fn no_headers_on_empty_data_after_headers() { 2628 let mut rdr = 2629 ReaderBuilder::new().has_headers(false).from_reader("".as_bytes()); 2630 assert_eq!(rdr.headers().unwrap().len(), 0); 2631 assert_eq!(rdr.records().count(), 0); 2632 } 2633 } 2634