1#!/usr/bin/env python 2# 3# Copyright (C) 2018 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16""" 17Merge multiple CSV files, possibly with different columns. 18""" 19 20import argparse 21import csv 22import io 23 24from zipfile import ZipFile 25 26args_parser = argparse.ArgumentParser(description='Merge given CSV files into a single one.') 27args_parser.add_argument('--header', help='Comma separated field names; ' 28 'if missing determines the header from input files.') 29args_parser.add_argument('--zip_input', help='ZIP archive with all CSV files to merge.') 30args_parser.add_argument('--output', help='Output file for merged CSV.', 31 default='-', type=argparse.FileType('w')) 32args_parser.add_argument('files', nargs=argparse.REMAINDER) 33args = args_parser.parse_args() 34 35 36def dict_reader(input): 37 return csv.DictReader(input, delimiter=',', quotechar='|') 38 39 40if args.zip_input and len(args.files) > 0: 41 raise ValueError('Expecting either a single ZIP with CSV files' 42 ' or a list of CSV files as input; not both.') 43 44csv_readers = [] 45if len(args.files) > 0: 46 for file in args.files: 47 csv_readers.append(dict_reader(open(file, 'r'))) 48elif args.zip_input: 49 with ZipFile(args.zip_input) as zip: 50 for entry in zip.namelist(): 51 if entry.endswith('.uau'): 52 csv_readers.append(dict_reader(io.TextIOWrapper(zip.open(entry, 'r')))) 53 54headers = set() 55if args.header: 56 fieldnames = args.header.split(',') 57else: 58 # Build union of all columns from source files: 59 for reader in csv_readers: 60 headers = headers.union(reader.fieldnames) 61 fieldnames = sorted(headers) 62 63# Concatenate all files to output: 64writer = csv.DictWriter(args.output, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL, 65 dialect='unix', fieldnames=fieldnames) 66writer.writeheader() 67for reader in csv_readers: 68 for row in reader: 69 writer.writerow(row) 70