1 /*
2 * bwt.c for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #if HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30 #include <stdio.h>
31 #if HAVE_STRING_H
32 # include <string.h>
33 #endif
34 #if HAVE_STDLIB_H
35 # include <stdlib.h>
36 #endif
37 #if HAVE_MEMORY_H
38 # include <memory.h>
39 #endif
40 #if HAVE_STDDEF_H
41 # include <stddef.h>
42 #endif
43 #if HAVE_STRINGS_H
44 # include <strings.h>
45 #endif
46 #if HAVE_SYS_TYPES_H
47 # include <sys/types.h>
48 #endif
49 #if HAVE_IO_H && HAVE_FCNTL_H
50 # include <io.h>
51 # include <fcntl.h>
52 #endif
53 #include <time.h>
54 #include <divsufsort.h>
55 #include "lfs.h"
56
57
58 static
59 size_t
write_int(FILE * fp,saidx_t n)60 write_int(FILE *fp, saidx_t n) {
61 unsigned char c[4];
62 c[0] = (unsigned char)((n >> 0) & 0xff), c[1] = (unsigned char)((n >> 8) & 0xff),
63 c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff);
64 return fwrite(c, sizeof(unsigned char), 4, fp);
65 }
66
67 static
68 void
print_help(const char * progname,int status)69 print_help(const char *progname, int status) {
70 fprintf(stderr,
71 "bwt, a burrows-wheeler transform program, version %s.\n",
72 divsufsort_version());
73 fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname);
74 fprintf(stderr, " -b num set block size to num MiB [1..512] (default: 32)\n\n");
75 exit(status);
76 }
77
78 int
main(int argc,const char * argv[])79 main(int argc, const char *argv[]) {
80 FILE *fp, *ofp;
81 const char *fname, *ofname;
82 sauchar_t *T;
83 saidx_t *SA;
84 LFS_OFF_T n;
85 size_t m;
86 saidx_t pidx;
87 clock_t start,finish;
88 saint_t i, blocksize = 32, needclose = 3;
89
90 /* Check arguments. */
91 if((argc == 1) ||
92 (strcmp(argv[1], "-h") == 0) ||
93 (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
94 if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); }
95 i = 1;
96 if(argc == 5) {
97 if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); }
98 blocksize = atoi(argv[i + 1]);
99 if(blocksize < 0) { blocksize = 1; }
100 else if(512 < blocksize) { blocksize = 512; }
101 i += 2;
102 }
103 blocksize <<= 20;
104
105 /* Open a file for reading. */
106 if(strcmp(argv[i], "-") != 0) {
107 #if HAVE_FOPEN_S
108 if(fopen_s(&fp, fname = argv[i], "rb") != 0) {
109 #else
110 if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) {
111 #endif
112 fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
113 perror(NULL);
114 exit(EXIT_FAILURE);
115 }
116 } else {
117 #if HAVE__SETMODE && HAVE__FILENO
118 if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
119 fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
120 perror(NULL);
121 exit(EXIT_FAILURE);
122 }
123 #endif
124 fp = stdin;
125 fname = "stdin";
126 needclose ^= 1;
127 }
128 i += 1;
129
130 /* Open a file for writing. */
131 if(strcmp(argv[i], "-") != 0) {
132 #if HAVE_FOPEN_S
133 if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) {
134 #else
135 if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) {
136 #endif
137 fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
138 perror(NULL);
139 exit(EXIT_FAILURE);
140 }
141 } else {
142 #if HAVE__SETMODE && HAVE__FILENO
143 if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
144 fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
145 perror(NULL);
146 exit(EXIT_FAILURE);
147 }
148 #endif
149 ofp = stdout;
150 ofname = "stdout";
151 needclose ^= 2;
152 }
153
154 /* Get the file size. */
155 if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
156 n = LFS_FTELL(fp);
157 rewind(fp);
158 if(n < 0) {
159 fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
160 perror(NULL);
161 exit(EXIT_FAILURE);
162 }
163 if(0x20000000L < n) { n = 0x20000000L; }
164 if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; }
165 } else if(blocksize == 0) { blocksize = 32 << 20; }
166
167 /* Allocate 5blocksize bytes of memory. */
168 T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
169 SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
170 if((T == NULL) || (SA == NULL)) {
171 fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
172 exit(EXIT_FAILURE);
173 }
174
175 /* Write the blocksize. */
176 if(write_int(ofp, blocksize) != 4) {
177 fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
178 perror(NULL);
179 exit(EXIT_FAILURE);
180 }
181
182 fprintf(stderr, " BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
183 start = clock();
184 for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) {
185 /* Burrows-Wheeler Transform. */
186 pidx = divbwt(T, T, SA, m);
187 if(pidx < 0) {
188 fprintf(stderr, "%s (bw_transform): %s.\n",
189 argv[0],
190 (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
191 exit(EXIT_FAILURE);
192 }
193
194 /* Write the bwted data. */
195 if((write_int(ofp, pidx) != 4) ||
196 (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) {
197 fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
198 perror(NULL);
199 exit(EXIT_FAILURE);
200 }
201 }
202 if(ferror(fp)) {
203 fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
204 perror(NULL);
205 exit(EXIT_FAILURE);
206 }
207 finish = clock();
208 fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
209 n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
210
211 /* Close files */
212 if(needclose & 1) { fclose(fp); }
213 if(needclose & 2) { fclose(ofp); }
214
215 /* Deallocate memory. */
216 free(SA);
217 free(T);
218
219 return 0;
220 }
221