1 /*
2  * mksary.c for libdivsufsort
3  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #if HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30 #include <stdio.h>
31 #if HAVE_STRING_H
32 # include <string.h>
33 #endif
34 #if HAVE_STDLIB_H
35 # include <stdlib.h>
36 #endif
37 #if HAVE_MEMORY_H
38 # include <memory.h>
39 #endif
40 #if HAVE_STDDEF_H
41 # include <stddef.h>
42 #endif
43 #if HAVE_STRINGS_H
44 # include <strings.h>
45 #endif
46 #if HAVE_SYS_TYPES_H
47 # include <sys/types.h>
48 #endif
49 #if HAVE_IO_H && HAVE_FCNTL_H
50 # include <io.h>
51 # include <fcntl.h>
52 #endif
53 #include <time.h>
54 #include <divsufsort.h>
55 #include "lfs.h"
56 
57 
58 static
59 void
print_help(const char * progname,int status)60 print_help(const char *progname, int status) {
61   fprintf(stderr,
62           "mksary, a simple suffix array builder, version %s.\n",
63           divsufsort_version());
64   fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
65   exit(status);
66 }
67 
68 int
main(int argc,const char * argv[])69 main(int argc, const char *argv[]) {
70   FILE *fp, *ofp;
71   const char *fname, *ofname;
72   sauchar_t *T;
73   saidx_t *SA;
74   LFS_OFF_T n;
75   clock_t start, finish;
76   saint_t needclose = 3;
77 
78   /* Check arguments. */
79   if((argc == 1) ||
80      (strcmp(argv[1], "-h") == 0) ||
81      (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
82   if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
83 
84   /* Open a file for reading. */
85   if(strcmp(argv[1], "-") != 0) {
86 #if HAVE_FOPEN_S
87     if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
88 #else
89     if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
90 #endif
91       fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
92       perror(NULL);
93       exit(EXIT_FAILURE);
94     }
95   } else {
96 #if HAVE__SETMODE && HAVE__FILENO
97     if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
98       fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
99       perror(NULL);
100       exit(EXIT_FAILURE);
101     }
102 #endif
103     fp = stdin;
104     fname = "stdin";
105     needclose ^= 1;
106   }
107 
108   /* Open a file for writing. */
109   if(strcmp(argv[2], "-") != 0) {
110 #if HAVE_FOPEN_S
111     if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
112 #else
113     if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
114 #endif
115       fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
116       perror(NULL);
117       exit(EXIT_FAILURE);
118     }
119   } else {
120 #if HAVE__SETMODE && HAVE__FILENO
121     if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
122       fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
123       perror(NULL);
124       exit(EXIT_FAILURE);
125     }
126 #endif
127     ofp = stdout;
128     ofname = "stdout";
129     needclose ^= 2;
130   }
131 
132   /* Get the file size. */
133   if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
134     n = LFS_FTELL(fp);
135     rewind(fp);
136     if(n < 0) {
137       fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
138       perror(NULL);
139       exit(EXIT_FAILURE);
140     }
141     if(0x7fffffff <= n) {
142       fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
143       exit(EXIT_FAILURE);
144     }
145   } else {
146     fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
147     perror(NULL);
148     exit(EXIT_FAILURE);
149   }
150 
151   /* Allocate 5blocksize bytes of memory. */
152   T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
153   SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
154   if((T == NULL) || (SA == NULL)) {
155     fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
156     exit(EXIT_FAILURE);
157   }
158 
159   /* Read n bytes of data. */
160   if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
161     fprintf(stderr, "%s: %s `%s': ",
162       argv[0],
163       (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
164       fname);
165     perror(NULL);
166     exit(EXIT_FAILURE);
167   }
168   if(needclose & 1) { fclose(fp); }
169 
170   /* Construct the suffix array. */
171   fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
172   start = clock();
173   if(divsufsort(T, SA, (saidx_t)n) != 0) {
174     fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
175     exit(EXIT_FAILURE);
176   }
177   finish = clock();
178   fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
179 
180   /* Write the suffix array. */
181   if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) {
182     fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
183     perror(NULL);
184     exit(EXIT_FAILURE);
185   }
186   if(needclose & 2) { fclose(ofp); }
187 
188   /* Deallocate memory. */
189   free(SA);
190   free(T);
191 
192   return 0;
193 }
194