1 #include "Python.h"
2 #include "structmember.h"
3 #include "osdefs.h"
4 #include "marshal.h"
5 #include <time.h>
6 
7 
8 #define IS_SOURCE   0x0
9 #define IS_BYTECODE 0x1
10 #define IS_PACKAGE  0x2
11 
12 struct st_zip_searchorder {
13     char suffix[14];
14     int type;
15 };
16 
17 /* zip_searchorder defines how we search for a module in the Zip
18    archive: we first search for a package __init__, then for
19    non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20    are swapped by initzipimport() if we run in optimized mode. Also,
21    '/' is replaced by SEP there. */
22 static struct st_zip_searchorder zip_searchorder[] = {
23     {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24     {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25     {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26     {".pyc", IS_BYTECODE},
27     {".pyo", IS_BYTECODE},
28     {".py", IS_SOURCE},
29     {"", 0}
30 };
31 
32 /* zipimporter object definition and support */
33 
34 typedef struct _zipimporter ZipImporter;
35 
36 struct _zipimporter {
37     PyObject_HEAD
38     PyObject *archive;  /* pathname of the Zip archive */
39     PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40     PyObject *files;    /* dict with file info {path: toc_entry} */
41 };
42 
43 static PyObject *ZipImportError;
44 static PyObject *zip_directory_cache = NULL;
45 
46 /* forward decls */
47 static PyObject *read_directory(char *archive);
48 static PyObject *get_data(char *archive, PyObject *toc_entry);
49 static PyObject *get_module_code(ZipImporter *self, char *fullname,
50                                  int *p_ispackage, char **p_modpath);
51 
52 
53 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54 
55 
56 /* zipimporter.__init__
57    Split the "subdirectory" from the Zip archive path, lookup a matching
58    entry in sys.path_importer_cache, fetch the file directory from there
59    if found, or else read it from the archive. */
60 static int
zipimporter_init(ZipImporter * self,PyObject * args,PyObject * kwds)61 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62 {
63     char *path, *p, *prefix, buf[MAXPATHLEN+2];
64     size_t len;
65 
66     if (!_PyArg_NoKeywords("zipimporter()", kwds))
67         return -1;
68 
69     if (!PyArg_ParseTuple(args, "s:zipimporter",
70                           &path))
71         return -1;
72 
73     len = strlen(path);
74     if (len == 0) {
75         PyErr_SetString(ZipImportError, "archive path is empty");
76         return -1;
77     }
78     if (len >= MAXPATHLEN) {
79         PyErr_SetString(ZipImportError,
80                         "archive path too long");
81         return -1;
82     }
83     strcpy(buf, path);
84 
85 #ifdef ALTSEP
86     for (p = buf; *p; p++) {
87         if (*p == ALTSEP)
88             *p = SEP;
89     }
90 #endif
91 
92     path = NULL;
93     prefix = NULL;
94     for (;;) {
95 #ifndef RISCOS
96         struct stat statbuf;
97         int rv;
98 
99         rv = stat(buf, &statbuf);
100         if (rv == 0) {
101             /* it exists */
102             if (S_ISREG(statbuf.st_mode))
103                 /* it's a file */
104                 path = buf;
105             break;
106         }
107 #else
108         if (object_exists(buf)) {
109             /* it exists */
110             if (isfile(buf))
111                 /* it's a file */
112                 path = buf;
113             break;
114         }
115 #endif
116         /* back up one path element */
117         p = strrchr(buf, SEP);
118         if (prefix != NULL)
119             *prefix = SEP;
120         if (p == NULL)
121             break;
122         *p = '\0';
123         prefix = p;
124     }
125     if (path != NULL) {
126         PyObject *files;
127         files = PyDict_GetItemString(zip_directory_cache, path);
128         if (files == NULL) {
129             files = read_directory(buf);
130             if (files == NULL)
131                 return -1;
132             if (PyDict_SetItemString(zip_directory_cache, path,
133                                      files) != 0)
134                 return -1;
135         }
136         else
137             Py_INCREF(files);
138         self->files = files;
139     }
140     else {
141         PyErr_SetString(ZipImportError, "not a Zip file");
142         return -1;
143     }
144 
145     if (prefix == NULL)
146         prefix = "";
147     else {
148         prefix++;
149         len = strlen(prefix);
150         if (prefix[len-1] != SEP) {
151             /* add trailing SEP */
152             prefix[len] = SEP;
153             prefix[len + 1] = '\0';
154         }
155     }
156 
157     self->archive = PyString_FromString(buf);
158     if (self->archive == NULL)
159         return -1;
160 
161     self->prefix = PyString_FromString(prefix);
162     if (self->prefix == NULL)
163         return -1;
164 
165     return 0;
166 }
167 
168 /* GC support. */
169 static int
zipimporter_traverse(PyObject * obj,visitproc visit,void * arg)170 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171 {
172     ZipImporter *self = (ZipImporter *)obj;
173     Py_VISIT(self->files);
174     return 0;
175 }
176 
177 static void
zipimporter_dealloc(ZipImporter * self)178 zipimporter_dealloc(ZipImporter *self)
179 {
180     PyObject_GC_UnTrack(self);
181     Py_XDECREF(self->archive);
182     Py_XDECREF(self->prefix);
183     Py_XDECREF(self->files);
184     Py_TYPE(self)->tp_free((PyObject *)self);
185 }
186 
187 static PyObject *
zipimporter_repr(ZipImporter * self)188 zipimporter_repr(ZipImporter *self)
189 {
190     char buf[500];
191     char *archive = "???";
192     char *prefix = "";
193 
194     if (self->archive != NULL && PyString_Check(self->archive))
195         archive = PyString_AsString(self->archive);
196     if (self->prefix != NULL && PyString_Check(self->prefix))
197         prefix = PyString_AsString(self->prefix);
198     if (prefix != NULL && *prefix)
199         PyOS_snprintf(buf, sizeof(buf),
200                       "<zipimporter object \"%.300s%c%.150s\">",
201                       archive, SEP, prefix);
202     else
203         PyOS_snprintf(buf, sizeof(buf),
204                       "<zipimporter object \"%.300s\">",
205                       archive);
206     return PyString_FromString(buf);
207 }
208 
209 /* return fullname.split(".")[-1] */
210 static char *
get_subname(char * fullname)211 get_subname(char *fullname)
212 {
213     char *subname = strrchr(fullname, '.');
214     if (subname == NULL)
215         subname = fullname;
216     else
217         subname++;
218     return subname;
219 }
220 
221 /* Given a (sub)modulename, write the potential file path in the
222    archive (without extension) to the path buffer. Return the
223    length of the resulting string. */
224 static int
make_filename(char * prefix,char * name,char * path)225 make_filename(char *prefix, char *name, char *path)
226 {
227     size_t len;
228     char *p;
229 
230     len = strlen(prefix);
231 
232     /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
233     if (len + strlen(name) + 13 >= MAXPATHLEN) {
234         PyErr_SetString(ZipImportError, "path too long");
235         return -1;
236     }
237 
238     strcpy(path, prefix);
239     strcpy(path + len, name);
240     for (p = path + len; *p; p++) {
241         if (*p == '.')
242             *p = SEP;
243     }
244     len += strlen(name);
245     assert(len < INT_MAX);
246     return (int)len;
247 }
248 
249 enum zi_module_info {
250     MI_ERROR,
251     MI_NOT_FOUND,
252     MI_MODULE,
253     MI_PACKAGE
254 };
255 
256 /* Return some information about a module. */
257 static enum zi_module_info
get_module_info(ZipImporter * self,char * fullname)258 get_module_info(ZipImporter *self, char *fullname)
259 {
260     char *subname, path[MAXPATHLEN + 1];
261     int len;
262     struct st_zip_searchorder *zso;
263 
264     subname = get_subname(fullname);
265 
266     len = make_filename(PyString_AsString(self->prefix), subname, path);
267     if (len < 0)
268         return MI_ERROR;
269 
270     for (zso = zip_searchorder; *zso->suffix; zso++) {
271         strcpy(path + len, zso->suffix);
272         if (PyDict_GetItemString(self->files, path) != NULL) {
273             if (zso->type & IS_PACKAGE)
274                 return MI_PACKAGE;
275             else
276                 return MI_MODULE;
277         }
278     }
279     return MI_NOT_FOUND;
280 }
281 
282 /* Check whether we can satisfy the import of the module named by
283    'fullname'. Return self if we can, None if we can't. */
284 static PyObject *
zipimporter_find_module(PyObject * obj,PyObject * args)285 zipimporter_find_module(PyObject *obj, PyObject *args)
286 {
287     ZipImporter *self = (ZipImporter *)obj;
288     PyObject *path = NULL;
289     char *fullname;
290     enum zi_module_info mi;
291 
292     if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293                           &fullname, &path))
294         return NULL;
295 
296     mi = get_module_info(self, fullname);
297     if (mi == MI_ERROR)
298         return NULL;
299     if (mi == MI_NOT_FOUND) {
300         Py_INCREF(Py_None);
301         return Py_None;
302     }
303     Py_INCREF(self);
304     return (PyObject *)self;
305 }
306 
307 /* Load and return the module named by 'fullname'. */
308 static PyObject *
zipimporter_load_module(PyObject * obj,PyObject * args)309 zipimporter_load_module(PyObject *obj, PyObject *args)
310 {
311     ZipImporter *self = (ZipImporter *)obj;
312     PyObject *code, *mod, *dict;
313     char *fullname, *modpath;
314     int ispackage;
315 
316     if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
317                           &fullname))
318         return NULL;
319 
320     code = get_module_code(self, fullname, &ispackage, &modpath);
321     if (code == NULL)
322         return NULL;
323 
324     mod = PyImport_AddModule(fullname);
325     if (mod == NULL) {
326         Py_DECREF(code);
327         return NULL;
328     }
329     dict = PyModule_GetDict(mod);
330 
331     /* mod.__loader__ = self */
332     if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333         goto error;
334 
335     if (ispackage) {
336         /* add __path__ to the module *before* the code gets
337            executed */
338         PyObject *pkgpath, *fullpath;
339         char *prefix = PyString_AsString(self->prefix);
340         char *subname = get_subname(fullname);
341         int err;
342 
343         fullpath = PyString_FromFormat("%s%c%s%s",
344                                 PyString_AsString(self->archive),
345                                 SEP,
346                                 *prefix ? prefix : "",
347                                 subname);
348         if (fullpath == NULL)
349             goto error;
350 
351         pkgpath = Py_BuildValue("[O]", fullpath);
352         Py_DECREF(fullpath);
353         if (pkgpath == NULL)
354             goto error;
355         err = PyDict_SetItemString(dict, "__path__", pkgpath);
356         Py_DECREF(pkgpath);
357         if (err != 0)
358             goto error;
359     }
360     mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
361     Py_DECREF(code);
362     if (Py_VerboseFlag)
363         PySys_WriteStderr("import %s # loaded from Zip %s\n",
364                           fullname, modpath);
365     return mod;
366 error:
367     Py_DECREF(code);
368     Py_DECREF(mod);
369     return NULL;
370 }
371 
372 /* Return a string matching __file__ for the named module */
373 static PyObject *
zipimporter_get_filename(PyObject * obj,PyObject * args)374 zipimporter_get_filename(PyObject *obj, PyObject *args)
375 {
376     ZipImporter *self = (ZipImporter *)obj;
377     PyObject *code;
378     char *fullname, *modpath;
379     int ispackage;
380 
381     if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
382                          &fullname))
383     return NULL;
384 
385     /* Deciding the filename requires working out where the code
386        would come from if the module was actually loaded */
387     code = get_module_code(self, fullname, &ispackage, &modpath);
388     if (code == NULL)
389     return NULL;
390     Py_DECREF(code); /* Only need the path info */
391 
392     return PyString_FromString(modpath);
393 }
394 
395 /* Return a bool signifying whether the module is a package or not. */
396 static PyObject *
zipimporter_is_package(PyObject * obj,PyObject * args)397 zipimporter_is_package(PyObject *obj, PyObject *args)
398 {
399     ZipImporter *self = (ZipImporter *)obj;
400     char *fullname;
401     enum zi_module_info mi;
402 
403     if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
404                           &fullname))
405         return NULL;
406 
407     mi = get_module_info(self, fullname);
408     if (mi == MI_ERROR)
409         return NULL;
410     if (mi == MI_NOT_FOUND) {
411         PyErr_Format(ZipImportError, "can't find module '%.200s'",
412                      fullname);
413         return NULL;
414     }
415     return PyBool_FromLong(mi == MI_PACKAGE);
416 }
417 
418 static PyObject *
zipimporter_get_data(PyObject * obj,PyObject * args)419 zipimporter_get_data(PyObject *obj, PyObject *args)
420 {
421     ZipImporter *self = (ZipImporter *)obj;
422     char *path;
423 #ifdef ALTSEP
424     char *p, buf[MAXPATHLEN + 1];
425 #endif
426     PyObject *toc_entry;
427     Py_ssize_t len;
428 
429     if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
430         return NULL;
431 
432 #ifdef ALTSEP
433     if (strlen(path) >= MAXPATHLEN) {
434         PyErr_SetString(ZipImportError, "path too long");
435         return NULL;
436     }
437     strcpy(buf, path);
438     for (p = buf; *p; p++) {
439         if (*p == ALTSEP)
440             *p = SEP;
441     }
442     path = buf;
443 #endif
444     len = PyString_Size(self->archive);
445     if ((size_t)len < strlen(path) &&
446         strncmp(path, PyString_AsString(self->archive), len) == 0 &&
447         path[len] == SEP) {
448         path = path + len + 1;
449     }
450 
451     toc_entry = PyDict_GetItemString(self->files, path);
452     if (toc_entry == NULL) {
453         PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
454         return NULL;
455     }
456     return get_data(PyString_AsString(self->archive), toc_entry);
457 }
458 
459 static PyObject *
zipimporter_get_code(PyObject * obj,PyObject * args)460 zipimporter_get_code(PyObject *obj, PyObject *args)
461 {
462     ZipImporter *self = (ZipImporter *)obj;
463     char *fullname;
464 
465     if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
466         return NULL;
467 
468     return get_module_code(self, fullname, NULL, NULL);
469 }
470 
471 static PyObject *
zipimporter_get_source(PyObject * obj,PyObject * args)472 zipimporter_get_source(PyObject *obj, PyObject *args)
473 {
474     ZipImporter *self = (ZipImporter *)obj;
475     PyObject *toc_entry;
476     char *fullname, *subname, path[MAXPATHLEN+1];
477     int len;
478     enum zi_module_info mi;
479 
480     if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
481         return NULL;
482 
483     mi = get_module_info(self, fullname);
484     if (mi == MI_ERROR)
485         return NULL;
486     if (mi == MI_NOT_FOUND) {
487         PyErr_Format(ZipImportError, "can't find module '%.200s'",
488                      fullname);
489         return NULL;
490     }
491     subname = get_subname(fullname);
492 
493     len = make_filename(PyString_AsString(self->prefix), subname, path);
494     if (len < 0)
495         return NULL;
496 
497     if (mi == MI_PACKAGE) {
498         path[len] = SEP;
499         strcpy(path + len + 1, "__init__.py");
500     }
501     else
502         strcpy(path + len, ".py");
503 
504     toc_entry = PyDict_GetItemString(self->files, path);
505     if (toc_entry != NULL)
506         return get_data(PyString_AsString(self->archive), toc_entry);
507 
508     /* we have the module, but no source */
509     Py_INCREF(Py_None);
510     return Py_None;
511 }
512 
513 PyDoc_STRVAR(doc_find_module,
514 "find_module(fullname, path=None) -> self or None.\n\
515 \n\
516 Search for a module specified by 'fullname'. 'fullname' must be the\n\
517 fully qualified (dotted) module name. It returns the zipimporter\n\
518 instance itself if the module was found, or None if it wasn't.\n\
519 The optional 'path' argument is ignored -- it's there for compatibility\n\
520 with the importer protocol.");
521 
522 PyDoc_STRVAR(doc_load_module,
523 "load_module(fullname) -> module.\n\
524 \n\
525 Load the module specified by 'fullname'. 'fullname' must be the\n\
526 fully qualified (dotted) module name. It returns the imported\n\
527 module, or raises ZipImportError if it wasn't found.");
528 
529 PyDoc_STRVAR(doc_get_data,
530 "get_data(pathname) -> string with file data.\n\
531 \n\
532 Return the data associated with 'pathname'. Raise IOError if\n\
533 the file wasn't found.");
534 
535 PyDoc_STRVAR(doc_is_package,
536 "is_package(fullname) -> bool.\n\
537 \n\
538 Return True if the module specified by fullname is a package.\n\
539 Raise ZipImportError if the module couldn't be found.");
540 
541 PyDoc_STRVAR(doc_get_code,
542 "get_code(fullname) -> code object.\n\
543 \n\
544 Return the code object for the specified module. Raise ZipImportError\n\
545 if the module couldn't be found.");
546 
547 PyDoc_STRVAR(doc_get_source,
548 "get_source(fullname) -> source string.\n\
549 \n\
550 Return the source code for the specified module. Raise ZipImportError\n\
551 if the module couldn't be found, return None if the archive does\n\
552 contain the module, but has no source for it.");
553 
554 
555 PyDoc_STRVAR(doc_get_filename,
556 "get_filename(fullname) -> filename string.\n\
557 \n\
558 Return the filename for the specified module.");
559 
560 static PyMethodDef zipimporter_methods[] = {
561     {"find_module", zipimporter_find_module, METH_VARARGS,
562      doc_find_module},
563     {"load_module", zipimporter_load_module, METH_VARARGS,
564      doc_load_module},
565     {"get_data", zipimporter_get_data, METH_VARARGS,
566      doc_get_data},
567     {"get_code", zipimporter_get_code, METH_VARARGS,
568      doc_get_code},
569     {"get_source", zipimporter_get_source, METH_VARARGS,
570      doc_get_source},
571     {"get_filename", zipimporter_get_filename, METH_VARARGS,
572      doc_get_filename},
573     {"is_package", zipimporter_is_package, METH_VARARGS,
574      doc_is_package},
575     {NULL,              NULL}   /* sentinel */
576 };
577 
578 static PyMemberDef zipimporter_members[] = {
579     {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
580     {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
581     {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
582     {NULL}
583 };
584 
585 PyDoc_STRVAR(zipimporter_doc,
586 "zipimporter(archivepath) -> zipimporter object\n\
587 \n\
588 Create a new zipimporter instance. 'archivepath' must be a path to\n\
589 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
590 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
591 valid directory inside the archive.\n\
592 \n\
593 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
594 archive.\n\
595 \n\
596 The 'archive' attribute of zipimporter objects contains the name of the\n\
597 zipfile targeted.");
598 
599 #define DEFERRED_ADDRESS(ADDR) 0
600 
601 static PyTypeObject ZipImporter_Type = {
602     PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
603     "zipimport.zipimporter",
604     sizeof(ZipImporter),
605     0,                                          /* tp_itemsize */
606     (destructor)zipimporter_dealloc,            /* tp_dealloc */
607     0,                                          /* tp_print */
608     0,                                          /* tp_getattr */
609     0,                                          /* tp_setattr */
610     0,                                          /* tp_compare */
611     (reprfunc)zipimporter_repr,                 /* tp_repr */
612     0,                                          /* tp_as_number */
613     0,                                          /* tp_as_sequence */
614     0,                                          /* tp_as_mapping */
615     0,                                          /* tp_hash */
616     0,                                          /* tp_call */
617     0,                                          /* tp_str */
618     PyObject_GenericGetAttr,                    /* tp_getattro */
619     0,                                          /* tp_setattro */
620     0,                                          /* tp_as_buffer */
621     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
622         Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
623     zipimporter_doc,                            /* tp_doc */
624     zipimporter_traverse,                       /* tp_traverse */
625     0,                                          /* tp_clear */
626     0,                                          /* tp_richcompare */
627     0,                                          /* tp_weaklistoffset */
628     0,                                          /* tp_iter */
629     0,                                          /* tp_iternext */
630     zipimporter_methods,                        /* tp_methods */
631     zipimporter_members,                        /* tp_members */
632     0,                                          /* tp_getset */
633     0,                                          /* tp_base */
634     0,                                          /* tp_dict */
635     0,                                          /* tp_descr_get */
636     0,                                          /* tp_descr_set */
637     0,                                          /* tp_dictoffset */
638     (initproc)zipimporter_init,                 /* tp_init */
639     PyType_GenericAlloc,                        /* tp_alloc */
640     PyType_GenericNew,                          /* tp_new */
641     PyObject_GC_Del,                            /* tp_free */
642 };
643 
644 
645 /* implementation */
646 
647 /* Given a buffer, return the long that is represented by the first
648    4 bytes, encoded as little endian. This partially reimplements
649    marshal.c:r_long() */
650 static long
get_long(unsigned char * buf)651 get_long(unsigned char *buf) {
652     long x;
653     x =  buf[0];
654     x |= (long)buf[1] <<  8;
655     x |= (long)buf[2] << 16;
656     x |= (long)buf[3] << 24;
657 #if SIZEOF_LONG > 4
658     /* Sign extension for 64-bit machines */
659     x |= -(x & 0x80000000L);
660 #endif
661     return x;
662 }
663 
664 /*
665    read_directory(archive) -> files dict (new reference)
666 
667    Given a path to a Zip archive, build a dict, mapping file names
668    (local to the archive, using SEP as a separator) to toc entries.
669 
670    A toc_entry is a tuple:
671 
672        (__file__,      # value to use for __file__, available for all files
673     compress,      # compression kind; 0 for uncompressed
674     data_size,     # size of compressed data on disk
675     file_size,     # size of decompressed data
676     file_offset,   # offset of file header from start of archive
677     time,          # mod time of file (in dos format)
678     date,          # mod data of file (in dos format)
679     crc,           # crc checksum of the data
680        )
681 
682    Directories can be recognized by the trailing SEP in the name,
683    data_size and file_offset are 0.
684 */
685 static PyObject *
read_directory(char * archive)686 read_directory(char *archive)
687 {
688     PyObject *files = NULL;
689     FILE *fp;
690     long compress, crc, data_size, file_size, file_offset, date, time;
691     long header_offset, name_size, header_size, header_position;
692     long i, l, count;
693     size_t length;
694     char path[MAXPATHLEN + 5];
695     char name[MAXPATHLEN + 5];
696     char *p, endof_central_dir[22];
697     long arc_offset; /* offset from beginning of file to start of zip-archive */
698 
699     if (strlen(archive) > MAXPATHLEN) {
700         PyErr_SetString(PyExc_OverflowError,
701                         "Zip path name is too long");
702         return NULL;
703     }
704     strcpy(path, archive);
705 
706     fp = fopen(archive, "rb");
707     if (fp == NULL) {
708         PyErr_Format(ZipImportError, "can't open Zip file: "
709                      "'%.200s'", archive);
710         return NULL;
711     }
712     fseek(fp, -22, SEEK_END);
713     header_position = ftell(fp);
714     if (fread(endof_central_dir, 1, 22, fp) != 22) {
715         fclose(fp);
716         PyErr_Format(ZipImportError, "can't read Zip file: "
717                      "'%.200s'", archive);
718         return NULL;
719     }
720     if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
721         /* Bad: End of Central Dir signature */
722         fclose(fp);
723         PyErr_Format(ZipImportError, "not a Zip file: "
724                      "'%.200s'", archive);
725         return NULL;
726     }
727 
728     header_size = get_long((unsigned char *)endof_central_dir + 12);
729     header_offset = get_long((unsigned char *)endof_central_dir + 16);
730     arc_offset = header_position - header_offset - header_size;
731     header_offset += arc_offset;
732 
733     files = PyDict_New();
734     if (files == NULL)
735         goto error;
736 
737     length = (long)strlen(path);
738     path[length] = SEP;
739 
740     /* Start of Central Directory */
741     count = 0;
742     for (;;) {
743         PyObject *t;
744         int err;
745 
746         fseek(fp, header_offset, 0);  /* Start of file header */
747         l = PyMarshal_ReadLongFromFile(fp);
748         if (l != 0x02014B50)
749             break;              /* Bad: Central Dir File Header */
750         fseek(fp, header_offset + 10, 0);
751         compress = PyMarshal_ReadShortFromFile(fp);
752         time = PyMarshal_ReadShortFromFile(fp);
753         date = PyMarshal_ReadShortFromFile(fp);
754         crc = PyMarshal_ReadLongFromFile(fp);
755         data_size = PyMarshal_ReadLongFromFile(fp);
756         file_size = PyMarshal_ReadLongFromFile(fp);
757         name_size = PyMarshal_ReadShortFromFile(fp);
758         header_size = 46 + name_size +
759            PyMarshal_ReadShortFromFile(fp) +
760            PyMarshal_ReadShortFromFile(fp);
761         fseek(fp, header_offset + 42, 0);
762         file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
763         if (name_size > MAXPATHLEN)
764             name_size = MAXPATHLEN;
765 
766         p = name;
767         for (i = 0; i < name_size; i++) {
768             *p = (char)getc(fp);
769             if (*p == '/')
770                 *p = SEP;
771             p++;
772         }
773         *p = 0;         /* Add terminating null byte */
774         header_offset += header_size;
775 
776         strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
777 
778         t = Py_BuildValue("siiiiiii", path, compress, data_size,
779                           file_size, file_offset, time, date, crc);
780         if (t == NULL)
781             goto error;
782         err = PyDict_SetItemString(files, name, t);
783         Py_DECREF(t);
784         if (err != 0)
785             goto error;
786         count++;
787     }
788     fclose(fp);
789     if (Py_VerboseFlag)
790         PySys_WriteStderr("# zipimport: found %ld names in %s\n",
791             count, archive);
792     return files;
793 error:
794     fclose(fp);
795     Py_XDECREF(files);
796     return NULL;
797 }
798 
799 /* Return the zlib.decompress function object, or NULL if zlib couldn't
800    be imported. The function is cached when found, so subsequent calls
801    don't import zlib again. */
802 static PyObject *
get_decompress_func(void)803 get_decompress_func(void)
804 {
805     static int importing_zlib = 0;
806     PyObject *zlib;
807     PyObject *decompress;
808 
809     if (importing_zlib != 0)
810         /* Someone has a zlib.py[co] in their Zip file;
811            let's avoid a stack overflow. */
812         return NULL;
813     importing_zlib = 1;
814     zlib = PyImport_ImportModuleNoBlock("zlib");
815     importing_zlib = 0;
816     if (zlib != NULL) {
817         decompress = PyObject_GetAttrString(zlib,
818                                             "decompress");
819         Py_DECREF(zlib);
820     }
821     else {
822         PyErr_Clear();
823         decompress = NULL;
824     }
825     if (Py_VerboseFlag)
826         PySys_WriteStderr("# zipimport: zlib %s\n",
827             zlib != NULL ? "available": "UNAVAILABLE");
828     return decompress;
829 }
830 
831 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
832    data as a new reference. */
833 static PyObject *
get_data(char * archive,PyObject * toc_entry)834 get_data(char *archive, PyObject *toc_entry)
835 {
836     PyObject *raw_data, *data = NULL, *decompress;
837     char *buf;
838     FILE *fp;
839     int err;
840     Py_ssize_t bytes_read = 0;
841     long l;
842     char *datapath;
843     long compress, data_size, file_size, file_offset;
844     long time, date, crc;
845 
846     if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
847                           &data_size, &file_size, &file_offset, &time,
848                           &date, &crc)) {
849         return NULL;
850     }
851 
852     fp = fopen(archive, "rb");
853     if (!fp) {
854         PyErr_Format(PyExc_IOError,
855            "zipimport: can not open file %s", archive);
856         return NULL;
857     }
858 
859     /* Check to make sure the local file header is correct */
860     fseek(fp, file_offset, 0);
861     l = PyMarshal_ReadLongFromFile(fp);
862     if (l != 0x04034B50) {
863         /* Bad: Local File Header */
864         PyErr_Format(ZipImportError,
865                      "bad local file header in %s",
866                      archive);
867         fclose(fp);
868         return NULL;
869     }
870     fseek(fp, file_offset + 26, 0);
871     l = 30 + PyMarshal_ReadShortFromFile(fp) +
872         PyMarshal_ReadShortFromFile(fp);        /* local header size */
873     file_offset += l;           /* Start of file data */
874 
875     raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
876                                           data_size : data_size + 1);
877     if (raw_data == NULL) {
878         fclose(fp);
879         return NULL;
880     }
881     buf = PyString_AsString(raw_data);
882 
883     err = fseek(fp, file_offset, 0);
884     if (err == 0)
885         bytes_read = fread(buf, 1, data_size, fp);
886     fclose(fp);
887     if (err || bytes_read != data_size) {
888         PyErr_SetString(PyExc_IOError,
889                         "zipimport: can't read data");
890         Py_DECREF(raw_data);
891         return NULL;
892     }
893 
894     if (compress != 0) {
895         buf[data_size] = 'Z';  /* saw this in zipfile.py */
896         data_size++;
897     }
898     buf[data_size] = '\0';
899 
900     if (compress == 0)  /* data is not compressed */
901         return raw_data;
902 
903     /* Decompress with zlib */
904     decompress = get_decompress_func();
905     if (decompress == NULL) {
906         PyErr_SetString(ZipImportError,
907                         "can't decompress data; "
908                         "zlib not available");
909         goto error;
910     }
911     data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
912     Py_DECREF(decompress);
913 error:
914     Py_DECREF(raw_data);
915     return data;
916 }
917 
918 /* Lenient date/time comparison function. The precision of the mtime
919    in the archive is lower than the mtime stored in a .pyc: we
920    must allow a difference of at most one second. */
921 static int
eq_mtime(time_t t1,time_t t2)922 eq_mtime(time_t t1, time_t t2)
923 {
924     time_t d = t1 - t2;
925     if (d < 0)
926         d = -d;
927     /* dostime only stores even seconds, so be lenient */
928     return d <= 1;
929 }
930 
931 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
932    and return the code object. Return None if it the magic word doesn't
933    match (we do this instead of raising an exception as we fall back
934    to .py if available and we don't want to mask other errors).
935    Returns a new reference. */
936 static PyObject *
unmarshal_code(char * pathname,PyObject * data,time_t mtime)937 unmarshal_code(char *pathname, PyObject *data, time_t mtime)
938 {
939     PyObject *code;
940     char *buf = PyString_AsString(data);
941     Py_ssize_t size = PyString_Size(data);
942 
943     if (size <= 9) {
944         PyErr_SetString(ZipImportError,
945                         "bad pyc data");
946         return NULL;
947     }
948 
949     if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
950         if (Py_VerboseFlag)
951             PySys_WriteStderr("# %s has bad magic\n",
952                               pathname);
953         Py_INCREF(Py_None);
954         return Py_None;  /* signal caller to try alternative */
955     }
956 
957     if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
958                                 mtime)) {
959         if (Py_VerboseFlag)
960             PySys_WriteStderr("# %s has bad mtime\n",
961                               pathname);
962         Py_INCREF(Py_None);
963         return Py_None;  /* signal caller to try alternative */
964     }
965 
966     code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
967     if (code == NULL)
968         return NULL;
969     if (!PyCode_Check(code)) {
970         Py_DECREF(code);
971         PyErr_Format(PyExc_TypeError,
972              "compiled module %.200s is not a code object",
973              pathname);
974         return NULL;
975     }
976     return code;
977 }
978 
979 /* Replace any occurances of "\r\n?" in the input string with "\n".
980    This converts DOS and Mac line endings to Unix line endings.
981    Also append a trailing "\n" to be compatible with
982    PyParser_SimpleParseFile(). Returns a new reference. */
983 static PyObject *
normalize_line_endings(PyObject * source)984 normalize_line_endings(PyObject *source)
985 {
986     char *buf, *q, *p = PyString_AsString(source);
987     PyObject *fixed_source;
988 
989     if (!p)
990         return NULL;
991 
992     /* one char extra for trailing \n and one for terminating \0 */
993     buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
994     if (buf == NULL) {
995         PyErr_SetString(PyExc_MemoryError,
996                         "zipimport: no memory to allocate "
997                         "source buffer");
998         return NULL;
999     }
1000     /* replace "\r\n?" by "\n" */
1001     for (q = buf; *p != '\0'; p++) {
1002         if (*p == '\r') {
1003             *q++ = '\n';
1004             if (*(p + 1) == '\n')
1005                 p++;
1006         }
1007         else
1008             *q++ = *p;
1009     }
1010     *q++ = '\n';  /* add trailing \n */
1011     *q = '\0';
1012     fixed_source = PyString_FromString(buf);
1013     PyMem_Free(buf);
1014     return fixed_source;
1015 }
1016 
1017 /* Given a string buffer containing Python source code, compile it
1018    return and return a code object as a new reference. */
1019 static PyObject *
compile_source(char * pathname,PyObject * source)1020 compile_source(char *pathname, PyObject *source)
1021 {
1022     PyObject *code, *fixed_source;
1023 
1024     fixed_source = normalize_line_endings(source);
1025     if (fixed_source == NULL)
1026         return NULL;
1027 
1028     code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1029                             Py_file_input);
1030     Py_DECREF(fixed_source);
1031     return code;
1032 }
1033 
1034 /* Convert the date/time values found in the Zip archive to a value
1035    that's compatible with the time stamp stored in .pyc files. */
1036 static time_t
parse_dostime(int dostime,int dosdate)1037 parse_dostime(int dostime, int dosdate)
1038 {
1039     struct tm stm;
1040 
1041     memset((void *) &stm, '\0', sizeof(stm));
1042 
1043     stm.tm_sec   =  (dostime        & 0x1f) * 2;
1044     stm.tm_min   =  (dostime >> 5)  & 0x3f;
1045     stm.tm_hour  =  (dostime >> 11) & 0x1f;
1046     stm.tm_mday  =   dosdate        & 0x1f;
1047     stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1048     stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1049     stm.tm_isdst =   -1; /* wday/yday is ignored */
1050 
1051     return mktime(&stm);
1052 }
1053 
1054 /* Given a path to a .pyc or .pyo file in the archive, return the
1055    modification time of the matching .py file, or 0 if no source
1056    is available. */
1057 static time_t
get_mtime_of_source(ZipImporter * self,char * path)1058 get_mtime_of_source(ZipImporter *self, char *path)
1059 {
1060     PyObject *toc_entry;
1061     time_t mtime = 0;
1062     Py_ssize_t lastchar = strlen(path) - 1;
1063     char savechar = path[lastchar];
1064     path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1065     toc_entry = PyDict_GetItemString(self->files, path);
1066     if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1067         PyTuple_Size(toc_entry) == 8) {
1068         /* fetch the time stamp of the .py file for comparison
1069            with an embedded pyc time stamp */
1070         int time, date;
1071         time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1072         date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1073         mtime = parse_dostime(time, date);
1074     }
1075     path[lastchar] = savechar;
1076     return mtime;
1077 }
1078 
1079 /* Return the code object for the module named by 'fullname' from the
1080    Zip archive as a new reference. */
1081 static PyObject *
get_code_from_data(ZipImporter * self,int ispackage,int isbytecode,time_t mtime,PyObject * toc_entry)1082 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1083                    time_t mtime, PyObject *toc_entry)
1084 {
1085     PyObject *data, *code;
1086     char *modpath;
1087     char *archive = PyString_AsString(self->archive);
1088 
1089     if (archive == NULL)
1090         return NULL;
1091 
1092     data = get_data(archive, toc_entry);
1093     if (data == NULL)
1094         return NULL;
1095 
1096     modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1097 
1098     if (isbytecode) {
1099         code = unmarshal_code(modpath, data, mtime);
1100     }
1101     else {
1102         code = compile_source(modpath, data);
1103     }
1104     Py_DECREF(data);
1105     return code;
1106 }
1107 
1108 /* Get the code object associated with the module specified by
1109    'fullname'. */
1110 static PyObject *
get_module_code(ZipImporter * self,char * fullname,int * p_ispackage,char ** p_modpath)1111 get_module_code(ZipImporter *self, char *fullname,
1112                 int *p_ispackage, char **p_modpath)
1113 {
1114     PyObject *toc_entry;
1115     char *subname, path[MAXPATHLEN + 1];
1116     int len;
1117     struct st_zip_searchorder *zso;
1118 
1119     subname = get_subname(fullname);
1120 
1121     len = make_filename(PyString_AsString(self->prefix), subname, path);
1122     if (len < 0)
1123         return NULL;
1124 
1125     for (zso = zip_searchorder; *zso->suffix; zso++) {
1126         PyObject *code = NULL;
1127 
1128         strcpy(path + len, zso->suffix);
1129         if (Py_VerboseFlag > 1)
1130             PySys_WriteStderr("# trying %s%c%s\n",
1131                               PyString_AsString(self->archive),
1132                               SEP, path);
1133         toc_entry = PyDict_GetItemString(self->files, path);
1134         if (toc_entry != NULL) {
1135             time_t mtime = 0;
1136             int ispackage = zso->type & IS_PACKAGE;
1137             int isbytecode = zso->type & IS_BYTECODE;
1138 
1139             if (isbytecode)
1140                 mtime = get_mtime_of_source(self, path);
1141             if (p_ispackage != NULL)
1142                 *p_ispackage = ispackage;
1143             code = get_code_from_data(self, ispackage,
1144                                       isbytecode, mtime,
1145                                       toc_entry);
1146             if (code == Py_None) {
1147                 /* bad magic number or non-matching mtime
1148                    in byte code, try next */
1149                 Py_DECREF(code);
1150                 continue;
1151             }
1152             if (code != NULL && p_modpath != NULL)
1153                 *p_modpath = PyString_AsString(
1154                     PyTuple_GetItem(toc_entry, 0));
1155             return code;
1156         }
1157     }
1158     PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1159     return NULL;
1160 }
1161 
1162 
1163 /* Module init */
1164 
1165 PyDoc_STRVAR(zipimport_doc,
1166 "zipimport provides support for importing Python modules from Zip archives.\n\
1167 \n\
1168 This module exports three objects:\n\
1169 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1170 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1171   subclass of ImportError, so it can be caught as ImportError, too.\n\
1172 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1173   info dicts, as used in zipimporter._files.\n\
1174 \n\
1175 It is usually not needed to use the zipimport module explicitly; it is\n\
1176 used by the builtin import mechanism for sys.path items that are paths\n\
1177 to Zip archives.");
1178 
1179 PyMODINIT_FUNC
initzipimport(void)1180 initzipimport(void)
1181 {
1182     PyObject *mod;
1183 
1184     if (PyType_Ready(&ZipImporter_Type) < 0)
1185         return;
1186 
1187     /* Correct directory separator */
1188     zip_searchorder[0].suffix[0] = SEP;
1189     zip_searchorder[1].suffix[0] = SEP;
1190     zip_searchorder[2].suffix[0] = SEP;
1191     if (Py_OptimizeFlag) {
1192         /* Reverse *.pyc and *.pyo */
1193         struct st_zip_searchorder tmp;
1194         tmp = zip_searchorder[0];
1195         zip_searchorder[0] = zip_searchorder[1];
1196         zip_searchorder[1] = tmp;
1197         tmp = zip_searchorder[3];
1198         zip_searchorder[3] = zip_searchorder[4];
1199         zip_searchorder[4] = tmp;
1200     }
1201 
1202     mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1203                          NULL, PYTHON_API_VERSION);
1204     if (mod == NULL)
1205         return;
1206 
1207     ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1208                                         PyExc_ImportError, NULL);
1209     if (ZipImportError == NULL)
1210         return;
1211 
1212     Py_INCREF(ZipImportError);
1213     if (PyModule_AddObject(mod, "ZipImportError",
1214                            ZipImportError) < 0)
1215         return;
1216 
1217     Py_INCREF(&ZipImporter_Type);
1218     if (PyModule_AddObject(mod, "zipimporter",
1219                            (PyObject *)&ZipImporter_Type) < 0)
1220         return;
1221 
1222     zip_directory_cache = PyDict_New();
1223     if (zip_directory_cache == NULL)
1224         return;
1225     Py_INCREF(zip_directory_cache);
1226     if (PyModule_AddObject(mod, "_zip_directory_cache",
1227                            zip_directory_cache) < 0)
1228         return;
1229 }
1230