1 #include "Python.h"
2 #include "structmember.h"
3 #include "osdefs.h"
4 #include "marshal.h"
5 #include <time.h>
6 
7 
8 #define IS_SOURCE   0x0
9 #define IS_BYTECODE 0x1
10 #define IS_PACKAGE  0x2
11 
12 struct st_zip_searchorder {
13     char suffix[14];
14     int type;
15 };
16 
17 /* zip_searchorder defines how we search for a module in the Zip
18    archive: we first search for a package __init__, then for
19    non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20    are swapped by initzipimport() if we run in optimized mode. Also,
21    '/' is replaced by SEP there. */
22 static struct st_zip_searchorder zip_searchorder[] = {
23     {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24     {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25     {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26     {".pyc", IS_BYTECODE},
27     {".pyo", IS_BYTECODE},
28     {".py", IS_SOURCE},
29     {"", 0}
30 };
31 
32 /* zipimporter object definition and support */
33 
34 typedef struct _zipimporter ZipImporter;
35 
36 struct _zipimporter {
37     PyObject_HEAD
38     PyObject *archive;  /* pathname of the Zip archive */
39     PyObject *prefix;   /* file prefix: "a/sub/directory/" */
40     PyObject *files;    /* dict with file info {path: toc_entry} */
41 };
42 
43 static PyObject *ZipImportError;
44 static PyObject *zip_directory_cache = NULL;
45 
46 /* forward decls */
47 static PyObject *read_directory(const char *archive);
48 static PyObject *get_data(const char *archive, PyObject *toc_entry);
49 static PyObject *get_module_code(ZipImporter *self, char *fullname,
50                                  int *p_ispackage, char **p_modpath);
51 
52 
53 #define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54 
55 
56 /* zipimporter.__init__
57    Split the "subdirectory" from the Zip archive path, lookup a matching
58    entry in sys.path_importer_cache, fetch the file directory from there
59    if found, or else read it from the archive. */
60 static int
zipimporter_init(ZipImporter * self,PyObject * args,PyObject * kwds)61 zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62 {
63     char *path, *p, *prefix, buf[MAXPATHLEN+2];
64     size_t len;
65 
66     if (!_PyArg_NoKeywords("zipimporter()", kwds))
67         return -1;
68 
69     if (!PyArg_ParseTuple(args, "s:zipimporter",
70                           &path))
71         return -1;
72 
73     len = strlen(path);
74     if (len == 0) {
75         PyErr_SetString(ZipImportError, "archive path is empty");
76         return -1;
77     }
78     if (len >= MAXPATHLEN) {
79         PyErr_SetString(ZipImportError,
80                         "archive path too long");
81         return -1;
82     }
83     strcpy(buf, path);
84 
85 #ifdef ALTSEP
86     for (p = buf; *p; p++) {
87         if (*p == ALTSEP)
88             *p = SEP;
89     }
90 #endif
91 
92     path = NULL;
93     prefix = NULL;
94     for (;;) {
95 #ifndef RISCOS
96         struct stat statbuf;
97         int rv;
98 
99         rv = stat(buf, &statbuf);
100         if (rv == 0) {
101             /* it exists */
102             if (S_ISREG(statbuf.st_mode))
103                 /* it's a file */
104                 path = buf;
105             break;
106         }
107 #else
108         if (object_exists(buf)) {
109             /* it exists */
110             if (isfile(buf))
111                 /* it's a file */
112                 path = buf;
113             break;
114         }
115 #endif
116         /* back up one path element */
117         p = strrchr(buf, SEP);
118         if (prefix != NULL)
119             *prefix = SEP;
120         if (p == NULL)
121             break;
122         *p = '\0';
123         prefix = p;
124     }
125     if (path != NULL) {
126         PyObject *files;
127         files = PyDict_GetItemString(zip_directory_cache, path);
128         if (files == NULL) {
129             files = read_directory(buf);
130             if (files == NULL)
131                 return -1;
132             if (PyDict_SetItemString(zip_directory_cache, path,
133                                      files) != 0)
134                 return -1;
135         }
136         else
137             Py_INCREF(files);
138         self->files = files;
139     }
140     else {
141         PyErr_SetString(ZipImportError, "not a Zip file");
142         return -1;
143     }
144 
145     if (prefix == NULL)
146         prefix = "";
147     else {
148         prefix++;
149         len = strlen(prefix);
150         if (prefix[len-1] != SEP) {
151             /* add trailing SEP */
152             prefix[len] = SEP;
153             prefix[len + 1] = '\0';
154         }
155     }
156 
157     self->archive = PyString_FromString(buf);
158     if (self->archive == NULL)
159         return -1;
160 
161     self->prefix = PyString_FromString(prefix);
162     if (self->prefix == NULL)
163         return -1;
164 
165     return 0;
166 }
167 
168 /* GC support. */
169 static int
zipimporter_traverse(PyObject * obj,visitproc visit,void * arg)170 zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171 {
172     ZipImporter *self = (ZipImporter *)obj;
173     Py_VISIT(self->files);
174     return 0;
175 }
176 
177 static void
zipimporter_dealloc(ZipImporter * self)178 zipimporter_dealloc(ZipImporter *self)
179 {
180     PyObject_GC_UnTrack(self);
181     Py_XDECREF(self->archive);
182     Py_XDECREF(self->prefix);
183     Py_XDECREF(self->files);
184     Py_TYPE(self)->tp_free((PyObject *)self);
185 }
186 
187 static PyObject *
zipimporter_repr(ZipImporter * self)188 zipimporter_repr(ZipImporter *self)
189 {
190     char buf[500];
191     char *archive = "???";
192     char *prefix = "";
193 
194     if (self->archive != NULL && PyString_Check(self->archive))
195         archive = PyString_AsString(self->archive);
196     if (self->prefix != NULL && PyString_Check(self->prefix))
197         prefix = PyString_AsString(self->prefix);
198     if (prefix != NULL && *prefix)
199         PyOS_snprintf(buf, sizeof(buf),
200                       "<zipimporter object \"%.300s%c%.150s\">",
201                       archive, SEP, prefix);
202     else
203         PyOS_snprintf(buf, sizeof(buf),
204                       "<zipimporter object \"%.300s\">",
205                       archive);
206     return PyString_FromString(buf);
207 }
208 
209 /* return fullname.split(".")[-1] */
210 static char *
get_subname(char * fullname)211 get_subname(char *fullname)
212 {
213     char *subname = strrchr(fullname, '.');
214     if (subname == NULL)
215         subname = fullname;
216     else
217         subname++;
218     return subname;
219 }
220 
221 /* Given a (sub)modulename, write the potential file path in the
222    archive (without extension) to the path buffer. Return the
223    length of the resulting string. */
224 static int
make_filename(char * prefix,char * name,char * path)225 make_filename(char *prefix, char *name, char *path)
226 {
227     size_t len;
228     char *p;
229 
230     len = strlen(prefix);
231 
232     /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
233     if (len + strlen(name) + 13 >= MAXPATHLEN) {
234         PyErr_SetString(ZipImportError, "path too long");
235         return -1;
236     }
237 
238     strcpy(path, prefix);
239     strcpy(path + len, name);
240     for (p = path + len; *p; p++) {
241         if (*p == '.')
242             *p = SEP;
243     }
244     len += strlen(name);
245     assert(len < INT_MAX);
246     return (int)len;
247 }
248 
249 enum zi_module_info {
250     MI_ERROR,
251     MI_NOT_FOUND,
252     MI_MODULE,
253     MI_PACKAGE
254 };
255 
256 /* Return some information about a module. */
257 static enum zi_module_info
get_module_info(ZipImporter * self,char * fullname)258 get_module_info(ZipImporter *self, char *fullname)
259 {
260     char *subname, path[MAXPATHLEN + 1];
261     int len;
262     struct st_zip_searchorder *zso;
263 
264     subname = get_subname(fullname);
265 
266     len = make_filename(PyString_AsString(self->prefix), subname, path);
267     if (len < 0)
268         return MI_ERROR;
269 
270     for (zso = zip_searchorder; *zso->suffix; zso++) {
271         strcpy(path + len, zso->suffix);
272         if (PyDict_GetItemString(self->files, path) != NULL) {
273             if (zso->type & IS_PACKAGE)
274                 return MI_PACKAGE;
275             else
276                 return MI_MODULE;
277         }
278     }
279     return MI_NOT_FOUND;
280 }
281 
282 /* Check whether we can satisfy the import of the module named by
283    'fullname'. Return self if we can, None if we can't. */
284 static PyObject *
zipimporter_find_module(PyObject * obj,PyObject * args)285 zipimporter_find_module(PyObject *obj, PyObject *args)
286 {
287     ZipImporter *self = (ZipImporter *)obj;
288     PyObject *path = NULL;
289     char *fullname;
290     enum zi_module_info mi;
291 
292     if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293                           &fullname, &path))
294         return NULL;
295 
296     mi = get_module_info(self, fullname);
297     if (mi == MI_ERROR)
298         return NULL;
299     if (mi == MI_NOT_FOUND) {
300         Py_INCREF(Py_None);
301         return Py_None;
302     }
303     Py_INCREF(self);
304     return (PyObject *)self;
305 }
306 
307 /* Load and return the module named by 'fullname'. */
308 static PyObject *
zipimporter_load_module(PyObject * obj,PyObject * args)309 zipimporter_load_module(PyObject *obj, PyObject *args)
310 {
311     ZipImporter *self = (ZipImporter *)obj;
312     PyObject *code, *mod, *dict;
313     char *fullname, *modpath;
314     int ispackage;
315 
316     if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
317                           &fullname))
318         return NULL;
319 
320     code = get_module_code(self, fullname, &ispackage, &modpath);
321     if (code == NULL)
322         return NULL;
323 
324     mod = PyImport_AddModule(fullname);
325     if (mod == NULL) {
326         Py_DECREF(code);
327         return NULL;
328     }
329     dict = PyModule_GetDict(mod);
330 
331     /* mod.__loader__ = self */
332     if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333         goto error;
334 
335     if (ispackage) {
336         /* add __path__ to the module *before* the code gets
337            executed */
338         PyObject *pkgpath, *fullpath;
339         char *prefix = PyString_AsString(self->prefix);
340         char *subname = get_subname(fullname);
341         int err;
342 
343         fullpath = PyString_FromFormat("%s%c%s%s",
344                                 PyString_AsString(self->archive),
345                                 SEP,
346                                 *prefix ? prefix : "",
347                                 subname);
348         if (fullpath == NULL)
349             goto error;
350 
351         pkgpath = Py_BuildValue("[O]", fullpath);
352         Py_DECREF(fullpath);
353         if (pkgpath == NULL)
354             goto error;
355         err = PyDict_SetItemString(dict, "__path__", pkgpath);
356         Py_DECREF(pkgpath);
357         if (err != 0)
358             goto error;
359     }
360     mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
361     Py_DECREF(code);
362     if (Py_VerboseFlag)
363         PySys_WriteStderr("import %s # loaded from Zip %s\n",
364                           fullname, modpath);
365     return mod;
366 error:
367     Py_DECREF(code);
368     Py_DECREF(mod);
369     return NULL;
370 }
371 
372 /* Return a string matching __file__ for the named module */
373 static PyObject *
zipimporter_get_filename(PyObject * obj,PyObject * args)374 zipimporter_get_filename(PyObject *obj, PyObject *args)
375 {
376     ZipImporter *self = (ZipImporter *)obj;
377     PyObject *code;
378     char *fullname, *modpath;
379     int ispackage;
380 
381     if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
382                          &fullname))
383         return NULL;
384 
385     /* Deciding the filename requires working out where the code
386        would come from if the module was actually loaded */
387     code = get_module_code(self, fullname, &ispackage, &modpath);
388     if (code == NULL)
389         return NULL;
390     Py_DECREF(code); /* Only need the path info */
391 
392     return PyString_FromString(modpath);
393 }
394 
395 /* Return a bool signifying whether the module is a package or not. */
396 static PyObject *
zipimporter_is_package(PyObject * obj,PyObject * args)397 zipimporter_is_package(PyObject *obj, PyObject *args)
398 {
399     ZipImporter *self = (ZipImporter *)obj;
400     char *fullname;
401     enum zi_module_info mi;
402 
403     if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
404                           &fullname))
405         return NULL;
406 
407     mi = get_module_info(self, fullname);
408     if (mi == MI_ERROR)
409         return NULL;
410     if (mi == MI_NOT_FOUND) {
411         PyErr_Format(ZipImportError, "can't find module '%.200s'",
412                      fullname);
413         return NULL;
414     }
415     return PyBool_FromLong(mi == MI_PACKAGE);
416 }
417 
418 static PyObject *
zipimporter_get_data(PyObject * obj,PyObject * args)419 zipimporter_get_data(PyObject *obj, PyObject *args)
420 {
421     ZipImporter *self = (ZipImporter *)obj;
422     char *path;
423 #ifdef ALTSEP
424     char *p, buf[MAXPATHLEN + 1];
425 #endif
426     PyObject *toc_entry;
427     Py_ssize_t len;
428 
429     if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
430         return NULL;
431 
432 #ifdef ALTSEP
433     if (strlen(path) >= MAXPATHLEN) {
434         PyErr_SetString(ZipImportError, "path too long");
435         return NULL;
436     }
437     strcpy(buf, path);
438     for (p = buf; *p; p++) {
439         if (*p == ALTSEP)
440             *p = SEP;
441     }
442     path = buf;
443 #endif
444     len = PyString_Size(self->archive);
445     if ((size_t)len < strlen(path) &&
446         strncmp(path, PyString_AsString(self->archive), len) == 0 &&
447         path[len] == SEP) {
448         path = path + len + 1;
449     }
450 
451     toc_entry = PyDict_GetItemString(self->files, path);
452     if (toc_entry == NULL) {
453         PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
454         return NULL;
455     }
456     return get_data(PyString_AsString(self->archive), toc_entry);
457 }
458 
459 static PyObject *
zipimporter_get_code(PyObject * obj,PyObject * args)460 zipimporter_get_code(PyObject *obj, PyObject *args)
461 {
462     ZipImporter *self = (ZipImporter *)obj;
463     char *fullname;
464 
465     if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
466         return NULL;
467 
468     return get_module_code(self, fullname, NULL, NULL);
469 }
470 
471 static PyObject *
zipimporter_get_source(PyObject * obj,PyObject * args)472 zipimporter_get_source(PyObject *obj, PyObject *args)
473 {
474     ZipImporter *self = (ZipImporter *)obj;
475     PyObject *toc_entry;
476     char *fullname, *subname, path[MAXPATHLEN+1];
477     int len;
478     enum zi_module_info mi;
479 
480     if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
481         return NULL;
482 
483     mi = get_module_info(self, fullname);
484     if (mi == MI_ERROR)
485         return NULL;
486     if (mi == MI_NOT_FOUND) {
487         PyErr_Format(ZipImportError, "can't find module '%.200s'",
488                      fullname);
489         return NULL;
490     }
491     subname = get_subname(fullname);
492 
493     len = make_filename(PyString_AsString(self->prefix), subname, path);
494     if (len < 0)
495         return NULL;
496 
497     if (mi == MI_PACKAGE) {
498         path[len] = SEP;
499         strcpy(path + len + 1, "__init__.py");
500     }
501     else
502         strcpy(path + len, ".py");
503 
504     toc_entry = PyDict_GetItemString(self->files, path);
505     if (toc_entry != NULL)
506         return get_data(PyString_AsString(self->archive), toc_entry);
507 
508     /* we have the module, but no source */
509     Py_INCREF(Py_None);
510     return Py_None;
511 }
512 
513 PyDoc_STRVAR(doc_find_module,
514 "find_module(fullname, path=None) -> self or None.\n\
515 \n\
516 Search for a module specified by 'fullname'. 'fullname' must be the\n\
517 fully qualified (dotted) module name. It returns the zipimporter\n\
518 instance itself if the module was found, or None if it wasn't.\n\
519 The optional 'path' argument is ignored -- it's there for compatibility\n\
520 with the importer protocol.");
521 
522 PyDoc_STRVAR(doc_load_module,
523 "load_module(fullname) -> module.\n\
524 \n\
525 Load the module specified by 'fullname'. 'fullname' must be the\n\
526 fully qualified (dotted) module name. It returns the imported\n\
527 module, or raises ZipImportError if it wasn't found.");
528 
529 PyDoc_STRVAR(doc_get_data,
530 "get_data(pathname) -> string with file data.\n\
531 \n\
532 Return the data associated with 'pathname'. Raise IOError if\n\
533 the file wasn't found.");
534 
535 PyDoc_STRVAR(doc_is_package,
536 "is_package(fullname) -> bool.\n\
537 \n\
538 Return True if the module specified by fullname is a package.\n\
539 Raise ZipImportError if the module couldn't be found.");
540 
541 PyDoc_STRVAR(doc_get_code,
542 "get_code(fullname) -> code object.\n\
543 \n\
544 Return the code object for the specified module. Raise ZipImportError\n\
545 if the module couldn't be found.");
546 
547 PyDoc_STRVAR(doc_get_source,
548 "get_source(fullname) -> source string.\n\
549 \n\
550 Return the source code for the specified module. Raise ZipImportError\n\
551 if the module couldn't be found, return None if the archive does\n\
552 contain the module, but has no source for it.");
553 
554 
555 PyDoc_STRVAR(doc_get_filename,
556 "get_filename(fullname) -> filename string.\n\
557 \n\
558 Return the filename for the specified module.");
559 
560 static PyMethodDef zipimporter_methods[] = {
561     {"find_module", zipimporter_find_module, METH_VARARGS,
562      doc_find_module},
563     {"load_module", zipimporter_load_module, METH_VARARGS,
564      doc_load_module},
565     {"get_data", zipimporter_get_data, METH_VARARGS,
566      doc_get_data},
567     {"get_code", zipimporter_get_code, METH_VARARGS,
568      doc_get_code},
569     {"get_source", zipimporter_get_source, METH_VARARGS,
570      doc_get_source},
571     {"get_filename", zipimporter_get_filename, METH_VARARGS,
572      doc_get_filename},
573     {"is_package", zipimporter_is_package, METH_VARARGS,
574      doc_is_package},
575     {NULL,              NULL}   /* sentinel */
576 };
577 
578 static PyMemberDef zipimporter_members[] = {
579     {"archive",  T_OBJECT, offsetof(ZipImporter, archive),  READONLY},
580     {"prefix",   T_OBJECT, offsetof(ZipImporter, prefix),   READONLY},
581     {"_files",   T_OBJECT, offsetof(ZipImporter, files),    READONLY},
582     {NULL}
583 };
584 
585 PyDoc_STRVAR(zipimporter_doc,
586 "zipimporter(archivepath) -> zipimporter object\n\
587 \n\
588 Create a new zipimporter instance. 'archivepath' must be a path to\n\
589 a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
590 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
591 valid directory inside the archive.\n\
592 \n\
593 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
594 archive.\n\
595 \n\
596 The 'archive' attribute of zipimporter objects contains the name of the\n\
597 zipfile targeted.");
598 
599 #define DEFERRED_ADDRESS(ADDR) 0
600 
601 static PyTypeObject ZipImporter_Type = {
602     PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
603     "zipimport.zipimporter",
604     sizeof(ZipImporter),
605     0,                                          /* tp_itemsize */
606     (destructor)zipimporter_dealloc,            /* tp_dealloc */
607     0,                                          /* tp_print */
608     0,                                          /* tp_getattr */
609     0,                                          /* tp_setattr */
610     0,                                          /* tp_compare */
611     (reprfunc)zipimporter_repr,                 /* tp_repr */
612     0,                                          /* tp_as_number */
613     0,                                          /* tp_as_sequence */
614     0,                                          /* tp_as_mapping */
615     0,                                          /* tp_hash */
616     0,                                          /* tp_call */
617     0,                                          /* tp_str */
618     PyObject_GenericGetAttr,                    /* tp_getattro */
619     0,                                          /* tp_setattro */
620     0,                                          /* tp_as_buffer */
621     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
622         Py_TPFLAGS_HAVE_GC,                     /* tp_flags */
623     zipimporter_doc,                            /* tp_doc */
624     zipimporter_traverse,                       /* tp_traverse */
625     0,                                          /* tp_clear */
626     0,                                          /* tp_richcompare */
627     0,                                          /* tp_weaklistoffset */
628     0,                                          /* tp_iter */
629     0,                                          /* tp_iternext */
630     zipimporter_methods,                        /* tp_methods */
631     zipimporter_members,                        /* tp_members */
632     0,                                          /* tp_getset */
633     0,                                          /* tp_base */
634     0,                                          /* tp_dict */
635     0,                                          /* tp_descr_get */
636     0,                                          /* tp_descr_set */
637     0,                                          /* tp_dictoffset */
638     (initproc)zipimporter_init,                 /* tp_init */
639     PyType_GenericAlloc,                        /* tp_alloc */
640     PyType_GenericNew,                          /* tp_new */
641     PyObject_GC_Del,                            /* tp_free */
642 };
643 
644 
645 /* implementation */
646 
647 /* Given a buffer, return the unsigned int that is represented by the first
648    4 bytes, encoded as little endian. This partially reimplements
649    marshal.c:r_long() */
650 static unsigned int
get_uint32(const unsigned char * buf)651 get_uint32(const unsigned char *buf)
652 {
653     unsigned int x;
654     x =  buf[0];
655     x |= (unsigned int)buf[1] <<  8;
656     x |= (unsigned int)buf[2] << 16;
657     x |= (unsigned int)buf[3] << 24;
658     return x;
659 }
660 
661 /* Given a buffer, return the unsigned int that is represented by the first
662    2 bytes, encoded as little endian. This partially reimplements
663    marshal.c:r_short() */
664 static unsigned short
get_uint16(const unsigned char * buf)665 get_uint16(const unsigned char *buf)
666 {
667     unsigned short x;
668     x =  buf[0];
669     x |= (unsigned short)buf[1] <<  8;
670     return x;
671 }
672 
673 static void
set_file_error(const char * archive,int eof)674 set_file_error(const char *archive, int eof)
675 {
676     if (eof) {
677         PyErr_SetString(PyExc_EOFError, "EOF read where not expected");
678     }
679     else {
680         PyErr_SetFromErrnoWithFilename(PyExc_IOError, archive);
681     }
682 }
683 
684 /*
685    read_directory(archive) -> files dict (new reference)
686 
687    Given a path to a Zip archive, build a dict, mapping file names
688    (local to the archive, using SEP as a separator) to toc entries.
689 
690    A toc_entry is a tuple:
691 
692    (__file__,      # value to use for __file__, available for all files
693     compress,      # compression kind; 0 for uncompressed
694     data_size,     # size of compressed data on disk
695     file_size,     # size of decompressed data
696     file_offset,   # offset of file header from start of archive
697     time,          # mod time of file (in dos format)
698     date,          # mod data of file (in dos format)
699     crc,           # crc checksum of the data
700    )
701 
702    Directories can be recognized by the trailing SEP in the name,
703    data_size and file_offset are 0.
704 */
705 static PyObject *
read_directory(const char * archive)706 read_directory(const char *archive)
707 {
708     PyObject *files = NULL;
709     FILE *fp;
710     unsigned short compress, time, date, name_size;
711     unsigned int crc, data_size, file_size, header_size, header_offset;
712     unsigned long file_offset, header_position;
713     unsigned long arc_offset;  /* Absolute offset to start of the zip-archive. */
714     unsigned int count, i;
715     unsigned char buffer[46];
716     size_t length;
717     char path[MAXPATHLEN + 5];
718     char name[MAXPATHLEN + 5];
719     const char *errmsg = NULL;
720 
721     if (strlen(archive) > MAXPATHLEN) {
722         PyErr_SetString(PyExc_OverflowError,
723                         "Zip path name is too long");
724         return NULL;
725     }
726     strcpy(path, archive);
727 
728     fp = fopen(archive, "rb");
729     if (fp == NULL) {
730         PyErr_Format(ZipImportError, "can't open Zip file: "
731                      "'%.200s'", archive);
732         return NULL;
733     }
734 
735     if (fseek(fp, -22, SEEK_END) == -1) {
736         goto file_error;
737     }
738     header_position = (unsigned long)ftell(fp);
739     if (header_position == (unsigned long)-1) {
740         goto file_error;
741     }
742     assert(header_position <= (unsigned long)LONG_MAX);
743     if (fread(buffer, 1, 22, fp) != 22) {
744         goto file_error;
745     }
746     if (get_uint32(buffer) != 0x06054B50u) {
747         /* Bad: End of Central Dir signature */
748         errmsg = "not a Zip file";
749         goto invalid_header;
750     }
751 
752     header_size = get_uint32(buffer + 12);
753     header_offset = get_uint32(buffer + 16);
754     if (header_position < header_size) {
755         errmsg = "bad central directory size";
756         goto invalid_header;
757     }
758     if (header_position < header_offset) {
759         errmsg = "bad central directory offset";
760         goto invalid_header;
761     }
762     if (header_position - header_size < header_offset) {
763         errmsg = "bad central directory size or offset";
764         goto invalid_header;
765     }
766     header_position -= header_size;
767     arc_offset = header_position - header_offset;
768 
769     files = PyDict_New();
770     if (files == NULL) {
771         goto error;
772     }
773 
774     length = (long)strlen(path);
775     path[length] = SEP;
776 
777     /* Start of Central Directory */
778     count = 0;
779     if (fseek(fp, (long)header_position, 0) == -1) {
780         goto file_error;
781     }
782     for (;;) {
783         PyObject *t;
784         size_t n;
785         int err;
786 
787         n = fread(buffer, 1, 46, fp);
788         if (n < 4) {
789             goto eof_error;
790         }
791         /* Start of file header */
792         if (get_uint32(buffer) != 0x02014B50u) {
793             break;              /* Bad: Central Dir File Header */
794         }
795         if (n != 46) {
796             goto eof_error;
797         }
798         compress = get_uint16(buffer + 10);
799         time = get_uint16(buffer + 12);
800         date = get_uint16(buffer + 14);
801         crc = get_uint32(buffer + 16);
802         data_size = get_uint32(buffer + 20);
803         file_size = get_uint32(buffer + 24);
804         name_size = get_uint16(buffer + 28);
805         header_size = (unsigned int)name_size +
806            get_uint16(buffer + 30) /* extra field */ +
807            get_uint16(buffer + 32) /* comment */;
808 
809         file_offset = get_uint32(buffer + 42);
810         if (file_offset > header_offset) {
811             errmsg = "bad local header offset";
812             goto invalid_header;
813         }
814         file_offset += arc_offset;
815 
816         if (name_size > MAXPATHLEN) {
817             name_size = MAXPATHLEN;
818         }
819         if (fread(name, 1, name_size, fp) != name_size) {
820             goto file_error;
821         }
822         name[name_size] = '\0';  /* Add terminating null byte */
823         if (SEP != '/') {
824             for (i = 0; i < name_size; i++) {
825                 if (name[i] == '/') {
826                     name[i] = SEP;
827                 }
828             }
829         }
830         /* Skip the rest of the header.
831          * On Windows, calling fseek to skip over the fields we don't use is
832          * slower than reading the data because fseek flushes stdio's
833          * internal buffers.  See issue #8745. */
834         assert(header_size <= 3*0xFFFFu);
835         for (i = name_size; i < header_size; i++) {
836             if (getc(fp) == EOF) {
837                 goto file_error;
838             }
839         }
840 
841         strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
842 
843         t = Py_BuildValue("sHIIkHHI", path, compress, data_size,
844                           file_size, file_offset, time, date, crc);
845         if (t == NULL) {
846             goto error;
847         }
848         err = PyDict_SetItemString(files, name, t);
849         Py_DECREF(t);
850         if (err != 0) {
851             goto error;
852         }
853         count++;
854     }
855     fclose(fp);
856     if (Py_VerboseFlag) {
857         PySys_WriteStderr("# zipimport: found %u names in %.200s\n",
858                            count, archive);
859     }
860     return files;
861 
862 eof_error:
863     set_file_error(archive, !ferror(fp));
864     goto error;
865 
866 file_error:
867     PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
868     goto error;
869 
870 invalid_header:
871     assert(errmsg != NULL);
872     PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
873     goto error;
874 
875 error:
876     fclose(fp);
877     Py_XDECREF(files);
878     return NULL;
879 }
880 
881 /* Return the zlib.decompress function object, or NULL if zlib couldn't
882    be imported. The function is cached when found, so subsequent calls
883    don't import zlib again. */
884 static PyObject *
get_decompress_func(void)885 get_decompress_func(void)
886 {
887     static int importing_zlib = 0;
888     PyObject *zlib;
889     PyObject *decompress;
890 
891     if (importing_zlib != 0)
892         /* Someone has a zlib.py[co] in their Zip file;
893            let's avoid a stack overflow. */
894         return NULL;
895     importing_zlib = 1;
896     zlib = PyImport_ImportModuleNoBlock("zlib");
897     importing_zlib = 0;
898     if (zlib != NULL) {
899         decompress = PyObject_GetAttrString(zlib,
900                                             "decompress");
901         Py_DECREF(zlib);
902     }
903     else {
904         PyErr_Clear();
905         decompress = NULL;
906     }
907     if (Py_VerboseFlag)
908         PySys_WriteStderr("# zipimport: zlib %s\n",
909             zlib != NULL ? "available": "UNAVAILABLE");
910     return decompress;
911 }
912 
913 /* Given a path to a Zip file and a toc_entry, return the (uncompressed)
914    data as a new reference. */
915 static PyObject *
get_data(const char * archive,PyObject * toc_entry)916 get_data(const char *archive, PyObject *toc_entry)
917 {
918     PyObject *raw_data = NULL, *data, *decompress;
919     char *buf;
920     FILE *fp;
921     const char *datapath;
922     unsigned short compress, time, date;
923     unsigned int crc;
924     Py_ssize_t data_size, file_size;
925     long file_offset, header_size;
926     unsigned char buffer[30];
927     const char *errmsg = NULL;
928 
929     if (!PyArg_ParseTuple(toc_entry, "sHnnlHHI", &datapath, &compress,
930                           &data_size, &file_size, &file_offset, &time,
931                           &date, &crc)) {
932         return NULL;
933     }
934     if (data_size < 0) {
935         PyErr_Format(ZipImportError, "negative data size");
936         return NULL;
937     }
938 
939     fp = fopen(archive, "rb");
940     if (!fp) {
941         PyErr_Format(PyExc_IOError,
942            "zipimport: can not open file %s", archive);
943         return NULL;
944     }
945 
946     /* Check to make sure the local file header is correct */
947     if (fseek(fp, file_offset, 0) == -1) {
948         goto file_error;
949     }
950     if (fread(buffer, 1, 30, fp) != 30) {
951         goto eof_error;
952     }
953     if (get_uint32(buffer) != 0x04034B50u) {
954         /* Bad: Local File Header */
955         errmsg = "bad local file header";
956         goto invalid_header;
957     }
958 
959     header_size = (unsigned int)30 +
960         get_uint16(buffer + 26) /* file name */ +
961         get_uint16(buffer + 28) /* extra field */;
962     if (file_offset > LONG_MAX - header_size) {
963         errmsg = "bad local file header size";
964         goto invalid_header;
965     }
966     file_offset += header_size;  /* Start of file data */
967 
968     if (data_size > LONG_MAX - 1) {
969         fclose(fp);
970         PyErr_NoMemory();
971         return NULL;
972     }
973     raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
974                                           data_size : data_size + 1);
975 
976     if (raw_data == NULL) {
977         goto error;
978     }
979     buf = PyString_AsString(raw_data);
980 
981     if (fseek(fp, file_offset, 0) == -1) {
982         goto file_error;
983     }
984     if (fread(buf, 1, data_size, fp) != (size_t)data_size) {
985         PyErr_SetString(PyExc_IOError,
986                         "zipimport: can't read data");
987         goto error;
988     }
989 
990     fclose(fp);
991     fp = NULL;
992 
993     if (compress != 0) {
994         buf[data_size] = 'Z';  /* saw this in zipfile.py */
995         data_size++;
996     }
997     buf[data_size] = '\0';
998 
999     if (compress == 0)  /* data is not compressed */
1000         return raw_data;
1001 
1002     /* Decompress with zlib */
1003     decompress = get_decompress_func();
1004     if (decompress == NULL) {
1005         PyErr_SetString(ZipImportError,
1006                         "can't decompress data; "
1007                         "zlib not available");
1008         goto error;
1009     }
1010     data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
1011     Py_DECREF(decompress);
1012     Py_DECREF(raw_data);
1013     return data;
1014 
1015 eof_error:
1016     set_file_error(archive, !ferror(fp));
1017     goto error;
1018 
1019 file_error:
1020     PyErr_Format(ZipImportError, "can't read Zip file: %.200s", archive);
1021     goto error;
1022 
1023 invalid_header:
1024     assert(errmsg != NULL);
1025     PyErr_Format(ZipImportError, "%s: %.200s", errmsg, archive);
1026     goto error;
1027 
1028 error:
1029     if (fp != NULL) {
1030         fclose(fp);
1031     }
1032     Py_XDECREF(raw_data);
1033     return NULL;
1034 }
1035 
1036 /* Lenient date/time comparison function. The precision of the mtime
1037    in the archive is lower than the mtime stored in a .pyc: we
1038    must allow a difference of at most one second. */
1039 static int
eq_mtime(time_t t1,time_t t2)1040 eq_mtime(time_t t1, time_t t2)
1041 {
1042     time_t d = t1 - t2;
1043     if (d < 0)
1044         d = -d;
1045     /* dostime only stores even seconds, so be lenient */
1046     return d <= 1;
1047 }
1048 
1049 /* Given the contents of a .py[co] file in a buffer, unmarshal the data
1050    and return the code object. Return None if it the magic word doesn't
1051    match (we do this instead of raising an exception as we fall back
1052    to .py if available and we don't want to mask other errors).
1053    Returns a new reference. */
1054 static PyObject *
unmarshal_code(const char * pathname,PyObject * data,time_t mtime)1055 unmarshal_code(const char *pathname, PyObject *data, time_t mtime)
1056 {
1057     PyObject *code;
1058     unsigned char *buf = (unsigned char *)PyString_AsString(data);
1059     Py_ssize_t size = PyString_Size(data);
1060 
1061     if (size < 8) {
1062         PyErr_SetString(ZipImportError,
1063                         "bad pyc data");
1064         return NULL;
1065     }
1066 
1067     if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
1068         if (Py_VerboseFlag) {
1069             PySys_WriteStderr("# %s has bad magic\n",
1070                               pathname);
1071         }
1072         Py_INCREF(Py_None);
1073         return Py_None;  /* signal caller to try alternative */
1074     }
1075 
1076     if (mtime != 0 && !eq_mtime(get_uint32(buf + 4), mtime)) {
1077         if (Py_VerboseFlag) {
1078             PySys_WriteStderr("# %s has bad mtime\n",
1079                               pathname);
1080         }
1081         Py_INCREF(Py_None);
1082         return Py_None;  /* signal caller to try alternative */
1083     }
1084 
1085     code = PyMarshal_ReadObjectFromString((char *)buf + 8, size - 8);
1086     if (code == NULL) {
1087         return NULL;
1088     }
1089     if (!PyCode_Check(code)) {
1090         Py_DECREF(code);
1091         PyErr_Format(PyExc_TypeError,
1092              "compiled module %.200s is not a code object",
1093              pathname);
1094         return NULL;
1095     }
1096     return code;
1097 }
1098 
1099 /* Replace any occurrences of "\r\n?" in the input string with "\n".
1100    This converts DOS and Mac line endings to Unix line endings.
1101    Also append a trailing "\n" to be compatible with
1102    PyParser_SimpleParseFile(). Returns a new reference. */
1103 static PyObject *
normalize_line_endings(PyObject * source)1104 normalize_line_endings(PyObject *source)
1105 {
1106     char *buf, *q, *p = PyString_AsString(source);
1107     PyObject *fixed_source;
1108 
1109     if (!p)
1110         return NULL;
1111 
1112     /* one char extra for trailing \n and one for terminating \0 */
1113     buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
1114     if (buf == NULL) {
1115         PyErr_SetString(PyExc_MemoryError,
1116                         "zipimport: no memory to allocate "
1117                         "source buffer");
1118         return NULL;
1119     }
1120     /* replace "\r\n?" by "\n" */
1121     for (q = buf; *p != '\0'; p++) {
1122         if (*p == '\r') {
1123             *q++ = '\n';
1124             if (*(p + 1) == '\n')
1125                 p++;
1126         }
1127         else
1128             *q++ = *p;
1129     }
1130     *q++ = '\n';  /* add trailing \n */
1131     *q = '\0';
1132     fixed_source = PyString_FromString(buf);
1133     PyMem_Free(buf);
1134     return fixed_source;
1135 }
1136 
1137 /* Given a string buffer containing Python source code, compile it
1138    return and return a code object as a new reference. */
1139 static PyObject *
compile_source(char * pathname,PyObject * source)1140 compile_source(char *pathname, PyObject *source)
1141 {
1142     PyObject *code, *fixed_source;
1143 
1144     fixed_source = normalize_line_endings(source);
1145     if (fixed_source == NULL)
1146         return NULL;
1147 
1148     code = Py_CompileString(PyString_AsString(fixed_source), pathname,
1149                             Py_file_input);
1150     Py_DECREF(fixed_source);
1151     return code;
1152 }
1153 
1154 /* Convert the date/time values found in the Zip archive to a value
1155    that's compatible with the time stamp stored in .pyc files. */
1156 static time_t
parse_dostime(int dostime,int dosdate)1157 parse_dostime(int dostime, int dosdate)
1158 {
1159     struct tm stm;
1160 
1161     memset((void *) &stm, '\0', sizeof(stm));
1162 
1163     stm.tm_sec   =  (dostime        & 0x1f) * 2;
1164     stm.tm_min   =  (dostime >> 5)  & 0x3f;
1165     stm.tm_hour  =  (dostime >> 11) & 0x1f;
1166     stm.tm_mday  =   dosdate        & 0x1f;
1167     stm.tm_mon   = ((dosdate >> 5)  & 0x0f) - 1;
1168     stm.tm_year  = ((dosdate >> 9)  & 0x7f) + 80;
1169     stm.tm_isdst =   -1; /* wday/yday is ignored */
1170 
1171     return mktime(&stm);
1172 }
1173 
1174 /* Given a path to a .pyc or .pyo file in the archive, return the
1175    modification time of the matching .py file, or 0 if no source
1176    is available. */
1177 static time_t
get_mtime_of_source(ZipImporter * self,char * path)1178 get_mtime_of_source(ZipImporter *self, char *path)
1179 {
1180     PyObject *toc_entry;
1181     time_t mtime = 0;
1182     Py_ssize_t lastchar = strlen(path) - 1;
1183     char savechar = path[lastchar];
1184     path[lastchar] = '\0';  /* strip 'c' or 'o' from *.py[co] */
1185     toc_entry = PyDict_GetItemString(self->files, path);
1186     if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1187         PyTuple_Size(toc_entry) == 8) {
1188         /* fetch the time stamp of the .py file for comparison
1189            with an embedded pyc time stamp */
1190         int time, date;
1191         time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1192         date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1193         mtime = parse_dostime(time, date);
1194     }
1195     path[lastchar] = savechar;
1196     return mtime;
1197 }
1198 
1199 /* Return the code object for the module named by 'fullname' from the
1200    Zip archive as a new reference. */
1201 static PyObject *
get_code_from_data(ZipImporter * self,int ispackage,int isbytecode,time_t mtime,PyObject * toc_entry)1202 get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1203                    time_t mtime, PyObject *toc_entry)
1204 {
1205     PyObject *data, *code;
1206     char *modpath;
1207     char *archive = PyString_AsString(self->archive);
1208 
1209     if (archive == NULL)
1210         return NULL;
1211 
1212     data = get_data(archive, toc_entry);
1213     if (data == NULL)
1214         return NULL;
1215 
1216     modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1217 
1218     if (isbytecode) {
1219         code = unmarshal_code(modpath, data, mtime);
1220     }
1221     else {
1222         code = compile_source(modpath, data);
1223     }
1224     Py_DECREF(data);
1225     return code;
1226 }
1227 
1228 /* Get the code object associated with the module specified by
1229    'fullname'. */
1230 static PyObject *
get_module_code(ZipImporter * self,char * fullname,int * p_ispackage,char ** p_modpath)1231 get_module_code(ZipImporter *self, char *fullname,
1232                 int *p_ispackage, char **p_modpath)
1233 {
1234     PyObject *toc_entry;
1235     char *subname, path[MAXPATHLEN + 1];
1236     int len;
1237     struct st_zip_searchorder *zso;
1238 
1239     subname = get_subname(fullname);
1240 
1241     len = make_filename(PyString_AsString(self->prefix), subname, path);
1242     if (len < 0)
1243         return NULL;
1244 
1245     for (zso = zip_searchorder; *zso->suffix; zso++) {
1246         PyObject *code = NULL;
1247 
1248         strcpy(path + len, zso->suffix);
1249         if (Py_VerboseFlag > 1)
1250             PySys_WriteStderr("# trying %s%c%s\n",
1251                               PyString_AsString(self->archive),
1252                               SEP, path);
1253         toc_entry = PyDict_GetItemString(self->files, path);
1254         if (toc_entry != NULL) {
1255             time_t mtime = 0;
1256             int ispackage = zso->type & IS_PACKAGE;
1257             int isbytecode = zso->type & IS_BYTECODE;
1258 
1259             if (isbytecode)
1260                 mtime = get_mtime_of_source(self, path);
1261             if (p_ispackage != NULL)
1262                 *p_ispackage = ispackage;
1263             code = get_code_from_data(self, ispackage,
1264                                       isbytecode, mtime,
1265                                       toc_entry);
1266             if (code == Py_None) {
1267                 /* bad magic number or non-matching mtime
1268                    in byte code, try next */
1269                 Py_DECREF(code);
1270                 continue;
1271             }
1272             if (code != NULL && p_modpath != NULL)
1273                 *p_modpath = PyString_AsString(
1274                     PyTuple_GetItem(toc_entry, 0));
1275             return code;
1276         }
1277     }
1278     PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1279     return NULL;
1280 }
1281 
1282 
1283 /* Module init */
1284 
1285 PyDoc_STRVAR(zipimport_doc,
1286 "zipimport provides support for importing Python modules from Zip archives.\n\
1287 \n\
1288 This module exports three objects:\n\
1289 - zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1290 - ZipImportError: exception raised by zipimporter objects. It's a\n\
1291   subclass of ImportError, so it can be caught as ImportError, too.\n\
1292 - _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1293   info dicts, as used in zipimporter._files.\n\
1294 \n\
1295 It is usually not needed to use the zipimport module explicitly; it is\n\
1296 used by the builtin import mechanism for sys.path items that are paths\n\
1297 to Zip archives.");
1298 
1299 PyMODINIT_FUNC
initzipimport(void)1300 initzipimport(void)
1301 {
1302     PyObject *mod;
1303 
1304     if (PyType_Ready(&ZipImporter_Type) < 0)
1305         return;
1306 
1307     /* Correct directory separator */
1308     zip_searchorder[0].suffix[0] = SEP;
1309     zip_searchorder[1].suffix[0] = SEP;
1310     zip_searchorder[2].suffix[0] = SEP;
1311     if (Py_OptimizeFlag) {
1312         /* Reverse *.pyc and *.pyo */
1313         struct st_zip_searchorder tmp;
1314         tmp = zip_searchorder[0];
1315         zip_searchorder[0] = zip_searchorder[1];
1316         zip_searchorder[1] = tmp;
1317         tmp = zip_searchorder[3];
1318         zip_searchorder[3] = zip_searchorder[4];
1319         zip_searchorder[4] = tmp;
1320     }
1321 
1322     mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1323                          NULL, PYTHON_API_VERSION);
1324     if (mod == NULL)
1325         return;
1326 
1327     ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1328                                         PyExc_ImportError, NULL);
1329     if (ZipImportError == NULL)
1330         return;
1331 
1332     Py_INCREF(ZipImportError);
1333     if (PyModule_AddObject(mod, "ZipImportError",
1334                            ZipImportError) < 0)
1335         return;
1336 
1337     Py_INCREF(&ZipImporter_Type);
1338     if (PyModule_AddObject(mod, "zipimporter",
1339                            (PyObject *)&ZipImporter_Type) < 0)
1340         return;
1341 
1342     zip_directory_cache = PyDict_New();
1343     if (zip_directory_cache == NULL)
1344         return;
1345     Py_INCREF(zip_directory_cache);
1346     if (PyModule_AddObject(mod, "_zip_directory_cache",
1347                            zip_directory_cache) < 0)
1348         return;
1349 }
1350