1 /* File object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 
7 #ifdef HAVE_SYS_TYPES_H
8 #include <sys/types.h>
9 #endif /* HAVE_SYS_TYPES_H */
10 
11 #ifdef MS_WINDOWS
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
16 #include <windows.h>
17 #endif
18 
19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
20 #include <io.h>
21 #endif
22 
23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
24 
25 #ifdef HAVE_ERRNO_H
26 #include <errno.h>
27 #endif
28 
29 #ifdef HAVE_GETC_UNLOCKED
30 #define GETC(f) getc_unlocked(f)
31 #define FLOCKFILE(f) flockfile(f)
32 #define FUNLOCKFILE(f) funlockfile(f)
33 #else
34 #define GETC(f) getc(f)
35 #define FLOCKFILE(f)
36 #define FUNLOCKFILE(f)
37 #endif
38 
39 /* Bits in f_newlinetypes */
40 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
41 #define NEWLINE_CR 1            /* \r newline seen */
42 #define NEWLINE_LF 2            /* \n newline seen */
43 #define NEWLINE_CRLF 4          /* \r\n newline seen */
44 
45 /*
46  * These macros release the GIL while preventing the f_close() function being
47  * called in the interval between them.  For that purpose, a running total of
48  * the number of currently running unlocked code sections is kept in
49  * the unlocked_count field of the PyFileObject. The close() method raises
50  * an IOError if that field is non-zero.  See issue #815646, #595601.
51  */
52 
53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
54 { \
55     fobj->unlocked_count++; \
56     Py_BEGIN_ALLOW_THREADS
57 
58 #define FILE_END_ALLOW_THREADS(fobj) \
59     Py_END_ALLOW_THREADS \
60     fobj->unlocked_count--; \
61     assert(fobj->unlocked_count >= 0); \
62 }
63 
64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
65     Py_BLOCK_THREADS \
66     fobj->unlocked_count--; \
67     assert(fobj->unlocked_count >= 0);
68 
69 #ifdef __cplusplus
70 extern "C" {
71 #endif
72 
73 FILE *
PyFile_AsFile(PyObject * f)74 PyFile_AsFile(PyObject *f)
75 {
76     if (f == NULL || !PyFile_Check(f))
77         return NULL;
78     else
79         return ((PyFileObject *)f)->f_fp;
80 }
81 
PyFile_IncUseCount(PyFileObject * fobj)82 void PyFile_IncUseCount(PyFileObject *fobj)
83 {
84     fobj->unlocked_count++;
85 }
86 
PyFile_DecUseCount(PyFileObject * fobj)87 void PyFile_DecUseCount(PyFileObject *fobj)
88 {
89     fobj->unlocked_count--;
90     assert(fobj->unlocked_count >= 0);
91 }
92 
93 PyObject *
PyFile_Name(PyObject * f)94 PyFile_Name(PyObject *f)
95 {
96     if (f == NULL || !PyFile_Check(f))
97         return NULL;
98     else
99         return ((PyFileObject *)f)->f_name;
100 }
101 
102 /* This is a safe wrapper around PyObject_Print to print to the FILE
103    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
104    about PyFileObject. */
105 static int
file_PyObject_Print(PyObject * op,PyFileObject * f,int flags)106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
107 {
108     int result;
109     PyFile_IncUseCount(f);
110     result = PyObject_Print(op, f->f_fp, flags);
111     PyFile_DecUseCount(f);
112     return result;
113 }
114 
115 /* On Unix, fopen will succeed for directories.
116    In Python, there should be no file objects referring to
117    directories, so we need a check.  */
118 
119 static PyFileObject*
dircheck(PyFileObject * f)120 dircheck(PyFileObject* f)
121 {
122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
123     struct stat buf;
124     if (f->f_fp == NULL)
125         return f;
126     if (fstat(fileno(f->f_fp), &buf) == 0 &&
127         S_ISDIR(buf.st_mode)) {
128         char *msg = strerror(EISDIR);
129         PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
130                                               EISDIR, msg, f->f_name);
131         PyErr_SetObject(PyExc_IOError, exc);
132         Py_XDECREF(exc);
133         return NULL;
134     }
135 #endif
136     return f;
137 }
138 
139 
140 static PyObject *
fill_file_fields(PyFileObject * f,FILE * fp,PyObject * name,char * mode,int (* close)(FILE *))141 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
142                  int (*close)(FILE *))
143 {
144     assert(name != NULL);
145     assert(f != NULL);
146     assert(PyFile_Check(f));
147     assert(f->f_fp == NULL);
148 
149     Py_DECREF(f->f_name);
150     Py_DECREF(f->f_mode);
151     Py_DECREF(f->f_encoding);
152     Py_DECREF(f->f_errors);
153 
154     Py_INCREF(name);
155     f->f_name = name;
156 
157     f->f_mode = PyString_FromString(mode);
158 
159     f->f_close = close;
160     f->f_softspace = 0;
161     f->f_binary = strchr(mode,'b') != NULL;
162     f->f_buf = NULL;
163     f->f_univ_newline = (strchr(mode, 'U') != NULL);
164     f->f_newlinetypes = NEWLINE_UNKNOWN;
165     f->f_skipnextlf = 0;
166     Py_INCREF(Py_None);
167     f->f_encoding = Py_None;
168     Py_INCREF(Py_None);
169     f->f_errors = Py_None;
170     f->readable = f->writable = 0;
171     if (strchr(mode, 'r') != NULL || f->f_univ_newline)
172         f->readable = 1;
173     if (strchr(mode, 'w') != NULL || strchr(mode, 'a') != NULL)
174         f->writable = 1;
175     if (strchr(mode, '+') != NULL)
176         f->readable = f->writable = 1;
177 
178     if (f->f_mode == NULL)
179         return NULL;
180     f->f_fp = fp;
181     f = dircheck(f);
182     return (PyObject *) f;
183 }
184 
185 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
186 #define Py_VERIFY_WINNT
187 /* The CRT on windows compiled with Visual Studio 2005 and higher may
188  * assert if given invalid mode strings.  This is all fine and well
189  * in static languages like C where the mode string is typcially hard
190  * coded.  But in Python, were we pass in the mode string from the user,
191  * we need to verify it first manually
192  */
_PyVerify_Mode_WINNT(const char * mode)193 static int _PyVerify_Mode_WINNT(const char *mode)
194 {
195     /* See if mode string is valid on Windows to avoid hard assertions */
196     /* remove leading spacese */
197     int singles = 0;
198     int pairs = 0;
199     int encoding = 0;
200     const char *s, *c;
201 
202     while(*mode == ' ') /* strip initial spaces */
203         ++mode;
204     if (!strchr("rwa", *mode)) /* must start with one of these */
205         return 0;
206     while (*++mode) {
207         if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
208             continue;
209         s = "+TD"; /* each of this can appear only once */
210         c = strchr(s, *mode);
211         if (c) {
212             ptrdiff_t idx = s-c;
213             if (singles & (1<<idx))
214                 return 0;
215             singles |= (1<<idx);
216             continue;
217         }
218         s = "btcnSR"; /* only one of each letter in the pairs allowed */
219         c = strchr(s, *mode);
220         if (c) {
221             ptrdiff_t idx = (s-c)/2;
222             if (pairs & (1<<idx))
223                 return 0;
224             pairs |= (1<<idx);
225             continue;
226         }
227         if (*mode == ',') {
228             encoding = 1;
229             break;
230         }
231         return 0; /* found an invalid char */
232     }
233 
234     if (encoding) {
235         char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
236         while (*mode == ' ')
237             ++mode;
238         /* find 'ccs =' */
239         if (strncmp(mode, "ccs", 3))
240             return 0;
241         mode += 3;
242         while (*mode == ' ')
243             ++mode;
244         if (*mode != '=')
245             return 0;
246         while (*mode == ' ')
247             ++mode;
248         for(encoding = 0; encoding<_countof(e); ++encoding) {
249             size_t l = strlen(e[encoding]);
250             if (!strncmp(mode, e[encoding], l)) {
251                 mode += l; /* found a valid encoding */
252                 break;
253             }
254         }
255         if (encoding == _countof(e))
256             return 0;
257     }
258     /* skip trailing spaces */
259     while (*mode == ' ')
260         ++mode;
261 
262     return *mode == '\0'; /* must be at the end of the string */
263 }
264 #endif
265 
266 /* check for known incorrect mode strings - problem is, platforms are
267    free to accept any mode characters they like and are supposed to
268    ignore stuff they don't understand... write or append mode with
269    universal newline support is expressly forbidden by PEP 278.
270    Additionally, remove the 'U' from the mode string as platforms
271    won't know what it is. Non-zero return signals an exception */
272 int
_PyFile_SanitizeMode(char * mode)273 _PyFile_SanitizeMode(char *mode)
274 {
275     char *upos;
276     size_t len = strlen(mode);
277 
278     if (!len) {
279         PyErr_SetString(PyExc_ValueError, "empty mode string");
280         return -1;
281     }
282 
283     upos = strchr(mode, 'U');
284     if (upos) {
285         memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
286 
287         if (mode[0] == 'w' || mode[0] == 'a') {
288             PyErr_Format(PyExc_ValueError, "universal newline "
289                          "mode can only be used with modes "
290                          "starting with 'r'");
291             return -1;
292         }
293 
294         if (mode[0] != 'r') {
295             memmove(mode+1, mode, strlen(mode)+1);
296             mode[0] = 'r';
297         }
298 
299         if (!strchr(mode, 'b')) {
300             memmove(mode+2, mode+1, strlen(mode));
301             mode[1] = 'b';
302         }
303     } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
304         PyErr_Format(PyExc_ValueError, "mode string must begin with "
305                     "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
306         return -1;
307     }
308 #ifdef Py_VERIFY_WINNT
309     /* additional checks on NT with visual studio 2005 and higher */
310     if (!_PyVerify_Mode_WINNT(mode)) {
311         PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
312         return -1;
313     }
314 #endif
315     return 0;
316 }
317 
318 static PyObject *
open_the_file(PyFileObject * f,char * name,char * mode)319 open_the_file(PyFileObject *f, char *name, char *mode)
320 {
321     char *newmode;
322     assert(f != NULL);
323     assert(PyFile_Check(f));
324 #ifdef MS_WINDOWS
325     /* windows ignores the passed name in order to support Unicode */
326     assert(f->f_name != NULL);
327 #else
328     assert(name != NULL);
329 #endif
330     assert(mode != NULL);
331     assert(f->f_fp == NULL);
332 
333     /* probably need to replace 'U' by 'rb' */
334     newmode = PyMem_MALLOC(strlen(mode) + 3);
335     if (!newmode) {
336         PyErr_NoMemory();
337         return NULL;
338     }
339     strcpy(newmode, mode);
340 
341     if (_PyFile_SanitizeMode(newmode)) {
342         f = NULL;
343         goto cleanup;
344     }
345 
346     /* rexec.py can't stop a user from getting the file() constructor --
347        all they have to do is get *any* file object f, and then do
348        type(f).  Here we prevent them from doing damage with it. */
349     if (PyEval_GetRestricted()) {
350         PyErr_SetString(PyExc_IOError,
351         "file() constructor not accessible in restricted mode");
352         f = NULL;
353         goto cleanup;
354     }
355     errno = 0;
356 
357 #ifdef MS_WINDOWS
358     if (PyUnicode_Check(f->f_name)) {
359         PyObject *wmode;
360         wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
361         if (f->f_name && wmode) {
362             FILE_BEGIN_ALLOW_THREADS(f)
363             /* PyUnicode_AS_UNICODE OK without thread
364                lock as it is a simple dereference. */
365             f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
366                               PyUnicode_AS_UNICODE(wmode));
367             FILE_END_ALLOW_THREADS(f)
368         }
369         Py_XDECREF(wmode);
370     }
371 #endif
372     if (NULL == f->f_fp && NULL != name) {
373         FILE_BEGIN_ALLOW_THREADS(f)
374         f->f_fp = fopen(name, newmode);
375         FILE_END_ALLOW_THREADS(f)
376     }
377 
378     if (f->f_fp == NULL) {
379 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
380         /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
381          * across all Windows flavors.  When it sets EINVAL varies
382          * across Windows flavors, the exact conditions aren't
383          * documented, and the answer lies in the OS's implementation
384          * of Win32's CreateFile function (whose source is secret).
385          * Seems the best we can do is map EINVAL to ENOENT.
386          * Starting with Visual Studio .NET 2005, EINVAL is correctly
387          * set by our CRT error handler (set in exceptions.c.)
388          */
389         if (errno == 0)         /* bad mode string */
390             errno = EINVAL;
391         else if (errno == EINVAL) /* unknown, but not a mode string */
392             errno = ENOENT;
393 #endif
394         /* EINVAL is returned when an invalid filename or
395          * an invalid mode is supplied. */
396         if (errno == EINVAL) {
397             PyObject *v;
398             char message[100];
399             PyOS_snprintf(message, 100,
400                 "invalid mode ('%.50s') or filename", mode);
401             v = Py_BuildValue("(isO)", errno, message, f->f_name);
402             if (v != NULL) {
403                 PyErr_SetObject(PyExc_IOError, v);
404                 Py_DECREF(v);
405             }
406         }
407         else
408             PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
409         f = NULL;
410     }
411     if (f != NULL)
412         f = dircheck(f);
413 
414 cleanup:
415     PyMem_FREE(newmode);
416 
417     return (PyObject *)f;
418 }
419 
420 static PyObject *
close_the_file(PyFileObject * f)421 close_the_file(PyFileObject *f)
422 {
423     int sts = 0;
424     int (*local_close)(FILE *);
425     FILE *local_fp = f->f_fp;
426     char *local_setbuf = f->f_setbuf;
427     if (local_fp != NULL) {
428         local_close = f->f_close;
429         if (local_close != NULL && f->unlocked_count > 0) {
430             if (f->ob_refcnt > 0) {
431                 PyErr_SetString(PyExc_IOError,
432                     "close() called during concurrent "
433                     "operation on the same file object.");
434             } else {
435                 /* This should not happen unless someone is
436                  * carelessly playing with the PyFileObject
437                  * struct fields and/or its associated FILE
438                  * pointer. */
439                 PyErr_SetString(PyExc_SystemError,
440                     "PyFileObject locking error in "
441                     "destructor (refcnt <= 0 at close).");
442             }
443             return NULL;
444         }
445         /* NULL out the FILE pointer before releasing the GIL, because
446          * it will not be valid anymore after the close() function is
447          * called. */
448         f->f_fp = NULL;
449         if (local_close != NULL) {
450             /* Issue #9295: must temporarily reset f_setbuf so that another
451                thread doesn't free it when running file_close() concurrently.
452                Otherwise this close() will crash when flushing the buffer. */
453             f->f_setbuf = NULL;
454             Py_BEGIN_ALLOW_THREADS
455             errno = 0;
456             sts = (*local_close)(local_fp);
457             Py_END_ALLOW_THREADS
458             f->f_setbuf = local_setbuf;
459             if (sts == EOF)
460                 return PyErr_SetFromErrno(PyExc_IOError);
461             if (sts != 0)
462                 return PyInt_FromLong((long)sts);
463         }
464     }
465     Py_RETURN_NONE;
466 }
467 
468 PyObject *
PyFile_FromFile(FILE * fp,char * name,char * mode,int (* close)(FILE *))469 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
470 {
471     PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
472                                                          NULL, NULL);
473     if (f != NULL) {
474         PyObject *o_name = PyString_FromString(name);
475         if (o_name == NULL)
476             return NULL;
477         if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
478             Py_DECREF(f);
479             f = NULL;
480         }
481         Py_DECREF(o_name);
482     }
483     return (PyObject *) f;
484 }
485 
486 PyObject *
PyFile_FromString(char * name,char * mode)487 PyFile_FromString(char *name, char *mode)
488 {
489     extern int fclose(FILE *);
490     PyFileObject *f;
491 
492     f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
493     if (f != NULL) {
494         if (open_the_file(f, name, mode) == NULL) {
495             Py_DECREF(f);
496             f = NULL;
497         }
498     }
499     return (PyObject *)f;
500 }
501 
502 void
PyFile_SetBufSize(PyObject * f,int bufsize)503 PyFile_SetBufSize(PyObject *f, int bufsize)
504 {
505     PyFileObject *file = (PyFileObject *)f;
506     if (bufsize >= 0) {
507         int type;
508         switch (bufsize) {
509         case 0:
510             type = _IONBF;
511             break;
512 #ifdef HAVE_SETVBUF
513         case 1:
514             type = _IOLBF;
515             bufsize = BUFSIZ;
516             break;
517 #endif
518         default:
519             type = _IOFBF;
520 #ifndef HAVE_SETVBUF
521             bufsize = BUFSIZ;
522 #endif
523             break;
524         }
525         fflush(file->f_fp);
526         if (type == _IONBF) {
527             PyMem_Free(file->f_setbuf);
528             file->f_setbuf = NULL;
529         } else {
530             file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
531                                                     bufsize);
532         }
533 #ifdef HAVE_SETVBUF
534         setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
535 #else /* !HAVE_SETVBUF */
536         setbuf(file->f_fp, file->f_setbuf);
537 #endif /* !HAVE_SETVBUF */
538     }
539 }
540 
541 /* Set the encoding used to output Unicode strings.
542    Return 1 on success, 0 on failure. */
543 
544 int
PyFile_SetEncoding(PyObject * f,const char * enc)545 PyFile_SetEncoding(PyObject *f, const char *enc)
546 {
547     return PyFile_SetEncodingAndErrors(f, enc, NULL);
548 }
549 
550 int
PyFile_SetEncodingAndErrors(PyObject * f,const char * enc,char * errors)551 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
552 {
553     PyFileObject *file = (PyFileObject*)f;
554     PyObject *str, *oerrors;
555 
556     assert(PyFile_Check(f));
557     str = PyString_FromString(enc);
558     if (!str)
559         return 0;
560     if (errors) {
561         oerrors = PyString_FromString(errors);
562         if (!oerrors) {
563             Py_DECREF(str);
564             return 0;
565         }
566     } else {
567         oerrors = Py_None;
568         Py_INCREF(Py_None);
569     }
570     Py_DECREF(file->f_encoding);
571     file->f_encoding = str;
572     Py_DECREF(file->f_errors);
573     file->f_errors = oerrors;
574     return 1;
575 }
576 
577 static PyObject *
err_closed(void)578 err_closed(void)
579 {
580     PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
581     return NULL;
582 }
583 
584 static PyObject *
err_mode(char * action)585 err_mode(char *action)
586 {
587     PyErr_Format(PyExc_IOError, "File not open for %s", action);
588     return NULL;
589 }
590 
591 /* Refuse regular file I/O if there's data in the iteration-buffer.
592  * Mixing them would cause data to arrive out of order, as the read*
593  * methods don't use the iteration buffer. */
594 static PyObject *
err_iterbuffered(void)595 err_iterbuffered(void)
596 {
597     PyErr_SetString(PyExc_ValueError,
598         "Mixing iteration and read methods would lose data");
599     return NULL;
600 }
601 
602 static void drop_readahead(PyFileObject *);
603 
604 /* Methods */
605 
606 static void
file_dealloc(PyFileObject * f)607 file_dealloc(PyFileObject *f)
608 {
609     PyObject *ret;
610     if (f->weakreflist != NULL)
611         PyObject_ClearWeakRefs((PyObject *) f);
612     ret = close_the_file(f);
613     if (!ret) {
614         PySys_WriteStderr("close failed in file object destructor:\n");
615         PyErr_Print();
616     }
617     else {
618         Py_DECREF(ret);
619     }
620     PyMem_Free(f->f_setbuf);
621     Py_XDECREF(f->f_name);
622     Py_XDECREF(f->f_mode);
623     Py_XDECREF(f->f_encoding);
624     Py_XDECREF(f->f_errors);
625     drop_readahead(f);
626     Py_TYPE(f)->tp_free((PyObject *)f);
627 }
628 
629 static PyObject *
file_repr(PyFileObject * f)630 file_repr(PyFileObject *f)
631 {
632     if (PyUnicode_Check(f->f_name)) {
633 #ifdef Py_USING_UNICODE
634         PyObject *ret = NULL;
635         PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
636         const char *name_str = name ? PyString_AsString(name) : "?";
637         ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
638                            f->f_fp == NULL ? "closed" : "open",
639                            name_str,
640                            PyString_AsString(f->f_mode),
641                            f);
642         Py_XDECREF(name);
643         return ret;
644 #endif
645     } else {
646         return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
647                            f->f_fp == NULL ? "closed" : "open",
648                            PyString_AsString(f->f_name),
649                            PyString_AsString(f->f_mode),
650                            f);
651     }
652 }
653 
654 static PyObject *
file_close(PyFileObject * f)655 file_close(PyFileObject *f)
656 {
657     PyObject *sts = close_the_file(f);
658     if (sts) {
659         PyMem_Free(f->f_setbuf);
660         f->f_setbuf = NULL;
661     }
662     return sts;
663 }
664 
665 
666 /* Our very own off_t-like type, 64-bit if possible */
667 #if !defined(HAVE_LARGEFILE_SUPPORT)
668 typedef off_t Py_off_t;
669 #elif SIZEOF_OFF_T >= 8
670 typedef off_t Py_off_t;
671 #elif SIZEOF_FPOS_T >= 8
672 typedef fpos_t Py_off_t;
673 #else
674 #error "Large file support, but neither off_t nor fpos_t is large enough."
675 #endif
676 
677 
678 /* a portable fseek() function
679    return 0 on success, non-zero on failure (with errno set) */
680 static int
_portable_fseek(FILE * fp,Py_off_t offset,int whence)681 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
682 {
683 #if !defined(HAVE_LARGEFILE_SUPPORT)
684     return fseek(fp, offset, whence);
685 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
686     return fseeko(fp, offset, whence);
687 #elif defined(HAVE_FSEEK64)
688     return fseek64(fp, offset, whence);
689 #elif defined(__BEOS__)
690     return _fseek(fp, offset, whence);
691 #elif SIZEOF_FPOS_T >= 8
692     /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
693        and fgetpos() to implement fseek()*/
694     fpos_t pos;
695     switch (whence) {
696     case SEEK_END:
697 #ifdef MS_WINDOWS
698         fflush(fp);
699         if (_lseeki64(fileno(fp), 0, 2) == -1)
700             return -1;
701 #else
702         if (fseek(fp, 0, SEEK_END) != 0)
703             return -1;
704 #endif
705         /* fall through */
706     case SEEK_CUR:
707         if (fgetpos(fp, &pos) != 0)
708             return -1;
709         offset += pos;
710         break;
711     /* case SEEK_SET: break; */
712     }
713     return fsetpos(fp, &offset);
714 #else
715 #error "Large file support, but no way to fseek."
716 #endif
717 }
718 
719 
720 /* a portable ftell() function
721    Return -1 on failure with errno set appropriately, current file
722    position on success */
723 static Py_off_t
_portable_ftell(FILE * fp)724 _portable_ftell(FILE* fp)
725 {
726 #if !defined(HAVE_LARGEFILE_SUPPORT)
727     return ftell(fp);
728 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
729     return ftello(fp);
730 #elif defined(HAVE_FTELL64)
731     return ftell64(fp);
732 #elif SIZEOF_FPOS_T >= 8
733     fpos_t pos;
734     if (fgetpos(fp, &pos) != 0)
735         return -1;
736     return pos;
737 #else
738 #error "Large file support, but no way to ftell."
739 #endif
740 }
741 
742 
743 static PyObject *
file_seek(PyFileObject * f,PyObject * args)744 file_seek(PyFileObject *f, PyObject *args)
745 {
746     int whence;
747     int ret;
748     Py_off_t offset;
749     PyObject *offobj, *off_index;
750 
751     if (f->f_fp == NULL)
752         return err_closed();
753     drop_readahead(f);
754     whence = 0;
755     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
756         return NULL;
757     off_index = PyNumber_Index(offobj);
758     if (!off_index) {
759         if (!PyFloat_Check(offobj))
760             return NULL;
761         /* Deprecated in 2.6 */
762         PyErr_Clear();
763         if (PyErr_WarnEx(PyExc_DeprecationWarning,
764                          "integer argument expected, got float",
765                          1) < 0)
766             return NULL;
767         off_index = offobj;
768         Py_INCREF(offobj);
769     }
770 #if !defined(HAVE_LARGEFILE_SUPPORT)
771     offset = PyInt_AsLong(off_index);
772 #else
773     offset = PyLong_Check(off_index) ?
774         PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
775 #endif
776     Py_DECREF(off_index);
777     if (PyErr_Occurred())
778         return NULL;
779 
780     FILE_BEGIN_ALLOW_THREADS(f)
781     errno = 0;
782     ret = _portable_fseek(f->f_fp, offset, whence);
783     FILE_END_ALLOW_THREADS(f)
784 
785     if (ret != 0) {
786         PyErr_SetFromErrno(PyExc_IOError);
787         clearerr(f->f_fp);
788         return NULL;
789     }
790     f->f_skipnextlf = 0;
791     Py_INCREF(Py_None);
792     return Py_None;
793 }
794 
795 
796 #ifdef HAVE_FTRUNCATE
797 static PyObject *
file_truncate(PyFileObject * f,PyObject * args)798 file_truncate(PyFileObject *f, PyObject *args)
799 {
800     Py_off_t newsize;
801     PyObject *newsizeobj = NULL;
802     Py_off_t initialpos;
803     int ret;
804 
805     if (f->f_fp == NULL)
806         return err_closed();
807     if (!f->writable)
808         return err_mode("writing");
809     if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
810         return NULL;
811 
812     /* Get current file position.  If the file happens to be open for
813      * update and the last operation was an input operation, C doesn't
814      * define what the later fflush() will do, but we promise truncate()
815      * won't change the current position (and fflush() *does* change it
816      * then at least on Windows).  The easiest thing is to capture
817      * current pos now and seek back to it at the end.
818      */
819     FILE_BEGIN_ALLOW_THREADS(f)
820     errno = 0;
821     initialpos = _portable_ftell(f->f_fp);
822     FILE_END_ALLOW_THREADS(f)
823     if (initialpos == -1)
824         goto onioerror;
825 
826     /* Set newsize to current postion if newsizeobj NULL, else to the
827      * specified value.
828      */
829     if (newsizeobj != NULL) {
830 #if !defined(HAVE_LARGEFILE_SUPPORT)
831         newsize = PyInt_AsLong(newsizeobj);
832 #else
833         newsize = PyLong_Check(newsizeobj) ?
834                         PyLong_AsLongLong(newsizeobj) :
835                 PyInt_AsLong(newsizeobj);
836 #endif
837         if (PyErr_Occurred())
838             return NULL;
839     }
840     else /* default to current position */
841         newsize = initialpos;
842 
843     /* Flush the stream.  We're mixing stream-level I/O with lower-level
844      * I/O, and a flush may be necessary to synch both platform views
845      * of the current file state.
846      */
847     FILE_BEGIN_ALLOW_THREADS(f)
848     errno = 0;
849     ret = fflush(f->f_fp);
850     FILE_END_ALLOW_THREADS(f)
851     if (ret != 0)
852         goto onioerror;
853 
854 #ifdef MS_WINDOWS
855     /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
856        so don't even try using it. */
857     {
858         HANDLE hFile;
859 
860         /* Have to move current pos to desired endpoint on Windows. */
861         FILE_BEGIN_ALLOW_THREADS(f)
862         errno = 0;
863         ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
864         FILE_END_ALLOW_THREADS(f)
865         if (ret)
866             goto onioerror;
867 
868         /* Truncate.  Note that this may grow the file! */
869         FILE_BEGIN_ALLOW_THREADS(f)
870         errno = 0;
871         hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
872         ret = hFile == (HANDLE)-1;
873         if (ret == 0) {
874             ret = SetEndOfFile(hFile) == 0;
875             if (ret)
876                 errno = EACCES;
877         }
878         FILE_END_ALLOW_THREADS(f)
879         if (ret)
880             goto onioerror;
881     }
882 #else
883     FILE_BEGIN_ALLOW_THREADS(f)
884     errno = 0;
885     ret = ftruncate(fileno(f->f_fp), newsize);
886     FILE_END_ALLOW_THREADS(f)
887     if (ret != 0)
888         goto onioerror;
889 #endif /* !MS_WINDOWS */
890 
891     /* Restore original file position. */
892     FILE_BEGIN_ALLOW_THREADS(f)
893     errno = 0;
894     ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
895     FILE_END_ALLOW_THREADS(f)
896     if (ret)
897         goto onioerror;
898 
899     Py_INCREF(Py_None);
900     return Py_None;
901 
902 onioerror:
903     PyErr_SetFromErrno(PyExc_IOError);
904     clearerr(f->f_fp);
905     return NULL;
906 }
907 #endif /* HAVE_FTRUNCATE */
908 
909 static PyObject *
file_tell(PyFileObject * f)910 file_tell(PyFileObject *f)
911 {
912     Py_off_t pos;
913 
914     if (f->f_fp == NULL)
915         return err_closed();
916     FILE_BEGIN_ALLOW_THREADS(f)
917     errno = 0;
918     pos = _portable_ftell(f->f_fp);
919     FILE_END_ALLOW_THREADS(f)
920 
921     if (pos == -1) {
922         PyErr_SetFromErrno(PyExc_IOError);
923         clearerr(f->f_fp);
924         return NULL;
925     }
926     if (f->f_skipnextlf) {
927         int c;
928         c = GETC(f->f_fp);
929         if (c == '\n') {
930             f->f_newlinetypes |= NEWLINE_CRLF;
931             pos++;
932             f->f_skipnextlf = 0;
933         } else if (c != EOF) ungetc(c, f->f_fp);
934     }
935 #if !defined(HAVE_LARGEFILE_SUPPORT)
936     return PyInt_FromLong(pos);
937 #else
938     return PyLong_FromLongLong(pos);
939 #endif
940 }
941 
942 static PyObject *
file_fileno(PyFileObject * f)943 file_fileno(PyFileObject *f)
944 {
945     if (f->f_fp == NULL)
946         return err_closed();
947     return PyInt_FromLong((long) fileno(f->f_fp));
948 }
949 
950 static PyObject *
file_flush(PyFileObject * f)951 file_flush(PyFileObject *f)
952 {
953     int res;
954 
955     if (f->f_fp == NULL)
956         return err_closed();
957     FILE_BEGIN_ALLOW_THREADS(f)
958     errno = 0;
959     res = fflush(f->f_fp);
960     FILE_END_ALLOW_THREADS(f)
961     if (res != 0) {
962         PyErr_SetFromErrno(PyExc_IOError);
963         clearerr(f->f_fp);
964         return NULL;
965     }
966     Py_INCREF(Py_None);
967     return Py_None;
968 }
969 
970 static PyObject *
file_isatty(PyFileObject * f)971 file_isatty(PyFileObject *f)
972 {
973     long res;
974     if (f->f_fp == NULL)
975         return err_closed();
976     FILE_BEGIN_ALLOW_THREADS(f)
977     res = isatty((int)fileno(f->f_fp));
978     FILE_END_ALLOW_THREADS(f)
979     return PyBool_FromLong(res);
980 }
981 
982 
983 #if BUFSIZ < 8192
984 #define SMALLCHUNK 8192
985 #else
986 #define SMALLCHUNK BUFSIZ
987 #endif
988 
989 #if SIZEOF_INT < 4
990 #define BIGCHUNK  (512 * 32)
991 #else
992 #define BIGCHUNK  (512 * 1024)
993 #endif
994 
995 static size_t
new_buffersize(PyFileObject * f,size_t currentsize)996 new_buffersize(PyFileObject *f, size_t currentsize)
997 {
998 #ifdef HAVE_FSTAT
999     off_t pos, end;
1000     struct stat st;
1001     if (fstat(fileno(f->f_fp), &st) == 0) {
1002         end = st.st_size;
1003         /* The following is not a bug: we really need to call lseek()
1004            *and* ftell().  The reason is that some stdio libraries
1005            mistakenly flush their buffer when ftell() is called and
1006            the lseek() call it makes fails, thereby throwing away
1007            data that cannot be recovered in any way.  To avoid this,
1008            we first test lseek(), and only call ftell() if lseek()
1009            works.  We can't use the lseek() value either, because we
1010            need to take the amount of buffered data into account.
1011            (Yet another reason why stdio stinks. :-) */
1012         pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
1013         if (pos >= 0) {
1014             pos = ftell(f->f_fp);
1015         }
1016         if (pos < 0)
1017             clearerr(f->f_fp);
1018         if (end > pos && pos >= 0)
1019             return currentsize + end - pos + 1;
1020         /* Add 1 so if the file were to grow we'd notice. */
1021     }
1022 #endif
1023     if (currentsize > SMALLCHUNK) {
1024         /* Keep doubling until we reach BIGCHUNK;
1025            then keep adding BIGCHUNK. */
1026         if (currentsize <= BIGCHUNK)
1027             return currentsize + currentsize;
1028         else
1029             return currentsize + BIGCHUNK;
1030     }
1031     return currentsize + SMALLCHUNK;
1032 }
1033 
1034 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1035 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1036 #else
1037 #ifdef EWOULDBLOCK
1038 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1039 #else
1040 #ifdef EAGAIN
1041 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1042 #else
1043 #define BLOCKED_ERRNO(x) 0
1044 #endif
1045 #endif
1046 #endif
1047 
1048 static PyObject *
file_read(PyFileObject * f,PyObject * args)1049 file_read(PyFileObject *f, PyObject *args)
1050 {
1051     long bytesrequested = -1;
1052     size_t bytesread, buffersize, chunksize;
1053     PyObject *v;
1054 
1055     if (f->f_fp == NULL)
1056         return err_closed();
1057     if (!f->readable)
1058         return err_mode("reading");
1059     /* refuse to mix with f.next() */
1060     if (f->f_buf != NULL &&
1061         (f->f_bufend - f->f_bufptr) > 0 &&
1062         f->f_buf[0] != '\0')
1063         return err_iterbuffered();
1064     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1065         return NULL;
1066     if (bytesrequested < 0)
1067         buffersize = new_buffersize(f, (size_t)0);
1068     else
1069         buffersize = bytesrequested;
1070     if (buffersize > PY_SSIZE_T_MAX) {
1071         PyErr_SetString(PyExc_OverflowError,
1072     "requested number of bytes is more than a Python string can hold");
1073         return NULL;
1074     }
1075     v = PyString_FromStringAndSize((char *)NULL, buffersize);
1076     if (v == NULL)
1077         return NULL;
1078     bytesread = 0;
1079     for (;;) {
1080         FILE_BEGIN_ALLOW_THREADS(f)
1081         errno = 0;
1082         chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1083                   buffersize - bytesread, f->f_fp, (PyObject *)f);
1084         FILE_END_ALLOW_THREADS(f)
1085         if (chunksize == 0) {
1086             if (!ferror(f->f_fp))
1087                 break;
1088             clearerr(f->f_fp);
1089             /* When in non-blocking mode, data shouldn't
1090              * be discarded if a blocking signal was
1091              * received. That will also happen if
1092              * chunksize != 0, but bytesread < buffersize. */
1093             if (bytesread > 0 && BLOCKED_ERRNO(errno))
1094                 break;
1095             PyErr_SetFromErrno(PyExc_IOError);
1096             Py_DECREF(v);
1097             return NULL;
1098         }
1099         bytesread += chunksize;
1100         if (bytesread < buffersize) {
1101             clearerr(f->f_fp);
1102             break;
1103         }
1104         if (bytesrequested < 0) {
1105             buffersize = new_buffersize(f, buffersize);
1106             if (_PyString_Resize(&v, buffersize) < 0)
1107                 return NULL;
1108         } else {
1109             /* Got what was requested. */
1110             break;
1111         }
1112     }
1113     if (bytesread != buffersize && _PyString_Resize(&v, bytesread))
1114         return NULL;
1115     return v;
1116 }
1117 
1118 static PyObject *
file_readinto(PyFileObject * f,PyObject * args)1119 file_readinto(PyFileObject *f, PyObject *args)
1120 {
1121     char *ptr;
1122     Py_ssize_t ntodo;
1123     Py_ssize_t ndone, nnow;
1124     Py_buffer pbuf;
1125 
1126     if (f->f_fp == NULL)
1127         return err_closed();
1128     if (!f->readable)
1129         return err_mode("reading");
1130     /* refuse to mix with f.next() */
1131     if (f->f_buf != NULL &&
1132         (f->f_bufend - f->f_bufptr) > 0 &&
1133         f->f_buf[0] != '\0')
1134         return err_iterbuffered();
1135     if (!PyArg_ParseTuple(args, "w*", &pbuf))
1136         return NULL;
1137     ptr = pbuf.buf;
1138     ntodo = pbuf.len;
1139     ndone = 0;
1140     while (ntodo > 0) {
1141         FILE_BEGIN_ALLOW_THREADS(f)
1142         errno = 0;
1143         nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1144                                         (PyObject *)f);
1145         FILE_END_ALLOW_THREADS(f)
1146         if (nnow == 0) {
1147             if (!ferror(f->f_fp))
1148                 break;
1149             PyErr_SetFromErrno(PyExc_IOError);
1150             clearerr(f->f_fp);
1151             PyBuffer_Release(&pbuf);
1152             return NULL;
1153         }
1154         ndone += nnow;
1155         ntodo -= nnow;
1156     }
1157     PyBuffer_Release(&pbuf);
1158     return PyInt_FromSsize_t(ndone);
1159 }
1160 
1161 /**************************************************************************
1162 Routine to get next line using platform fgets().
1163 
1164 Under MSVC 6:
1165 
1166 + MS threadsafe getc is very slow (multiple layers of function calls before+
1167   after each character, to lock+unlock the stream).
1168 + The stream-locking functions are MS-internal -- can't access them from user
1169   code.
1170 + There's nothing Tim could find in the MS C or platform SDK libraries that
1171   can worm around this.
1172 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1173 
1174 So we use fgets for speed(!), despite that it's painful.
1175 
1176 MS realloc is also slow.
1177 
1178 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1179 have):
1180     Linux               a wash
1181     Solaris             a wash
1182     Tru64 Unix          getline_via_fgets significantly faster
1183 
1184 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1185 writes something into the buffer, can it write into any position beyond the
1186 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1187 known on which it does; and it would be a strange way to code fgets. Still,
1188 getline_via_fgets may not work correctly if it does.  The std test
1189 test_bufio.py should fail if platform fgets() routinely writes beyond the
1190 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1191 **************************************************************************/
1192 
1193 /* Use this routine if told to, or by default on non-get_unlocked()
1194  * platforms unless told not to.  Yikes!  Let's spell that out:
1195  * On a platform with getc_unlocked():
1196  *     By default, use getc_unlocked().
1197  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1198  * On a platform without getc_unlocked():
1199  *     By default, use fgets().
1200  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1201  */
1202 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1203 #define USE_FGETS_IN_GETLINE
1204 #endif
1205 
1206 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1207 #undef USE_FGETS_IN_GETLINE
1208 #endif
1209 
1210 #ifdef USE_FGETS_IN_GETLINE
1211 static PyObject*
getline_via_fgets(PyFileObject * f,FILE * fp)1212 getline_via_fgets(PyFileObject *f, FILE *fp)
1213 {
1214 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1215  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1216  * to fill this much of the buffer with a known value in order to figure out
1217  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1218  * than "most" lines, we waste time filling unused buffer slots.  100 is
1219  * surely adequate for most peoples' email archives, chewing over source code,
1220  * etc -- "regular old text files".
1221  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1222  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1223  * cautions about boosting that.  300 was chosen because the worst real-life
1224  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1225  * half the lines were 254 chars.
1226  */
1227 #define INITBUFSIZE 100
1228 #define MAXBUFSIZE 300
1229     char* p;            /* temp */
1230     char buf[MAXBUFSIZE];
1231     PyObject* v;        /* the string object result */
1232     char* pvfree;       /* address of next free slot */
1233     char* pvend;    /* address one beyond last free slot */
1234     size_t nfree;       /* # of free buffer slots; pvend-pvfree */
1235     size_t total_v_size;  /* total # of slots in buffer */
1236     size_t increment;           /* amount to increment the buffer */
1237     size_t prev_v_size;
1238 
1239     /* Optimize for normal case:  avoid _PyString_Resize if at all
1240      * possible via first reading into stack buffer "buf".
1241      */
1242     total_v_size = INITBUFSIZE;         /* start small and pray */
1243     pvfree = buf;
1244     for (;;) {
1245         FILE_BEGIN_ALLOW_THREADS(f)
1246         pvend = buf + total_v_size;
1247         nfree = pvend - pvfree;
1248         memset(pvfree, '\n', nfree);
1249         assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1250         p = fgets(pvfree, (int)nfree, fp);
1251         FILE_END_ALLOW_THREADS(f)
1252 
1253         if (p == NULL) {
1254             clearerr(fp);
1255             if (PyErr_CheckSignals())
1256                 return NULL;
1257             v = PyString_FromStringAndSize(buf, pvfree - buf);
1258             return v;
1259         }
1260         /* fgets read *something* */
1261         p = memchr(pvfree, '\n', nfree);
1262         if (p != NULL) {
1263             /* Did the \n come from fgets or from us?
1264              * Since fgets stops at the first \n, and then writes
1265              * \0, if it's from fgets a \0 must be next.  But if
1266              * that's so, it could not have come from us, since
1267              * the \n's we filled the buffer with have only more
1268              * \n's to the right.
1269              */
1270             if (p+1 < pvend && *(p+1) == '\0') {
1271                 /* It's from fgets:  we win!  In particular,
1272                  * we haven't done any mallocs yet, and can
1273                  * build the final result on the first try.
1274                  */
1275                 ++p;                    /* include \n from fgets */
1276             }
1277             else {
1278                 /* Must be from us:  fgets didn't fill the
1279                  * buffer and didn't find a newline, so it
1280                  * must be the last and newline-free line of
1281                  * the file.
1282                  */
1283                 assert(p > pvfree && *(p-1) == '\0');
1284                 --p;                    /* don't include \0 from fgets */
1285             }
1286             v = PyString_FromStringAndSize(buf, p - buf);
1287             return v;
1288         }
1289         /* yuck:  fgets overwrote all the newlines, i.e. the entire
1290          * buffer.  So this line isn't over yet, or maybe it is but
1291          * we're exactly at EOF.  If we haven't already, try using the
1292          * rest of the stack buffer.
1293          */
1294         assert(*(pvend-1) == '\0');
1295         if (pvfree == buf) {
1296             pvfree = pvend - 1;                 /* overwrite trailing null */
1297             total_v_size = MAXBUFSIZE;
1298         }
1299         else
1300             break;
1301     }
1302 
1303     /* The stack buffer isn't big enough; malloc a string object and read
1304      * into its buffer.
1305      */
1306     total_v_size = MAXBUFSIZE << 1;
1307     v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1308     if (v == NULL)
1309         return v;
1310     /* copy over everything except the last null byte */
1311     memcpy(BUF(v), buf, MAXBUFSIZE-1);
1312     pvfree = BUF(v) + MAXBUFSIZE - 1;
1313 
1314     /* Keep reading stuff into v; if it ever ends successfully, break
1315      * after setting p one beyond the end of the line.  The code here is
1316      * very much like the code above, except reads into v's buffer; see
1317      * the code above for detailed comments about the logic.
1318      */
1319     for (;;) {
1320         FILE_BEGIN_ALLOW_THREADS(f)
1321         pvend = BUF(v) + total_v_size;
1322         nfree = pvend - pvfree;
1323         memset(pvfree, '\n', nfree);
1324         assert(nfree < INT_MAX);
1325         p = fgets(pvfree, (int)nfree, fp);
1326         FILE_END_ALLOW_THREADS(f)
1327 
1328         if (p == NULL) {
1329             clearerr(fp);
1330             if (PyErr_CheckSignals()) {
1331                 Py_DECREF(v);
1332                 return NULL;
1333             }
1334             p = pvfree;
1335             break;
1336         }
1337         p = memchr(pvfree, '\n', nfree);
1338         if (p != NULL) {
1339             if (p+1 < pvend && *(p+1) == '\0') {
1340                 /* \n came from fgets */
1341                 ++p;
1342                 break;
1343             }
1344             /* \n came from us; last line of file, no newline */
1345             assert(p > pvfree && *(p-1) == '\0');
1346             --p;
1347             break;
1348         }
1349         /* expand buffer and try again */
1350         assert(*(pvend-1) == '\0');
1351         increment = total_v_size >> 2;          /* mild exponential growth */
1352         prev_v_size = total_v_size;
1353         total_v_size += increment;
1354         /* check for overflow */
1355         if (total_v_size <= prev_v_size ||
1356             total_v_size > PY_SSIZE_T_MAX) {
1357             PyErr_SetString(PyExc_OverflowError,
1358                 "line is longer than a Python string can hold");
1359             Py_DECREF(v);
1360             return NULL;
1361         }
1362         if (_PyString_Resize(&v, (int)total_v_size) < 0)
1363             return NULL;
1364         /* overwrite the trailing null byte */
1365         pvfree = BUF(v) + (prev_v_size - 1);
1366     }
1367     if (BUF(v) + total_v_size != p && _PyString_Resize(&v, p - BUF(v)))
1368         return NULL;
1369     return v;
1370 #undef INITBUFSIZE
1371 #undef MAXBUFSIZE
1372 }
1373 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1374 
1375 /* Internal routine to get a line.
1376    Size argument interpretation:
1377    > 0: max length;
1378    <= 0: read arbitrary line
1379 */
1380 
1381 static PyObject *
get_line(PyFileObject * f,int n)1382 get_line(PyFileObject *f, int n)
1383 {
1384     FILE *fp = f->f_fp;
1385     int c;
1386     char *buf, *end;
1387     size_t total_v_size;        /* total # of slots in buffer */
1388     size_t used_v_size;         /* # used slots in buffer */
1389     size_t increment;       /* amount to increment the buffer */
1390     PyObject *v;
1391     int newlinetypes = f->f_newlinetypes;
1392     int skipnextlf = f->f_skipnextlf;
1393     int univ_newline = f->f_univ_newline;
1394 
1395 #if defined(USE_FGETS_IN_GETLINE)
1396     if (n <= 0 && !univ_newline )
1397         return getline_via_fgets(f, fp);
1398 #endif
1399     total_v_size = n > 0 ? n : 100;
1400     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1401     if (v == NULL)
1402         return NULL;
1403     buf = BUF(v);
1404     end = buf + total_v_size;
1405 
1406     for (;;) {
1407         FILE_BEGIN_ALLOW_THREADS(f)
1408         FLOCKFILE(fp);
1409         if (univ_newline) {
1410             c = 'x'; /* Shut up gcc warning */
1411             while ( buf != end && (c = GETC(fp)) != EOF ) {
1412                 if (skipnextlf ) {
1413                     skipnextlf = 0;
1414                     if (c == '\n') {
1415                         /* Seeing a \n here with
1416                          * skipnextlf true means we
1417                          * saw a \r before.
1418                          */
1419                         newlinetypes |= NEWLINE_CRLF;
1420                         c = GETC(fp);
1421                         if (c == EOF) break;
1422                     } else {
1423                         newlinetypes |= NEWLINE_CR;
1424                     }
1425                 }
1426                 if (c == '\r') {
1427                     skipnextlf = 1;
1428                     c = '\n';
1429                 } else if ( c == '\n')
1430                     newlinetypes |= NEWLINE_LF;
1431                 *buf++ = c;
1432                 if (c == '\n') break;
1433             }
1434             if ( c == EOF && skipnextlf )
1435                 newlinetypes |= NEWLINE_CR;
1436         } else /* If not universal newlines use the normal loop */
1437         while ((c = GETC(fp)) != EOF &&
1438                (*buf++ = c) != '\n' &&
1439             buf != end)
1440             ;
1441         FUNLOCKFILE(fp);
1442         FILE_END_ALLOW_THREADS(f)
1443         f->f_newlinetypes = newlinetypes;
1444         f->f_skipnextlf = skipnextlf;
1445         if (c == '\n')
1446             break;
1447         if (c == EOF) {
1448             if (ferror(fp)) {
1449                 PyErr_SetFromErrno(PyExc_IOError);
1450                 clearerr(fp);
1451                 Py_DECREF(v);
1452                 return NULL;
1453             }
1454             clearerr(fp);
1455             if (PyErr_CheckSignals()) {
1456                 Py_DECREF(v);
1457                 return NULL;
1458             }
1459             break;
1460         }
1461         /* Must be because buf == end */
1462         if (n > 0)
1463             break;
1464         used_v_size = total_v_size;
1465         increment = total_v_size >> 2; /* mild exponential growth */
1466         total_v_size += increment;
1467         if (total_v_size > PY_SSIZE_T_MAX) {
1468             PyErr_SetString(PyExc_OverflowError,
1469                 "line is longer than a Python string can hold");
1470             Py_DECREF(v);
1471             return NULL;
1472         }
1473         if (_PyString_Resize(&v, total_v_size) < 0)
1474             return NULL;
1475         buf = BUF(v) + used_v_size;
1476         end = BUF(v) + total_v_size;
1477     }
1478 
1479     used_v_size = buf - BUF(v);
1480     if (used_v_size != total_v_size && _PyString_Resize(&v, used_v_size))
1481         return NULL;
1482     return v;
1483 }
1484 
1485 /* External C interface */
1486 
1487 PyObject *
PyFile_GetLine(PyObject * f,int n)1488 PyFile_GetLine(PyObject *f, int n)
1489 {
1490     PyObject *result;
1491 
1492     if (f == NULL) {
1493         PyErr_BadInternalCall();
1494         return NULL;
1495     }
1496 
1497     if (PyFile_Check(f)) {
1498         PyFileObject *fo = (PyFileObject *)f;
1499         if (fo->f_fp == NULL)
1500             return err_closed();
1501         if (!fo->readable)
1502             return err_mode("reading");
1503         /* refuse to mix with f.next() */
1504         if (fo->f_buf != NULL &&
1505             (fo->f_bufend - fo->f_bufptr) > 0 &&
1506             fo->f_buf[0] != '\0')
1507             return err_iterbuffered();
1508         result = get_line(fo, n);
1509     }
1510     else {
1511         PyObject *reader;
1512         PyObject *args;
1513 
1514         reader = PyObject_GetAttrString(f, "readline");
1515         if (reader == NULL)
1516             return NULL;
1517         if (n <= 0)
1518             args = PyTuple_New(0);
1519         else
1520             args = Py_BuildValue("(i)", n);
1521         if (args == NULL) {
1522             Py_DECREF(reader);
1523             return NULL;
1524         }
1525         result = PyEval_CallObject(reader, args);
1526         Py_DECREF(reader);
1527         Py_DECREF(args);
1528         if (result != NULL && !PyString_Check(result) &&
1529             !PyUnicode_Check(result)) {
1530             Py_DECREF(result);
1531             result = NULL;
1532             PyErr_SetString(PyExc_TypeError,
1533                        "object.readline() returned non-string");
1534         }
1535     }
1536 
1537     if (n < 0 && result != NULL && PyString_Check(result)) {
1538         char *s = PyString_AS_STRING(result);
1539         Py_ssize_t len = PyString_GET_SIZE(result);
1540         if (len == 0) {
1541             Py_DECREF(result);
1542             result = NULL;
1543             PyErr_SetString(PyExc_EOFError,
1544                             "EOF when reading a line");
1545         }
1546         else if (s[len-1] == '\n') {
1547             if (result->ob_refcnt == 1) {
1548                 if (_PyString_Resize(&result, len-1))
1549                     return NULL;
1550             }
1551             else {
1552                 PyObject *v;
1553                 v = PyString_FromStringAndSize(s, len-1);
1554                 Py_DECREF(result);
1555                 result = v;
1556             }
1557         }
1558     }
1559 #ifdef Py_USING_UNICODE
1560     if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1561         Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1562         Py_ssize_t len = PyUnicode_GET_SIZE(result);
1563         if (len == 0) {
1564             Py_DECREF(result);
1565             result = NULL;
1566             PyErr_SetString(PyExc_EOFError,
1567                             "EOF when reading a line");
1568         }
1569         else if (s[len-1] == '\n') {
1570             if (result->ob_refcnt == 1)
1571                 PyUnicode_Resize(&result, len-1);
1572             else {
1573                 PyObject *v;
1574                 v = PyUnicode_FromUnicode(s, len-1);
1575                 Py_DECREF(result);
1576                 result = v;
1577             }
1578         }
1579     }
1580 #endif
1581     return result;
1582 }
1583 
1584 /* Python method */
1585 
1586 static PyObject *
file_readline(PyFileObject * f,PyObject * args)1587 file_readline(PyFileObject *f, PyObject *args)
1588 {
1589     int n = -1;
1590 
1591     if (f->f_fp == NULL)
1592         return err_closed();
1593     if (!f->readable)
1594         return err_mode("reading");
1595     /* refuse to mix with f.next() */
1596     if (f->f_buf != NULL &&
1597         (f->f_bufend - f->f_bufptr) > 0 &&
1598         f->f_buf[0] != '\0')
1599         return err_iterbuffered();
1600     if (!PyArg_ParseTuple(args, "|i:readline", &n))
1601         return NULL;
1602     if (n == 0)
1603         return PyString_FromString("");
1604     if (n < 0)
1605         n = 0;
1606     return get_line(f, n);
1607 }
1608 
1609 static PyObject *
file_readlines(PyFileObject * f,PyObject * args)1610 file_readlines(PyFileObject *f, PyObject *args)
1611 {
1612     long sizehint = 0;
1613     PyObject *list = NULL;
1614     PyObject *line;
1615     char small_buffer[SMALLCHUNK];
1616     char *buffer = small_buffer;
1617     size_t buffersize = SMALLCHUNK;
1618     PyObject *big_buffer = NULL;
1619     size_t nfilled = 0;
1620     size_t nread;
1621     size_t totalread = 0;
1622     char *p, *q, *end;
1623     int err;
1624     int shortread = 0;
1625 
1626     if (f->f_fp == NULL)
1627         return err_closed();
1628     if (!f->readable)
1629         return err_mode("reading");
1630     /* refuse to mix with f.next() */
1631     if (f->f_buf != NULL &&
1632         (f->f_bufend - f->f_bufptr) > 0 &&
1633         f->f_buf[0] != '\0')
1634         return err_iterbuffered();
1635     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1636         return NULL;
1637     if ((list = PyList_New(0)) == NULL)
1638         return NULL;
1639     for (;;) {
1640         if (shortread)
1641             nread = 0;
1642         else {
1643             FILE_BEGIN_ALLOW_THREADS(f)
1644             errno = 0;
1645             nread = Py_UniversalNewlineFread(buffer+nfilled,
1646                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1647             FILE_END_ALLOW_THREADS(f)
1648             shortread = (nread < buffersize-nfilled);
1649         }
1650         if (nread == 0) {
1651             sizehint = 0;
1652             if (!ferror(f->f_fp))
1653                 break;
1654             PyErr_SetFromErrno(PyExc_IOError);
1655             clearerr(f->f_fp);
1656             goto error;
1657         }
1658         totalread += nread;
1659         p = (char *)memchr(buffer+nfilled, '\n', nread);
1660         if (p == NULL) {
1661             /* Need a larger buffer to fit this line */
1662             nfilled += nread;
1663             buffersize *= 2;
1664             if (buffersize > PY_SSIZE_T_MAX) {
1665                 PyErr_SetString(PyExc_OverflowError,
1666                 "line is longer than a Python string can hold");
1667                 goto error;
1668             }
1669             if (big_buffer == NULL) {
1670                 /* Create the big buffer */
1671                 big_buffer = PyString_FromStringAndSize(
1672                     NULL, buffersize);
1673                 if (big_buffer == NULL)
1674                     goto error;
1675                 buffer = PyString_AS_STRING(big_buffer);
1676                 memcpy(buffer, small_buffer, nfilled);
1677             }
1678             else {
1679                 /* Grow the big buffer */
1680                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1681                     goto error;
1682                 buffer = PyString_AS_STRING(big_buffer);
1683             }
1684             continue;
1685         }
1686         end = buffer+nfilled+nread;
1687         q = buffer;
1688         do {
1689             /* Process complete lines */
1690             p++;
1691             line = PyString_FromStringAndSize(q, p-q);
1692             if (line == NULL)
1693                 goto error;
1694             err = PyList_Append(list, line);
1695             Py_DECREF(line);
1696             if (err != 0)
1697                 goto error;
1698             q = p;
1699             p = (char *)memchr(q, '\n', end-q);
1700         } while (p != NULL);
1701         /* Move the remaining incomplete line to the start */
1702         nfilled = end-q;
1703         memmove(buffer, q, nfilled);
1704         if (sizehint > 0)
1705             if (totalread >= (size_t)sizehint)
1706                 break;
1707     }
1708     if (nfilled != 0) {
1709         /* Partial last line */
1710         line = PyString_FromStringAndSize(buffer, nfilled);
1711         if (line == NULL)
1712             goto error;
1713         if (sizehint > 0) {
1714             /* Need to complete the last line */
1715             PyObject *rest = get_line(f, 0);
1716             if (rest == NULL) {
1717                 Py_DECREF(line);
1718                 goto error;
1719             }
1720             PyString_Concat(&line, rest);
1721             Py_DECREF(rest);
1722             if (line == NULL)
1723                 goto error;
1724         }
1725         err = PyList_Append(list, line);
1726         Py_DECREF(line);
1727         if (err != 0)
1728             goto error;
1729     }
1730 
1731 cleanup:
1732     Py_XDECREF(big_buffer);
1733     return list;
1734 
1735 error:
1736     Py_CLEAR(list);
1737     goto cleanup;
1738 }
1739 
1740 static PyObject *
file_write(PyFileObject * f,PyObject * args)1741 file_write(PyFileObject *f, PyObject *args)
1742 {
1743     Py_buffer pbuf;
1744     const char *s;
1745     Py_ssize_t n, n2;
1746     PyObject *encoded = NULL;
1747 
1748     if (f->f_fp == NULL)
1749         return err_closed();
1750     if (!f->writable)
1751         return err_mode("writing");
1752     if (f->f_binary) {
1753         if (!PyArg_ParseTuple(args, "s*", &pbuf))
1754             return NULL;
1755         s = pbuf.buf;
1756         n = pbuf.len;
1757     }
1758     else {
1759         const char *encoding, *errors;
1760         PyObject *text;
1761         if (!PyArg_ParseTuple(args, "O", &text))
1762             return NULL;
1763 
1764         if (PyString_Check(text)) {
1765             s = PyString_AS_STRING(text);
1766             n = PyString_GET_SIZE(text);
1767         } else if (PyUnicode_Check(text)) {
1768             if (f->f_encoding != Py_None)
1769                 encoding = PyString_AS_STRING(f->f_encoding);
1770             else
1771                 encoding = PyUnicode_GetDefaultEncoding();
1772             if (f->f_errors != Py_None)
1773                 errors = PyString_AS_STRING(f->f_errors);
1774             else
1775                 errors = "strict";
1776             encoded = PyUnicode_AsEncodedString(text, encoding, errors);
1777             if (encoded == NULL)
1778                 return NULL;
1779             s = PyString_AS_STRING(encoded);
1780             n = PyString_GET_SIZE(encoded);
1781         } else {
1782             if (PyObject_AsCharBuffer(text, &s, &n))
1783                 return NULL;
1784         }
1785     }
1786     f->f_softspace = 0;
1787     FILE_BEGIN_ALLOW_THREADS(f)
1788     errno = 0;
1789     n2 = fwrite(s, 1, n, f->f_fp);
1790     FILE_END_ALLOW_THREADS(f)
1791     Py_XDECREF(encoded);
1792     if (f->f_binary)
1793         PyBuffer_Release(&pbuf);
1794     if (n2 != n) {
1795         PyErr_SetFromErrno(PyExc_IOError);
1796         clearerr(f->f_fp);
1797         return NULL;
1798     }
1799     Py_INCREF(Py_None);
1800     return Py_None;
1801 }
1802 
1803 static PyObject *
file_writelines(PyFileObject * f,PyObject * seq)1804 file_writelines(PyFileObject *f, PyObject *seq)
1805 {
1806 #define CHUNKSIZE 1000
1807     PyObject *list, *line;
1808     PyObject *it;       /* iter(seq) */
1809     PyObject *result;
1810     int index, islist;
1811     Py_ssize_t i, j, nwritten, len;
1812 
1813     assert(seq != NULL);
1814     if (f->f_fp == NULL)
1815         return err_closed();
1816     if (!f->writable)
1817         return err_mode("writing");
1818 
1819     result = NULL;
1820     list = NULL;
1821     islist = PyList_Check(seq);
1822     if  (islist)
1823         it = NULL;
1824     else {
1825         it = PyObject_GetIter(seq);
1826         if (it == NULL) {
1827             PyErr_SetString(PyExc_TypeError,
1828                 "writelines() requires an iterable argument");
1829             return NULL;
1830         }
1831         /* From here on, fail by going to error, to reclaim "it". */
1832         list = PyList_New(CHUNKSIZE);
1833         if (list == NULL)
1834             goto error;
1835     }
1836 
1837     /* Strategy: slurp CHUNKSIZE lines into a private list,
1838        checking that they are all strings, then write that list
1839        without holding the interpreter lock, then come back for more. */
1840     for (index = 0; ; index += CHUNKSIZE) {
1841         if (islist) {
1842             Py_XDECREF(list);
1843             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1844             if (list == NULL)
1845                 goto error;
1846             j = PyList_GET_SIZE(list);
1847         }
1848         else {
1849             for (j = 0; j < CHUNKSIZE; j++) {
1850                 line = PyIter_Next(it);
1851                 if (line == NULL) {
1852                     if (PyErr_Occurred())
1853                         goto error;
1854                     break;
1855                 }
1856                 PyList_SetItem(list, j, line);
1857             }
1858             /* The iterator might have closed the file on us. */
1859             if (f->f_fp == NULL) {
1860                 err_closed();
1861                 goto error;
1862             }
1863         }
1864         if (j == 0)
1865             break;
1866 
1867         /* Check that all entries are indeed strings. If not,
1868            apply the same rules as for file.write() and
1869            convert the results to strings. This is slow, but
1870            seems to be the only way since all conversion APIs
1871            could potentially execute Python code. */
1872         for (i = 0; i < j; i++) {
1873             PyObject *v = PyList_GET_ITEM(list, i);
1874             if (!PyString_Check(v)) {
1875                 const char *buffer;
1876                 if (((f->f_binary &&
1877                       PyObject_AsReadBuffer(v,
1878                           (const void**)&buffer,
1879                                         &len)) ||
1880                      PyObject_AsCharBuffer(v,
1881                                            &buffer,
1882                                            &len))) {
1883                     PyErr_SetString(PyExc_TypeError,
1884             "writelines() argument must be a sequence of strings");
1885                             goto error;
1886                 }
1887                 line = PyString_FromStringAndSize(buffer,
1888                                                   len);
1889                 if (line == NULL)
1890                     goto error;
1891                 Py_DECREF(v);
1892                 PyList_SET_ITEM(list, i, line);
1893             }
1894         }
1895 
1896         /* Since we are releasing the global lock, the
1897            following code may *not* execute Python code. */
1898         f->f_softspace = 0;
1899         FILE_BEGIN_ALLOW_THREADS(f)
1900         errno = 0;
1901         for (i = 0; i < j; i++) {
1902             line = PyList_GET_ITEM(list, i);
1903             len = PyString_GET_SIZE(line);
1904             nwritten = fwrite(PyString_AS_STRING(line),
1905                               1, len, f->f_fp);
1906             if (nwritten != len) {
1907                 FILE_ABORT_ALLOW_THREADS(f)
1908                 PyErr_SetFromErrno(PyExc_IOError);
1909                 clearerr(f->f_fp);
1910                 goto error;
1911             }
1912         }
1913         FILE_END_ALLOW_THREADS(f)
1914 
1915         if (j < CHUNKSIZE)
1916             break;
1917     }
1918 
1919     Py_INCREF(Py_None);
1920     result = Py_None;
1921   error:
1922     Py_XDECREF(list);
1923     Py_XDECREF(it);
1924     return result;
1925 #undef CHUNKSIZE
1926 }
1927 
1928 static PyObject *
file_self(PyFileObject * f)1929 file_self(PyFileObject *f)
1930 {
1931     if (f->f_fp == NULL)
1932         return err_closed();
1933     Py_INCREF(f);
1934     return (PyObject *)f;
1935 }
1936 
1937 static PyObject *
file_xreadlines(PyFileObject * f)1938 file_xreadlines(PyFileObject *f)
1939 {
1940     if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1941                        "try 'for line in f' instead", 1) < 0)
1942            return NULL;
1943     return file_self(f);
1944 }
1945 
1946 static PyObject *
file_exit(PyObject * f,PyObject * args)1947 file_exit(PyObject *f, PyObject *args)
1948 {
1949     PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1950     if (!ret)
1951         /* If error occurred, pass through */
1952         return NULL;
1953     Py_DECREF(ret);
1954     /* We cannot return the result of close since a true
1955      * value will be interpreted as "yes, swallow the
1956      * exception if one was raised inside the with block". */
1957     Py_RETURN_NONE;
1958 }
1959 
1960 PyDoc_STRVAR(readline_doc,
1961 "readline([size]) -> next line from the file, as a string.\n"
1962 "\n"
1963 "Retain newline.  A non-negative size argument limits the maximum\n"
1964 "number of bytes to return (an incomplete line may be returned then).\n"
1965 "Return an empty string at EOF.");
1966 
1967 PyDoc_STRVAR(read_doc,
1968 "read([size]) -> read at most size bytes, returned as a string.\n"
1969 "\n"
1970 "If the size argument is negative or omitted, read until EOF is reached.\n"
1971 "Notice that when in non-blocking mode, less data than what was requested\n"
1972 "may be returned, even if no size parameter was given.");
1973 
1974 PyDoc_STRVAR(write_doc,
1975 "write(str) -> None.  Write string str to file.\n"
1976 "\n"
1977 "Note that due to buffering, flush() or close() may be needed before\n"
1978 "the file on disk reflects the data written.");
1979 
1980 PyDoc_STRVAR(fileno_doc,
1981 "fileno() -> integer \"file descriptor\".\n"
1982 "\n"
1983 "This is needed for lower-level file interfaces, such os.read().");
1984 
1985 PyDoc_STRVAR(seek_doc,
1986 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1987 "\n"
1988 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1989 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1990 "(move relative to current position, positive or negative), and 2 (move\n"
1991 "relative to end of file, usually negative, although many platforms allow\n"
1992 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1993 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1994 "undefined behavior."
1995 "\n"
1996 "Note that not all file objects are seekable.");
1997 
1998 #ifdef HAVE_FTRUNCATE
1999 PyDoc_STRVAR(truncate_doc,
2000 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
2001 "\n"
2002 "Size defaults to the current file position, as returned by tell().");
2003 #endif
2004 
2005 PyDoc_STRVAR(tell_doc,
2006 "tell() -> current file position, an integer (may be a long integer).");
2007 
2008 PyDoc_STRVAR(readinto_doc,
2009 "readinto() -> Undocumented.  Don't use this; it may go away.");
2010 
2011 PyDoc_STRVAR(readlines_doc,
2012 "readlines([size]) -> list of strings, each a line from the file.\n"
2013 "\n"
2014 "Call readline() repeatedly and return a list of the lines so read.\n"
2015 "The optional size argument, if given, is an approximate bound on the\n"
2016 "total number of bytes in the lines returned.");
2017 
2018 PyDoc_STRVAR(xreadlines_doc,
2019 "xreadlines() -> returns self.\n"
2020 "\n"
2021 "For backward compatibility. File objects now include the performance\n"
2022 "optimizations previously implemented in the xreadlines module.");
2023 
2024 PyDoc_STRVAR(writelines_doc,
2025 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
2026 "\n"
2027 "Note that newlines are not added.  The sequence can be any iterable object\n"
2028 "producing strings. This is equivalent to calling write() for each string.");
2029 
2030 PyDoc_STRVAR(flush_doc,
2031 "flush() -> None.  Flush the internal I/O buffer.");
2032 
2033 PyDoc_STRVAR(close_doc,
2034 "close() -> None or (perhaps) an integer.  Close the file.\n"
2035 "\n"
2036 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
2037 "further I/O operations.  close() may be called more than once without\n"
2038 "error.  Some kinds of file objects (for example, opened by popen())\n"
2039 "may return an exit status upon closing.");
2040 
2041 PyDoc_STRVAR(isatty_doc,
2042 "isatty() -> true or false.  True if the file is connected to a tty device.");
2043 
2044 PyDoc_STRVAR(enter_doc,
2045              "__enter__() -> self.");
2046 
2047 PyDoc_STRVAR(exit_doc,
2048              "__exit__(*excinfo) -> None.  Closes the file.");
2049 
2050 static PyMethodDef file_methods[] = {
2051     {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
2052     {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
2053     {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
2054     {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
2055     {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
2056 #ifdef HAVE_FTRUNCATE
2057     {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
2058 #endif
2059     {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
2060     {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
2061     {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
2062     {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
2063     {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
2064     {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
2065     {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
2066     {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
2067     {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
2068     {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
2069     {NULL,            NULL}             /* sentinel */
2070 };
2071 
2072 #define OFF(x) offsetof(PyFileObject, x)
2073 
2074 static PyMemberDef file_memberlist[] = {
2075     {"mode",            T_OBJECT,       OFF(f_mode),    RO,
2076      "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2077     {"name",            T_OBJECT,       OFF(f_name),    RO,
2078      "file name"},
2079     {"encoding",        T_OBJECT,       OFF(f_encoding),        RO,
2080      "file encoding"},
2081     {"errors",          T_OBJECT,       OFF(f_errors),  RO,
2082      "Unicode error handler"},
2083     /* getattr(f, "closed") is implemented without this table */
2084     {NULL}      /* Sentinel */
2085 };
2086 
2087 static PyObject *
get_closed(PyFileObject * f,void * closure)2088 get_closed(PyFileObject *f, void *closure)
2089 {
2090     return PyBool_FromLong((long)(f->f_fp == 0));
2091 }
2092 static PyObject *
get_newlines(PyFileObject * f,void * closure)2093 get_newlines(PyFileObject *f, void *closure)
2094 {
2095     switch (f->f_newlinetypes) {
2096     case NEWLINE_UNKNOWN:
2097         Py_INCREF(Py_None);
2098         return Py_None;
2099     case NEWLINE_CR:
2100         return PyString_FromString("\r");
2101     case NEWLINE_LF:
2102         return PyString_FromString("\n");
2103     case NEWLINE_CR|NEWLINE_LF:
2104         return Py_BuildValue("(ss)", "\r", "\n");
2105     case NEWLINE_CRLF:
2106         return PyString_FromString("\r\n");
2107     case NEWLINE_CR|NEWLINE_CRLF:
2108         return Py_BuildValue("(ss)", "\r", "\r\n");
2109     case NEWLINE_LF|NEWLINE_CRLF:
2110         return Py_BuildValue("(ss)", "\n", "\r\n");
2111     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2112         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2113     default:
2114         PyErr_Format(PyExc_SystemError,
2115                      "Unknown newlines value 0x%x\n",
2116                      f->f_newlinetypes);
2117         return NULL;
2118     }
2119 }
2120 
2121 static PyObject *
get_softspace(PyFileObject * f,void * closure)2122 get_softspace(PyFileObject *f, void *closure)
2123 {
2124     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2125         return NULL;
2126     return PyInt_FromLong(f->f_softspace);
2127 }
2128 
2129 static int
set_softspace(PyFileObject * f,PyObject * value)2130 set_softspace(PyFileObject *f, PyObject *value)
2131 {
2132     int new;
2133     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2134         return -1;
2135 
2136     if (value == NULL) {
2137         PyErr_SetString(PyExc_TypeError,
2138                         "can't delete softspace attribute");
2139         return -1;
2140     }
2141 
2142     new = PyInt_AsLong(value);
2143     if (new == -1 && PyErr_Occurred())
2144         return -1;
2145     f->f_softspace = new;
2146     return 0;
2147 }
2148 
2149 static PyGetSetDef file_getsetlist[] = {
2150     {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2151     {"newlines", (getter)get_newlines, NULL,
2152      "end-of-line convention used in this file"},
2153     {"softspace", (getter)get_softspace, (setter)set_softspace,
2154      "flag indicating that a space needs to be printed; used by print"},
2155     {0},
2156 };
2157 
2158 static void
drop_readahead(PyFileObject * f)2159 drop_readahead(PyFileObject *f)
2160 {
2161     if (f->f_buf != NULL) {
2162         PyMem_Free(f->f_buf);
2163         f->f_buf = NULL;
2164     }
2165 }
2166 
2167 /* Make sure that file has a readahead buffer with at least one byte
2168    (unless at EOF) and no more than bufsize.  Returns negative value on
2169    error, will set MemoryError if bufsize bytes cannot be allocated. */
2170 static int
readahead(PyFileObject * f,int bufsize)2171 readahead(PyFileObject *f, int bufsize)
2172 {
2173     Py_ssize_t chunksize;
2174 
2175     if (f->f_buf != NULL) {
2176         if( (f->f_bufend - f->f_bufptr) >= 1)
2177             return 0;
2178         else
2179             drop_readahead(f);
2180     }
2181     if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2182         PyErr_NoMemory();
2183         return -1;
2184     }
2185     FILE_BEGIN_ALLOW_THREADS(f)
2186     errno = 0;
2187     chunksize = Py_UniversalNewlineFread(
2188         f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2189     FILE_END_ALLOW_THREADS(f)
2190     if (chunksize == 0) {
2191         if (ferror(f->f_fp)) {
2192             PyErr_SetFromErrno(PyExc_IOError);
2193             clearerr(f->f_fp);
2194             drop_readahead(f);
2195             return -1;
2196         }
2197     }
2198     f->f_bufptr = f->f_buf;
2199     f->f_bufend = f->f_buf + chunksize;
2200     return 0;
2201 }
2202 
2203 /* Used by file_iternext.  The returned string will start with 'skip'
2204    uninitialized bytes followed by the remainder of the line. Don't be
2205    horrified by the recursive call: maximum recursion depth is limited by
2206    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2207 
2208 static PyStringObject *
readahead_get_line_skip(PyFileObject * f,int skip,int bufsize)2209 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2210 {
2211     PyStringObject* s;
2212     char *bufptr;
2213     char *buf;
2214     Py_ssize_t len;
2215 
2216     if (f->f_buf == NULL)
2217         if (readahead(f, bufsize) < 0)
2218             return NULL;
2219 
2220     len = f->f_bufend - f->f_bufptr;
2221     if (len == 0)
2222         return (PyStringObject *)
2223             PyString_FromStringAndSize(NULL, skip);
2224     bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2225     if (bufptr != NULL) {
2226         bufptr++;                               /* Count the '\n' */
2227         len = bufptr - f->f_bufptr;
2228         s = (PyStringObject *)
2229             PyString_FromStringAndSize(NULL, skip+len);
2230         if (s == NULL)
2231             return NULL;
2232         memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2233         f->f_bufptr = bufptr;
2234         if (bufptr == f->f_bufend)
2235             drop_readahead(f);
2236     } else {
2237         bufptr = f->f_bufptr;
2238         buf = f->f_buf;
2239         f->f_buf = NULL;                /* Force new readahead buffer */
2240         assert(skip+len < INT_MAX);
2241         s = readahead_get_line_skip(
2242             f, (int)(skip+len), bufsize + (bufsize>>2) );
2243         if (s == NULL) {
2244             PyMem_Free(buf);
2245             return NULL;
2246         }
2247         memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2248         PyMem_Free(buf);
2249     }
2250     return s;
2251 }
2252 
2253 /* A larger buffer size may actually decrease performance. */
2254 #define READAHEAD_BUFSIZE 8192
2255 
2256 static PyObject *
file_iternext(PyFileObject * f)2257 file_iternext(PyFileObject *f)
2258 {
2259     PyStringObject* l;
2260 
2261     if (f->f_fp == NULL)
2262         return err_closed();
2263     if (!f->readable)
2264         return err_mode("reading");
2265 
2266     l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2267     if (l == NULL || PyString_GET_SIZE(l) == 0) {
2268         Py_XDECREF(l);
2269         return NULL;
2270     }
2271     return (PyObject *)l;
2272 }
2273 
2274 
2275 static PyObject *
file_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2276 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2277 {
2278     PyObject *self;
2279     static PyObject *not_yet_string;
2280 
2281     assert(type != NULL && type->tp_alloc != NULL);
2282 
2283     if (not_yet_string == NULL) {
2284         not_yet_string = PyString_InternFromString("<uninitialized file>");
2285         if (not_yet_string == NULL)
2286             return NULL;
2287     }
2288 
2289     self = type->tp_alloc(type, 0);
2290     if (self != NULL) {
2291         /* Always fill in the name and mode, so that nobody else
2292            needs to special-case NULLs there. */
2293         Py_INCREF(not_yet_string);
2294         ((PyFileObject *)self)->f_name = not_yet_string;
2295         Py_INCREF(not_yet_string);
2296         ((PyFileObject *)self)->f_mode = not_yet_string;
2297         Py_INCREF(Py_None);
2298         ((PyFileObject *)self)->f_encoding = Py_None;
2299         Py_INCREF(Py_None);
2300         ((PyFileObject *)self)->f_errors = Py_None;
2301         ((PyFileObject *)self)->weakreflist = NULL;
2302         ((PyFileObject *)self)->unlocked_count = 0;
2303     }
2304     return self;
2305 }
2306 
2307 static int
file_init(PyObject * self,PyObject * args,PyObject * kwds)2308 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2309 {
2310     PyFileObject *foself = (PyFileObject *)self;
2311     int ret = 0;
2312     static char *kwlist[] = {"name", "mode", "buffering", 0};
2313     char *name = NULL;
2314     char *mode = "r";
2315     int bufsize = -1;
2316     int wideargument = 0;
2317 #ifdef MS_WINDOWS
2318     PyObject *po;
2319 #endif
2320 
2321     assert(PyFile_Check(self));
2322     if (foself->f_fp != NULL) {
2323         /* Have to close the existing file first. */
2324         PyObject *closeresult = file_close(foself);
2325         if (closeresult == NULL)
2326             return -1;
2327         Py_DECREF(closeresult);
2328     }
2329 
2330 #ifdef MS_WINDOWS
2331     if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2332                                     kwlist, &po, &mode, &bufsize)) {
2333         wideargument = 1;
2334         if (fill_file_fields(foself, NULL, po, mode,
2335                              fclose) == NULL)
2336             goto Error;
2337     } else {
2338         /* Drop the argument parsing error as narrow
2339            strings are also valid. */
2340         PyErr_Clear();
2341     }
2342 #endif
2343 
2344     if (!wideargument) {
2345         PyObject *o_name;
2346 
2347         if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2348                                          Py_FileSystemDefaultEncoding,
2349                                          &name,
2350                                          &mode, &bufsize))
2351             return -1;
2352 
2353         /* We parse again to get the name as a PyObject */
2354         if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2355                                          kwlist, &o_name, &mode,
2356                                          &bufsize))
2357             goto Error;
2358 
2359         if (fill_file_fields(foself, NULL, o_name, mode,
2360                              fclose) == NULL)
2361             goto Error;
2362     }
2363     if (open_the_file(foself, name, mode) == NULL)
2364         goto Error;
2365     foself->f_setbuf = NULL;
2366     PyFile_SetBufSize(self, bufsize);
2367     goto Done;
2368 
2369 Error:
2370     ret = -1;
2371     /* fall through */
2372 Done:
2373     PyMem_Free(name); /* free the encoded string */
2374     return ret;
2375 }
2376 
2377 PyDoc_VAR(file_doc) =
2378 PyDoc_STR(
2379 "file(name[, mode[, buffering]]) -> file object\n"
2380 "\n"
2381 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2382 "writing or appending.  The file will be created if it doesn't exist\n"
2383 "when opened for writing or appending; it will be truncated when\n"
2384 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2385 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2386 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2387 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2388 "to open a file is with the builtin open() function.\n"
2389 )
2390 PyDoc_STR(
2391 "Add a 'U' to mode to open the file for input with universal newline\n"
2392 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2393 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2394 "the value for this attribute is one of None (no newline read yet),\n"
2395 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2396 "\n"
2397 "'U' cannot be combined with 'w' or '+' mode.\n"
2398 );
2399 
2400 PyTypeObject PyFile_Type = {
2401     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2402     "file",
2403     sizeof(PyFileObject),
2404     0,
2405     (destructor)file_dealloc,                   /* tp_dealloc */
2406     0,                                          /* tp_print */
2407     0,                                          /* tp_getattr */
2408     0,                                          /* tp_setattr */
2409     0,                                          /* tp_compare */
2410     (reprfunc)file_repr,                        /* tp_repr */
2411     0,                                          /* tp_as_number */
2412     0,                                          /* tp_as_sequence */
2413     0,                                          /* tp_as_mapping */
2414     0,                                          /* tp_hash */
2415     0,                                          /* tp_call */
2416     0,                                          /* tp_str */
2417     PyObject_GenericGetAttr,                    /* tp_getattro */
2418     /* softspace is writable:  we must supply tp_setattro */
2419     PyObject_GenericSetAttr,                    /* tp_setattro */
2420     0,                                          /* tp_as_buffer */
2421     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2422     file_doc,                                   /* tp_doc */
2423     0,                                          /* tp_traverse */
2424     0,                                          /* tp_clear */
2425     0,                                          /* tp_richcompare */
2426     offsetof(PyFileObject, weakreflist),        /* tp_weaklistoffset */
2427     (getiterfunc)file_self,                     /* tp_iter */
2428     (iternextfunc)file_iternext,                /* tp_iternext */
2429     file_methods,                               /* tp_methods */
2430     file_memberlist,                            /* tp_members */
2431     file_getsetlist,                            /* tp_getset */
2432     0,                                          /* tp_base */
2433     0,                                          /* tp_dict */
2434     0,                                          /* tp_descr_get */
2435     0,                                          /* tp_descr_set */
2436     0,                                          /* tp_dictoffset */
2437     file_init,                                  /* tp_init */
2438     PyType_GenericAlloc,                        /* tp_alloc */
2439     file_new,                                   /* tp_new */
2440     PyObject_Del,                           /* tp_free */
2441 };
2442 
2443 /* Interface for the 'soft space' between print items. */
2444 
2445 int
PyFile_SoftSpace(PyObject * f,int newflag)2446 PyFile_SoftSpace(PyObject *f, int newflag)
2447 {
2448     long oldflag = 0;
2449     if (f == NULL) {
2450         /* Do nothing */
2451     }
2452     else if (PyFile_Check(f)) {
2453         oldflag = ((PyFileObject *)f)->f_softspace;
2454         ((PyFileObject *)f)->f_softspace = newflag;
2455     }
2456     else {
2457         PyObject *v;
2458         v = PyObject_GetAttrString(f, "softspace");
2459         if (v == NULL)
2460             PyErr_Clear();
2461         else {
2462             if (PyInt_Check(v))
2463                 oldflag = PyInt_AsLong(v);
2464             assert(oldflag < INT_MAX);
2465             Py_DECREF(v);
2466         }
2467         v = PyInt_FromLong((long)newflag);
2468         if (v == NULL)
2469             PyErr_Clear();
2470         else {
2471             if (PyObject_SetAttrString(f, "softspace", v) != 0)
2472                 PyErr_Clear();
2473             Py_DECREF(v);
2474         }
2475     }
2476     return (int)oldflag;
2477 }
2478 
2479 /* Interfaces to write objects/strings to file-like objects */
2480 
2481 int
PyFile_WriteObject(PyObject * v,PyObject * f,int flags)2482 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2483 {
2484     PyObject *writer, *value, *args, *result;
2485     if (f == NULL) {
2486         PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2487         return -1;
2488     }
2489     else if (PyFile_Check(f)) {
2490         PyFileObject *fobj = (PyFileObject *) f;
2491 #ifdef Py_USING_UNICODE
2492         PyObject *enc = fobj->f_encoding;
2493         int result;
2494 #endif
2495         if (fobj->f_fp == NULL) {
2496             err_closed();
2497             return -1;
2498         }
2499 #ifdef Py_USING_UNICODE
2500         if ((flags & Py_PRINT_RAW) &&
2501             PyUnicode_Check(v) && enc != Py_None) {
2502             char *cenc = PyString_AS_STRING(enc);
2503             char *errors = fobj->f_errors == Py_None ?
2504               "strict" : PyString_AS_STRING(fobj->f_errors);
2505             value = PyUnicode_AsEncodedString(v, cenc, errors);
2506             if (value == NULL)
2507                 return -1;
2508         } else {
2509             value = v;
2510             Py_INCREF(value);
2511         }
2512         result = file_PyObject_Print(value, fobj, flags);
2513         Py_DECREF(value);
2514         return result;
2515 #else
2516         return file_PyObject_Print(v, fobj, flags);
2517 #endif
2518     }
2519     writer = PyObject_GetAttrString(f, "write");
2520     if (writer == NULL)
2521         return -1;
2522     if (flags & Py_PRINT_RAW) {
2523         if (PyUnicode_Check(v)) {
2524             value = v;
2525             Py_INCREF(value);
2526         } else
2527             value = PyObject_Str(v);
2528     }
2529     else
2530         value = PyObject_Repr(v);
2531     if (value == NULL) {
2532         Py_DECREF(writer);
2533         return -1;
2534     }
2535     args = PyTuple_Pack(1, value);
2536     if (args == NULL) {
2537         Py_DECREF(value);
2538         Py_DECREF(writer);
2539         return -1;
2540     }
2541     result = PyEval_CallObject(writer, args);
2542     Py_DECREF(args);
2543     Py_DECREF(value);
2544     Py_DECREF(writer);
2545     if (result == NULL)
2546         return -1;
2547     Py_DECREF(result);
2548     return 0;
2549 }
2550 
2551 int
PyFile_WriteString(const char * s,PyObject * f)2552 PyFile_WriteString(const char *s, PyObject *f)
2553 {
2554 
2555     if (f == NULL) {
2556         /* Should be caused by a pre-existing error */
2557         if (!PyErr_Occurred())
2558             PyErr_SetString(PyExc_SystemError,
2559                             "null file for PyFile_WriteString");
2560         return -1;
2561     }
2562     else if (PyFile_Check(f)) {
2563         PyFileObject *fobj = (PyFileObject *) f;
2564         FILE *fp = PyFile_AsFile(f);
2565         if (fp == NULL) {
2566             err_closed();
2567             return -1;
2568         }
2569         FILE_BEGIN_ALLOW_THREADS(fobj)
2570         fputs(s, fp);
2571         FILE_END_ALLOW_THREADS(fobj)
2572         return 0;
2573     }
2574     else if (!PyErr_Occurred()) {
2575         PyObject *v = PyString_FromString(s);
2576         int err;
2577         if (v == NULL)
2578             return -1;
2579         err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2580         Py_DECREF(v);
2581         return err;
2582     }
2583     else
2584         return -1;
2585 }
2586 
2587 /* Try to get a file-descriptor from a Python object.  If the object
2588    is an integer or long integer, its value is returned.  If not, the
2589    object's fileno() method is called if it exists; the method must return
2590    an integer or long integer, which is returned as the file descriptor value.
2591    -1 is returned on failure.
2592 */
2593 
PyObject_AsFileDescriptor(PyObject * o)2594 int PyObject_AsFileDescriptor(PyObject *o)
2595 {
2596     int fd;
2597     PyObject *meth;
2598 
2599     if (PyInt_Check(o)) {
2600         fd = PyInt_AsLong(o);
2601     }
2602     else if (PyLong_Check(o)) {
2603         fd = PyLong_AsLong(o);
2604     }
2605     else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2606     {
2607         PyObject *fno = PyEval_CallObject(meth, NULL);
2608         Py_DECREF(meth);
2609         if (fno == NULL)
2610             return -1;
2611 
2612         if (PyInt_Check(fno)) {
2613             fd = PyInt_AsLong(fno);
2614             Py_DECREF(fno);
2615         }
2616         else if (PyLong_Check(fno)) {
2617             fd = PyLong_AsLong(fno);
2618             Py_DECREF(fno);
2619         }
2620         else {
2621             PyErr_SetString(PyExc_TypeError,
2622                             "fileno() returned a non-integer");
2623             Py_DECREF(fno);
2624             return -1;
2625         }
2626     }
2627     else {
2628         PyErr_SetString(PyExc_TypeError,
2629                         "argument must be an int, or have a fileno() method.");
2630         return -1;
2631     }
2632 
2633     if (fd < 0) {
2634         PyErr_Format(PyExc_ValueError,
2635                      "file descriptor cannot be a negative integer (%i)",
2636                      fd);
2637         return -1;
2638     }
2639     return fd;
2640 }
2641 
2642 /* From here on we need access to the real fgets and fread */
2643 #undef fgets
2644 #undef fread
2645 
2646 /*
2647 ** Py_UniversalNewlineFgets is an fgets variation that understands
2648 ** all of \r, \n and \r\n conventions.
2649 ** The stream should be opened in binary mode.
2650 ** If fobj is NULL the routine always does newline conversion, and
2651 ** it may peek one char ahead to gobble the second char in \r\n.
2652 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2653 ** is no readahead but in stead a flag is used to skip a following
2654 ** \n on the next read. Also, if the file is open in binary mode
2655 ** the whole conversion is skipped. Finally, the routine keeps track of
2656 ** the different types of newlines seen.
2657 ** Note that we need no error handling: fgets() treats error and eof
2658 ** identically.
2659 */
2660 char *
Py_UniversalNewlineFgets(char * buf,int n,FILE * stream,PyObject * fobj)2661 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2662 {
2663     char *p = buf;
2664     int c;
2665     int newlinetypes = 0;
2666     int skipnextlf = 0;
2667     int univ_newline = 1;
2668 
2669     if (fobj) {
2670         if (!PyFile_Check(fobj)) {
2671             errno = ENXIO;              /* What can you do... */
2672             return NULL;
2673         }
2674         univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2675         if ( !univ_newline )
2676             return fgets(buf, n, stream);
2677         newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2678         skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2679     }
2680     FLOCKFILE(stream);
2681     c = 'x'; /* Shut up gcc warning */
2682     while (--n > 0 && (c = GETC(stream)) != EOF ) {
2683         if (skipnextlf ) {
2684             skipnextlf = 0;
2685             if (c == '\n') {
2686                 /* Seeing a \n here with skipnextlf true
2687                 ** means we saw a \r before.
2688                 */
2689                 newlinetypes |= NEWLINE_CRLF;
2690                 c = GETC(stream);
2691                 if (c == EOF) break;
2692             } else {
2693                 /*
2694                 ** Note that c == EOF also brings us here,
2695                 ** so we're okay if the last char in the file
2696                 ** is a CR.
2697                 */
2698                 newlinetypes |= NEWLINE_CR;
2699             }
2700         }
2701         if (c == '\r') {
2702             /* A \r is translated into a \n, and we skip
2703             ** an adjacent \n, if any. We don't set the
2704             ** newlinetypes flag until we've seen the next char.
2705             */
2706             skipnextlf = 1;
2707             c = '\n';
2708         } else if ( c == '\n') {
2709             newlinetypes |= NEWLINE_LF;
2710         }
2711         *p++ = c;
2712         if (c == '\n') break;
2713     }
2714     if ( c == EOF && skipnextlf )
2715         newlinetypes |= NEWLINE_CR;
2716     FUNLOCKFILE(stream);
2717     *p = '\0';
2718     if (fobj) {
2719         ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2720         ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2721     } else if ( skipnextlf ) {
2722         /* If we have no file object we cannot save the
2723         ** skipnextlf flag. We have to readahead, which
2724         ** will cause a pause if we're reading from an
2725         ** interactive stream, but that is very unlikely
2726         ** unless we're doing something silly like
2727         ** execfile("/dev/tty").
2728         */
2729         c = GETC(stream);
2730         if ( c != '\n' )
2731             ungetc(c, stream);
2732     }
2733     if (p == buf)
2734         return NULL;
2735     return buf;
2736 }
2737 
2738 /*
2739 ** Py_UniversalNewlineFread is an fread variation that understands
2740 ** all of \r, \n and \r\n conventions.
2741 ** The stream should be opened in binary mode.
2742 ** fobj must be a PyFileObject. In this case there
2743 ** is no readahead but in stead a flag is used to skip a following
2744 ** \n on the next read. Also, if the file is open in binary mode
2745 ** the whole conversion is skipped. Finally, the routine keeps track of
2746 ** the different types of newlines seen.
2747 */
2748 size_t
Py_UniversalNewlineFread(char * buf,size_t n,FILE * stream,PyObject * fobj)2749 Py_UniversalNewlineFread(char *buf, size_t n,
2750                          FILE *stream, PyObject *fobj)
2751 {
2752     char *dst = buf;
2753     PyFileObject *f = (PyFileObject *)fobj;
2754     int newlinetypes, skipnextlf;
2755 
2756     assert(buf != NULL);
2757     assert(stream != NULL);
2758 
2759     if (!fobj || !PyFile_Check(fobj)) {
2760         errno = ENXIO;          /* What can you do... */
2761         return 0;
2762     }
2763     if (!f->f_univ_newline)
2764         return fread(buf, 1, n, stream);
2765     newlinetypes = f->f_newlinetypes;
2766     skipnextlf = f->f_skipnextlf;
2767     /* Invariant:  n is the number of bytes remaining to be filled
2768      * in the buffer.
2769      */
2770     while (n) {
2771         size_t nread;
2772         int shortread;
2773         char *src = dst;
2774 
2775         nread = fread(dst, 1, n, stream);
2776         assert(nread <= n);
2777         if (nread == 0)
2778             break;
2779 
2780         n -= nread; /* assuming 1 byte out for each in; will adjust */
2781         shortread = n != 0;             /* true iff EOF or error */
2782         while (nread--) {
2783             char c = *src++;
2784             if (c == '\r') {
2785                 /* Save as LF and set flag to skip next LF. */
2786                 *dst++ = '\n';
2787                 skipnextlf = 1;
2788             }
2789             else if (skipnextlf && c == '\n') {
2790                 /* Skip LF, and remember we saw CR LF. */
2791                 skipnextlf = 0;
2792                 newlinetypes |= NEWLINE_CRLF;
2793                 ++n;
2794             }
2795             else {
2796                 /* Normal char to be stored in buffer.  Also
2797                  * update the newlinetypes flag if either this
2798                  * is an LF or the previous char was a CR.
2799                  */
2800                 if (c == '\n')
2801                     newlinetypes |= NEWLINE_LF;
2802                 else if (skipnextlf)
2803                     newlinetypes |= NEWLINE_CR;
2804                 *dst++ = c;
2805                 skipnextlf = 0;
2806             }
2807         }
2808         if (shortread) {
2809             /* If this is EOF, update type flags. */
2810             if (skipnextlf && feof(stream))
2811                 newlinetypes |= NEWLINE_CR;
2812             break;
2813         }
2814     }
2815     f->f_newlinetypes = newlinetypes;
2816     f->f_skipnextlf = skipnextlf;
2817     return dst - buf;
2818 }
2819 
2820 #ifdef __cplusplus
2821 }
2822 #endif
2823