1 /* MD5 module */
2 
3 /* This module provides an interface to the MD5 algorithm */
4 
5 /* See below for information about the original code this module was
6    based upon. Additional work performed by:
7 
8    Andrew Kuchling (amk@amk.ca)
9    Greg Stein (gstein@lyra.org)
10    Trevor Perrin (trevp@trevp.net)
11 
12    Copyright (C) 2005-2007   Gregory P. Smith (greg@krypto.org)
13    Licensed to PSF under a Contributor Agreement.
14 
15 */
16 
17 /* MD5 objects */
18 
19 #include "Python.h"
20 #include "hashlib.h"
21 #include "pystrhex.h"
22 
23 /*[clinic input]
24 module _md5
25 class MD5Type "MD5object *" "&PyType_Type"
26 [clinic start generated code]*/
27 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/
28 
29 /* Some useful types */
30 
31 #if SIZEOF_INT == 4
32 typedef unsigned int MD5_INT32; /* 32-bit integer */
33 typedef long long MD5_INT64; /* 64-bit integer */
34 #else
35 /* not defined. compilation will die. */
36 #endif
37 
38 /* The MD5 block size and message digest sizes, in bytes */
39 
40 #define MD5_BLOCKSIZE    64
41 #define MD5_DIGESTSIZE   16
42 
43 /* The structure for storing MD5 info */
44 
45 struct md5_state {
46     MD5_INT64 length;
47     MD5_INT32 state[4], curlen;
48     unsigned char buf[MD5_BLOCKSIZE];
49 };
50 
51 typedef struct {
52     PyObject_HEAD
53 
54     struct md5_state hash_state;
55 } MD5object;
56 
57 #include "clinic/md5module.c.h"
58 
59 /* ------------------------------------------------------------------------
60  *
61  * This code for the MD5 algorithm was noted as public domain. The
62  * original headers are pasted below.
63  *
64  * Several changes have been made to make it more compatible with the
65  * Python environment and desired interface.
66  *
67  */
68 
69 /* LibTomCrypt, modular cryptographic library -- Tom St Denis
70  *
71  * LibTomCrypt is a library that provides various cryptographic
72  * algorithms in a highly modular and flexible manner.
73  *
74  * The library is free for all purposes without any express
75  * guarantee it works.
76  *
77  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
78  */
79 
80 /* rotate the hard way (platform optimizations could be done) */
81 #define ROLc(x, y) ( (((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
82 
83 /* Endian Neutral macros that work on all platforms */
84 
85 #define STORE32L(x, y)                                                                     \
86      { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
87        (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
88 
89 #define LOAD32L(x, y)                            \
90      { x = ((unsigned long)((y)[3] & 255)<<24) | \
91            ((unsigned long)((y)[2] & 255)<<16) | \
92            ((unsigned long)((y)[1] & 255)<<8)  | \
93            ((unsigned long)((y)[0] & 255)); }
94 
95 #define STORE64L(x, y)                                                                     \
96      { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255);   \
97        (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255);   \
98        (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
99        (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
100 
101 
102 /* MD5 macros */
103 
104 #define F(x,y,z)  (z ^ (x & (y ^ z)))
105 #define G(x,y,z)  (y ^ (z & (y ^ x)))
106 #define H(x,y,z)  (x^y^z)
107 #define I(x,y,z)  (y^(x|(~z)))
108 
109 #define FF(a,b,c,d,M,s,t) \
110     a = (a + F(b,c,d) + M + t); a = ROLc(a, s) + b;
111 
112 #define GG(a,b,c,d,M,s,t) \
113     a = (a + G(b,c,d) + M + t); a = ROLc(a, s) + b;
114 
115 #define HH(a,b,c,d,M,s,t) \
116     a = (a + H(b,c,d) + M + t); a = ROLc(a, s) + b;
117 
118 #define II(a,b,c,d,M,s,t) \
119     a = (a + I(b,c,d) + M + t); a = ROLc(a, s) + b;
120 
121 
md5_compress(struct md5_state * md5,const unsigned char * buf)122 static void md5_compress(struct md5_state *md5, const unsigned char *buf)
123 {
124     MD5_INT32 i, W[16], a, b, c, d;
125 
126     assert(md5 != NULL);
127     assert(buf != NULL);
128 
129     /* copy the state into 512-bits into W[0..15] */
130     for (i = 0; i < 16; i++) {
131         LOAD32L(W[i], buf + (4*i));
132     }
133 
134     /* copy state */
135     a = md5->state[0];
136     b = md5->state[1];
137     c = md5->state[2];
138     d = md5->state[3];
139 
140     FF(a,b,c,d,W[0],7,0xd76aa478UL)
141     FF(d,a,b,c,W[1],12,0xe8c7b756UL)
142     FF(c,d,a,b,W[2],17,0x242070dbUL)
143     FF(b,c,d,a,W[3],22,0xc1bdceeeUL)
144     FF(a,b,c,d,W[4],7,0xf57c0fafUL)
145     FF(d,a,b,c,W[5],12,0x4787c62aUL)
146     FF(c,d,a,b,W[6],17,0xa8304613UL)
147     FF(b,c,d,a,W[7],22,0xfd469501UL)
148     FF(a,b,c,d,W[8],7,0x698098d8UL)
149     FF(d,a,b,c,W[9],12,0x8b44f7afUL)
150     FF(c,d,a,b,W[10],17,0xffff5bb1UL)
151     FF(b,c,d,a,W[11],22,0x895cd7beUL)
152     FF(a,b,c,d,W[12],7,0x6b901122UL)
153     FF(d,a,b,c,W[13],12,0xfd987193UL)
154     FF(c,d,a,b,W[14],17,0xa679438eUL)
155     FF(b,c,d,a,W[15],22,0x49b40821UL)
156     GG(a,b,c,d,W[1],5,0xf61e2562UL)
157     GG(d,a,b,c,W[6],9,0xc040b340UL)
158     GG(c,d,a,b,W[11],14,0x265e5a51UL)
159     GG(b,c,d,a,W[0],20,0xe9b6c7aaUL)
160     GG(a,b,c,d,W[5],5,0xd62f105dUL)
161     GG(d,a,b,c,W[10],9,0x02441453UL)
162     GG(c,d,a,b,W[15],14,0xd8a1e681UL)
163     GG(b,c,d,a,W[4],20,0xe7d3fbc8UL)
164     GG(a,b,c,d,W[9],5,0x21e1cde6UL)
165     GG(d,a,b,c,W[14],9,0xc33707d6UL)
166     GG(c,d,a,b,W[3],14,0xf4d50d87UL)
167     GG(b,c,d,a,W[8],20,0x455a14edUL)
168     GG(a,b,c,d,W[13],5,0xa9e3e905UL)
169     GG(d,a,b,c,W[2],9,0xfcefa3f8UL)
170     GG(c,d,a,b,W[7],14,0x676f02d9UL)
171     GG(b,c,d,a,W[12],20,0x8d2a4c8aUL)
172     HH(a,b,c,d,W[5],4,0xfffa3942UL)
173     HH(d,a,b,c,W[8],11,0x8771f681UL)
174     HH(c,d,a,b,W[11],16,0x6d9d6122UL)
175     HH(b,c,d,a,W[14],23,0xfde5380cUL)
176     HH(a,b,c,d,W[1],4,0xa4beea44UL)
177     HH(d,a,b,c,W[4],11,0x4bdecfa9UL)
178     HH(c,d,a,b,W[7],16,0xf6bb4b60UL)
179     HH(b,c,d,a,W[10],23,0xbebfbc70UL)
180     HH(a,b,c,d,W[13],4,0x289b7ec6UL)
181     HH(d,a,b,c,W[0],11,0xeaa127faUL)
182     HH(c,d,a,b,W[3],16,0xd4ef3085UL)
183     HH(b,c,d,a,W[6],23,0x04881d05UL)
184     HH(a,b,c,d,W[9],4,0xd9d4d039UL)
185     HH(d,a,b,c,W[12],11,0xe6db99e5UL)
186     HH(c,d,a,b,W[15],16,0x1fa27cf8UL)
187     HH(b,c,d,a,W[2],23,0xc4ac5665UL)
188     II(a,b,c,d,W[0],6,0xf4292244UL)
189     II(d,a,b,c,W[7],10,0x432aff97UL)
190     II(c,d,a,b,W[14],15,0xab9423a7UL)
191     II(b,c,d,a,W[5],21,0xfc93a039UL)
192     II(a,b,c,d,W[12],6,0x655b59c3UL)
193     II(d,a,b,c,W[3],10,0x8f0ccc92UL)
194     II(c,d,a,b,W[10],15,0xffeff47dUL)
195     II(b,c,d,a,W[1],21,0x85845dd1UL)
196     II(a,b,c,d,W[8],6,0x6fa87e4fUL)
197     II(d,a,b,c,W[15],10,0xfe2ce6e0UL)
198     II(c,d,a,b,W[6],15,0xa3014314UL)
199     II(b,c,d,a,W[13],21,0x4e0811a1UL)
200     II(a,b,c,d,W[4],6,0xf7537e82UL)
201     II(d,a,b,c,W[11],10,0xbd3af235UL)
202     II(c,d,a,b,W[2],15,0x2ad7d2bbUL)
203     II(b,c,d,a,W[9],21,0xeb86d391UL)
204 
205     md5->state[0] = md5->state[0] + a;
206     md5->state[1] = md5->state[1] + b;
207     md5->state[2] = md5->state[2] + c;
208     md5->state[3] = md5->state[3] + d;
209 }
210 
211 
212 /**
213    Initialize the hash state
214    @param sha1   The hash state you wish to initialize
215 */
216 static void
md5_init(struct md5_state * md5)217 md5_init(struct md5_state *md5)
218 {
219     assert(md5 != NULL);
220     md5->state[0] = 0x67452301UL;
221     md5->state[1] = 0xefcdab89UL;
222     md5->state[2] = 0x98badcfeUL;
223     md5->state[3] = 0x10325476UL;
224     md5->curlen = 0;
225     md5->length = 0;
226 }
227 
228 /**
229    Process a block of memory though the hash
230    @param sha1   The hash state
231    @param in     The data to hash
232    @param inlen  The length of the data (octets)
233 */
234 static void
md5_process(struct md5_state * md5,const unsigned char * in,Py_ssize_t inlen)235 md5_process(struct md5_state *md5, const unsigned char *in, Py_ssize_t inlen)
236 {
237     Py_ssize_t n;
238 
239     assert(md5 != NULL);
240     assert(in != NULL);
241     assert(md5->curlen <= sizeof(md5->buf));
242 
243     while (inlen > 0) {
244         if (md5->curlen == 0 && inlen >= MD5_BLOCKSIZE) {
245            md5_compress(md5, in);
246            md5->length    += MD5_BLOCKSIZE * 8;
247            in             += MD5_BLOCKSIZE;
248            inlen          -= MD5_BLOCKSIZE;
249         } else {
250            n = Py_MIN(inlen, (Py_ssize_t)(MD5_BLOCKSIZE - md5->curlen));
251            memcpy(md5->buf + md5->curlen, in, (size_t)n);
252            md5->curlen    += (MD5_INT32)n;
253            in             += n;
254            inlen          -= n;
255            if (md5->curlen == MD5_BLOCKSIZE) {
256               md5_compress(md5, md5->buf);
257               md5->length += 8*MD5_BLOCKSIZE;
258               md5->curlen = 0;
259            }
260        }
261     }
262 }
263 
264 /**
265    Terminate the hash to get the digest
266    @param sha1  The hash state
267    @param out [out] The destination of the hash (16 bytes)
268 */
269 static void
md5_done(struct md5_state * md5,unsigned char * out)270 md5_done(struct md5_state *md5, unsigned char *out)
271 {
272     int i;
273 
274     assert(md5 != NULL);
275     assert(out != NULL);
276     assert(md5->curlen < sizeof(md5->buf));
277 
278     /* increase the length of the message */
279     md5->length += md5->curlen * 8;
280 
281     /* append the '1' bit */
282     md5->buf[md5->curlen++] = (unsigned char)0x80;
283 
284     /* if the length is currently above 56 bytes we append zeros
285      * then compress.  Then we can fall back to padding zeros and length
286      * encoding like normal.
287      */
288     if (md5->curlen > 56) {
289         while (md5->curlen < 64) {
290             md5->buf[md5->curlen++] = (unsigned char)0;
291         }
292         md5_compress(md5, md5->buf);
293         md5->curlen = 0;
294     }
295 
296     /* pad up to 56 bytes of zeroes */
297     while (md5->curlen < 56) {
298         md5->buf[md5->curlen++] = (unsigned char)0;
299     }
300 
301     /* store length */
302     STORE64L(md5->length, md5->buf+56);
303     md5_compress(md5, md5->buf);
304 
305     /* copy output */
306     for (i = 0; i < 4; i++) {
307         STORE32L(md5->state[i], out+(4*i));
308     }
309 }
310 
311 /* .Source: /cvs/libtom/libtomcrypt/src/hashes/md5.c,v $ */
312 /* .Revision: 1.10 $ */
313 /* .Date: 2007/05/12 14:25:28 $ */
314 
315 /*
316  * End of copied MD5 code.
317  *
318  * ------------------------------------------------------------------------
319  */
320 
321 static PyTypeObject MD5type;
322 
323 
324 static MD5object *
newMD5object(void)325 newMD5object(void)
326 {
327     return (MD5object *)PyObject_New(MD5object, &MD5type);
328 }
329 
330 
331 /* Internal methods for a hash object */
332 
333 static void
MD5_dealloc(PyObject * ptr)334 MD5_dealloc(PyObject *ptr)
335 {
336     PyObject_Del(ptr);
337 }
338 
339 
340 /* External methods for a hash object */
341 
342 /*[clinic input]
343 MD5Type.copy
344 
345 Return a copy of the hash object.
346 [clinic start generated code]*/
347 
348 static PyObject *
MD5Type_copy_impl(MD5object * self)349 MD5Type_copy_impl(MD5object *self)
350 /*[clinic end generated code: output=596eb36852f02071 input=2c09e6d2493f3079]*/
351 {
352     MD5object *newobj;
353 
354     if ((newobj = newMD5object())==NULL)
355         return NULL;
356 
357     newobj->hash_state = self->hash_state;
358     return (PyObject *)newobj;
359 }
360 
361 /*[clinic input]
362 MD5Type.digest
363 
364 Return the digest value as a bytes object.
365 [clinic start generated code]*/
366 
367 static PyObject *
MD5Type_digest_impl(MD5object * self)368 MD5Type_digest_impl(MD5object *self)
369 /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/
370 {
371     unsigned char digest[MD5_DIGESTSIZE];
372     struct md5_state temp;
373 
374     temp = self->hash_state;
375     md5_done(&temp, digest);
376     return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
377 }
378 
379 /*[clinic input]
380 MD5Type.hexdigest
381 
382 Return the digest value as a string of hexadecimal digits.
383 [clinic start generated code]*/
384 
385 static PyObject *
MD5Type_hexdigest_impl(MD5object * self)386 MD5Type_hexdigest_impl(MD5object *self)
387 /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
388 {
389     unsigned char digest[MD5_DIGESTSIZE];
390     struct md5_state temp;
391 
392     /* Get the raw (binary) digest value */
393     temp = self->hash_state;
394     md5_done(&temp, digest);
395 
396     return _Py_strhex((const char*)digest, MD5_DIGESTSIZE);
397 }
398 
399 /*[clinic input]
400 MD5Type.update
401 
402     obj: object
403     /
404 
405 Update this hash object's state with the provided string.
406 [clinic start generated code]*/
407 
408 static PyObject *
MD5Type_update(MD5object * self,PyObject * obj)409 MD5Type_update(MD5object *self, PyObject *obj)
410 /*[clinic end generated code: output=f6ad168416338423 input=6e1efcd9ecf17032]*/
411 {
412     Py_buffer buf;
413 
414     GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
415 
416     md5_process(&self->hash_state, buf.buf, buf.len);
417 
418     PyBuffer_Release(&buf);
419     Py_RETURN_NONE;
420 }
421 
422 static PyMethodDef MD5_methods[] = {
423     MD5TYPE_COPY_METHODDEF
424     MD5TYPE_DIGEST_METHODDEF
425     MD5TYPE_HEXDIGEST_METHODDEF
426     MD5TYPE_UPDATE_METHODDEF
427     {NULL,        NULL}         /* sentinel */
428 };
429 
430 static PyObject *
MD5_get_block_size(PyObject * self,void * closure)431 MD5_get_block_size(PyObject *self, void *closure)
432 {
433     return PyLong_FromLong(MD5_BLOCKSIZE);
434 }
435 
436 static PyObject *
MD5_get_name(PyObject * self,void * closure)437 MD5_get_name(PyObject *self, void *closure)
438 {
439     return PyUnicode_FromStringAndSize("md5", 3);
440 }
441 
442 static PyObject *
md5_get_digest_size(PyObject * self,void * closure)443 md5_get_digest_size(PyObject *self, void *closure)
444 {
445     return PyLong_FromLong(MD5_DIGESTSIZE);
446 }
447 
448 
449 static PyGetSetDef MD5_getseters[] = {
450     {"block_size",
451      (getter)MD5_get_block_size, NULL,
452      NULL,
453      NULL},
454     {"name",
455      (getter)MD5_get_name, NULL,
456      NULL,
457      NULL},
458     {"digest_size",
459      (getter)md5_get_digest_size, NULL,
460      NULL,
461      NULL},
462     {NULL}  /* Sentinel */
463 };
464 
465 static PyTypeObject MD5type = {
466     PyVarObject_HEAD_INIT(NULL, 0)
467     "_md5.md5",         /*tp_name*/
468     sizeof(MD5object),  /*tp_basicsize*/
469     0,                  /*tp_itemsize*/
470     /* methods */
471     MD5_dealloc,        /*tp_dealloc*/
472     0,                  /*tp_vectorcall_offset*/
473     0,                  /*tp_getattr*/
474     0,                  /*tp_setattr*/
475     0,                  /*tp_as_async*/
476     0,                  /*tp_repr*/
477     0,                  /*tp_as_number*/
478     0,                  /*tp_as_sequence*/
479     0,                  /*tp_as_mapping*/
480     0,                  /*tp_hash*/
481     0,                  /*tp_call*/
482     0,                  /*tp_str*/
483     0,                  /*tp_getattro*/
484     0,                  /*tp_setattro*/
485     0,                  /*tp_as_buffer*/
486     Py_TPFLAGS_DEFAULT, /*tp_flags*/
487     0,                  /*tp_doc*/
488     0,                  /*tp_traverse*/
489     0,                  /*tp_clear*/
490     0,                  /*tp_richcompare*/
491     0,                  /*tp_weaklistoffset*/
492     0,                  /*tp_iter*/
493     0,                  /*tp_iternext*/
494     MD5_methods,        /* tp_methods */
495     NULL,               /* tp_members */
496     MD5_getseters,      /* tp_getset */
497 };
498 
499 
500 /* The single module-level function: new() */
501 
502 /*[clinic input]
503 _md5.md5
504 
505     string: object(c_default="NULL") = b''
506     *
507     usedforsecurity: bool = True
508 
509 Return a new MD5 hash object; optionally initialized with a string.
510 [clinic start generated code]*/
511 
512 static PyObject *
_md5_md5_impl(PyObject * module,PyObject * string,int usedforsecurity)513 _md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity)
514 /*[clinic end generated code: output=587071f76254a4ac input=7a144a1905636985]*/
515 {
516     MD5object *new;
517     Py_buffer buf;
518 
519     if (string)
520         GET_BUFFER_VIEW_OR_ERROUT(string, &buf);
521 
522     if ((new = newMD5object()) == NULL) {
523         if (string)
524             PyBuffer_Release(&buf);
525         return NULL;
526     }
527 
528     md5_init(&new->hash_state);
529 
530     if (PyErr_Occurred()) {
531         Py_DECREF(new);
532         if (string)
533             PyBuffer_Release(&buf);
534         return NULL;
535     }
536     if (string) {
537         md5_process(&new->hash_state, buf.buf, buf.len);
538         PyBuffer_Release(&buf);
539     }
540 
541     return (PyObject *)new;
542 }
543 
544 
545 /* List of functions exported by this module */
546 
547 static struct PyMethodDef MD5_functions[] = {
548     _MD5_MD5_METHODDEF
549     {NULL,      NULL}            /* Sentinel */
550 };
551 
552 
553 /* Initialize this module. */
554 
555 static struct PyModuleDef _md5module = {
556         PyModuleDef_HEAD_INIT,
557         "_md5",
558         NULL,
559         -1,
560         MD5_functions,
561         NULL,
562         NULL,
563         NULL,
564         NULL
565 };
566 
567 PyMODINIT_FUNC
PyInit__md5(void)568 PyInit__md5(void)
569 {
570     PyObject *m;
571 
572     Py_SET_TYPE(&MD5type, &PyType_Type);
573     if (PyType_Ready(&MD5type) < 0) {
574         return NULL;
575     }
576 
577     m = PyModule_Create(&_md5module);
578     if (m == NULL) {
579         return NULL;
580     }
581 
582     Py_INCREF((PyObject *)&MD5type);
583     PyModule_AddObject(m, "MD5Type", (PyObject *)&MD5type);
584     return m;
585 }
586