1 /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #if !defined(_GNU_SOURCE)
26 #define _GNU_SOURCE
27 #endif
28 #include <pthread.h>
29 
30 #include <libwebsockets.h>
31 #include "private-lib-core.h"
32 
33 #include <string.h>
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <dirent.h>
38 #include <time.h>
39 #include <errno.h>
40 #include <stdarg.h>
41 
42 #include <sys/stat.h>
43 #include <sys/time.h>
44 #include <sys/types.h>
45 
46 #if defined(__APPLE__)
47 #include <sys/dirent.h>
48 /* Travis OSX does not have DT_REG... */
49 #if !defined(DT_REG)
50 #define DT_REG 8
51 #endif
52 #endif
53 
54 struct file_entry {
55 	lws_list_ptr sorted;
56 	lws_list_ptr prev;
57 	char name[64];
58 	time_t modified;
59 	size_t size;
60 };
61 
62 struct lws_diskcache_scan {
63 	struct file_entry *batch;
64 	const char *cache_dir_base;
65 	lws_list_ptr head;
66 	time_t last_scan_completed;
67 	uint64_t agg_size;
68 	uint64_t cache_size_limit;
69 	uint64_t avg_size;
70 	uint64_t cache_tries;
71 	uint64_t cache_hits;
72 	int cache_subdir;
73 	int batch_in_use;
74 	int agg_file_count;
75 	int secs_waiting;
76 };
77 
78 #define KIB (1024)
79 #define MIB (KIB * KIB)
80 
81 #define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)
82 
83 static const char *hex = "0123456789abcdef";
84 
85 #define BATCH_COUNT 128
86 
87 static int
fe_modified_sort(lws_list_ptr a,lws_list_ptr b)88 fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
89 {
90 	struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted);
91 
92 	return p2->modified - p1->modified;
93 }
94 
95 struct lws_diskcache_scan *
lws_diskcache_create(const char * cache_dir_base,uint64_t cache_size_limit)96 lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
97 {
98 	struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan");
99 
100 	if (!lds)
101 		return NULL;
102 
103 	memset(lds, 0, sizeof(*lds));
104 
105 	lds->cache_dir_base = cache_dir_base;
106 	lds->cache_size_limit = cache_size_limit;
107 
108 	return lds;
109 }
110 
111 void
lws_diskcache_destroy(struct lws_diskcache_scan ** lds)112 lws_diskcache_destroy(struct lws_diskcache_scan **lds)
113 {
114 	if ((*lds)->batch)
115 		lws_free((*lds)->batch);
116 	lws_free(*lds);
117 	*lds = NULL;
118 }
119 
120 int
lws_diskcache_prepare(const char * cache_base_dir,int mode,int uid)121 lws_diskcache_prepare(const char *cache_base_dir, int mode, int uid)
122 {
123 	char dir[256];
124 	int n, m;
125 
126 	(void)mkdir(cache_base_dir, mode);
127 	if (chown(cache_base_dir, uid, -1))
128 		lwsl_err("%s: %s: unable to chown %d\n", __func__,
129 			 cache_base_dir, uid);
130 
131 	for (n = 0; n < 16; n++) {
132 		lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
133 		(void)mkdir(dir, mode);
134 		if (chown(dir, uid, -1))
135 			lwsl_err("%s: %s: unable to chown %d\n", __func__,
136 						 dir, uid);
137 		for (m = 0; m < 16; m++) {
138 			lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
139 				     cache_base_dir, hex[n], hex[m]);
140 			(void)mkdir(dir, mode);
141 			if (chown(dir, uid, -1))
142 				lwsl_err("%s: %s: unable to chown %d\n",
143 					 __func__, dir, uid);
144 		}
145 	}
146 
147 	return 0;
148 }
149 
150 /* copies and then truncates the incoming name, and renames the file at the
151  * untruncated path to have the new truncated name */
152 
153 int
lws_diskcache_finalize_name(char * cache)154 lws_diskcache_finalize_name(char *cache)
155 {
156 	char ren[256], *p;
157 
158 	strncpy(ren, cache, sizeof(ren) - 1);
159 	ren[sizeof(ren) - 1] = '\0';
160 	p = strchr(cache, '~');
161 	if (p) {
162 		*p = '\0';
163 		if (rename(ren, cache)) {
164 			lwsl_err("%s: problem renaming %s to %s\n", __func__,
165 				 ren, cache);
166 			return 1;
167 		}
168 
169 		return 0;
170 	}
171 
172 	return 1;
173 }
174 
175 int
lws_diskcache_query(struct lws_diskcache_scan * lds,int is_bot,const char * hash_hex,int * _fd,char * cache,int cache_len,size_t * extant_cache_len)176 lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
177 		    const char *hash_hex, int *_fd, char *cache, int cache_len,
178 		    size_t *extant_cache_len)
179 {
180 	struct stat s;
181 	int n;
182 
183 	/* caching is disabled? */
184 	if (!lds->cache_dir_base)
185 		return LWS_DISKCACHE_QUERY_NO_CACHE;
186 
187 	if (!is_bot)
188 		lds->cache_tries++;
189 
190 	n = lws_snprintf(cache, cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
191 			 hash_hex[0], hash_hex[1], hash_hex);
192 
193 	lwsl_info("%s: job cache %s\n", __func__, cache);
194 
195 	*_fd = open(cache, O_RDONLY);
196 	if (*_fd >= 0) {
197 		int fd;
198 
199 		if (!is_bot)
200 			lds->cache_hits++;
201 
202 		if (fstat(*_fd, &s)) {
203 			close(*_fd);
204 
205 			return LWS_DISKCACHE_QUERY_NO_CACHE;
206 		}
207 
208 		*extant_cache_len = (size_t)s.st_size;
209 
210 		/* "touch" the hit cache file so it's last for LRU now */
211 		fd = open(cache, O_RDWR);
212 		if (fd >= 0)
213 			close(fd);
214 
215 		return LWS_DISKCACHE_QUERY_EXISTS;
216 	}
217 
218 	/* bots are too random to pollute the cache with their antics */
219 	if (is_bot)
220 		return LWS_DISKCACHE_QUERY_NO_CACHE;
221 
222 	/* let's create it first with a unique temp name */
223 
224 	lws_snprintf(cache + n, cache_len - n, "~%d-%p", (int)getpid(),
225 		     extant_cache_len);
226 
227 	*_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600);
228 	if (*_fd < 0) {
229 		/* well... ok... we will proceed without cache then... */
230 		lwsl_notice("%s: Problem creating cache %s: errno %d\n",
231 			    __func__, cache, errno);
232 		return LWS_DISKCACHE_QUERY_NO_CACHE;
233 	}
234 
235 	return LWS_DISKCACHE_QUERY_CREATING;
236 }
237 
238 int
lws_diskcache_secs_to_idle(struct lws_diskcache_scan * lds)239 lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
240 {
241 	return lds->secs_waiting;
242 }
243 
244 /*
245  * The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
246  * the dirs under the cache dir.  Since we don't need or want a full list of
247  * files in there in memory at once, we restrict the linked-list size to
248  * BATCH_COUNT entries, and once it is full, simply ignore any further files
249  * that are newer than the newest one on that list.  Files older than the
250  * newest guy already on the list evict the newest guy already on the list
251  * and are sorted into the correct order.  In this way no matter the number
252  * of files to be processed the memory requirement is fixed at BATCH_COUNT
253  * struct file_entry-s.
254  *
255  * The oldest subset of BATCH_COUNT files are sorted into the cd->batch
256  * allocation in more recent -> least recent order.
257  *
258  * We want to track the total size of all files we saw as well, so we know if
259  * we need to actually do anything yet to restrict how much space it's taking
260  * up.
261  *
262  * And we want to do those things statefully and incrementally instead of one
263  * big atomic operation, since the user may want a huge cache, so we look in
264  * one cache dir at a time and track state in the repodir struct.
265  *
266  * When we have seen everything, we add the doubly-linked prev pointers and then
267  * if we are over the limit, start deleting up to BATCH_COUNT files working back
268  * from the end.
269  */
270 
271 int
lws_diskcache_trim(struct lws_diskcache_scan * lds)272 lws_diskcache_trim(struct lws_diskcache_scan *lds)
273 {
274 	size_t cache_size_limit = lds->cache_size_limit;
275 	char dirpath[132], filepath[132 + 32];
276 	lws_list_ptr lp, op = NULL;
277 	int files_trimmed = 0;
278 	struct file_entry *p;
279 	int fd, n, ret = -1;
280 	size_t trimmed = 0;
281 	struct dirent *de;
282 	struct stat s;
283 	DIR *dir;
284 
285 	if (!lds->cache_subdir) {
286 
287 		if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
288 			return 0;
289 
290 		lds->batch = lws_malloc(sizeof(struct file_entry) *
291 				BATCH_COUNT, "cache_trim");
292 		if (!lds->batch) {
293 			lwsl_err("%s: OOM\n", __func__);
294 
295 			return 1;
296 		}
297 		lds->agg_size = 0;
298 		lds->head = NULL;
299 		lds->batch_in_use = 0;
300 		lds->agg_file_count = 0;
301 	}
302 
303 	lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
304 		     lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
305 		     hex[lds->cache_subdir & 15]);
306 
307 	dir = opendir(dirpath);
308 	if (!dir) {
309 		lwsl_err("Unable to walk repo dir '%s'\n",
310 			 lds->cache_dir_base);
311 		return -1;
312 	}
313 
314 	do {
315 		de = readdir(dir);
316 		if (!de)
317 			break;
318 
319 		if (de->d_type != DT_REG)
320 			continue;
321 
322 		lds->agg_file_count++;
323 
324 		lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
325 			     de->d_name);
326 
327 		fd = open(filepath, O_RDONLY);
328 		if (fd < 0) {
329 			lwsl_err("%s: cannot open %s\n", __func__, filepath);
330 
331 			continue;
332 		}
333 
334 		n = fstat(fd, &s);
335 		close(fd);
336 		if (n) {
337 			lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
338 			continue;
339 		}
340 
341 		lds->agg_size += s.st_size;
342 
343 		if (lds->batch_in_use == BATCH_COUNT) {
344 			/*
345 			 * once we filled up the batch with candidates, we don't
346 			 * need to consider any files newer than the newest guy
347 			 * on the list...
348 			 */
349 			if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
350 				continue;
351 
352 			/*
353 			 * ... and if we find an older file later, we know it
354 			 * will be replacing the newest guy on the list, so use
355 			 * that directly...
356 			 */
357 			p = lds->head;
358 			lds->head = p->sorted;
359 		} else
360 			/* we are still accepting anything to fill the batch */
361 
362 			p = &lds->batch[lds->batch_in_use++];
363 
364 		p->sorted = NULL;
365 		strncpy(p->name, de->d_name, sizeof(p->name) - 1);
366 		p->name[sizeof(p->name) - 1] = '\0';
367 		p->modified = s.st_mtime;
368 		p->size = s.st_size;
369 
370 		lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
371 	} while (de);
372 
373 	ret = 0;
374 
375 	lds->cache_subdir++;
376 	if (lds->cache_subdir != 0x100)
377 		goto done;
378 
379 	/* we completed the whole scan... */
380 
381 	/* if really no guidence, then 256MiB */
382 	if (!cache_size_limit)
383 		cache_size_limit = 256 * 1024 * 1024;
384 
385 	if (lds->agg_size > cache_size_limit) {
386 
387 		/* apply prev pointers to make the list doubly-linked */
388 
389 		lp = lds->head;
390 		while (lp) {
391 			p = lp_to_fe(lp, sorted);
392 
393 			p->prev = op;
394 			op = &p->prev;
395 			lp = p->sorted;
396 		}
397 
398 		/*
399 		 * reverse the list (start from tail, now traverse using
400 		 * .prev)... it's oldest-first now...
401 		 */
402 
403 		lp = op;
404 
405 		while (lp && lds->agg_size > cache_size_limit) {
406 			p = lp_to_fe(lp, prev);
407 
408 			lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
409 				     lds->cache_dir_base, p->name[0],
410 				     p->name[1], p->name);
411 
412 			if (!unlink(filepath)) {
413 				lds->agg_size -= p->size;
414 				trimmed += p->size;
415 				files_trimmed++;
416 			} else
417 				lwsl_notice("%s: Failed to unlink %s\n",
418 					    __func__, filepath);
419 
420 			lp = p->prev;
421 		}
422 
423 		if (files_trimmed)
424 			lwsl_notice("%s: %s: trimmed %d files totalling "
425 				    "%lldKib, leaving %lldMiB\n", __func__,
426 				    lds->cache_dir_base, files_trimmed,
427 				    ((unsigned long long)trimmed) / KIB,
428 				    ((unsigned long long)lds->agg_size) / MIB);
429 	}
430 
431 	if (lds->agg_size && lds->agg_file_count)
432 		lds->avg_size = lds->agg_size / lds->agg_file_count;
433 
434 	/*
435 	 * estimate how long we can go before scanning again... default we need
436 	 * to start again immediately
437 	 */
438 
439 	lds->last_scan_completed = time(NULL);
440 	lds->secs_waiting = 1;
441 
442 	if (lds->agg_size < cache_size_limit) {
443 		uint64_t avg = 4096, capacity, projected;
444 
445 		/* let's use 80% of the real average for margin */
446 		if (lds->agg_size && lds->agg_file_count)
447 			avg = ((lds->agg_size * 8) / lds->agg_file_count) / 10;
448 
449 		/*
450 		 * if we collected BATCH_COUNT files of the average size,
451 		 * how much can we clean up in 256s?
452 		 */
453 
454 		capacity = avg * BATCH_COUNT;
455 
456 		/*
457 		 * if the cache grew by 10%, would we hit the limit even then?
458 		 */
459 		projected = (lds->agg_size * 11) / 10;
460 		if (projected < cache_size_limit)
461 			/* no... */
462 			lds->secs_waiting  = (256 / 2) * ((cache_size_limit -
463 						    projected) / capacity);
464 
465 		/*
466 		 * large waits imply we may not have enough info yet, so
467 		 * check once an hour at least.
468 		 */
469 
470 		if (lds->secs_waiting > 3600)
471 			lds->secs_waiting = 3600;
472 	} else
473 		lds->secs_waiting = 0;
474 
475 	lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
476 		  lds->cache_dir_base,
477 		  (unsigned long long)lds->agg_size / KIB,
478 		  (unsigned long long)cache_size_limit / KIB,
479 		  lds->secs_waiting);
480 
481 	lws_free(lds->batch);
482 	lds->batch = NULL;
483 
484 	lds->cache_subdir = 0;
485 
486 done:
487 	closedir(dir);
488 
489 	return ret;
490 }
491