1 /*
2  * punch.c --- deallocate blocks allocated to an inode
3  *
4  * Copyright (C) 2010 Theodore Ts'o.
5  *
6  * %Begin-Header%
7  * This file may be redistributed under the terms of the GNU Library
8  * General Public License, version 2.
9  * %End-Header%
10  */
11 
12 #include "config.h"
13 #include <stdio.h>
14 #include <string.h>
15 #if HAVE_UNISTD_H
16 #include <unistd.h>
17 #endif
18 #include <errno.h>
19 
20 #include "ext2_fs.h"
21 #include "ext2fs.h"
22 #include "ext2fsP.h"
23 
24 #undef PUNCH_DEBUG
25 
26 /*
27  * This function returns 1 if the specified block is all zeros
28  */
check_zero_block(char * buf,int blocksize)29 static int check_zero_block(char *buf, int blocksize)
30 {
31 	char	*cp = buf;
32 	int	left = blocksize;
33 
34 	while (left > 0) {
35 		if (*cp++)
36 			return 0;
37 		left--;
38 	}
39 	return 1;
40 }
41 
42 /*
43  * This clever recursive function handles i_blocks[] as well as
44  * indirect, double indirect, and triple indirect blocks.  It iterates
45  * over the entries in the i_blocks array or indirect blocks, and for
46  * each one, will recursively handle any indirect blocks and then
47  * frees and deallocates the blocks.
48  */
ind_punch(ext2_filsys fs,struct ext2_inode * inode,char * block_buf,blk_t * p,int level,blk64_t start,blk64_t count,int max)49 static errcode_t ind_punch(ext2_filsys fs, struct ext2_inode *inode,
50 			   char *block_buf, blk_t *p, int level,
51 			   blk64_t start, blk64_t count, int max)
52 {
53 	errcode_t	retval;
54 	blk_t		b;
55 	int		i;
56 	blk64_t		offset, incr;
57 	int		freed = 0;
58 
59 #ifdef PUNCH_DEBUG
60 	printf("Entering ind_punch, level %d, start %llu, count %llu, "
61 	       "max %d\n", level, start, count, max);
62 #endif
63 	incr = 1ULL << ((EXT2_BLOCK_SIZE_BITS(fs->super) - 2) * level);
64 	for (i = 0, offset = 0; i < max; i++, p++, offset += incr) {
65 		if (offset >= start + count)
66 			break;
67 		if (*p == 0 || (offset+incr) <= start)
68 			continue;
69 		b = *p;
70 		if (level > 0) {
71 			blk_t start2;
72 #ifdef PUNCH_DEBUG
73 			printf("Reading indirect block %u\n", b);
74 #endif
75 			retval = ext2fs_read_ind_block(fs, b, block_buf);
76 			if (retval)
77 				return retval;
78 			start2 = (start > offset) ? start - offset : 0;
79 			retval = ind_punch(fs, inode, block_buf + fs->blocksize,
80 					   (blk_t *) block_buf, level - 1,
81 					   start2, count - offset,
82 					   fs->blocksize >> 2);
83 			if (retval)
84 				return retval;
85 			retval = ext2fs_write_ind_block(fs, b, block_buf);
86 			if (retval)
87 				return retval;
88 			if (!check_zero_block(block_buf, fs->blocksize))
89 				continue;
90 		}
91 #ifdef PUNCH_DEBUG
92 		printf("Freeing block %u (offset %llu)\n", b, offset);
93 #endif
94 		ext2fs_block_alloc_stats(fs, b, -1);
95 		*p = 0;
96 		freed++;
97 	}
98 #ifdef PUNCH_DEBUG
99 	printf("Freed %d blocks\n", freed);
100 #endif
101 	return ext2fs_iblk_sub_blocks(fs, inode, freed);
102 }
103 
104 #define BLK_T_MAX ((blk_t)~0ULL)
ext2fs_punch_ind(ext2_filsys fs,struct ext2_inode * inode,char * block_buf,blk64_t start,blk64_t end)105 static errcode_t ext2fs_punch_ind(ext2_filsys fs, struct ext2_inode *inode,
106 				  char *block_buf, blk64_t start, blk64_t end)
107 {
108 	errcode_t		retval;
109 	char			*buf = 0;
110 	int			level;
111 	int			num = EXT2_NDIR_BLOCKS;
112 	blk_t			*bp = inode->i_block;
113 	blk_t			addr_per_block;
114 	blk64_t			max = EXT2_NDIR_BLOCKS;
115 	blk_t			count;
116 
117 	/* Check start/end don't overflow the 2^32-1 indirect block limit */
118 	if (start > BLK_T_MAX)
119 		return 0;
120 	if (end >= BLK_T_MAX || end - start + 1 >= BLK_T_MAX)
121 		count = BLK_T_MAX - start;
122 	else
123 		count = end - start + 1;
124 
125 	if (!block_buf) {
126 		retval = ext2fs_get_array(3, fs->blocksize, &buf);
127 		if (retval)
128 			return retval;
129 		block_buf = buf;
130 	}
131 
132 	addr_per_block = (blk_t)fs->blocksize >> 2;
133 
134 	for (level = 0; level < 4; level++, max *= (blk64_t)addr_per_block) {
135 #ifdef PUNCH_DEBUG
136 		printf("Main loop level %d, start %llu count %u "
137 		       "max %llu num %d\n", level, start, count, max, num);
138 #endif
139 		if (start < max) {
140 			retval = ind_punch(fs, inode, block_buf, bp, level,
141 					   start, count, num);
142 			if (retval)
143 				goto errout;
144 			if (count > max)
145 				count -= max - start;
146 			else
147 				break;
148 			start = 0;
149 		} else
150 			start -= max;
151 		bp += num;
152 		if (level == 0) {
153 			num = 1;
154 			max = 1;
155 		}
156 	}
157 	retval = 0;
158 errout:
159 	if (buf)
160 		ext2fs_free_mem(&buf);
161 	return retval;
162 }
163 #undef BLK_T_MAX
164 
165 #ifdef PUNCH_DEBUG
166 
167 #define dbg_printf(f, a...)  printf(f, ## a)
168 
dbg_print_extent(char * desc,struct ext2fs_extent * extent)169 static void dbg_print_extent(char *desc, struct ext2fs_extent *extent)
170 {
171 	if (desc)
172 		printf("%s: ", desc);
173 	printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ",
174 	       extent->e_lblk, extent->e_lblk + extent->e_len - 1,
175 	       extent->e_len, extent->e_pblk);
176 	if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF)
177 		fputs("LEAF ", stdout);
178 	if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
179 		fputs("UNINIT ", stdout);
180 	if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
181 		fputs("2ND_VISIT ", stdout);
182 	if (!extent->e_flags)
183 		fputs("(none)", stdout);
184 	fputc('\n', stdout);
185 
186 }
187 #else
188 #define dbg_print_extent(desc, ex)	do { } while (0)
189 #define dbg_printf(f, a...)		do { } while (0)
190 #endif
191 
192 /* Free a range of blocks, respecting cluster boundaries */
punch_extent_blocks(ext2_filsys fs,ext2_ino_t ino,struct ext2_inode * inode,blk64_t lfree_start,blk64_t free_start,__u32 free_count,int * freed)193 static errcode_t punch_extent_blocks(ext2_filsys fs, ext2_ino_t ino,
194 				     struct ext2_inode *inode,
195 				     blk64_t lfree_start, blk64_t free_start,
196 				     __u32 free_count, int *freed)
197 {
198 	blk64_t		pblk;
199 	int		freed_now = 0;
200 	__u32		cluster_freed;
201 	errcode_t	retval = 0;
202 
203 	/* No bigalloc?  Just free each block. */
204 	if (EXT2FS_CLUSTER_RATIO(fs) == 1) {
205 		*freed += free_count;
206 		while (free_count-- > 0)
207 			ext2fs_block_alloc_stats2(fs, free_start++, -1);
208 		return retval;
209 	}
210 
211 	/*
212 	 * Try to free up to the next cluster boundary.  We assume that all
213 	 * blocks in a logical cluster map to blocks from the same physical
214 	 * cluster, and that the offsets within the [pl]clusters match.
215 	 */
216 	if (free_start & EXT2FS_CLUSTER_MASK(fs)) {
217 		retval = ext2fs_map_cluster_block(fs, ino, inode,
218 						  lfree_start, &pblk);
219 		if (retval)
220 			goto errout;
221 		if (!pblk) {
222 			ext2fs_block_alloc_stats2(fs, free_start, -1);
223 			freed_now++;
224 		}
225 		cluster_freed = EXT2FS_CLUSTER_RATIO(fs) -
226 			(free_start & EXT2FS_CLUSTER_MASK(fs));
227 		if (cluster_freed > free_count)
228 			cluster_freed = free_count;
229 		free_count -= cluster_freed;
230 		free_start += cluster_freed;
231 		lfree_start += cluster_freed;
232 	}
233 
234 	/* Free whole clusters from the middle of the range. */
235 	while (free_count > 0 && free_count >= (unsigned) EXT2FS_CLUSTER_RATIO(fs)) {
236 		ext2fs_block_alloc_stats2(fs, free_start, -1);
237 		freed_now++;
238 		cluster_freed = EXT2FS_CLUSTER_RATIO(fs);
239 		free_count -= cluster_freed;
240 		free_start += cluster_freed;
241 		lfree_start += cluster_freed;
242 	}
243 
244 	/* Try to free the last cluster. */
245 	if (free_count > 0) {
246 		retval = ext2fs_map_cluster_block(fs, ino, inode,
247 						  lfree_start, &pblk);
248 		if (retval)
249 			goto errout;
250 		if (!pblk) {
251 			ext2fs_block_alloc_stats2(fs, free_start, -1);
252 			freed_now++;
253 		}
254 	}
255 
256 errout:
257 	*freed += freed_now;
258 	return retval;
259 }
260 
ext2fs_punch_extent(ext2_filsys fs,ext2_ino_t ino,struct ext2_inode * inode,blk64_t start,blk64_t end)261 static errcode_t ext2fs_punch_extent(ext2_filsys fs, ext2_ino_t ino,
262 				     struct ext2_inode *inode,
263 				     blk64_t start, blk64_t end)
264 {
265 	ext2_extent_handle_t	handle = 0;
266 	struct ext2fs_extent	extent;
267 	errcode_t		retval;
268 	blk64_t			free_start, next, lfree_start;
269 	__u32			free_count, newlen;
270 	int			freed = 0;
271 	int			op;
272 
273 	retval = ext2fs_extent_open2(fs, ino, inode, &handle);
274 	if (retval)
275 		return retval;
276 	/*
277 	 * Find the extent closest to the start of the punch range.  We don't
278 	 * check the return value because _goto() sets the current node to the
279 	 * next-lowest extent if 'start' is in a hole, and doesn't set a
280 	 * current node if there was a real error reading the extent tree.
281 	 * In that case, _get() will error out.
282 	 *
283 	 * Note: If _get() returns 'no current node', that simply means that
284 	 * there aren't any blocks mapped past this point in the file, so we're
285 	 * done.
286 	 */
287 	ext2fs_extent_goto(handle, start);
288 	retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent);
289 	if (retval == EXT2_ET_NO_CURRENT_NODE) {
290 		retval = 0;
291 		goto errout;
292 	} else if (retval)
293 		goto errout;
294 	while (1) {
295 		op = EXT2_EXTENT_NEXT_LEAF;
296 		dbg_print_extent("main loop", &extent);
297 		next = extent.e_lblk + extent.e_len;
298 		dbg_printf("start %llu, end %llu, next %llu\n",
299 			   (unsigned long long) start,
300 			   (unsigned long long) end,
301 			   (unsigned long long) next);
302 		if (start <= extent.e_lblk) {
303 			/*
304 			 * Have we iterated past the end of the punch region?
305 			 * If so, we can stop.
306 			 */
307 			if (end < extent.e_lblk)
308 				break;
309 			dbg_printf("Case #%d\n", 1);
310 			/* Start of deleted region before extent;
311 			   adjust beginning of extent */
312 			free_start = extent.e_pblk;
313 			lfree_start = extent.e_lblk;
314 			if (next > end)
315 				free_count = end - extent.e_lblk + 1;
316 			else
317 				free_count = extent.e_len;
318 			extent.e_len -= free_count;
319 			extent.e_lblk += free_count;
320 			extent.e_pblk += free_count;
321 		} else if (end >= next-1) {
322 			/*
323 			 * Is the punch region beyond this extent?  This can
324 			 * happen if start is already inside a hole.  Try to
325 			 * advance to the next extent if this is the case.
326 			 */
327 			if (start >= next)
328 				goto next_extent;
329 			/* End of deleted region after extent;
330 			   adjust end of extent */
331 			dbg_printf("Case #%d\n", 2);
332 			newlen = start - extent.e_lblk;
333 			free_start = extent.e_pblk + newlen;
334 			lfree_start = extent.e_lblk + newlen;
335 			free_count = extent.e_len - newlen;
336 			extent.e_len = newlen;
337 		} else {
338 			struct ext2fs_extent	newex;
339 
340 			dbg_printf("Case #%d\n", 3);
341 			/* The hard case; we need to split the extent */
342 			newex.e_pblk = extent.e_pblk +
343 				(end + 1 - extent.e_lblk);
344 			newex.e_lblk = end + 1;
345 			newex.e_len = next - end - 1;
346 			newex.e_flags = extent.e_flags;
347 
348 			extent.e_len = start - extent.e_lblk;
349 			free_start = extent.e_pblk + extent.e_len;
350 			lfree_start = extent.e_lblk + extent.e_len;
351 			free_count = end - start + 1;
352 
353 			dbg_print_extent("inserting", &newex);
354 			retval = ext2fs_extent_insert(handle,
355 					EXT2_EXTENT_INSERT_AFTER, &newex);
356 			if (retval)
357 				goto errout;
358 			retval = ext2fs_extent_fix_parents(handle);
359 			if (retval)
360 				goto errout;
361 			/*
362 			 * Now pointing at inserted extent; so go back.
363 			 *
364 			 * We cannot use EXT2_EXTENT_PREV to go back; note the
365 			 * subtlety in the comment for fix_parents().
366 			 */
367 			retval = ext2fs_extent_goto(handle, extent.e_lblk);
368 			if (retval)
369 				goto errout;
370 		}
371 		if (extent.e_len) {
372 			dbg_print_extent("replacing", &extent);
373 			retval = ext2fs_extent_replace(handle, 0, &extent);
374 			if (retval)
375 				goto errout;
376 			retval = ext2fs_extent_fix_parents(handle);
377 		} else {
378 			struct ext2fs_extent	newex;
379 			blk64_t			old_lblk, next_lblk;
380 			dbg_printf("deleting current extent%s\n", "");
381 
382 			/*
383 			 * Save the location of the next leaf, then slip
384 			 * back to the current extent.
385 			 */
386 			retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
387 						   &newex);
388 			if (retval)
389 				goto errout;
390 			old_lblk = newex.e_lblk;
391 
392 			retval = ext2fs_extent_get(handle,
393 						   EXT2_EXTENT_NEXT_LEAF,
394 						   &newex);
395 			if (retval == EXT2_ET_EXTENT_NO_NEXT)
396 				next_lblk = old_lblk;
397 			else if (retval)
398 				goto errout;
399 			else
400 				next_lblk = newex.e_lblk;
401 
402 			retval = ext2fs_extent_goto(handle, old_lblk);
403 			if (retval)
404 				goto errout;
405 
406 			/* Now delete the extent. */
407 			retval = ext2fs_extent_delete(handle, 0);
408 			if (retval)
409 				goto errout;
410 
411 			retval = ext2fs_extent_fix_parents(handle);
412 			if (retval && retval != EXT2_ET_NO_CURRENT_NODE)
413 				goto errout;
414 			retval = 0;
415 
416 			/*
417 			 * Jump forward to the next extent.  If there are
418 			 * errors, the ext2fs_extent_get down below will
419 			 * capture them for us.
420 			 */
421 			(void)ext2fs_extent_goto(handle, next_lblk);
422 			op = EXT2_EXTENT_CURRENT;
423 		}
424 		if (retval)
425 			goto errout;
426 		dbg_printf("Free start %llu, free count = %u\n",
427 		       free_start, free_count);
428 		retval = punch_extent_blocks(fs, ino, inode, lfree_start,
429 					     free_start, free_count, &freed);
430 		if (retval)
431 			goto errout;
432 	next_extent:
433 		retval = ext2fs_extent_get(handle, op,
434 					   &extent);
435 		if (retval == EXT2_ET_EXTENT_NO_NEXT ||
436 		    retval == EXT2_ET_NO_CURRENT_NODE)
437 			break;
438 		if (retval)
439 			goto errout;
440 	}
441 	dbg_printf("Freed %d blocks\n", freed);
442 	retval = ext2fs_iblk_sub_blocks(fs, inode, freed);
443 errout:
444 	ext2fs_extent_free(handle);
445 	return retval;
446 }
447 
ext2fs_punch_inline_data(ext2_filsys fs,ext2_ino_t ino,struct ext2_inode * inode,blk64_t start,blk64_t end EXT2FS_ATTR ((unused)))448 static errcode_t ext2fs_punch_inline_data(ext2_filsys fs, ext2_ino_t ino,
449 					  struct ext2_inode *inode,
450 					  blk64_t start,
451 					  blk64_t end EXT2FS_ATTR((unused)))
452 {
453 	errcode_t retval;
454 
455 	/*
456 	 * In libext2fs ext2fs_punch is based on block unit.  So that
457 	 * means that if start > 0 we don't need to do nothing.  Due
458 	 * to this we will remove all inline data in ext2fs_punch()
459 	 * now.
460 	 */
461 	if (start > 0)
462 		return 0;
463 
464 	memset((char *)inode->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE);
465 	inode->i_size = 0;
466 	retval = ext2fs_write_inode(fs, ino, inode);
467 	if (retval)
468 		return retval;
469 
470 	return ext2fs_inline_data_ea_remove(fs, ino);
471 }
472 
473 /*
474  * Deallocate all logical _blocks_ starting at start to end, inclusive.
475  * If end is ~0ULL, then this is effectively truncate.
476  */
ext2fs_punch(ext2_filsys fs,ext2_ino_t ino,struct ext2_inode * inode,char * block_buf,blk64_t start,blk64_t end)477 errcode_t ext2fs_punch(ext2_filsys fs, ext2_ino_t ino,
478 		       struct ext2_inode *inode,
479 		       char *block_buf, blk64_t start,
480 		       blk64_t end)
481 {
482 	errcode_t		retval;
483 	struct ext2_inode	inode_buf;
484 
485 	if (start > end)
486 		return EINVAL;
487 
488 	/* Read inode structure if necessary */
489 	if (!inode) {
490 		retval = ext2fs_read_inode(fs, ino, &inode_buf);
491 		if (retval)
492 			return retval;
493 		inode = &inode_buf;
494 	}
495 	if (inode->i_flags & EXT4_INLINE_DATA_FL)
496 		return ext2fs_punch_inline_data(fs, ino, inode, start, end);
497 	else if (inode->i_flags & EXT4_EXTENTS_FL)
498 		retval = ext2fs_punch_extent(fs, ino, inode, start, end);
499 	else
500 		retval = ext2fs_punch_ind(fs, inode, block_buf, start, end);
501 	if (retval)
502 		return retval;
503 
504 #ifdef PUNCH_DEBUG
505 	printf("%u: write inode size now %u blocks %u\n",
506 		ino, inode->i_size, inode->i_blocks);
507 #endif
508 	return ext2fs_write_inode(fs, ino, inode);
509 }
510