1 /*
2 * Copyright (C) 2015-2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <assert.h>
18 #include <inttypes.h>
19 #include <lk/compiler.h>
20 #include <stdbool.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <string.h>
24
25 #ifndef LOCAL_TRACE
26 #define LOCAL_TRACE TRACE_LEVEL_INIT
27 #endif
28 #ifndef LOCAL_TRACE_ERR
29 #define LOCAL_TRACE_ERR TRACE_LEVEL_INIT
30 #endif
31
32 #include "array.h"
33 #include "block_allocator.h"
34 #include "block_cache.h"
35 #include "block_set.h"
36 #include "checkpoint.h"
37 #include "debug.h"
38 #include "error_reporting.h"
39 #include "file.h"
40 #include "fs.h"
41 #include "transaction.h"
42
43 #define SUPER_BLOCK_MAGIC (0x0073797473757274ULL) /* trustys */
44 #define SUPER_BLOCK_FLAGS_VERSION_MASK (0x3U)
45 #define SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK (0x1U)
46 #define SUPER_BLOCK_FLAGS_EMPTY (0x4U)
47 #define SUPER_BLOCK_FLAGS_ALTERNATE (0x8U)
48 #define SUPER_BLOCK_FLAGS_SUPPORTED_MASK (0xfU)
49 #define SUPER_BLOCK_FS_VERSION (0U)
50
51 /**
52 * typedef super_block_opt_flags8_t - Optional flags, can be ORed together
53 *
54 * %SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3
55 * Indicates that the superblock has additional data after flags2 and that
56 * flags3 should be set to the same value as flags
57 * %SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT
58 * Indicates that the superblock contains the @checkpoint field
59 * %SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN
60 * An error was detected in this file system, a full scan and possibly repair
61 * should be initiated on the next mount. Reset after scanning.
62 */
63 typedef uint8_t super_block_opt_flags8_t;
64 #define SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 (0x1U)
65 #define SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT (0x2U)
66 #define SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN (0x4U)
67
68 /**
69 * typedef super_block_required_flags16_t - Required FS flags, can be ORed
70 * together
71 *
72 * These flags are required to be supported by the current implementation; if
73 * any unrecognized flag bits are set the file system must not be mounted.
74 * Versions of the storage service prior to the addition of the @required_flags
75 * field will interpret non-zero flags as a high @fs_version and will refuse to
76 * mount the file-system.
77 *
78 * %SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED
79 * Indicates that the main (i.e. flags does not contain
80 * %SUPER_BLOCK_FLAGS_ALTERNATE) file system has been repaired in a manner
81 * that effectively resulted in rollback to a previous state since it was last
82 * cleared. This flag is required to be supported, if set, so that we do not
83 * discard a repaired state by running an older version of the storage
84 * service. This flag is cleared when the main file system is cleared, and
85 * therefore only tracks repairs since the file system was last cleared.
86 * %SUPER_BLOCK_REQUIRED_FLAGS_MASK
87 * Mask of bits that are understood by the current storage implementation. If
88 * any bits of this field are set outside of this mask, do not mount the file
89 * system.
90 */
91 typedef uint16_t super_block_required_flags16_t;
92 #define SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED (0x1U)
93 #define SUPER_BLOCK_REQUIRED_FLAGS_MASK \
94 (SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED)
95
96 /**
97 * struct super_block - On-disk root block for file system state
98 * @iv: Initial value used for encrypt/decrypt.
99 * @magic: SUPER_BLOCK_MAGIC.
100 * @flags: Version in bottom two bits, other bits are reserved.
101 * @fs_version: Required file system version. If greater than
102 * %SUPER_BLOCK_FS_VERSION, do not mount or overwrite
103 * filesystem.
104 * @required_flags: Required file system flags. To mount this file system, any
105 * non-zero flag bits set must be supported by the storage
106 * implementation.
107 * @block_size: Block size of file system.
108 * @block_num_size: Number of bytes used to store block numbers.
109 * @mac_size: number of bytes used to store mac values.
110 * @opt_flags: Optional flags, any of &typedef super_block_opt_flags8_t
111 * ORed together.
112 * @res2: Reserved for future use. Write 0, read ignore.
113 * @block_count: Size of file system.
114 * @free: Block and mac of free set root node.
115 * @free_count: Currently unused.
116 * @files: Block and mac of files tree root node.
117 * @res3: Reserved for future use. Write 0, read ignore.
118 * @flags2: Copy of @flags. Allows storing the super-block in a device
119 * that does not support an atomic write of the entire
120 * super-block.
121 * @backup: Backup of previous super-block, used to support an alternate
122 * backing store. 0 if no backup has ever been written. Once a
123 * backup exists, it will only ever be swapped, not cleared.
124 * @checkpoint: Block and mac of checkpoint metadata block. 0 if a
125 * checkpoint does not exist.
126 * @res4: Reserved for future use. Write 0, read ignore.
127 * @flags3: Copy of @flags. Allows storing the super-block in a device
128 * that does not support an atomic write of the entire
129 * super-block. If SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 is not set,
130 * @flags3 is not checked and fields after @flags2 are ignored.
131 *
132 * Block numbers and macs in @free and @files are packed as indicated by
133 * @block_num_size and @mac_size, but unlike other on-disk data, the size of the
134 * whole field is always the full 24 bytes needed for a 8 byte block number and
135 * 16 byte mac This allows the @flags2 and @flags3 to be validated before
136 * knowing @block_num_size and @mac_size.
137 */
138 struct super_block {
139 struct iv iv;
140 uint64_t magic;
141 uint32_t flags;
142 uint16_t fs_version;
143 super_block_required_flags16_t required_flags;
144 uint32_t block_size;
145 uint8_t block_num_size;
146 uint8_t mac_size;
147 super_block_opt_flags8_t opt_flags;
148 uint8_t res2;
149 data_block_t block_count;
150 struct block_mac free;
151 data_block_t free_count;
152 struct block_mac files;
153 uint32_t res3[5];
154 uint32_t flags2;
155 struct super_block_backup backup;
156 struct block_mac checkpoint;
157 uint32_t res4[6];
158 uint32_t flags3;
159 };
160 STATIC_ASSERT(offsetof(struct super_block, flags2) == 124);
161 STATIC_ASSERT(offsetof(struct super_block, flags3) == 252);
162 STATIC_ASSERT(sizeof(struct super_block) == 256);
163
164 /*
165 * We rely on these offsets in future_fs_version_test and
166 * unknown_required_flags_test in the storage_block_test to test that we will
167 * not mount or modify a super block with unknown version or fs flags.
168 */
169 STATIC_ASSERT(offsetof(struct super_block, fs_version) == 28);
170 STATIC_ASSERT(offsetof(struct super_block, required_flags) == 30);
171
172 /* block_device_tipc.c ensures that we have at least 256 bytes in RPMB blocks */
173 STATIC_ASSERT(sizeof(struct super_block) <= 256);
174
175 static struct list_node fs_list = LIST_INITIAL_VALUE(fs_list);
176
177 /**
178 * update_super_block_internal - Generate and write superblock
179 * @tr: Transaction object.
180 * @free: New free root.
181 * @files: New files root.
182 * @checkpoint: New checkpoint metadata block.
183 * @pinned: New block should not be reused in the block cache until
184 * it is successfully written.
185 *
186 * Return: %true if super block was updated (in cache), %false if transaction
187 * failed before super block was updated.
188 */
update_super_block_internal(struct transaction * tr,const struct block_mac * free,const struct block_mac * files,const struct block_mac * checkpoint,bool pinned)189 static bool update_super_block_internal(struct transaction* tr,
190 const struct block_mac* free,
191 const struct block_mac* files,
192 const struct block_mac* checkpoint,
193 bool pinned) {
194 struct super_block* super_rw;
195 struct obj_ref super_ref = OBJ_REF_INITIAL_VALUE(super_ref);
196 unsigned int ver;
197 unsigned int index;
198 super_block_required_flags16_t required_flags = 0;
199 uint32_t flags;
200 uint32_t block_size = tr->fs->super_dev->block_size;
201 super_block_opt_flags8_t opt_flags = SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 |
202 SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT;
203
204 if (!tr->fs->writable) {
205 pr_err("Attempting to write superblock for read-only filesystem\n");
206 if (!tr->failed) {
207 transaction_fail(tr);
208 }
209 return false;
210 }
211
212 assert(block_size >= sizeof(struct super_block));
213 assert(tr->fs->initial_super_block_tr == NULL ||
214 tr->fs->initial_super_block_tr == tr);
215
216 ver = (tr->fs->super_block_version + 1) & SUPER_BLOCK_FLAGS_VERSION_MASK;
217 index = ver & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK;
218 flags = ver;
219 if (!free && !files) {
220 /*
221 * If the free and files trees are not provided, the filesystem is in
222 * the initial empty state.
223 */
224 flags |= SUPER_BLOCK_FLAGS_EMPTY;
225 } else {
226 /* Non-empty filesystems must have both trees (with root node blocks) */
227 assert(free);
228 assert(files);
229 }
230 if (tr->fs->alternate_data) {
231 flags |= SUPER_BLOCK_FLAGS_ALTERNATE;
232 }
233 if (tr->repaired || tr->fs->main_repaired) {
234 /*
235 * We don't track repairs in alternate data mode, so we shouldn't do
236 * them - ensure the transaction does not include a repair if we are in
237 * alternate state. The FS flag is used to persist the state for the
238 * main FS.
239 */
240 assert(!tr->repaired || !tr->fs->alternate_data);
241 required_flags |= SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED;
242 /*
243 * TODO: We would like to track the number of repairs in addition to the
244 * current repair state. This may be up to three different counters: 1)
245 * the number of times this fs has been repaired over the device
246 * lifetime to report in metrics, 2) the number of repairs since last
247 * clear, and 3) the overall fs generation count (number of device
248 * lifetime repairs+clears). 2) and 3) would primarily be useful if we
249 * expose them to clients via a new query API, while 1) would mostly be
250 * for device metrics. We can implement some or all of these counters
251 * when we add an API that consumes them.
252 */
253 }
254 if (tr->fs->needs_full_scan) {
255 opt_flags |= SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN;
256 }
257
258 pr_write("write super block %" PRIu64 ", ver %d\n",
259 tr->fs->super_block[index], ver);
260
261 super_rw = block_get_cleared_super(tr, tr->fs->super_block[index],
262 &super_ref, pinned);
263 if (tr->failed) {
264 block_put_dirty_discard(super_rw, &super_ref);
265 return false;
266 }
267 super_rw->magic = SUPER_BLOCK_MAGIC;
268 super_rw->flags = flags;
269 /* TODO: keep existing fs version when possible */
270 super_rw->fs_version = SUPER_BLOCK_FS_VERSION;
271 super_rw->required_flags = required_flags;
272 super_rw->block_size = tr->fs->dev->block_size;
273 super_rw->block_num_size = tr->fs->block_num_size;
274 super_rw->mac_size = tr->fs->mac_size;
275 super_rw->opt_flags = opt_flags;
276 super_rw->block_count = tr->fs->dev->block_count;
277 if (free) {
278 super_rw->free = *free;
279 }
280 super_rw->free_count = 0; /* TODO: remove or update */
281 if (files) {
282 super_rw->files = *files;
283 }
284 if (checkpoint) {
285 super_rw->checkpoint = *checkpoint;
286 }
287 super_rw->flags2 = flags;
288 super_rw->backup = tr->fs->backup;
289 super_rw->flags3 = flags;
290 tr->fs->written_super_block_version = ver;
291
292 block_put_dirty_no_mac(super_rw, &super_ref, tr->fs->allow_tampering);
293
294 return true;
295 }
296
297 /**
298 * update_super_block - Generate and write superblock
299 * @tr: Transaction object.
300 * @free: New free root.
301 * @files: New files root.
302 * @checkpoint: New checkpoint metadata block.
303 *
304 * Return: %true if super block was updated (in cache), %false if transaction
305 * failed before super block was updated.
306 */
update_super_block(struct transaction * tr,const struct block_mac * free,const struct block_mac * files,const struct block_mac * checkpoint)307 bool update_super_block(struct transaction* tr,
308 const struct block_mac* free,
309 const struct block_mac* files,
310 const struct block_mac* checkpoint) {
311 return update_super_block_internal(tr, free, files, checkpoint, false);
312 }
313
314 /**
315 * write_initial_super_block - Write initial superblock to internal transaction
316 * @fs: File system state object.
317 *
318 * When needed, this must be called before creating any other transactions on
319 * this filesystem so we don't fill up the cache with entries that can't be
320 * flushed to make room for this block.
321 *
322 * Return: %true if the initial empty superblock was successfully written to the
323 * cache, or %false otherwise.
324 */
write_initial_super_block(struct fs * fs)325 static bool write_initial_super_block(struct fs* fs) {
326 struct transaction* tr;
327 tr = calloc(1, sizeof(*tr));
328 if (!tr) {
329 return false;
330 }
331 fs->initial_super_block_tr = tr;
332
333 transaction_init(tr, fs, true);
334 return update_super_block_internal(tr, NULL, NULL, NULL, true);
335 }
336
337 /**
338 * write_current_super_block - Write current superblock to internal transaction
339 * @fs: File system state object.
340 * @reinitialize: Allow the special transaction to be reinitialized if it has
341 * failed
342 *
343 * Write the current state of the super block to an internal transaction that
344 * will be written before any other block. This can be used to re-sync the
345 * in-memory fs-state with the on-disk state after detecting a write failure
346 * where no longer know the on-disk super block state.
347 *
348 * @fs must be writable when calling this function.
349 */
write_current_super_block(struct fs * fs,bool reinitialize)350 void write_current_super_block(struct fs* fs, bool reinitialize) {
351 bool super_block_updated;
352 struct transaction* tr;
353
354 assert(fs->writable);
355
356 if (fs->initial_super_block_tr) {
357 /*
358 * If initial_super_block_tr is already pending and not failed there is
359 * no need to allocate a new one so return early.
360 *
361 * If the special transaction has failed, we need to re-initialize it so
362 * that we can attempt to recover to a good state.
363 *
364 * We are only allowed to reinitialze if the @reinitialize parameter is
365 * true. We don't want to allow reinitialization while cleaning blocks
366 * (i.e. via fs_unknown_super_block_state_all()), as this would reset
367 * the special transaction to non-failed state and create a situation
368 * where transaction_initial_super_block_complete() cannot know if it
369 * successfully flushed the special transaction to disk. Therefore we
370 * only allow transaction_initial_super_block_complete() to reinitialize
371 * a failed special transaction after it attempts and fails to write the
372 * block to disk.
373 *
374 * Since we pin special superblock entries in the block cache and
375 * therefore cannot evict them with normal transactions,
376 * transaction_initial_super_block_complete() is the only place we can
377 * attempt a special transaction write, and if it fails the transaction
378 * is immediately reinitialized. Therefore we should only ever be in a
379 * failed state if reinitialize is true (i.e. we are being called from
380 * transaction_initial_super_block_complete()).
381 */
382
383 assert(reinitialize || !fs->initial_super_block_tr->failed);
384 if (!fs->initial_super_block_tr->failed || !reinitialize) {
385 return;
386 }
387
388 tr = fs->initial_super_block_tr;
389 transaction_activate(tr);
390 } else {
391 tr = calloc(1, sizeof(*tr));
392 if (!tr) {
393 /* Not safe to proceed. TODO: add flag to defer this allocation? */
394 abort();
395 }
396 transaction_init(tr, fs, true);
397 fs->initial_super_block_tr = tr;
398 }
399
400 /*
401 * Until the filesystem contains committed data, fs->free.block_tree.root
402 * will be zero, i.e. an invalid block mac. fs->free.block_tree.root is only
403 * updated in transaction_complete() after successfully writing a new
404 * superblock. If the filesystem is empty, we need to emit a cleared
405 * superblock with a special flag to prevent the superblock state from
406 * getting out of sync with the filesystem data if a reboot occurrs before
407 * committing a superblock with data.
408 *
409 * We can't use fs->files.root here because it may be invalid if there are
410 * no files in the filesystem. If the free node is zero, then the files node
411 * must be as well, so we assert this.
412 */
413 bool fs_is_cleared = !block_mac_valid(tr, &fs->free.block_tree.root);
414 if (fs_is_cleared) {
415 assert(!block_mac_valid(tr, &fs->files.root));
416 super_block_updated =
417 update_super_block_internal(tr, NULL, NULL, NULL, true);
418 } else {
419 super_block_updated = update_super_block_internal(
420 tr, &fs->free.block_tree.root, &fs->files.root, &fs->checkpoint,
421 true);
422 }
423 if (!super_block_updated) {
424 /* Not safe to proceed. TODO: add flag to try again? */
425 fprintf(stderr,
426 "Could not create pending write for current superblock state. "
427 "Not safe to proceed.\n");
428 abort();
429 }
430 }
431
432 /**
433 * fs_mark_scan_required - Require a full scan for invalid blocks the next time
434 * this FS is mounted
435 * @fs: File system object
436 *
437 * Marks the file system to require a full scan (and possibly repair) on the
438 * next mount. If @fs is writable, this function immediately writes a new copy
439 * of the current super block, so the flag will persist even with no further
440 * writes to the file system.
441 */
fs_mark_scan_required(struct fs * fs)442 void fs_mark_scan_required(struct fs* fs) {
443 fs->needs_full_scan = true;
444 if (!fs->writable) {
445 /* We can't write back the superblock until this FS is writable. */
446 return;
447 }
448 write_current_super_block(fs, false);
449 assert(fs->initial_super_block_tr);
450 transaction_initial_super_block_complete(fs->initial_super_block_tr);
451 }
452
453 /**
454 * super_block_valid - Check if superblock is valid
455 * @dev: Block device that supoer block was read from.
456 * @super: Super block data.
457 *
458 * Return: %true if @super is valid for @dev, %false otherwise.
459 */
super_block_valid(const struct block_device * dev,const struct super_block * super)460 static bool super_block_valid(const struct block_device* dev,
461 const struct super_block* super) {
462 if (super->magic != SUPER_BLOCK_MAGIC) {
463 pr_init("bad magic, 0x%" PRIx64 "\n", super->magic);
464 return false;
465 }
466 if (super->flags != super->flags2) {
467 pr_warn("flags, 0x%x, does not match flags2, 0x%x\n", super->flags,
468 super->flags2);
469 return false;
470 }
471 if ((super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3) &&
472 super->flags != super->flags3) {
473 pr_warn("flags, 0x%x, does not match flags3, 0x%x\n", super->flags,
474 super->flags3);
475 return false;
476 }
477 if (super->fs_version > SUPER_BLOCK_FS_VERSION) {
478 pr_warn("super block is from the future: 0x%x\n", super->fs_version);
479 return true;
480 }
481 if (super->flags & ~SUPER_BLOCK_FLAGS_SUPPORTED_MASK) {
482 pr_warn("unknown flags set, 0x%x\n", super->flags);
483 return false;
484 }
485 if (super->block_size != dev->block_size) {
486 pr_warn("bad block size 0x%x, expected 0x%zx\n", super->block_size,
487 dev->block_size);
488 return false;
489 }
490 if (super->block_num_size != dev->block_num_size) {
491 pr_warn("invalid block_num_size %d, expected %zd\n",
492 super->block_num_size, dev->block_num_size);
493 return false;
494 }
495 if (super->mac_size != dev->mac_size) {
496 pr_warn("invalid mac_size %d, expected %zd\n", super->mac_size,
497 dev->mac_size);
498 return false;
499 }
500 if (!dev->tamper_detecting && super->mac_size != sizeof(struct mac)) {
501 pr_warn("invalid mac_size %d != %zd\n", super->mac_size,
502 sizeof(data_block_t));
503 return false;
504 }
505
506 return true;
507 }
508
509 /**
510 * super_version_delta - Find the version delta between two superblocks
511 * @new_super: Candidate new superblock
512 * @old_super: Old superblock
513 *
514 * The overflow in this function is intentional as a way to use a wrapping
515 * two-bit counter.
516 *
517 * Return: Wrapped difference between the two bit version numbers in the two
518 * superblocks. This will be 1 when new is newer than old, 3 when old is
519 * newer than new, and any other number indicates an invalid/corrupt version.
520 */
521 __attribute__((no_sanitize("unsigned-integer-overflow"))) static inline uint8_t
super_version_delta(const struct super_block * new_super,const struct super_block * old_super)522 super_version_delta(const struct super_block* new_super,
523 const struct super_block* old_super) {
524 return (new_super->flags - old_super->flags) &
525 SUPER_BLOCK_FLAGS_VERSION_MASK;
526 }
527
528 /**
529 * use_new_super - Check if new superblock is valid and more recent than old
530 * @dev: Block device that super block was read from.
531 * @new_super: New super block data.
532 * @new_super_index: Index that @new_super was read from.
533 * @old_super: Old super block data, or %NULL.
534 *
535 * Return: %true if @new_super is valid for @dev, and more recent than
536 * @old_super (or @old_super is %NULL), %false otherwise.
537 */
use_new_super(const struct block_device * dev,const struct super_block * new_super,unsigned int new_super_index,const struct super_block * old_super)538 static bool use_new_super(const struct block_device* dev,
539 const struct super_block* new_super,
540 unsigned int new_super_index,
541 const struct super_block* old_super) {
542 uint8_t dv;
543 if (!super_block_valid(dev, new_super)) {
544 return false;
545 }
546 if ((new_super->flags & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK) !=
547 new_super_index) {
548 pr_warn("block index, 0x%x, does not match flags, 0x%x\n",
549 new_super_index, new_super->flags);
550 return false;
551 }
552 if (!old_super) {
553 return true;
554 }
555 dv = super_version_delta(new_super, old_super);
556 pr_read("version delta, %d (new flags 0x%x, old flags 0x%x)\n", dv,
557 new_super->flags, old_super->flags);
558 if (dv == 1) {
559 return true;
560 }
561 if (dv == 3) {
562 return false;
563 }
564 pr_warn("bad version delta, %d (new flags 0x%x, old flags 0x%x)\n", dv,
565 new_super->flags, old_super->flags);
566 return false;
567 }
568
569 static void fs_init_free_set(struct fs* fs, struct block_set* set);
570
571 /**
572 * fs_set_roots - Initialize fs state from super block roots
573 * @fs: File system state object
574 * @free: Free set root node
575 * @files: Files tree root node
576 * @checkpoint: Checkpoint metadata block. May be NULL.
577 * @restore_checkpoint: If %true, restore files and free roots from @checkpoint
578 * (which must not be NULL).
579 *
580 * Unconditionally sets the filesystem roots to @free and @files respectively,
581 * then attempts to restore the checkpoint roots if @restore_checkpoint is
582 * %true. When attempting to restore from a checkpoint that exists but is not
583 * readable, return %false, leaving the filesystem roots initialized to @free
584 * and @files. If attempting to restore from checkpoint but no checkpoint was
585 * previously set, this function will clear the filesystem.
586 *
587 * Returns %true if fs roots were correctly initialized as requested, %false if
588 * a requested checkpoint restore failed (but roots were still initialized to
589 * the provided blocks).
590 */
fs_set_roots(struct fs * fs,const struct block_mac * free,const struct block_mac * files,const struct block_mac * checkpoint,bool restore_checkpoint)591 static bool fs_set_roots(struct fs* fs,
592 const struct block_mac* free,
593 const struct block_mac* files,
594 const struct block_mac* checkpoint,
595 bool restore_checkpoint) {
596 bool success = true;
597 struct transaction tr;
598 struct block_tree checkpoint_files =
599 BLOCK_TREE_INITIAL_VALUE(checkpoint_files);
600
601 assert(!restore_checkpoint || checkpoint);
602
603 fs->free.block_tree.root = *free;
604 fs->files.root = *files;
605
606 if (checkpoint) {
607 fs->checkpoint = *checkpoint;
608 transaction_init(&tr, fs, true);
609
610 /*
611 * fs->checkpoint_free is initialized to contain all blocks, so we
612 * don't have to initialize it if there is no checkpoint on disk
613 */
614 assert(!block_range_empty(fs->checkpoint_free.initial_range));
615
616 if (block_mac_valid(&tr, &fs->checkpoint)) {
617 success = checkpoint_read(&tr, &fs->checkpoint, &checkpoint_files,
618 &fs->checkpoint_free);
619 } else if (restore_checkpoint) {
620 /* We do not want to restore a non-existent checkpoint */
621 success = false;
622 }
623 if (success && restore_checkpoint) {
624 /*
625 * Checkpoint restore counts as a repair which must set the repaired
626 * flag. We disallow checkpoint restore in alternate mode in
627 * fs_init().
628 */
629 fs->main_repaired = true;
630 fs->files.root = checkpoint_files.root;
631 block_set_copy_ro(&tr, &fs->free, &fs->checkpoint_free);
632 /*
633 * block_set_copy_ro() clears the copy_on_write flag for the free
634 * set, so we have to reset it to allow modification.
635 */
636 fs->free.block_tree.copy_on_write = true;
637 }
638 if (!tr.failed) {
639 /* temporary transaction is only for reading, drop it */
640 transaction_fail(&tr);
641 }
642 transaction_free(&tr);
643 }
644
645 return success;
646 }
647
648 /**
649 * fs_init_free_set - Initialize an initial free set for a file system
650 * @fs: File system state object.
651 * @set: Block set to initialize
652 *
653 * Initializes @set to the entire range of @fs, i.e. all blocks are free.
654 */
fs_init_free_set(struct fs * fs,struct block_set * set)655 static void fs_init_free_set(struct fs* fs, struct block_set* set) {
656 struct block_range range = {
657 .start = fs->min_block_num,
658 .end = fs->dev->block_count,
659 };
660 block_set_add_initial_range(set, range);
661 }
662
663 /**
664 * fs_init_from_super - Initialize file system from super block
665 * @fs: File system state object.
666 * @super: Superblock data, or %NULL.
667 * @flags: Any of &typedef fs_init_flags32_t, ORed together.
668 *
669 * Return: 0 if super block was usable, -1 if a fatal error was encountered and
670 * initialization should not continue. The file system may not be readable, even
671 * if this function returns 0. Check @fs->readable before attempting to read
672 * from this file system.
673 */
fs_init_from_super(struct fs * fs,const struct super_block * super,fs_init_flags32_t flags)674 static int fs_init_from_super(struct fs* fs,
675 const struct super_block* super,
676 fs_init_flags32_t flags) {
677 bool is_clear = false;
678 bool do_clear = flags & FS_INIT_FLAGS_DO_CLEAR;
679 bool do_swap = false; /* Does the active superblock alternate mode match the
680 current mode? */
681 bool do_clear_backup = false;
682 bool has_backup_field =
683 super && (super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3);
684 bool has_checkpoint_field =
685 has_backup_field && super &&
686 (super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT);
687 bool recovery_allowed = flags & FS_INIT_FLAGS_RECOVERY_CLEAR_ALLOWED;
688 bool read_only = false;
689 const struct block_mac* new_files_root;
690 const struct block_mac* new_free_root;
691 const struct block_mac* new_checkpoint = NULL;
692
693 /*
694 * We check that the super-block matches these block device params in
695 * super_block_valid(). If these params change, the filesystem (and
696 * alternate backup) will be wiped and reset with the new params.
697 */
698 fs->block_num_size = fs->dev->block_num_size;
699 fs->mac_size = fs->dev->mac_size;
700
701 block_set_init(fs, &fs->free);
702 fs->free.block_tree.copy_on_write = true;
703 fs_file_tree_init(fs, &fs->files);
704 fs->files.copy_on_write = true;
705 fs->files.allow_copy_on_write = true;
706 fs->main_repaired = false;
707
708 memset(&fs->checkpoint, 0, sizeof(fs->checkpoint));
709 block_set_init(fs, &fs->checkpoint_free);
710 /*
711 * checkpoint_init() will clear the checkpoint initial range if a valid
712 * checkpoint exists.
713 */
714 fs_init_free_set(fs, &fs->checkpoint_free);
715
716 /* Reserve 1/4 for tmp blocks plus half of the remaining space */
717 fs->reserved_count = fs->dev->block_count / 8 * 5;
718
719 fs->alternate_data = flags & FS_INIT_FLAGS_ALTERNATE_DATA;
720
721 /*
722 * Check version and flags after initializing an empty FS, so that we can
723 * disallow writing and continue initializing other file systems. If we exit
724 * early here this file system will be inaccessible, but its fields are
725 * safely initialized.
726 */
727 if (super && super->fs_version > SUPER_BLOCK_FS_VERSION) {
728 pr_err("ERROR: super block is from the future 0x%x\n",
729 super->fs_version);
730 error_report_superblock_invalid(fs->name);
731 assert(!fs->readable);
732 assert(!fs->writable);
733 return 0;
734 }
735
736 if (super && (super->required_flags & ~SUPER_BLOCK_REQUIRED_FLAGS_MASK)) {
737 pr_err("ERROR: super block requires unrecognized fs features: 0x%x\n",
738 super->required_flags);
739 error_report_superblock_invalid(fs->name);
740 assert(!fs->readable);
741 assert(!fs->writable);
742 return 0;
743 }
744
745 if (super) {
746 fs->super_block_version = super->flags & SUPER_BLOCK_FLAGS_VERSION_MASK;
747 fs->needs_full_scan =
748 super->opt_flags & SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN;
749 fs->main_repaired = super->required_flags &
750 SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED;
751
752 do_swap = !(super->flags & SUPER_BLOCK_FLAGS_ALTERNATE) !=
753 !(flags & FS_INIT_FLAGS_ALTERNATE_DATA);
754
755 if (do_swap) {
756 pr_init("Swapping super-block with alternate\n");
757
758 fs->backup.flags = super->flags & (SUPER_BLOCK_FLAGS_EMPTY |
759 SUPER_BLOCK_FLAGS_ALTERNATE);
760 fs->backup.free = super->free;
761 fs->backup.files = super->files;
762 fs->backup.checkpoint = super->checkpoint;
763
764 if (!has_backup_field ||
765 super->backup.flags & SUPER_BLOCK_FLAGS_EMPTY) {
766 is_clear = true;
767 } else if (has_backup_field) {
768 new_files_root = &super->backup.files;
769 new_free_root = &super->backup.free;
770 if (has_checkpoint_field) {
771 new_checkpoint = &super->backup.checkpoint;
772 }
773 }
774 } else {
775 if (has_backup_field) {
776 fs->backup = super->backup;
777 }
778
779 if (super->flags & SUPER_BLOCK_FLAGS_EMPTY) {
780 is_clear = true;
781 } else {
782 new_files_root = &super->files;
783 new_free_root = &super->free;
784 if (has_checkpoint_field) {
785 new_checkpoint = &super->checkpoint;
786 }
787 }
788 }
789
790 if (!is_clear && !do_clear &&
791 (!block_probe(fs, new_files_root, true) ||
792 !block_probe(fs, new_free_root, false))) {
793 pr_init("Backing file probe failed, fs is corrupted.\n");
794 if (recovery_allowed) {
795 pr_init("Attempting to clear corrupted fs.\n");
796 do_clear = true;
797 }
798 }
799
800 /*
801 * Check that the block device has not shrunk. Shrinking is only allowed
802 * in limited circumstances if we are also clearing the filesystem.
803 */
804 if (super->block_count > fs->dev->block_count) {
805 if ((!do_clear) && (!is_clear)) {
806 /*
807 * If block device is smaller than super and we're not clearing
808 * the fs, we want to prevent write access to avoid losing data.
809 * Read-only access is still allowed, although blocks may be
810 * missing.
811 */
812 pr_err("bad block count 0x%" PRIx64 ", expected <= 0x%" PRIx64
813 "\n",
814 super->block_count, fs->dev->block_count);
815 read_only = true;
816 } else if (flags & FS_INIT_FLAGS_ALTERNATE_DATA) {
817 /*
818 * Either we are on main filesystem and switching to alternate
819 * or we are on alternate. Either case is an error. If we get
820 * here, then the alternate FS is not backed by a temp file,
821 * which should never happen. We want to error loudly in this
822 * case, but continue mounting other file systems.
823 */
824 pr_err("Can't clear fs if FS_INIT_FLAGS_ALTERNATE_DATA is"
825 " set .\n");
826 assert(!fs->readable);
827 assert(!fs->writable);
828 return 0;
829 } else {
830 /*
831 * If we are are on main filesystem and the backup is an
832 * alternate, clear the backup also.
833 */
834 do_clear_backup = true;
835 }
836 }
837 }
838
839 if (!fs->alternate_data && (flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT)) {
840 fs->needs_full_scan = false;
841 }
842
843 /*
844 * If any of the following are true:
845 * - we are initializing a new fs
846 * - we are not swapping but detect an old superblock without the backup
847 * - filesystem device has shrunk and FS_INIT_FLAGS_DO_CLEAR is set
848 * then ensure that the backup slot is a valid empty filesystem in case we
849 * later switch filesystems without an explicit clear flag.
850 */
851 if (!super || (!do_swap && !has_backup_field) || do_clear_backup) {
852 fs->backup = (struct super_block_backup){
853 .flags = SUPER_BLOCK_FLAGS_EMPTY,
854 .files = {0},
855 .free = {0},
856 .checkpoint = {0},
857 };
858 }
859
860 if (super && !is_clear && !do_clear) {
861 if (!fs_set_roots(fs, new_free_root, new_files_root, new_checkpoint,
862 flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT)) {
863 /*
864 * fs_set_roots() returns false if the checkpoint restore failed,
865 * but leaves the roots in a valid state to allow read-only access.
866 */
867 pr_err("fs %s: failed to initialize filesystem roots\n", fs->name);
868 read_only = true;
869 } else {
870 pr_init("fs %s: loaded super block version %d, checkpoint exists: %d\n",
871 fs->name, fs->super_block_version,
872 block_range_empty(fs->checkpoint_free.initial_range));
873 }
874 } else {
875 if (is_clear) {
876 pr_init("fs %s: superblock, version %d, is empty fs\n", fs->name,
877 fs->super_block_version);
878 } else if (do_clear) {
879 pr_init("fs %s: clear requested, create empty, version %d\n",
880 fs->name, fs->super_block_version);
881 if (!fs->alternate_data) {
882 fs->main_repaired = false;
883 fs->needs_full_scan = false;
884 }
885 } else {
886 pr_init("fs %s: no valid super-block found, create empty\n",
887 fs->name);
888 }
889 fs_init_free_set(fs, &fs->free);
890 }
891 assert(fs->block_num_size >= fs->dev->block_num_size);
892 assert(fs->block_num_size <= sizeof(data_block_t));
893 assert(fs->mac_size >= fs->dev->mac_size);
894 assert(fs->mac_size <= sizeof(struct mac));
895 assert(fs->mac_size == sizeof(struct mac) || fs->dev->tamper_detecting);
896
897 /*
898 * fs_set_roots() unconditionally set the files and free roots. If it fails,
899 * it failed to read the checkpoint block but that should only block
900 * modification, not reading.
901 */
902 fs->readable = true;
903
904 if (read_only) {
905 assert(!fs->writable);
906 return 0;
907 }
908
909 fs->writable = true;
910 if (do_clear && !is_clear) {
911 if (!write_initial_super_block(fs)) {
912 return -1;
913 }
914 } else if (flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT) {
915 /*
916 * Flush the new restored checkpoint to superblock before overwriting
917 * any data blocks. We know that we can't already have a pending
918 * initial_super_block_tr yet because we just made the filesystem
919 * writable, and write_current_super_block() requires a writable
920 * filesystem.
921 */
922 assert(!fs->initial_super_block_tr);
923 write_current_super_block(fs, false);
924 }
925
926 return 0;
927 }
928
929 /**
930 * load_super_block - Find and load superblock and initialize file system state
931 * @fs: File system state object.
932 * @flags: Any of &typedef fs_init_flags32_t, ORed together.
933 *
934 * Return: 0 if super block was readable and not from a future file system
935 * version (regardless of its other content), -1 if not.
936 */
load_super_block(struct fs * fs,fs_init_flags32_t flags)937 static int load_super_block(struct fs* fs, fs_init_flags32_t flags) {
938 unsigned int i;
939 int ret;
940 const struct super_block* new_super;
941 struct obj_ref new_super_ref = OBJ_REF_INITIAL_VALUE(new_super_ref);
942 const struct super_block* old_super = NULL;
943 struct obj_ref old_super_ref = OBJ_REF_INITIAL_VALUE(old_super_ref);
944
945 assert(fs->super_dev->block_size >= sizeof(struct super_block));
946
947 for (i = 0; i < countof(fs->super_block); i++) {
948 new_super = block_get_super(fs, fs->super_block[i], &new_super_ref);
949 if (!new_super) {
950 if (fs->allow_tampering) {
951 /*
952 * Superblock may not exist yet in non-secure storage, proceed
953 * anyway
954 */
955 continue;
956 }
957 pr_err("failed to read super-block\n");
958 ret = -1; // -EIO ? ERR_IO?;
959 goto err;
960 }
961 if (use_new_super(fs->dev, new_super, i, old_super)) {
962 if (old_super) {
963 block_put(old_super, &old_super_ref);
964 }
965 old_super = new_super;
966 obj_ref_transfer(&old_super_ref, &new_super_ref);
967 } else {
968 block_put(new_super, &new_super_ref);
969 }
970 }
971
972 ret = fs_init_from_super(fs, old_super, flags);
973 err:
974 if (old_super) {
975 block_put(old_super, &old_super_ref);
976 }
977 return ret;
978 }
979
980 struct fs_check_state {
981 struct file_iterate_state iter;
982 bool delete_invalid_files;
983
984 bool internal_state_valid;
985 bool invalid_block_found;
986 };
987
fs_check_file(struct file_iterate_state * iter,struct transaction * tr,const struct block_mac * block_mac,bool added,bool removed)988 static bool fs_check_file(struct file_iterate_state* iter,
989 struct transaction* tr,
990 const struct block_mac* block_mac,
991 bool added,
992 bool removed) {
993 struct fs_check_state* fs_check_state =
994 containerof(iter, struct fs_check_state, iter);
995 struct obj_ref info_ref = OBJ_REF_INITIAL_VALUE(info_ref);
996 struct storage_file_handle file;
997 char path[FS_PATH_MAX];
998
999 assert(!tr->failed);
1000 assert(!tr->invalid_block_found);
1001
1002 const struct file_info* info = file_get_info(tr, block_mac, &info_ref);
1003 if (!info) {
1004 pr_err("could not get file info at block %" PRIu64 "\n",
1005 block_mac_to_block(tr, block_mac));
1006 fs_check_state->internal_state_valid = false;
1007 goto err_file_info;
1008 }
1009 strncpy(path, info->path, sizeof(path));
1010 path[sizeof(path) - 1] = '\0';
1011 file_info_put(info, &info_ref);
1012
1013 enum file_op_result result =
1014 file_open(tr, path, &file, FILE_OPEN_NO_CREATE, true);
1015 if (result != FILE_OP_SUCCESS) {
1016 /* TODO: is it ok to leak the filename here? we do it elsewhere */
1017 pr_err("could not open file %s\n", path);
1018 fs_check_state->internal_state_valid = false;
1019 goto err_file_open;
1020 }
1021
1022 if (!file_check(tr, &file)) {
1023 fs_check_state->internal_state_valid = false;
1024 }
1025
1026 file_close(&file);
1027
1028 err_file_open:
1029 err_file_info:
1030 if (tr->invalid_block_found) {
1031 fs_check_state->invalid_block_found = true;
1032 /* We have noted the invalid block, reset for the next file. */
1033 tr->invalid_block_found = false;
1034 }
1035 if (tr->failed) {
1036 transaction_activate(tr);
1037 }
1038
1039 /* Continue iterating unconditionally */
1040 return false;
1041 }
1042
fs_check_full(struct fs * fs)1043 enum fs_check_result fs_check_full(struct fs* fs) {
1044 bool free_set_valid, file_tree_valid;
1045 enum fs_check_result res = FS_CHECK_NO_ERROR;
1046 struct transaction iterate_tr;
1047 struct fs_check_state state = {
1048 .iter.file = fs_check_file,
1049 .internal_state_valid = true,
1050 .invalid_block_found = false,
1051 };
1052
1053 transaction_init(&iterate_tr, fs, true);
1054
1055 /* Check the free list for consistency */
1056 free_set_valid = block_set_check(&iterate_tr, &fs->free);
1057 if (!free_set_valid || iterate_tr.invalid_block_found) {
1058 pr_err("free block set is invalid\n");
1059 res = FS_CHECK_INVALID_FREE_SET;
1060 /*
1061 * We can recover the free set non-destructively by rebuilding from the
1062 * file tree, so we don't need to report the invalid block.
1063 */
1064 iterate_tr.invalid_block_found = false;
1065 }
1066 if (iterate_tr.failed) {
1067 pr_err("free set tree not fully readable\n");
1068 state.internal_state_valid = false;
1069 transaction_activate(&iterate_tr);
1070 }
1071
1072 /* Check the file tree for consistency */
1073 file_tree_valid = block_tree_check(&iterate_tr, &fs->files);
1074 if (!file_tree_valid) {
1075 pr_err("file tree is invalid\n");
1076 res = FS_CHECK_INVALID_FILE_TREE;
1077 }
1078 if (iterate_tr.invalid_block_found) {
1079 pr_err("invalid block encountered in file tree\n");
1080 state.invalid_block_found = true;
1081 iterate_tr.invalid_block_found = false;
1082 }
1083 if (iterate_tr.failed) {
1084 pr_err("file tree not fully readable\n");
1085 state.internal_state_valid = false;
1086 transaction_activate(&iterate_tr);
1087 }
1088
1089 file_iterate(&iterate_tr, NULL, false, &state.iter, true);
1090
1091 /* Invalid blocks take precedence over internal consistency errors. */
1092 if (state.invalid_block_found) {
1093 res = FS_CHECK_INVALID_BLOCK;
1094 } else if (res == FS_CHECK_NO_ERROR && !state.internal_state_valid) {
1095 res = FS_CHECK_UNKNOWN;
1096 }
1097 if (!iterate_tr.failed) {
1098 transaction_fail(&iterate_tr);
1099 }
1100 transaction_free(&iterate_tr);
1101
1102 return res;
1103 }
1104
fs_check_quick(struct fs * fs)1105 enum fs_check_result fs_check_quick(struct fs* fs) {
1106 bool fs_is_clear = !block_range_empty(fs->free.initial_range);
1107 if (fs_is_clear || (block_probe(fs, &fs->files.root, true) &&
1108 block_probe(fs, &fs->free.block_tree.root, false))) {
1109 return FS_CHECK_NO_ERROR;
1110 } else {
1111 return FS_CHECK_INVALID_BLOCK;
1112 }
1113 }
1114
fs_check(struct fs * fs)1115 enum fs_check_result fs_check(struct fs* fs) {
1116 if (fs->needs_full_scan) {
1117 pr_warn("%s filesystem requires full scan on mount\n", fs->name);
1118 return fs_check_full(fs);
1119 } else {
1120 return fs_check_quick(fs);
1121 }
1122 }
1123
1124 /**
1125 * fs_file_tree_init - Initialize an empty file tree for a file system
1126 * @fs: File system state object.
1127 * @tree: Block tree to initialize as a file tree.
1128 */
fs_file_tree_init(const struct fs * fs,struct block_tree * tree)1129 void fs_file_tree_init(const struct fs* fs, struct block_tree* tree) {
1130 size_t block_num_size;
1131 size_t block_mac_size;
1132
1133 block_num_size = fs->block_num_size;
1134 block_mac_size = block_num_size + fs->mac_size;
1135 block_tree_init(tree, fs->dev->block_size, block_num_size, block_mac_size,
1136 block_mac_size);
1137 }
1138
1139 /**
1140 * fs_init - Initialize file system state
1141 * @fs: File system state object.
1142 * @name: File system name for error reporting. Must be a static string.
1143 * @key: Key pointer. Must not be freed while @fs is in use.
1144 * @dev: Main block device.
1145 * @super_dev: Block device for super block.
1146 * @flags: Any of &typedef fs_init_flags32_t, ORed together.
1147 */
fs_init(struct fs * fs,const char * name,const struct key * key,struct block_device * dev,struct block_device * super_dev,fs_init_flags32_t flags)1148 int fs_init(struct fs* fs,
1149 const char* name,
1150 const struct key* key,
1151 struct block_device* dev,
1152 struct block_device* super_dev,
1153 fs_init_flags32_t flags) {
1154 int ret;
1155
1156 if (super_dev->block_size < sizeof(struct super_block)) {
1157 pr_err("unsupported block size for super_dev, %zd < %zd\n",
1158 super_dev->block_size, sizeof(struct super_block));
1159 return -1; // ERR_NOT_VALID?
1160 }
1161
1162 if (super_dev->block_count < 2) {
1163 pr_err("unsupported block count for super_dev, %" PRIu64 "\n",
1164 super_dev->block_count);
1165 return -1; // ERR_NOT_VALID?
1166 }
1167
1168 if ((flags & FS_INIT_FLAGS_ALTERNATE_DATA) &&
1169 (flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT)) {
1170 pr_err("Alternate file system cannot restore to a checkpoint\n");
1171 return -1;
1172 }
1173
1174 fs->name = name;
1175 fs->key = key;
1176 fs->dev = dev;
1177 fs->super_dev = super_dev;
1178 fs->readable = false;
1179 fs->writable = false;
1180 fs->allow_tampering = flags & FS_INIT_FLAGS_ALLOW_TAMPERING;
1181 fs->checkpoint_required = false;
1182 list_initialize(&fs->transactions);
1183 list_initialize(&fs->allocated);
1184 fs->initial_super_block_tr = NULL;
1185 list_add_tail(&fs_list, &fs->node);
1186
1187 if (dev == super_dev) {
1188 fs->min_block_num = 2;
1189 } else {
1190 /* TODO: use 0 when btree code allows it */
1191 fs->min_block_num = 1;
1192 }
1193 fs->super_block[0] = 0;
1194 fs->super_block[1] = 1;
1195 ret = load_super_block(fs, flags);
1196 if (ret) {
1197 fs_destroy(fs);
1198 fs->dev = NULL;
1199 fs->super_dev = NULL;
1200 return ret;
1201 }
1202
1203 if ((flags & FS_INIT_FLAGS_AUTO_CHECKPOINT) &&
1204 !block_mac_valid_fs(fs, &fs->checkpoint)) {
1205 if (fs_check_full(fs) == FS_CHECK_NO_ERROR) {
1206 fs->checkpoint_required = true;
1207 } else {
1208 pr_err("Not automatically creating a checkpoint; "
1209 "an error was found in filesystem %s\n",
1210 fs->name);
1211 }
1212 }
1213
1214 return 0;
1215 }
1216
1217 /**
1218 * fs_destroy - Destroy file system state
1219 * @fs: File system state object.
1220 *
1221 * Free any dynamically allocated state and check that @fs is not referenced by
1222 * any transactions.
1223 */
fs_destroy(struct fs * fs)1224 void fs_destroy(struct fs* fs) {
1225 if (fs->initial_super_block_tr) {
1226 if (!fs->initial_super_block_tr->failed) {
1227 transaction_fail(fs->initial_super_block_tr);
1228 }
1229 transaction_free(fs->initial_super_block_tr);
1230 free(fs->initial_super_block_tr);
1231 fs->initial_super_block_tr = NULL;
1232 }
1233 assert(list_is_empty(&fs->transactions));
1234 assert(list_is_empty(&fs->allocated));
1235 list_delete(&fs->node);
1236 fs->readable = false;
1237 fs->writable = false;
1238 }
1239
1240 /**
1241 * fs_unknown_super_block_state_all - Notify filesystems of unknown disk state
1242 *
1243 * Call from other layers when detecting write failues that can cause the
1244 * in-memory state of super blocks (or other block that we don't care about) to
1245 * be different from the on-disk state. Write in-memory state to disk before
1246 * writing any other block.
1247 */
fs_unknown_super_block_state_all(void)1248 void fs_unknown_super_block_state_all(void) {
1249 struct fs* fs;
1250 list_for_every_entry(&fs_list, fs, struct fs, node) {
1251 /* TODO: filter out filesystems that are not affected? */
1252 /*
1253 * We can't reinitialize an existing, failed special transaction here.
1254 * If a initial superblock write failed and triggered
1255 * fs_unknown_super_block_state_all() we need to leave that superblock
1256 * transaction in a failed state so that the transaction that that
1257 * triggered the failing write can also be failed further up the call
1258 * chain. If a special transaction already exists we are guaranteed that
1259 * it will be reinitialized and flushed to disk before any new writes to
1260 * that FS, so we don't need to reinitialize it here.
1261 *
1262 * If this file system is not writable, we should not try to re-write
1263 * the current super block state. A read-only file system cannot have
1264 * any modifications that we are allowed to save, and it does not need
1265 * to be re-synced here as we cannot have previously failed to write its
1266 * superblock.
1267 */
1268 if (fs->writable) {
1269 write_current_super_block(fs, false /* reinitialize */);
1270 }
1271 }
1272 }
1273
fs_fail_all_transactions(void)1274 void fs_fail_all_transactions(void) {
1275 struct transaction* tmp_tr;
1276 struct transaction* tr;
1277 struct fs* fs;
1278 list_for_every_entry(&fs_list, fs, struct fs, node) {
1279 list_for_every_entry_safe(&fs->transactions, tr, tmp_tr,
1280 struct transaction, node) {
1281 if (transaction_is_active(tr) && !tr->failed) {
1282 transaction_fail(tr);
1283 }
1284 }
1285 }
1286 }
1287