/* * Copyright (C) 2015-2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "array.h" #include "block_allocator.h" #include "block_set.h" #include "checkpoint.h" #include "debug.h" #include "file.h" #include "transaction.h" bool print_merge_free; /** * transaction_check_free * @tr: Transaction object. * @set: Free set to check. * @min_free Number of block that must be in @set * * Return: %true is @set contains @min_free or more blocks, %false otherwise. */ static bool transaction_check_free(struct transaction* tr, struct block_set* set, data_block_t min_free) { data_block_t next_block; struct block_range free_range; data_block_t count; next_block = 0; while (true) { free_range = block_set_find_next_range(tr, set, next_block); if (block_range_empty(free_range)) { return false; } count = free_range.end - free_range.start; if (count >= min_free) { return true; } min_free -= count; next_block = free_range.end; } } /** * transaction_merge_free_sets * @tr: Transaction object. * @new_set: Output set. * @set_i: Initial set. * @set_d: Set of blocks to delete. * @set_a: Set of blocks to add. * * Helper function to update the free_set when committing a transaction. * @new_set = @set_i - @set_d - @new_set-blocks + @set_a + set_[ida]-blocks * @new_set must start empty and will be initialized and filled in this * function. */ static void transaction_merge_free_sets(struct transaction* tr, struct block_set* new_set, struct block_set* set_i, struct block_set* set_d, struct block_set* set_a) { data_block_t next_block; struct block_range delete_range = BLOCK_RANGE_INITIAL_VALUE(delete_range); struct block_range add_range = BLOCK_RANGE_INITIAL_VALUE(add_range); /* new_set should start empty. */ assert(block_set_find_next_block(tr, new_set, 1, true) == 0); block_set_copy(tr, new_set, set_i); full_assert(block_set_check(tr, set_i)); full_assert(block_set_check(tr, set_d)); full_assert(block_set_check(tr, set_a)); assert(!block_mac_valid(tr, &set_i->block_tree.root) || transaction_block_need_copy( tr, block_mac_to_block(tr, &set_i->block_tree.root))); if (print_merge_free) { printf("%s\n", __func__); } /* TODO: don't walk the whole tree each time */ next_block = 1; while (next_block != 0) { tr->min_free_block = next_block; delete_range = block_set_find_next_range(tr, set_d, next_block); add_range = block_set_find_next_range(tr, set_a, next_block); if (print_merge_free) { printf("%s: add %" PRIu64 "-%" PRIu64 " or delete %" PRIu64 "-%" PRIu64 "\n", __func__, add_range.start, add_range.end - 1, delete_range.start, delete_range.end - 1); } assert(!block_range_overlap(delete_range, add_range)); if (block_range_before(delete_range, add_range)) { assert(delete_range.start >= next_block); tr->min_free_block = delete_range.end; block_allocator_suspend_set_updates(tr); block_set_remove_range(tr, new_set, delete_range); block_allocator_process_queue(tr); next_block = delete_range.end; } else if (!block_range_empty(add_range)) { assert(add_range.start >= next_block); tr->min_free_block = add_range.end; block_allocator_suspend_set_updates(tr); block_set_add_range(tr, new_set, add_range); block_allocator_process_queue(tr); next_block = add_range.end; } else { assert(block_range_empty(delete_range)); assert(block_range_empty(add_range)); next_block = 0; } if (tr->failed) { pr_warn("transaction failed, abort\n"); return; } } full_assert(block_set_check(tr, new_set)); } /** transaction_rebuild_free_set - Rebuild free set from referenced file blocks * @tr: Transaction object. * @new_free_set: Output free set. * @files: Root block and mac of the files tree. * @checkpoint: Checkpoint metadata block and mac * * Rebuilds the file system free set by walking the current files tree and * ensuring that all referenced blocks are marked as not free. @new_free_set * will be initialized to contain all blocks not referenced from the files root. * The @checkpoint metadata block will also be removed from the free set, but * its children (checkpoint files tree and free set) will not be checked. The * blocks in the checkpoint (beside the metadata block) are tracked as * free/allocated by the checkpoint free set rather than the active file system * free set. * * We ignore tr->freed and tr->fs->free here because we are reconstructing the * entire free set. All blocks that were freed in this transaction will not be * referenced by @new_files. */ static void transaction_rebuild_free_set(struct transaction* tr, struct block_set* new_free_set, struct block_mac* new_files, struct block_mac* new_checkpoint) { struct block_range init_range = { .start = tr->fs->min_block_num, .end = tr->fs->dev->block_count, }; struct block_range range; struct block_set previously_allocated = BLOCK_SET_INITIAL_VALUE(previously_allocated); /* * Copy and save tr->allocated so that we can keep track of the blocks * already allocated for the current transaction when performing allocations * for the new free set tree nodes. We then reset tr->allocated so that it * will only hold new blocks allocated for new_free_set. All blocks * allocated for files will already be referenced in new_files, so we'll * already be removing them from new_free_set. */ assert(list_in_list(&tr->allocated.node)); list_delete(&tr->allocated.node); block_set_copy_ro(tr, &previously_allocated, &tr->allocated); list_add_tail(&tr->fs->allocated, &previously_allocated.node); block_set_init(tr->fs, &tr->allocated); list_add_tail(&tr->fs->allocated, &tr->allocated.node); block_set_init(tr->fs, new_free_set); new_free_set->block_tree.copy_on_write = true; new_free_set->block_tree.allow_copy_on_write = true; block_set_add_range(tr, new_free_set, init_range); if (block_mac_valid(tr, new_checkpoint)) { block_set_remove_block(tr, new_free_set, block_mac_to_block(tr, new_checkpoint)); } if (tr->failed) { pr_warn("transaction failed, abort\n"); return; } if (block_mac_valid(tr, new_files)) { files_rebuild_free_set(tr, new_free_set, new_files); if (tr->failed) { pr_warn("transaction failed, abort\n"); return; } } for (range = block_set_find_next_range(tr, &tr->allocated, 1); !block_range_empty(range); range = block_set_find_next_range(tr, &tr->allocated, range.end)) { tr->min_free_block = range.end; block_allocator_suspend_set_updates(tr); block_set_remove_range(tr, new_free_set, range); block_allocator_process_queue(tr); } /* * Copy the rest of the allocated blocks back to tr->allocated to maintain a * consistent state. We don't actually need to do this with the current code * calling this function, but this restores the transaction state to what * would be expected if it were to be used in the future. */ for (range = block_set_find_next_range(tr, &previously_allocated, 1); !block_range_empty(range); range = block_set_find_next_range(tr, &previously_allocated, range.end)) { block_set_add_range(tr, &tr->allocated, range); } list_delete(&previously_allocated.node); full_assert(block_set_check(tr, new_free_set)); } /** * transaction_block_need_copy - Check if block needs copy * @tr: Transaction object. * @block: Block number to check. * * Return: %true if block has not been allocated as a non-tmp block for @tr, * %false otherwise. */ bool transaction_block_need_copy(struct transaction* tr, data_block_t block) { assert(block); assert(!block_set_block_in_set(tr, &tr->tmp_allocated, block)); assert(!block_allocator_allocation_queued(tr, block, true)); return !block_set_block_in_set(tr, &tr->allocated, block) && !block_allocator_allocation_queued(tr, block, false); } /** * transaction_delete_active - Remove transaction from active lists (internal) * @tr: Transaction object. */ static void transaction_delete_active(struct transaction* tr) { assert(list_in_list(&tr->allocated.node)); assert(list_in_list(&tr->tmp_allocated.node)); list_delete(&tr->allocated.node); list_delete(&tr->tmp_allocated.node); } /** * transaction_fail - Fail transaction * @tr: Transaction object. * * Marks transaction as failed, removes it from active list, discards dirty * cache entries and restore open files to last committed state. */ void transaction_fail(struct transaction* tr) { assert(!tr->failed); tr->failed = true; if (tr->complete) { return; } block_cache_discard_transaction(tr, true); transaction_delete_active(tr); file_transaction_failed(tr); } /** * transaction_free - Free transaction * @tr: Transaction object. * * Prepare @tr for free. @tr must not be active and all open files must already * be closed. */ void transaction_free(struct transaction* tr) { assert(!transaction_is_active(tr)); assert(list_is_empty(&tr->open_files)); assert(list_in_list(&tr->node)); list_delete(&tr->node); } /** * check_free_tree - Check tree of free set (internal) * @tr: Transaction object. * @free: Set object. * * Check that the blocks used by the tree for a free set are not in the same * set. */ static void check_free_tree(struct transaction* tr, struct block_set* free) { unsigned int i; struct block_tree_path path; block_tree_walk(tr, &free->block_tree, 0, true, &path); while (block_tree_path_get_key(&path)) { for (i = 0; i < path.count; i++) { assert(!block_set_block_in_set( tr, free, block_mac_to_block(tr, &path.entry[i].block_mac))); } block_tree_path_next(&path); } } /** * transaction_complete - Complete transaction, optionally updating checkpoint * @tr: Transaction object. * @update_checkpoint: If true, update checkpoint with the new file-system * state. */ void transaction_complete_etc(struct transaction* tr, bool update_checkpoint) { struct block_mac new_files; struct transaction* tmp_tr; struct transaction* other_tr; struct block_set new_free_set = BLOCK_SET_INITIAL_VALUE(new_free_set); struct checkpoint* new_checkpoint = NULL; struct block_mac new_checkpoint_mac; struct obj_ref new_checkpoint_ref = OBJ_REF_INITIAL_VALUE(new_checkpoint_ref); bool super_block_updated; assert(tr->fs); assert(!tr->complete); if (tr->fs->checkpoint_required) { tr->fs->checkpoint_required = false; if (!checkpoint_commit(tr->fs)) { /* * checkpoint creation failed, so we need to try again before we * commit the next transaction */ tr->fs->checkpoint_required = true; transaction_fail(tr); pr_warn("auto-checkpoint failed, abort\n"); goto err_transaction_failed; } } // printf("%s: %" PRIu64 "\n", __func__, tr->version); block_mac_copy(tr, &new_checkpoint_mac, &tr->fs->checkpoint); if (tr->failed) { pr_warn("transaction failed, abort\n"); goto err_transaction_failed; } assert(transaction_is_active(tr)); file_transaction_complete(tr, &new_files); if (tr->failed) { pr_warn("transaction failed, abort\n"); goto err_transaction_failed; } if (update_checkpoint) { new_checkpoint = checkpoint_get_new_block(tr, &new_checkpoint_ref, &new_checkpoint_mac); if (tr->failed) { pr_warn("transaction failed, abort\n"); goto err_transaction_failed; } assert(new_checkpoint); } tr->new_free_set = &new_free_set; if (tr->rebuild_free_set) { transaction_rebuild_free_set(tr, &new_free_set, &new_files, &new_checkpoint_mac); } else { transaction_merge_free_sets(tr, &new_free_set, &tr->fs->free, &tr->allocated, &tr->freed); } if (tr->failed) { pr_warn("transaction failed, abort\n"); goto err_transaction_failed; } if (!transaction_check_free(tr, &new_free_set, tr->fs->reserved_count)) { if (!tr->failed) { transaction_fail(tr); } pr_warn("transaction would leave fs too full, abort\n"); goto err_transaction_failed; } if (tr->fs->alternate_data && tr->repaired) { if (!tr->failed) { transaction_fail(tr); } pr_warn("transaction cannot repair alternate fs, abort\n"); goto err_transaction_failed; } if (0) { printf("%s: old free:\n", __func__); block_set_print(tr, &tr->fs->free); printf("%s: tmp_allocated:\n", __func__); block_set_print(tr, &tr->tmp_allocated); printf("%s: allocated:\n", __func__); block_set_print(tr, &tr->allocated); printf("%s: freed:\n", __func__); block_set_print(tr, &tr->freed); printf("%s: new free:\n", __func__); block_set_print(tr, &new_free_set); } if (tr->failed) { pr_warn("transaction failed, abort\n"); goto err_transaction_failed; } if (update_checkpoint) { checkpoint_update_roots(tr, new_checkpoint, &new_files, &new_free_set.block_tree.root); block_put_dirty(tr, new_checkpoint, &new_checkpoint_ref, &new_checkpoint_mac, NULL); /* * We have now released the block reference new_checkpoint_ref, so make * sure we don't release it again in err_transaction_failed */ new_checkpoint = NULL; } block_cache_clean_transaction(tr); if (tr->failed) { pr_warn("transaction failed, abort\n"); goto err_transaction_failed; } assert(block_range_empty(new_free_set.initial_range)); check_free_tree(tr, &new_free_set); if (block_mac_same_block(tr, &tr->fs->free.block_tree.root, &new_free_set.block_tree.root)) { /* * If the root block of the free tree did not move, there can be no * other changes to the filesystem. */ assert(block_mac_eq(tr, &tr->fs->free.block_tree.root, &new_free_set.block_tree.root)); assert(block_mac_eq(tr, &tr->fs->files.root, &new_files)); /* * Skip super block write if there are no changes to the filesystem. * This is needed in case a previous write error has triggered a request * to write another copy of the old super block. There can only be one * copy of each block in the cache. If we try to write a new super block * here before cleaning the pending one, we get a conflict. If there * were changes to the filesystem, the pending super block has already * been cleaned at this point. */ goto complete_nop_transaction; } super_block_updated = update_super_block(tr, &new_free_set.block_tree.root, &new_files, &new_checkpoint_mac); if (!super_block_updated) { assert(tr->failed); pr_warn("failed to update super block, abort\n"); goto err_transaction_failed; } block_cache_clean_transaction(tr); /* * If an error was detected writing the super block, it is not safe to * continue as we do not know if the write completed. We need to rewrite a * known state over the unknown super block to avoid an inconsistent view of * the filesystem. * * At this point block_cache_complete_write has been called by the block * device, so the current superblock slot in the block cache is free and not * associated with the pending transaction. */ if (tr->failed) { pr_warn("failed to write super block, notify fs and abort the transaction\n"); /* * Superblock could have been written or not. Make sure no other blocks * are written to the filesystem before writing another copy of the * superblock with the existing file and free trees. * * TODO: Don't trigger a superblock write on unaffected filesystems. * We update all for now to simplify testing. */ fs_unknown_super_block_state_all(); goto err_transaction_failed; } tr->fs->free.block_tree.root = new_free_set.block_tree.root; block_range_clear( &tr->fs->free .initial_range); /* clear for initial file-system state */ tr->fs->files.root = new_files; tr->fs->super_block_version = tr->fs->written_super_block_version; tr->fs->checkpoint = new_checkpoint_mac; if (tr->repaired) { assert(!tr->fs->alternate_data); tr->fs->main_repaired = true; } if (update_checkpoint) { tr->fs->checkpoint_free.block_tree.root = new_free_set.block_tree.root; block_range_clear(&tr->fs->checkpoint_free.initial_range); } complete_nop_transaction: transaction_delete_active(tr); tr->complete = true; file_transaction_success(tr); assert(!tr->failed); check_free_tree(tr, &tr->fs->free); list_for_every_entry_safe(&tr->fs->transactions, other_tr, tmp_tr, struct transaction, node) { if (tr->failed) { break; } if (!transaction_is_active(other_tr)) { continue; } if (tr->rebuild_free_set) { /* * TODO: only fail actually conflicting transactions when rebuilding * the free set. When rebuilding, tr->freed does not contain all * freed blocks if tree nodes were dropped. We could rebuild a free * set delta by subtracting the new free set from the old one and * then compare this delta against other transactions. */ pr_warn("Rebuilding free set requires failing all pending transactions\n"); transaction_fail(other_tr); } else if (block_set_overlap(tr, &tr->freed, &other_tr->freed)) { pr_warn("fail conflicting transaction\n"); transaction_fail(other_tr); } } if (tr->failed) { pr_warn("transaction failed while failing conflicting transactions\n"); tr->failed = false; list_for_every_entry_safe(&tr->fs->transactions, other_tr, tmp_tr, struct transaction, node) { if (!transaction_is_active(other_tr)) { continue; } pr_warn("fail possibly conflicting transaction\n"); transaction_fail(other_tr); } } assert(!tr->failed); block_cache_discard_transaction(tr, false); err_transaction_failed: if (new_checkpoint) { block_put_dirty_discard(new_checkpoint, &new_checkpoint_ref); } if (tr->failed) { file_transaction_complete_failed(tr); } assert(!block_cache_debug_get_ref_block_count()); } /** * transaction_initial_super_block_complete - Complete special transaction * @tr: Transaction object. Must match initial_super_block_tr in fs. * * Flush the initial superblock in @tr to disk. If the block could not be * written re-initialize @tr and leave it in place for another attempt. * Otherwise clear @tr->fs->initial_super_block_tr and free @tr. * * The initial superblock can only be flushed from the block cache by the * block_cache_clean_transaction() call here, as we do not allow initial * superblocks to be flushed to make room for other data. This ensures that we * don't run out of room to recreate the superblock write in case it fails. */ void transaction_initial_super_block_complete(struct transaction* tr) { assert(tr == tr->fs->initial_super_block_tr); block_cache_clean_transaction(tr); if (tr->failed) { /* * If we failed to write the superblock we re-initialize a new attempt * to write that superblock before the next time we write to this * filesystem. */ pr_err("%s: failed to write initial superblock, version %d.\n", __func__, tr->fs->written_super_block_version); write_current_super_block(tr->fs, true /* reinitialize */); return; } printf("%s: write initial superblock, version %d -> %d\n", __func__, tr->fs->super_block_version, tr->fs->written_super_block_version); assert(tr == tr->fs->initial_super_block_tr); tr->fs->super_block_version = tr->fs->written_super_block_version; tr->fs->initial_super_block_tr = NULL; /* not a real transaction, discard the state so it can be freed */ transaction_fail(tr); transaction_free(tr); free(tr); } /** * transaction_activate - Activate transaction * @tr: Transaction object. */ void transaction_activate(struct transaction* tr) { assert(tr->fs); assert(!transaction_is_active(tr)); tr->failed = false; tr->invalid_block_found = false; tr->complete = false; tr->rebuild_free_set = false; tr->repaired = false; tr->min_free_block = 0; tr->last_free_block = 0; tr->last_tmp_free_block = 0; tr->new_free_set = NULL; block_set_init(tr->fs, &tr->tmp_allocated); block_set_init(tr->fs, &tr->allocated); block_set_init(tr->fs, &tr->freed); fs_file_tree_init(tr->fs, &tr->files_added); fs_file_tree_init(tr->fs, &tr->files_updated); fs_file_tree_init(tr->fs, &tr->files_removed); list_add_tail(&tr->fs->allocated, &tr->allocated.node); list_add_tail(&tr->fs->allocated, &tr->tmp_allocated.node); } /** * transaction_init - Initialize new transaction object * @tr: Transaction object. * @fs: File system state object. * @activate: Activate transaction */ void transaction_init(struct transaction* tr, struct fs* fs, bool activate) { assert(fs); assert(fs->dev); memset(tr, 0, sizeof(*tr)); tr->fs = fs; list_initialize(&tr->open_files); list_add_tail(&fs->transactions, &tr->node); if (activate) { transaction_activate(tr); } }