From f05ae18d97bf036c1877658dbe38dcfb1e53c937 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 21 May 2025 19:28:20 +0930 Subject: [PATCH 01/15] btrfs-progs: mkfs: add --inode-flags option This new option allows end users to specify certain per-inode flags for specified file/directory inside rootdir. And mkfs will follow the kernel behavior by inheriting the inode flag from the parent. For example: rootdir |- file1 |- file2 |- dir1/ | |- file3 |- subv/ << will be created as a subvolume using --subvol option |- dir2/ | |- file4 |- file5 When `mkfs.btrfs --rootdir rootdir --subvol subv --inode-flags nodatacow:dir1 --inode-flags nodatacow:subv", then the following files and directory will have *nodatacow* flag set: - dir1 - file3 - subv - dir2 - file4 - file5 For now only two flags are supported: - nodatacow Disable data COW, implies *nodatasum* for regular files - nodatasum Disable data checksum only. This also works with --compress option, and files with nodatasum or nodatacow flag will skip compression. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- Documentation/mkfs.btrfs.rst | 35 ++++++++++ mkfs/main.c | 121 ++++++++++++++++++++++++++++++++++- mkfs/rootdir.c | 78 ++++++++++++++++++++-- mkfs/rootdir.h | 15 +++++ 4 files changed, 243 insertions(+), 6 deletions(-) diff --git a/Documentation/mkfs.btrfs.rst b/Documentation/mkfs.btrfs.rst index 119e18b47..82837e429 100644 --- a/Documentation/mkfs.btrfs.rst +++ b/Documentation/mkfs.btrfs.rst @@ -213,6 +213,41 @@ OPTIONS :file:`hardlink1` and :file:`hardlink2` because :file:`hardlink3` will be inside a new subvolume. +--inode-flags : + Specify that *path* to have inode *flags*, other than the default one (which + implies data COW and data checksum). The option *--rootdir* must also be + specified. This option can be specified multiple times. + + The supported flag(s) are: + + * *nodatacow*: disable data COW, implies *nodatasum* for regular files. + * *nodatasum*: disable data checksum only. + + *flags* can be separated by comma (*,*). + + Children inodes will inherit the flags from their parent inodes, like the + following case: + + .. code-block:: none + + rootdir/ + |- file1 + |- file2 + |- dir/ + |- file3 + + In that case, if *--inode-flags nodatacow:dir* is specified, both + :file:`dir` and :file:`file3` will have the *nodatacow* flag. + + And this option also works with *--subvol* option, but the inode flag of + each subvolume is independent and will not inherit from the parent directory. + (The same as the kernel behavior.) + + .. note:: + Both *--inode-flags* and *--subvol* options are memory hungry, + will consume at least 8KiB for each option. Please keep the + usage of both options to minimum. + --shrink Shrink the filesystem to its minimal size, only works with *--rootdir* option. diff --git a/mkfs/main.c b/mkfs/main.c index 4c2ce98c7..4b4165dfb 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -1164,6 +1164,63 @@ static int parse_subvolume(const char *path, struct list_head *subvols, return 0; } +static int parse_inode_flags(const char *option, struct list_head *inode_flags_list) +{ + struct rootdir_inode_flags_entry *entry = NULL; + char *colon; + char *dumpped = NULL; + char *token; + int ret; + + dumpped = strdup(option); + if (!dumpped) { + ret = -ENOMEM; + error_msg(ERROR_MSG_MEMORY, NULL); + goto cleanup; + } + entry = calloc(1, sizeof(*entry)); + if (!entry) { + ret = -ENOMEM; + error_msg(ERROR_MSG_MEMORY, NULL); + goto cleanup; + } + colon = strstr(dumpped, ":"); + if (!colon) { + error("invalid inode flags: %s", option); + ret = -EINVAL; + goto cleanup; + } + *colon = '\0'; + + token = strtok(dumpped, ","); + while (token) { + if (token == NULL) + break; + if (strcmp(token, "nodatacow") == 0) { + entry->nodatacow = true; + } else if (strcmp(token, "nodatasum") == 0) { + entry->nodatasum = true; + } else { + error("unknown flag: %s", token); + ret = -EINVAL; + goto cleanup; + } + token = strtok(NULL, ","); + } + + if (arg_copy_path(entry->inode_path, colon + 1, sizeof(entry->inode_path))) { + error("--inode-flags path too long"); + ret = -E2BIG; + goto cleanup; + } + list_add_tail(&entry->list, inode_flags_list); + return 0; +cleanup: + free(dumpped); + free(entry); + return ret; +} + int BOX_MAIN(mkfs)(int argc, char **argv) { char *file; @@ -1206,10 +1263,12 @@ int BOX_MAIN(mkfs)(int argc, char **argv) int nr_global_roots = sysconf(_SC_NPROCESSORS_ONLN); char *source_dir = NULL; struct rootdir_subvol *rds; + struct rootdir_inode_flags_entry *rif; bool has_default_subvol = false; enum btrfs_compression_type compression = BTRFS_COMPRESS_NONE; unsigned int compression_level = 0; LIST_HEAD(subvols); + LIST_HEAD(inode_flags_list); cpu_detect_flags(); hash_init_accel(); @@ -1223,6 +1282,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv) GETOPT_VAL_CHECKSUM, GETOPT_VAL_GLOBAL_ROOTS, GETOPT_VAL_DEVICE_UUID, + GETOPT_VAL_INODE_FLAGS, GETOPT_VAL_COMPRESS, }; static const struct option long_options[] = { @@ -1241,6 +1301,7 @@ int BOX_MAIN(mkfs)(int argc, char **argv) { "version", no_argument, NULL, 'V' }, { "rootdir", required_argument, NULL, 'r' }, { "subvol", required_argument, NULL, 'u' }, + { "inode-flags", required_argument, NULL, GETOPT_VAL_INODE_FLAGS }, { "nodiscard", no_argument, NULL, 'K' }, { "features", required_argument, NULL, 'O' }, { "runtime-features", required_argument, NULL, 'R' }, @@ -1374,6 +1435,11 @@ int BOX_MAIN(mkfs)(int argc, char **argv) case 'q': bconf_be_quiet(); break; + case GETOPT_VAL_INODE_FLAGS: + ret = parse_inode_flags(optarg, &inode_flags_list); + if (ret) + goto error; + break; case GETOPT_VAL_COMPRESS: if (parse_compression(optarg, &compression, &compression_level)) { ret = 1; @@ -1438,6 +1504,11 @@ int BOX_MAIN(mkfs)(int argc, char **argv) ret = 1; goto error; } + if (!list_empty(&inode_flags_list) && source_dir == NULL) { + error("option --inode-flags must be used with --rootdir"); + ret = 1; + goto error; + } if (source_dir) { char *canonical = realpath(source_dir, NULL); @@ -1503,6 +1574,41 @@ int BOX_MAIN(mkfs)(int argc, char **argv) } } + list_for_each_entry(rif, &inode_flags_list, list) { + char path[PATH_MAX]; + struct rootdir_inode_flags_entry *rif2; + + if (path_cat_out(path, source_dir, rif->inode_path)) { + ret = -EINVAL; + error("path invalid: %s", path); + goto error; + } + if (!realpath(path, rif->full_path)) { + ret = -errno; + error("could not get canonical path: %s: %m", path); + goto error; + } + if (!path_exists(rif->full_path)) { + ret = -ENOENT; + error("inode path does not exist: %s", rif->full_path); + goto error; + } + list_for_each_entry(rif2, &inode_flags_list, list) { + /* + * Only compare entries before us. So we won't compare + * the same pair twice. + */ + if (rif2 == rif) + break; + if (strcmp(rif2->full_path, rif->full_path) == 0) { + error("duplicated inode flag entries for %s", + rif->full_path); + ret = -EEXIST; + goto error; + } + } + } + if (*fs_uuid) { uuid_t dummy_uuid; @@ -2084,10 +2190,15 @@ int BOX_MAIN(mkfs)(int argc, char **argv) rds->is_default ? "" : " ", rds->dir); } + list_for_each_entry(rif, &inode_flags_list, list) { + pr_verbose(LOG_DEFAULT, " Inode flags (%s): %s\n", + rif->nodatacow ? "NODATACOW" : "", + rif->inode_path); + } ret = btrfs_mkfs_fill_dir(trans, source_dir, root, - &subvols, compression, - compression_level); + &subvols, &inode_flags_list, + compression, compression_level); if (ret) { errno = -ret; error("error while filling filesystem: %m"); @@ -2229,6 +2340,12 @@ int BOX_MAIN(mkfs)(int argc, char **argv) list_del(&head->list); free(head); } + while (!list_empty(&inode_flags_list)) { + rif = list_entry(inode_flags_list.next, + struct rootdir_inode_flags_entry, list); + list_del(&rif->list); + free(rif); + } return !!ret; diff --git a/mkfs/rootdir.c b/mkfs/rootdir.c index 3cb507cb5..8d626f104 100644 --- a/mkfs/rootdir.c +++ b/mkfs/rootdir.c @@ -153,6 +153,7 @@ static struct rootdir_path current_path = { static struct btrfs_trans_handle *g_trans = NULL; static struct list_head *g_subvols; +static struct list_head *g_inode_flags_list; static u64 next_subvol_id = BTRFS_FIRST_FREE_OBJECTID; static u64 default_subvol_id; static enum btrfs_compression_type g_compression; @@ -1295,6 +1296,40 @@ static u8 ftype_to_btrfs_type(mode_t ftype) return BTRFS_FT_UNKNOWN; } +static void update_inode_flags(const struct rootdir_inode_flags_entry *rif, + struct btrfs_inode_item *stack_inode) +{ + u64 inode_flags; + + inode_flags = btrfs_stack_inode_flags(stack_inode); + if (rif->nodatacow) { + inode_flags |= BTRFS_INODE_NODATACOW; + + if (S_ISREG(btrfs_stack_inode_mode(stack_inode))) + inode_flags |= BTRFS_INODE_NODATASUM; + } + if (rif->nodatasum) + inode_flags |= BTRFS_INODE_NODATASUM; + + btrfs_set_stack_inode_flags(stack_inode, inode_flags); +} + +static void search_and_update_inode_flags(struct btrfs_inode_item *stack_inode, + const char *full_path) +{ + struct rootdir_inode_flags_entry *rif; + + list_for_each_entry(rif, g_inode_flags_list, list) { + if (strcmp(rif->full_path, full_path) == 0) { + update_inode_flags(rif, stack_inode); + + list_del(&rif->list); + free(rif); + return; + } + } +} + static int ftw_add_subvol(const char *full_path, const struct stat *st, int typeflag, struct FTW *ftwbuf, struct rootdir_subvol *subvol) @@ -1353,6 +1388,7 @@ static int ftw_add_subvol(const char *full_path, const struct stat *st, } stat_to_inode_item(&inode_item, st); + search_and_update_inode_flags(&inode_item, full_path); btrfs_set_stack_inode_nlink(&inode_item, 1); ret = update_inode_item(g_trans, new_root, &inode_item, ino); if (ret < 0) { @@ -1372,6 +1408,31 @@ static int ftw_add_subvol(const char *full_path, const struct stat *st, return 0; } +static int read_inode_item(struct btrfs_root *root, struct btrfs_inode_item *inode_item, + u64 ino) +{ + struct btrfs_path path = { 0 }; + struct btrfs_key key; + int ret; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret > 0) + ret = -ENOENT; + if (ret < 0) + goto out; + + read_extent_buffer(path.nodes[0], inode_item, + btrfs_item_ptr_offset(path.nodes[0], path.slots[0]), + sizeof(*inode_item)); +out: + btrfs_release_path(&path); + return ret; +} + static int ftw_add_inode(const char *full_path, const struct stat *st, int typeflag, struct FTW *ftwbuf) { @@ -1519,6 +1580,7 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, return ret; } stat_to_inode_item(&inode_item, st); + search_and_update_inode_flags(&inode_item, full_path); ret = btrfs_insert_inode(g_trans, root, ino, &inode_item); if (ret < 0) { @@ -1551,11 +1613,17 @@ static int ftw_add_inode(const char *full_path, const struct stat *st, } /* - * btrfs_add_link() has increased the nlink to 1 in the metadata. - * Also update the value in case we need to update the inode item - * later. + * btrfs_add_link() has increased the nlink, and may even updated the + * inode flags (inherited from the parent). + * Read out the latest version of inode item. */ - btrfs_set_stack_inode_nlink(&inode_item, 1); + ret = read_inode_item(root, &inode_item, ino); + if (ret < 0) { + errno = -ret; + error("failed to read inode item for subvol %llu inode %llu ('%s'): %m", + btrfs_root_id(root), ino, full_path); + return ret; + } ret = add_xattr_item(g_trans, root, ino, full_path); if (ret < 0) { @@ -1648,6 +1716,7 @@ static int set_default_subvolume(struct btrfs_trans_handle *trans) int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir, struct btrfs_root *root, struct list_head *subvols, + struct list_head *inode_flags_list, enum btrfs_compression_type compression, unsigned int compression_level) { @@ -1694,6 +1763,7 @@ int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir g_trans = trans; g_subvols = subvols; + g_inode_flags_list = inode_flags_list; g_compression = compression; g_compression_level = compression_level; INIT_LIST_HEAD(¤t_path.inode_list); diff --git a/mkfs/rootdir.h b/mkfs/rootdir.h index b32fda5bf..f8b959f7a 100644 --- a/mkfs/rootdir.h +++ b/mkfs/rootdir.h @@ -45,8 +45,23 @@ struct rootdir_subvol { bool readonly; }; +/* + * Represent a flag for specified inode at @full_path. + */ +struct rootdir_inode_flags_entry { + struct list_head list; + /* Fully canonicalized path to the source file. */ + char full_path[PATH_MAX]; + /* Path inside the source directory. */ + char inode_path[PATH_MAX]; + + bool nodatacow; + bool nodatasum; +}; + int btrfs_mkfs_fill_dir(struct btrfs_trans_handle *trans, const char *source_dir, struct btrfs_root *root, struct list_head *subvols, + struct list_head *inode_flags_list, enum btrfs_compression_type compression, unsigned int compression_level); u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size, From 3eff85222fb83c7682c5d24c0a6351588594da9e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 21 May 2025 19:28:21 +0930 Subject: [PATCH 02/15] btrfs-progs: tests: new test case for mkfs.btrfs --inode-flags The simple test will create a layout like the following: rootdir |- file1 |- file2 |- subv/ << Regular subvolume | |- file3 |- nocow_subv/ << NODATACOW subvolume | |- file4 |- nocow_dir/ << NODATACOW directory | |- dir2 | | |- file5 | |- file6 |- nocow_file1 << NODATACOW file Any files under NODATACOW subvolume/directory should also be NODATACOW. The explicitly specified single file should also be NODATACOW. Issue: #984 Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- tests/mkfs-tests/038-inode-flags/test.sh | 58 ++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 tests/mkfs-tests/038-inode-flags/test.sh diff --git a/tests/mkfs-tests/038-inode-flags/test.sh b/tests/mkfs-tests/038-inode-flags/test.sh new file mode 100755 index 000000000..d646b2c99 --- /dev/null +++ b/tests/mkfs-tests/038-inode-flags/test.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# Basic test for mkfs.btrfs --inode-flags --rootdir. Create a dataset and use it as +# rootdir, then various inode-flags and verify the flag is properly set. + +source "$TEST_TOP/common" || exit + +check_prereq mkfs.btrfs +check_prereq btrfs +check_global_prereq lsattr + +setup_root_helper +prepare_test_dev + +tmp=$(_mktemp_dir mkfs-rootdir) + +write_file() +{ + local path="$1" + local size="$2" + + run_check dd if=/dev/zero of="$path" bs="$size" count=1 status=noxfer > /dev/null 2>&1 +} + +check_nodatacow() +{ + local path="$1" + + lsattr "$TEST_MNT/$path" | grep -q C || _fail "missing NODATACOW flag for $path" +} + +write_file "$tmp/file1" 64K +write_file "$tmp/file2" 64K +run_check mkdir -p "$tmp/subv" "$tmp/nocow_subv" "$tmp/nocow_dir/dir2" +write_file "$tmp/subv/file3" 64K +write_file "$tmp/nocow_subv/file4" 64K +write_file "$tmp/nocow_dir/dir2/file5" 64K +write_file "$tmp/nocow_dir/file6" 64K +write_file "$tmp/nocow_file1" 64K + +run_check_mkfs_test_dev --rootdir "$tmp" \ + --inode-flags "nodatacow:nocow_subv" \ + --subvol "nocow_subv" \ + --inode-flags "nodatacow:nocow_dir" \ + --inode-flags "nodatacow:nocow_file1" + +run_check $SUDO_HELPER "$TOP/btrfs" check "$TEST_DEV" + +run_check_mount_test_dev +check_nodatacow "nocow_subv" +check_nodatacow "nocow_subv/file4" +check_nodatacow "nocow_dir" +check_nodatacow "nocow_dir/file6" +check_nodatacow "nocow_dir/dir2/file5" +check_nodatacow "nocow_file1" +run_check lsattr -R "$TEST_MNT" +run_check_umount_test_dev + +run_check rm -rf -- "$tmp" From f96b088ac9d9db138b18a66c4b576eee2453aae8 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 30 May 2025 13:29:26 +0200 Subject: [PATCH 03/15] btrfs-progs: zoned: create a data block-group for relocation Create a second data block-group to be used for relocation, in case a zoned filesystem in created. This second data block-group will then be picked up by the kernel as the default data relocation block-group on mount. This ensures we always have a target to relocate good data to when we need to do garbage collection. Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- mkfs/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mkfs/main.c b/mkfs/main.c index 4b4165dfb..9fa1f8d10 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -2039,6 +2039,15 @@ int BOX_MAIN(mkfs)(int argc, char **argv) goto error; } + if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_ZONED) { + ret = create_data_block_groups(trans, root, mixed, &allocation); + if (ret) { + errno = -ret; + error("failed to create data relocation block groups: %m"); + goto error; + } + } + if (features.incompat_flags & BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2) { ret = create_global_roots(trans, nr_global_roots); if (ret) { From 15d049b91b3f8ef03bb0137ab665d07086c63891 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 14 May 2025 16:16:28 +0100 Subject: [PATCH 04/15] btrfs-progs: print csum values on superblock mismatch If in btrfs_check_super() we find that the superblock has a csum mismatch, print the wanted and found values, just as we do for metadata in __csum_tree_block_size(). When hex-editing a btrfs image, it's useful to use btrfs check to calculate what the new csum should be. Unfortunately at present this only works for trees and not for the superblock, meaning you have to use the much more wordy `btrfs inspect-internal`. Pull-request: #985 Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- kernel-shared/disk-io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel-shared/disk-io.c b/kernel-shared/disk-io.c index 95e1504fd..3bf1ff7a8 100644 --- a/kernel-shared/disk-io.c +++ b/kernel-shared/disk-io.c @@ -1771,8 +1771,14 @@ int btrfs_check_super(struct btrfs_super_block *sb, unsigned sbflags) btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE, result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - if (memcmp(result, sb->csum, csum_size)) { - error("superblock checksum mismatch"); + if (memcmp(result, sb->csum, csum_size) != 0) { + char found[BTRFS_CSUM_STRING_LEN]; + char wanted[BTRFS_CSUM_STRING_LEN]; + + btrfs_format_csum(csum_type, result, found); + btrfs_format_csum(csum_type, (u8 *)sb->csum, wanted); + + error("superblock checksum mismatch: wanted %s found %s", wanted, found); return -EIO; } if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { From 505196246ea50348583a4f1c13c0498bdab1a5de Mon Sep 17 00:00:00 2001 From: ozraru Date: Wed, 30 Apr 2025 03:06:03 +0900 Subject: [PATCH 05/15] btrfs-progs: docs: fix kernel version to set defrag compress level This feature is provided by commit of kernel fc5c0c58258748 ("btrfs: defrag: extend ioctl to accept compression levels") which is not included in 6.14 but 6.15. [skip ci] Pull-request: #983 Signed-off-by: David Sterba --- Documentation/btrfs-filesystem.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/btrfs-filesystem.rst b/Documentation/btrfs-filesystem.rst index 8b99df824..5852b4e6d 100644 --- a/Documentation/btrfs-filesystem.rst +++ b/Documentation/btrfs-filesystem.rst @@ -118,7 +118,7 @@ defragment [options] | [|...] compression. See also section *EXAMPLES*. -L|--level - Since kernel 6.14 the compresison can also take the level parameter which will be used + Since kernel 6.15 the compresison can also take the level parameter which will be used only for the defragmentation and overrides the eventual mount option compression level. Valid levels depend on the compression algorithms: *zlib* 1..9, *lzo* does not have any levels, *zstd* the standard levels 1..15 and also the From 97740f64ee60f9c820e489952bdd3b5d6a1daafd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:06 +0930 Subject: [PATCH 06/15] btrfs-progs: convert: add feature dependency checks for bgt Block group tree requires no-holes and free-space-tree features, add such check just like mkfs. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/convert/main.c b/convert/main.c index 8784192fe..0db32ca42 100644 --- a/convert/main.c +++ b/convert/main.c @@ -1212,6 +1212,12 @@ static int do_convert(const char *devname, u32 convert_flags, u32 nodesize, if (btrfs_check_nodesize(nodesize, blocksize, features)) goto fail; + if ((features->compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE) && + (!(features->incompat_flags & BTRFS_FEATURE_INCOMPAT_NO_HOLES) || + !(features->compat_ro_flags & BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))) { + error("block group tree requires no-holes and free-space-tree features"); + goto fail; + } fd = open(devname, O_RDWR); if (fd < 0) { error("unable to open %s: %m", devname); From 788324fe12023bc9ebb7cb41564b55518302ed5a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:07 +0930 Subject: [PATCH 07/15] btrfs-progs: convert: replace the bytenrs check with a UASSERT() The bytenr sequence of all roots are controlled by our code, so if something went wrong with the sequence, it's a bug. A UASSERT() is more suitable for this case. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/common.c | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/convert/common.c b/convert/common.c index 8c64c2ce8..2ab88253c 100644 --- a/convert/common.c +++ b/convert/common.c @@ -260,19 +260,8 @@ static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, * Provided bytenr must in ascending order, or tree root will have a * bad key order. */ - if (!(root_bytenr < extent_bytenr && extent_bytenr < dev_bytenr && - dev_bytenr < fs_bytenr && fs_bytenr < csum_bytenr)) { - error("bad tree bytenr order: " - "root < extent %llu < %llu, " - "extent < dev %llu < %llu, " - "dev < fs %llu < %llu, " - "fs < csum %llu < %llu", - root_bytenr, extent_bytenr, - extent_bytenr, dev_bytenr, - dev_bytenr, fs_bytenr, - fs_bytenr, csum_bytenr); - return -EINVAL; - } + UASSERT(root_bytenr < extent_bytenr && extent_bytenr < dev_bytenr && + dev_bytenr < fs_bytenr && fs_bytenr < csum_bytenr); buf = malloc(sizeof(*buf) + cfg->nodesize); if (!buf) return -ENOMEM; @@ -703,22 +692,9 @@ static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg, * We must ensure provided bytenr are in ascending order, * or extent tree key order will be broken. */ - if (!(chunk_bytenr < root_bytenr && root_bytenr < extent_bytenr && - extent_bytenr < dev_bytenr && dev_bytenr < fs_bytenr && - fs_bytenr < csum_bytenr)) { - error("bad tree bytenr order: " - "chunk < root %llu < %llu, " - "root < extent %llu < %llu, " - "extent < dev %llu < %llu, " - "dev < fs %llu < %llu, " - "fs < csum %llu < %llu", - chunk_bytenr, root_bytenr, - root_bytenr, extent_bytenr, - extent_bytenr, dev_bytenr, - dev_bytenr, fs_bytenr, - fs_bytenr, csum_bytenr); - return -EINVAL; - } + UASSERT(chunk_bytenr < root_bytenr && root_bytenr < extent_bytenr && + extent_bytenr < dev_bytenr && dev_bytenr < fs_bytenr && + fs_bytenr < csum_bytenr); buf = malloc(sizeof(*buf) + cfg->nodesize); if (!buf) return -ENOMEM; From b34a4dd6c5be10c9da167dc1a70ff8350fbdd76a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:08 +0930 Subject: [PATCH 08/15] btrfs-progs: convert: simplify insert_temp_root_item() The function requires parameters @slot and @itemoff to record where the next item should land. But this is overkilled, as after inserting an item, the temporary extent buffer will have its header nritems and the item pointer updated. We can use that header nritems and item pointer to get where the next item should land. This removes the external counter to record @slot and @itemoff. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/common.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/convert/common.c b/convert/common.c index 2ab88253c..3642c0762 100644 --- a/convert/common.c +++ b/convert/common.c @@ -189,17 +189,29 @@ static int setup_temp_extent_buffer(struct extent_buffer *buf, return 0; } +static u32 get_item_offset(const struct extent_buffer *eb, + const struct btrfs_mkfs_config *cfg) +{ + u32 slot = btrfs_header_nritems(eb); + + if (slot) + return btrfs_item_offset(eb, slot - 1); + else + return cfg->leaf_data_size; +} + static void insert_temp_root_item(struct extent_buffer *buf, struct btrfs_mkfs_config *cfg, - int *slot, u32 *itemoff, u64 objectid, - u64 bytenr) + u64 objectid, u64 bytenr) { struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; struct btrfs_disk_key disk_key; + u32 slot = btrfs_header_nritems(buf); + u32 itemoff = get_item_offset(buf, cfg); - btrfs_set_header_nritems(buf, *slot + 1); - (*itemoff) -= sizeof(root_item); + btrfs_set_header_nritems(buf, slot + 1); + itemoff -= sizeof(root_item); memset(&root_item, 0, sizeof(root_item)); inode_item = &root_item.inode; btrfs_set_stack_inode_generation(inode_item, 1); @@ -217,13 +229,12 @@ static void insert_temp_root_item(struct extent_buffer *buf, btrfs_set_disk_key_objectid(&disk_key, objectid); btrfs_set_disk_key_offset(&disk_key, 0); - btrfs_set_item_key(buf, &disk_key, *slot); - btrfs_set_item_offset(buf, *slot, *itemoff); - btrfs_set_item_size(buf, *slot, sizeof(root_item)); + btrfs_set_item_key(buf, &disk_key, slot); + btrfs_set_item_offset(buf, slot, itemoff); + btrfs_set_item_size(buf, slot, sizeof(root_item)); write_extent_buffer(buf, &root_item, - btrfs_item_ptr_offset(buf, *slot), + btrfs_item_ptr_offset(buf, slot), sizeof(root_item)); - (*slot)++; } /* @@ -252,8 +263,6 @@ static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, u64 dev_bytenr, u64 fs_bytenr, u64 csum_bytenr) { struct extent_buffer *buf = NULL; - u32 itemoff = cfg->leaf_data_size; - int slot = 0; int ret; /* @@ -271,14 +280,10 @@ static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, if (ret < 0) goto out; - insert_temp_root_item(buf, cfg, &slot, &itemoff, - BTRFS_EXTENT_TREE_OBJECTID, extent_bytenr); - insert_temp_root_item(buf, cfg, &slot, &itemoff, - BTRFS_DEV_TREE_OBJECTID, dev_bytenr); - insert_temp_root_item(buf, cfg, &slot, &itemoff, - BTRFS_FS_TREE_OBJECTID, fs_bytenr); - insert_temp_root_item(buf, cfg, &slot, &itemoff, - BTRFS_CSUM_TREE_OBJECTID, csum_bytenr); + insert_temp_root_item(buf, cfg, BTRFS_EXTENT_TREE_OBJECTID, extent_bytenr); + insert_temp_root_item(buf, cfg, BTRFS_DEV_TREE_OBJECTID, dev_bytenr); + insert_temp_root_item(buf, cfg, BTRFS_FS_TREE_OBJECTID, fs_bytenr); + insert_temp_root_item(buf, cfg, BTRFS_CSUM_TREE_OBJECTID, csum_bytenr); ret = write_temp_extent_buffer(fd, buf, root_bytenr, cfg); out: From bc52579ab8a3337010fbbaa69bc3b9364f6a2cf5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:09 +0930 Subject: [PATCH 09/15] btrfs-progs: convert: simplify insert_temp_dev_item() and insert_temp_chunk_item() These functions require parameters @slot and @itemoff to record where the next item should land. But this is overkilled, as after inserting an item, the temporary extent buffer will have its header nritems and the item pointer updated. We can use that header nritems and item pointer to get where the next item should land. This removes the external counter to record @slot and @itemoff. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/common.c | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/convert/common.c b/convert/common.c index 3642c0762..ab16317ac 100644 --- a/convert/common.c +++ b/convert/common.c @@ -292,14 +292,15 @@ static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, } static int insert_temp_dev_item(int fd, struct extent_buffer *buf, - struct btrfs_mkfs_config *cfg, - int *slot, u32 *itemoff) + struct btrfs_mkfs_config *cfg) { struct btrfs_disk_key disk_key; struct btrfs_dev_item *dev_item; unsigned char dev_uuid[BTRFS_UUID_SIZE]; unsigned char fsid[BTRFS_FSID_SIZE]; struct btrfs_super_block super; + u32 slot = btrfs_header_nritems(buf); + u32 itemoff = get_item_offset(buf, cfg); int ret; ret = pread(fd, &super, BTRFS_SUPER_INFO_SIZE, cfg->super_bytenr); @@ -308,17 +309,17 @@ static int insert_temp_dev_item(int fd, struct extent_buffer *buf, goto out; } - btrfs_set_header_nritems(buf, *slot + 1); - (*itemoff) -= sizeof(*dev_item); + btrfs_set_header_nritems(buf, slot + 1); + itemoff -= sizeof(*dev_item); /* setup device item 1, 0 is for replace case */ btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY); btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID); btrfs_set_disk_key_offset(&disk_key, 1); - btrfs_set_item_key(buf, &disk_key, *slot); - btrfs_set_item_offset(buf, *slot, *itemoff); - btrfs_set_item_size(buf, *slot, sizeof(*dev_item)); + btrfs_set_item_key(buf, &disk_key, slot); + btrfs_set_item_offset(buf, slot, itemoff); + btrfs_set_item_size(buf, slot, sizeof(*dev_item)); - dev_item = btrfs_item_ptr(buf, *slot, struct btrfs_dev_item); + dev_item = btrfs_item_ptr(buf, slot, struct btrfs_dev_item); /* Generate device uuid */ uuid_generate(dev_uuid); write_extent_buffer(buf, dev_uuid, @@ -346,19 +347,19 @@ static int insert_temp_dev_item(int fd, struct extent_buffer *buf, read_extent_buffer(buf, &super.dev_item, (unsigned long)dev_item, sizeof(*dev_item)); ret = write_temp_super(fd, &super, cfg->super_bytenr); - (*slot)++; out: return ret; } static int insert_temp_chunk_item(int fd, struct extent_buffer *buf, struct btrfs_mkfs_config *cfg, - int *slot, u32 *itemoff, u64 start, u64 len, - u64 type) + u64 start, u64 len, u64 type) { struct btrfs_chunk *chunk; struct btrfs_disk_key disk_key; struct btrfs_super_block sb; + u32 slot = btrfs_header_nritems(buf); + u32 itemoff = get_item_offset(buf, cfg); int ret = 0; ret = pread(fd, &sb, BTRFS_SUPER_INFO_SIZE, cfg->super_bytenr); @@ -367,16 +368,16 @@ static int insert_temp_chunk_item(int fd, struct extent_buffer *buf, return ret; } - btrfs_set_header_nritems(buf, *slot + 1); - (*itemoff) -= btrfs_chunk_item_size(1); + btrfs_set_header_nritems(buf, slot + 1); + itemoff -= btrfs_chunk_item_size(1); btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY); btrfs_set_disk_key_objectid(&disk_key, BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_disk_key_offset(&disk_key, start); - btrfs_set_item_key(buf, &disk_key, *slot); - btrfs_set_item_offset(buf, *slot, *itemoff); - btrfs_set_item_size(buf, *slot, btrfs_chunk_item_size(1)); + btrfs_set_item_key(buf, &disk_key, slot); + btrfs_set_item_offset(buf, slot, itemoff); + btrfs_set_item_size(buf, slot, btrfs_chunk_item_size(1)); - chunk = btrfs_item_ptr(buf, *slot, struct btrfs_chunk); + chunk = btrfs_item_ptr(buf, slot, struct btrfs_chunk); btrfs_set_chunk_length(buf, chunk, len); btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID); btrfs_set_chunk_stripe_len(buf, chunk, BTRFS_STRIPE_LEN); @@ -392,7 +393,6 @@ static int insert_temp_chunk_item(int fd, struct extent_buffer *buf, write_extent_buffer(buf, sb.dev_item.uuid, (unsigned long)btrfs_stripe_dev_uuid_nr(chunk, 0), BTRFS_UUID_SIZE); - (*slot)++; /* * If it's system chunk, also copy it to super block. @@ -422,8 +422,6 @@ static int setup_temp_chunk_tree(int fd, struct btrfs_mkfs_config *cfg, u64 chunk_bytenr) { struct extent_buffer *buf = NULL; - u32 itemoff = cfg->leaf_data_size; - int slot = 0; int ret; /* Must ensure SYS chunk starts before META chunk */ @@ -440,17 +438,15 @@ static int setup_temp_chunk_tree(int fd, struct btrfs_mkfs_config *cfg, if (ret < 0) goto out; - ret = insert_temp_dev_item(fd, buf, cfg, &slot, &itemoff); + ret = insert_temp_dev_item(fd, buf, cfg); if (ret < 0) goto out; - ret = insert_temp_chunk_item(fd, buf, cfg, &slot, &itemoff, - sys_chunk_start, + ret = insert_temp_chunk_item(fd, buf, cfg, sys_chunk_start, BTRFS_MKFS_SYSTEM_GROUP_SIZE, BTRFS_BLOCK_GROUP_SYSTEM); if (ret < 0) goto out; - ret = insert_temp_chunk_item(fd, buf, cfg, &slot, &itemoff, - meta_chunk_start, + ret = insert_temp_chunk_item(fd, buf, cfg, meta_chunk_start, BTRFS_CONVERT_META_GROUP_SIZE, BTRFS_BLOCK_GROUP_METADATA); if (ret < 0) From 68770572f5cda319aeb4767788d26e3c1bb6f15f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:10 +0930 Subject: [PATCH 10/15] btrfs-progs: convert: simplify insert_temp_dev_extent() This function requires parameters @slot and @itemoff to record where the next item should land. But this is overkilled, as after inserting an item, the temporary extent buffer will have its header nritems and the item pointer updated. We can use that header nritems and item pointer to get where the next item should land. This removes the external counter to record @slot and @itemoff. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/common.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/convert/common.c b/convert/common.c index ab16317ac..ed34cd16d 100644 --- a/convert/common.c +++ b/convert/common.c @@ -459,28 +459,29 @@ static int setup_temp_chunk_tree(int fd, struct btrfs_mkfs_config *cfg, } static void insert_temp_dev_extent(struct extent_buffer *buf, - int *slot, u32 *itemoff, u64 start, u64 len) + struct btrfs_mkfs_config *cfg, u64 start, u64 len) { struct btrfs_dev_extent *dev_extent; struct btrfs_disk_key disk_key; + u32 slot = btrfs_header_nritems(buf); + u32 itemoff = get_item_offset(buf, cfg); - btrfs_set_header_nritems(buf, *slot + 1); - (*itemoff) -= sizeof(*dev_extent); + btrfs_set_header_nritems(buf, slot + 1); + itemoff -= sizeof(*dev_extent); btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY); btrfs_set_disk_key_objectid(&disk_key, 1); btrfs_set_disk_key_offset(&disk_key, start); - btrfs_set_item_key(buf, &disk_key, *slot); - btrfs_set_item_offset(buf, *slot, *itemoff); - btrfs_set_item_size(buf, *slot, sizeof(*dev_extent)); + btrfs_set_item_key(buf, &disk_key, slot); + btrfs_set_item_offset(buf, slot, itemoff); + btrfs_set_item_size(buf, slot, sizeof(*dev_extent)); - dev_extent = btrfs_item_ptr(buf, *slot, struct btrfs_dev_extent); + dev_extent = btrfs_item_ptr(buf, slot, struct btrfs_dev_extent); btrfs_set_dev_extent_chunk_objectid(buf, dev_extent, BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_length(buf, dev_extent, len); btrfs_set_dev_extent_chunk_offset(buf, dev_extent, start); btrfs_set_dev_extent_chunk_tree(buf, dev_extent, BTRFS_CHUNK_TREE_OBJECTID); - (*slot)++; } static int setup_temp_dev_tree(int fd, struct btrfs_mkfs_config *cfg, @@ -488,8 +489,6 @@ static int setup_temp_dev_tree(int fd, struct btrfs_mkfs_config *cfg, u64 dev_bytenr) { struct extent_buffer *buf = NULL; - u32 itemoff = cfg->leaf_data_size; - int slot = 0; int ret; /* Must ensure SYS chunk starts before META chunk */ @@ -505,9 +504,9 @@ static int setup_temp_dev_tree(int fd, struct btrfs_mkfs_config *cfg, BTRFS_DEV_TREE_OBJECTID); if (ret < 0) goto out; - insert_temp_dev_extent(buf, &slot, &itemoff, sys_chunk_start, + insert_temp_dev_extent(buf, cfg, sys_chunk_start, BTRFS_MKFS_SYSTEM_GROUP_SIZE); - insert_temp_dev_extent(buf, &slot, &itemoff, meta_chunk_start, + insert_temp_dev_extent(buf, cfg, meta_chunk_start, BTRFS_CONVERT_META_GROUP_SIZE); ret = write_temp_extent_buffer(fd, buf, dev_bytenr, cfg); out: From 7e489c09c06aa2b3e81b9ca901296390e0e2a3b2 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:11 +0930 Subject: [PATCH 11/15] btrfs-progs: convert: simplify insert_temp_extent_item() and insert_temp_block_group() These functions require parameters @slot and @itemoff to record where the next item should land. But this is overkilled, as after inserting an item, the temporary extent buffer will have its header nritems and the item pointer updated. We can use that header nritems and item pointer to get where the next item should land. This removes the external counter to record @slot and @itemoff. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/common.c | 72 +++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/convert/common.c b/convert/common.c index ed34cd16d..a4c451fd4 100644 --- a/convert/common.c +++ b/convert/common.c @@ -567,8 +567,7 @@ static int setup_temp_csum_tree(int fd, struct btrfs_mkfs_config *cfg, */ static int insert_temp_extent_item(int fd, struct extent_buffer *buf, struct btrfs_mkfs_config *cfg, - int *slot, u32 *itemoff, u64 bytenr, - u64 ref_root) + u64 bytenr, u64 ref_root) { struct extent_buffer *tmp; struct btrfs_extent_item *ei; @@ -576,6 +575,8 @@ static int insert_temp_extent_item(int fd, struct extent_buffer *buf, struct btrfs_disk_key disk_key; struct btrfs_disk_key tree_info_key; struct btrfs_tree_block_info *info; + u32 slot = btrfs_header_nritems(buf); + u32 itemoff = get_item_offset(buf, cfg); int itemsize; int skinny_metadata = cfg->features.incompat_flags & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA; @@ -587,8 +588,8 @@ static int insert_temp_extent_item(int fd, struct extent_buffer *buf, itemsize = sizeof(*ei) + sizeof(*iref) + sizeof(struct btrfs_tree_block_info); - btrfs_set_header_nritems(buf, *slot + 1); - *(itemoff) -= itemsize; + btrfs_set_header_nritems(buf, slot + 1); + itemoff -= itemsize; if (skinny_metadata) { btrfs_set_disk_key_type(&disk_key, BTRFS_METADATA_ITEM_KEY); @@ -599,11 +600,11 @@ static int insert_temp_extent_item(int fd, struct extent_buffer *buf, } btrfs_set_disk_key_objectid(&disk_key, bytenr); - btrfs_set_item_key(buf, &disk_key, *slot); - btrfs_set_item_offset(buf, *slot, *itemoff); - btrfs_set_item_size(buf, *slot, itemsize); + btrfs_set_item_key(buf, &disk_key, slot); + btrfs_set_item_offset(buf, slot, itemoff); + btrfs_set_item_size(buf, slot, itemsize); - ei = btrfs_item_ptr(buf, *slot, struct btrfs_extent_item); + ei = btrfs_item_ptr(buf, slot, struct btrfs_extent_item); btrfs_set_extent_refs(buf, ei, 1); btrfs_set_extent_generation(buf, ei, 1); btrfs_set_extent_flags(buf, ei, BTRFS_EXTENT_FLAG_TREE_BLOCK); @@ -618,7 +619,6 @@ static int insert_temp_extent_item(int fd, struct extent_buffer *buf, BTRFS_TREE_BLOCK_REF_KEY); btrfs_set_extent_inline_ref_offset(buf, iref, ref_root); - (*slot)++; if (skinny_metadata) return 0; @@ -654,28 +654,28 @@ static int insert_temp_extent_item(int fd, struct extent_buffer *buf, static void insert_temp_block_group(struct extent_buffer *buf, struct btrfs_mkfs_config *cfg, - int *slot, u32 *itemoff, u64 bytenr, u64 len, u64 used, u64 flag) { struct btrfs_block_group_item bgi; struct btrfs_disk_key disk_key; + u32 slot = btrfs_header_nritems(buf); + u32 itemoff = get_item_offset(buf, cfg); - btrfs_set_header_nritems(buf, *slot + 1); - (*itemoff) -= sizeof(bgi); + btrfs_set_header_nritems(buf, slot + 1); + itemoff -= sizeof(bgi); btrfs_set_disk_key_type(&disk_key, BTRFS_BLOCK_GROUP_ITEM_KEY); btrfs_set_disk_key_objectid(&disk_key, bytenr); btrfs_set_disk_key_offset(&disk_key, len); - btrfs_set_item_key(buf, &disk_key, *slot); - btrfs_set_item_offset(buf, *slot, *itemoff); - btrfs_set_item_size(buf, *slot, sizeof(bgi)); + btrfs_set_item_key(buf, &disk_key, slot); + btrfs_set_item_offset(buf, slot, itemoff); + btrfs_set_item_size(buf, slot, sizeof(bgi)); btrfs_set_stack_block_group_flags(&bgi, flag); btrfs_set_stack_block_group_used(&bgi, used); btrfs_set_stack_block_group_chunk_objectid(&bgi, BTRFS_FIRST_CHUNK_TREE_OBJECTID); - write_extent_buffer(buf, &bgi, btrfs_item_ptr_offset(buf, *slot), + write_extent_buffer(buf, &bgi, btrfs_item_ptr_offset(buf, slot), sizeof(bgi)); - (*slot)++; } static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg, @@ -684,8 +684,6 @@ static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg, u64 fs_bytenr, u64 csum_bytenr) { struct extent_buffer *buf = NULL; - u32 itemoff = cfg->leaf_data_size; - int slot = 0; int ret; /* @@ -704,39 +702,39 @@ static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg, if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, - chunk_bytenr, BTRFS_CHUNK_TREE_OBJECTID); + ret = insert_temp_extent_item(fd, buf, cfg, chunk_bytenr, + BTRFS_CHUNK_TREE_OBJECTID); if (ret < 0) goto out; - insert_temp_block_group(buf, cfg, &slot, &itemoff, chunk_bytenr, - BTRFS_MKFS_SYSTEM_GROUP_SIZE, cfg->nodesize, - BTRFS_BLOCK_GROUP_SYSTEM); + insert_temp_block_group(buf, cfg, chunk_bytenr, + BTRFS_MKFS_SYSTEM_GROUP_SIZE, cfg->nodesize, + BTRFS_BLOCK_GROUP_SYSTEM); - ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, - root_bytenr, BTRFS_ROOT_TREE_OBJECTID); + ret = insert_temp_extent_item(fd, buf, cfg, root_bytenr, + BTRFS_ROOT_TREE_OBJECTID); if (ret < 0) goto out; /* 5 tree block used, root, extent, dev, fs and csum*/ - insert_temp_block_group(buf, cfg, &slot, &itemoff, root_bytenr, - BTRFS_CONVERT_META_GROUP_SIZE, cfg->nodesize * 5, - BTRFS_BLOCK_GROUP_METADATA); + insert_temp_block_group(buf, cfg, root_bytenr, + BTRFS_CONVERT_META_GROUP_SIZE, cfg->nodesize * 5, + BTRFS_BLOCK_GROUP_METADATA); - ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, - extent_bytenr, BTRFS_EXTENT_TREE_OBJECTID); + ret = insert_temp_extent_item(fd, buf, cfg, extent_bytenr, + BTRFS_EXTENT_TREE_OBJECTID); if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, - dev_bytenr, BTRFS_DEV_TREE_OBJECTID); + ret = insert_temp_extent_item(fd, buf, cfg, dev_bytenr, + BTRFS_DEV_TREE_OBJECTID); if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, - fs_bytenr, BTRFS_FS_TREE_OBJECTID); + ret = insert_temp_extent_item(fd, buf, cfg, fs_bytenr, + BTRFS_FS_TREE_OBJECTID); if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, - csum_bytenr, BTRFS_CSUM_TREE_OBJECTID); + ret = insert_temp_extent_item(fd, buf, cfg, csum_bytenr, + BTRFS_CSUM_TREE_OBJECTID); if (ret < 0) goto out; From 857e01f2bb7f50f07261b3cfde1f315f651857f7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:13 +0930 Subject: [PATCH 12/15] btrfs-progs: convert: merge setup_temp_fs_tree() and setup_temp_csum_tree() Both fs and csum trees are empty at make_convert_btrfs(), no need to use two different functions to do that. Merge them into a common setup_temp_empty_tree() instead. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/common.c | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/convert/common.c b/convert/common.c index a4c451fd4..40f04ebef 100644 --- a/convert/common.c +++ b/convert/common.c @@ -514,8 +514,8 @@ static int setup_temp_dev_tree(int fd, struct btrfs_mkfs_config *cfg, return ret; } -static int setup_temp_fs_tree(int fd, struct btrfs_mkfs_config *cfg, - u64 fs_bytenr) +static int setup_temp_empty_tree(int fd, struct btrfs_mkfs_config *cfg, + u64 root_bytenr, u64 owner) { struct extent_buffer *buf = NULL; int ret; @@ -523,36 +523,13 @@ static int setup_temp_fs_tree(int fd, struct btrfs_mkfs_config *cfg, buf = malloc(sizeof(*buf) + cfg->nodesize); if (!buf) return -ENOMEM; - ret = setup_temp_extent_buffer(buf, cfg, fs_bytenr, - BTRFS_FS_TREE_OBJECTID); + ret = setup_temp_extent_buffer(buf, cfg, root_bytenr, owner); if (ret < 0) goto out; /* * Temporary fs tree is completely empty. */ - ret = write_temp_extent_buffer(fd, buf, fs_bytenr, cfg); -out: - free(buf); - return ret; -} - -static int setup_temp_csum_tree(int fd, struct btrfs_mkfs_config *cfg, - u64 csum_bytenr) -{ - struct extent_buffer *buf = NULL; - int ret; - - buf = malloc(sizeof(*buf) + cfg->nodesize); - if (!buf) - return -ENOMEM; - ret = setup_temp_extent_buffer(buf, cfg, csum_bytenr, - BTRFS_CSUM_TREE_OBJECTID); - if (ret < 0) - goto out; - /* - * Temporary csum tree is completely empty. - */ - ret = write_temp_extent_buffer(fd, buf, csum_bytenr, cfg); + ret = write_temp_extent_buffer(fd, buf, root_bytenr, cfg); out: free(buf); return ret; @@ -867,10 +844,10 @@ int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg, dev_bytenr); if (ret < 0) goto out; - ret = setup_temp_fs_tree(fd, cfg, fs_bytenr); + ret = setup_temp_empty_tree(fd, cfg, fs_bytenr, BTRFS_FS_TREE_OBJECTID); if (ret < 0) goto out; - ret = setup_temp_csum_tree(fd, cfg, csum_bytenr); + ret = setup_temp_empty_tree(fd, cfg, csum_bytenr, BTRFS_CSUM_TREE_OBJECTID); if (ret < 0) goto out; /* From 7da8fd0c5fd5f6d691d34b9ab3f824879c94c161 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:14 +0930 Subject: [PATCH 13/15] btrfs-progs: convert: implement the block group tree support properly Previously there were some problems related to btrfs-convert bgt support, that it doesn't work at all, caused by the following reasons: - We never update the super block with extra compat ro flags Even if we set "-O bgt" flags, it will not set the compat ro flags, and everything just go non-bgt routine. Meanwhile other compat ro flags are for free-space-tree, and free-space-tree is rebuilt after the full convert is done. Thus this bug won't cause any problem for fst features, but only affecting bgt so far. - No extra handling to create block group tree Fix above problems by: - Set the proper compat RO flag for the temporary super block We should only set the compat RO flags except the two FST related bits. As FST is handled after conversion, we should not set the flag at that timing. - Add block group tree root item and its backrefs So the initial temporary fs will have a proper block group tree. The only tricky part is for the extent tree population, where we have to put all block group items into the block group tree other than the extent tree. With these two points addressed, now block group tree can be properly enabled for btrfs-convert. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- convert/common.c | 108 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 23 deletions(-) diff --git a/convert/common.c b/convert/common.c index 40f04ebef..64484f7fe 100644 --- a/convert/common.c +++ b/convert/common.c @@ -150,6 +150,13 @@ static int setup_temp_super(int fd, struct btrfs_mkfs_config *cfg, btrfs_set_super_chunk_root(&super, chunk_bytenr); btrfs_set_super_cache_generation(&super, -1); btrfs_set_super_incompat_flags(&super, cfg->features.incompat_flags); + /* + * Do not set fst related flags yet, it will be handled after + * the fs is converted. + */ + btrfs_set_super_compat_ro_flags(&super, cfg->features.compat_ro_flags & + ~(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)); if (cfg->label) strncpy_null(super.label, cfg->label, BTRFS_LABEL_SIZE); @@ -200,6 +207,12 @@ static u32 get_item_offset(const struct extent_buffer *eb, return cfg->leaf_data_size; } +static bool btrfs_is_bgt(const struct btrfs_mkfs_config *cfg) +{ + return cfg->features.compat_ro_flags & + BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE; +} + static void insert_temp_root_item(struct extent_buffer *buf, struct btrfs_mkfs_config *cfg, u64 objectid, u64 bytenr) @@ -260,7 +273,8 @@ static inline int write_temp_extent_buffer(int fd, struct extent_buffer *buf, static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, u64 root_bytenr, u64 extent_bytenr, - u64 dev_bytenr, u64 fs_bytenr, u64 csum_bytenr) + u64 dev_bytenr, u64 fs_bytenr, u64 csum_bytenr, + u64 bgt_bytenr) { struct extent_buffer *buf = NULL; int ret; @@ -270,7 +284,8 @@ static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, * bad key order. */ UASSERT(root_bytenr < extent_bytenr && extent_bytenr < dev_bytenr && - dev_bytenr < fs_bytenr && fs_bytenr < csum_bytenr); + dev_bytenr < fs_bytenr && fs_bytenr < csum_bytenr && + csum_bytenr < bgt_bytenr); buf = malloc(sizeof(*buf) + cfg->nodesize); if (!buf) return -ENOMEM; @@ -284,6 +299,9 @@ static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, insert_temp_root_item(buf, cfg, BTRFS_DEV_TREE_OBJECTID, dev_bytenr); insert_temp_root_item(buf, cfg, BTRFS_FS_TREE_OBJECTID, fs_bytenr); insert_temp_root_item(buf, cfg, BTRFS_CSUM_TREE_OBJECTID, csum_bytenr); + if (btrfs_is_bgt(cfg)) + insert_temp_root_item(buf, cfg, BTRFS_BLOCK_GROUP_TREE_OBJECTID, + bgt_bytenr); ret = write_temp_extent_buffer(fd, buf, root_bytenr, cfg); out: @@ -658,9 +676,12 @@ static void insert_temp_block_group(struct extent_buffer *buf, static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg, u64 chunk_bytenr, u64 root_bytenr, u64 extent_bytenr, u64 dev_bytenr, - u64 fs_bytenr, u64 csum_bytenr) + u64 fs_bytenr, u64 csum_bytenr, + u64 bgt_bytenr) { - struct extent_buffer *buf = NULL; + struct extent_buffer *extent_buf = NULL; + struct extent_buffer *bg_buf = NULL; + const bool is_bgt = btrfs_is_bgt(cfg); int ret; /* @@ -669,55 +690,85 @@ static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg, */ UASSERT(chunk_bytenr < root_bytenr && root_bytenr < extent_bytenr && extent_bytenr < dev_bytenr && dev_bytenr < fs_bytenr && - fs_bytenr < csum_bytenr); - buf = malloc(sizeof(*buf) + cfg->nodesize); - if (!buf) - return -ENOMEM; + fs_bytenr < csum_bytenr && csum_bytenr < bgt_bytenr); + extent_buf = malloc(sizeof(*extent_buf) + cfg->nodesize); + if (!extent_buf) { + ret = -ENOMEM; + goto out; + } - ret = setup_temp_extent_buffer(buf, cfg, extent_bytenr, + ret = setup_temp_extent_buffer(extent_buf, cfg, extent_bytenr, BTRFS_EXTENT_TREE_OBJECTID); if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, chunk_bytenr, + if (is_bgt) { + bg_buf = malloc(sizeof(*bg_buf) + cfg->nodesize); + if (!bg_buf) { + ret = -ENOMEM; + goto out; + } + ret = setup_temp_extent_buffer(bg_buf, cfg, bgt_bytenr, + BTRFS_BLOCK_GROUP_TREE_OBJECTID); + if (ret < 0) + goto out; + } + + ret = insert_temp_extent_item(fd, extent_buf, cfg, chunk_bytenr, BTRFS_CHUNK_TREE_OBJECTID); if (ret < 0) goto out; - insert_temp_block_group(buf, cfg, chunk_bytenr, + insert_temp_block_group(is_bgt ? bg_buf : extent_buf, cfg, chunk_bytenr, BTRFS_MKFS_SYSTEM_GROUP_SIZE, cfg->nodesize, BTRFS_BLOCK_GROUP_SYSTEM); - ret = insert_temp_extent_item(fd, buf, cfg, root_bytenr, + ret = insert_temp_extent_item(fd, extent_buf, cfg, root_bytenr, BTRFS_ROOT_TREE_OBJECTID); if (ret < 0) goto out; - /* 5 tree block used, root, extent, dev, fs and csum*/ - insert_temp_block_group(buf, cfg, root_bytenr, - BTRFS_CONVERT_META_GROUP_SIZE, cfg->nodesize * 5, + /* + * 5 tree block used, root, extent, dev, fs and csum. + * Plus bg tree if specified. + */ + insert_temp_block_group(is_bgt ? bg_buf : extent_buf, cfg, root_bytenr, + BTRFS_CONVERT_META_GROUP_SIZE, + is_bgt ? cfg->nodesize * 6 : cfg->nodesize * 5, BTRFS_BLOCK_GROUP_METADATA); - ret = insert_temp_extent_item(fd, buf, cfg, extent_bytenr, + ret = insert_temp_extent_item(fd, extent_buf, cfg, extent_bytenr, BTRFS_EXTENT_TREE_OBJECTID); if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, dev_bytenr, + ret = insert_temp_extent_item(fd, extent_buf, cfg, dev_bytenr, BTRFS_DEV_TREE_OBJECTID); if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, fs_bytenr, + ret = insert_temp_extent_item(fd, extent_buf, cfg, fs_bytenr, BTRFS_FS_TREE_OBJECTID); if (ret < 0) goto out; - ret = insert_temp_extent_item(fd, buf, cfg, csum_bytenr, + ret = insert_temp_extent_item(fd, extent_buf, cfg, csum_bytenr, BTRFS_CSUM_TREE_OBJECTID); if (ret < 0) goto out; + if (btrfs_is_bgt(cfg)) { + ret = insert_temp_extent_item(fd, extent_buf, cfg, bgt_bytenr, + BTRFS_BLOCK_GROUP_TREE_OBJECTID); + if (ret < 0) + goto out; + } + + ret = write_temp_extent_buffer(fd, extent_buf, extent_bytenr, cfg); + if (ret < 0) + goto out; + if (is_bgt) + ret = write_temp_extent_buffer(fd, bg_buf, bgt_bytenr, cfg); - ret = write_temp_extent_buffer(fd, buf, extent_bytenr, cfg); out: - free(buf); + free(extent_buf); + free(bg_buf); return ret; } @@ -751,6 +802,7 @@ int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg, { struct cache_tree *free_space = &cctx->free_space; struct cache_tree *used_space = &cctx->used_space; + const bool is_bgt = btrfs_is_bgt(cfg); u64 sys_chunk_start; u64 meta_chunk_start; /* chunk tree bytenr, in system chunk */ @@ -761,6 +813,7 @@ int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg, u64 dev_bytenr; u64 fs_bytenr; u64 csum_bytenr; + u64 bgt_bytenr = (u64)-1; int ret; /* Source filesystem must be opened, checked and analyzed in advance */ @@ -814,6 +867,7 @@ int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg, * | +nodesize * 2 | device root | * | +nodesize * 3 | fs tree | * | +nodesize * 4 | csum tree | + * | +nodesize * 5 | bg tree | (Optional) * ------------------------------------- * Inside the allocated system chunk, the layout will be: * | offset | contents | @@ -827,13 +881,15 @@ int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg, dev_bytenr = meta_chunk_start + cfg->nodesize * 2; fs_bytenr = meta_chunk_start + cfg->nodesize * 3; csum_bytenr = meta_chunk_start + cfg->nodesize * 4; + if (is_bgt) + bgt_bytenr = meta_chunk_start + cfg->nodesize * 5; ret = setup_temp_super(fd, cfg, root_bytenr, chunk_bytenr); if (ret < 0) goto out; ret = setup_temp_root_tree(fd, cfg, root_bytenr, extent_bytenr, - dev_bytenr, fs_bytenr, csum_bytenr); + dev_bytenr, fs_bytenr, csum_bytenr, bgt_bytenr); if (ret < 0) goto out; ret = setup_temp_chunk_tree(fd, cfg, sys_chunk_start, meta_chunk_start, @@ -850,13 +906,19 @@ int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg, ret = setup_temp_empty_tree(fd, cfg, csum_bytenr, BTRFS_CSUM_TREE_OBJECTID); if (ret < 0) goto out; + if (is_bgt) { + ret = setup_temp_empty_tree(fd, cfg, bgt_bytenr, + BTRFS_BLOCK_GROUP_TREE_OBJECTID); + if (ret < 0) + goto out; + } /* * Setup extent tree last, since it may need to read tree block key * for non-skinny metadata case. */ ret = setup_temp_extent_tree(fd, cfg, chunk_bytenr, root_bytenr, extent_bytenr, dev_bytenr, fs_bytenr, - csum_bytenr); + csum_bytenr, bgt_bytenr); out: return ret; } From 6bf82ada57eb85d5201de625ee7e4c8633cd7665 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 24 May 2025 11:38:15 +0930 Subject: [PATCH 14/15] btrfs-progs: tests: tests: add a test case for convert with bgt feature Previously "btrfs-convert -O bgt" would not cause any error, but the resulting fs has no block-group-tree feature at all, making it no different than "btrfs-convert -O ^bgt". This is a big bug that was never caught by our existing convert runs. 001-ext2-basic and 003-ext4-basic all tested bgt feature, but don't really check if the resulting fs really have bgt flags set. To fix that add a new test case, which will do the regular bgt convert, but at the end also do a super block dump and verify the BLOCK_GROUP_TREE flag is properly set. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- .../028-block-group-tree/test.sh | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 tests/convert-tests/028-block-group-tree/test.sh diff --git a/tests/convert-tests/028-block-group-tree/test.sh b/tests/convert-tests/028-block-group-tree/test.sh new file mode 100755 index 000000000..7b30ac7de --- /dev/null +++ b/tests/convert-tests/028-block-group-tree/test.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Make sure btrfs-convert can create a fs with bgt feature. + +source "$TEST_TOP/common" || exit +source "$TEST_TOP/common.convert" || exit + +setup_root_helper +prepare_test_dev + +check_global_prereq mkfs.ext4 +check_prereq btrfs-convert +check_prereq btrfs + +convert_test_prep_fs ext4 mke2fs -t ext4 -b 4096 +run_check_umount_test_dev +convert_test_do_convert bgt 16384 + +# Manually check the super block to make sure it has BGT flag. +run_check_stdout "$TOP/btrfs" inspect-internal dump-super "$TEST_DEV" |\ + grep -q "BLOCK_GROUP_TREE" || _fail "No block-group-tree feature enabled" From 4f3c2568663aa74c0721df503c2011347ff086ea Mon Sep 17 00:00:00 2001 From: Sidong Yang Date: Sun, 25 May 2025 11:49:59 +0000 Subject: [PATCH 15/15] btrfs-progs: subvolume: use BTRFS_IOC_SUBVOL_SYNC_WAIT for sync This patch uses BTRFS_IOC_SUBVOL_SYNC_WAIT ioctl in subvolume sync command before checking periodically and adds an option to not to use sync wait ioctl call and force to check periodically. This patch calls a new function wait_for_subvolume_sync() that calls BTRFS_IOC_SUBVOL_SYNC_WAIT for each subvol. Issue: #953 Signed-off-by: Sidong Yang --- cmds/subvolume.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/cmds/subvolume.c b/cmds/subvolume.c index 6b33731d7..4c352e20d 100644 --- a/cmds/subvolume.c +++ b/cmds/subvolume.c @@ -122,6 +122,22 @@ static int wait_for_subvolume_cleaning(int fd, size_t count, uint64_t *ids, return 0; } +static int wait_for_subvolume_sync(int fd, size_t count, uint64_t *ids) { + int i, ret; + struct btrfs_ioctl_subvol_wait arg; + + for (i=0; i ", NULL @@ -1726,9 +1742,12 @@ static const char * const cmd_subvolume_sync_usage[] = { "after deletion.", "If no subvolume id is given, wait until all current deletion requests", "are completed, but do not wait for subvolumes deleted meanwhile.", - "The status of subvolume ids is checked periodically.", + "The status of subvolume IDs is first checked by attempting to wait" + "via ioctl. If the ioctl is not supported or fails, the status is checked" + "periodically as a fallback.", "", OPTLINE("-s ", "sleep N seconds between checks (default: 1)"), + OPTLINE("-p", "use periodic checking instead of waiting via ioctl"), NULL }; @@ -1740,6 +1759,7 @@ static int cmd_subvolume_sync(const struct cmd_struct *cmd, int argc, char **arg size_t id_count, i; int sleep_interval = 1; enum btrfs_util_error err; + bool periodic = false; optind = 0; while (1) { @@ -1757,6 +1777,9 @@ static int cmd_subvolume_sync(const struct cmd_struct *cmd, int argc, char **arg goto out; } break; + case 'p': + periodic = true; + break; default: usage_unknown_option(cmd, argv); } @@ -1814,7 +1837,16 @@ static int cmd_subvolume_sync(const struct cmd_struct *cmd, int argc, char **arg } } - ret = wait_for_subvolume_cleaning(fd, id_count, ids, sleep_interval); + if (periodic) { + ret = wait_for_subvolume_cleaning(fd, id_count, ids, sleep_interval); + } else { + ret = wait_for_subvolume_sync(fd, id_count, ids); + if (ret) { + if (ret == -ENOTTY) + error("subvolume sync ioctl not supported in this kernel version, 6.13 and newer is required"); + ret = wait_for_subvolume_cleaning(fd, id_count, ids, sleep_interval); + } + } out: free(ids);