]>
Commit | Line | Data |
---|---|---|
565a4147 QW |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | #include <common.h> | |
3 | #include <fs_internal.h> | |
f337fb9e | 4 | #include <log.h> |
4aebb994 QW |
5 | #include <uuid.h> |
6 | #include <memalign.h> | |
7 | #include "kernel-shared/btrfs_tree.h" | |
f06bfcf5 | 8 | #include "common/rbtree-utils.h" |
565a4147 | 9 | #include "disk-io.h" |
4aebb994 QW |
10 | #include "ctree.h" |
11 | #include "btrfs.h" | |
75b0817b QW |
12 | #include "volumes.h" |
13 | #include "extent-io.h" | |
565a4147 QW |
14 | #include "crypto/hash.h" |
15 | ||
75b0817b QW |
16 | /* specified errno for check_tree_block */ |
17 | #define BTRFS_BAD_BYTENR (-1) | |
18 | #define BTRFS_BAD_FSID (-2) | |
19 | #define BTRFS_BAD_LEVEL (-3) | |
20 | #define BTRFS_BAD_NRITEMS (-4) | |
21 | ||
22 | /* Calculate max possible nritems for a leaf/node */ | |
23 | static u32 max_nritems(u8 level, u32 nodesize) | |
24 | { | |
25 | ||
26 | if (level == 0) | |
27 | return ((nodesize - sizeof(struct btrfs_header)) / | |
28 | sizeof(struct btrfs_item)); | |
29 | return ((nodesize - sizeof(struct btrfs_header)) / | |
30 | sizeof(struct btrfs_key_ptr)); | |
31 | } | |
32 | ||
33 | static int check_tree_block(struct btrfs_fs_info *fs_info, | |
34 | struct extent_buffer *buf) | |
35 | { | |
36 | ||
37 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | |
38 | u32 nodesize = fs_info->nodesize; | |
39 | bool fsid_match = false; | |
40 | int ret = BTRFS_BAD_FSID; | |
41 | ||
42 | if (buf->start != btrfs_header_bytenr(buf)) | |
43 | return BTRFS_BAD_BYTENR; | |
44 | if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL) | |
45 | return BTRFS_BAD_LEVEL; | |
46 | if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf), | |
47 | nodesize)) | |
48 | return BTRFS_BAD_NRITEMS; | |
49 | ||
50 | /* Only leaf can be empty */ | |
51 | if (btrfs_header_nritems(buf) == 0 && | |
52 | btrfs_header_level(buf) != 0) | |
53 | return BTRFS_BAD_NRITEMS; | |
54 | ||
55 | while (fs_devices) { | |
56 | /* | |
57 | * Checking the incompat flag is only valid for the current | |
58 | * fs. For seed devices it's forbidden to have their uuid | |
59 | * changed so reading ->fsid in this case is fine | |
60 | */ | |
61 | if (fs_devices == fs_info->fs_devices && | |
62 | btrfs_fs_incompat(fs_info, METADATA_UUID)) | |
63 | fsid_match = !memcmp_extent_buffer(buf, | |
64 | fs_devices->metadata_uuid, | |
65 | btrfs_header_fsid(), | |
66 | BTRFS_FSID_SIZE); | |
67 | else | |
68 | fsid_match = !memcmp_extent_buffer(buf, | |
69 | fs_devices->fsid, | |
70 | btrfs_header_fsid(), | |
71 | BTRFS_FSID_SIZE); | |
72 | ||
73 | ||
74 | if (fsid_match) { | |
75 | ret = 0; | |
76 | break; | |
77 | } | |
78 | fs_devices = fs_devices->seed; | |
79 | } | |
80 | return ret; | |
81 | } | |
82 | ||
83 | static void print_tree_block_error(struct btrfs_fs_info *fs_info, | |
84 | struct extent_buffer *eb, | |
85 | int err) | |
86 | { | |
87 | char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; | |
88 | char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; | |
89 | u8 buf[BTRFS_UUID_SIZE]; | |
90 | ||
91 | if (!err) | |
92 | return; | |
93 | ||
94 | fprintf(stderr, "bad tree block %llu, ", eb->start); | |
95 | switch (err) { | |
96 | case BTRFS_BAD_FSID: | |
97 | read_extent_buffer(eb, buf, btrfs_header_fsid(), | |
98 | BTRFS_UUID_SIZE); | |
99 | uuid_unparse(buf, found_uuid); | |
100 | uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid); | |
101 | fprintf(stderr, "fsid mismatch, want=%s, have=%s\n", | |
102 | fs_uuid, found_uuid); | |
103 | break; | |
104 | case BTRFS_BAD_BYTENR: | |
105 | fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n", | |
106 | eb->start, btrfs_header_bytenr(eb)); | |
107 | break; | |
108 | case BTRFS_BAD_LEVEL: | |
109 | fprintf(stderr, "bad level, %u > %d\n", | |
110 | btrfs_header_level(eb), BTRFS_MAX_LEVEL); | |
111 | break; | |
112 | case BTRFS_BAD_NRITEMS: | |
113 | fprintf(stderr, "invalid nr_items: %u\n", | |
114 | btrfs_header_nritems(eb)); | |
115 | break; | |
116 | } | |
117 | } | |
118 | ||
565a4147 QW |
119 | int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len) |
120 | { | |
121 | memset(out, 0, BTRFS_CSUM_SIZE); | |
122 | ||
123 | switch (csum_type) { | |
124 | case BTRFS_CSUM_TYPE_CRC32: | |
125 | return hash_crc32c(data, len, out); | |
126 | case BTRFS_CSUM_TYPE_XXHASH: | |
127 | return hash_xxhash(data, len, out); | |
128 | case BTRFS_CSUM_TYPE_SHA256: | |
129 | return hash_sha256(data, len, out); | |
1617165a QW |
130 | case BTRFS_CSUM_TYPE_BLAKE2: |
131 | return hash_blake2(data, len, out); | |
565a4147 QW |
132 | default: |
133 | printf("Unknown csum type %d\n", csum_type); | |
134 | return -EINVAL; | |
135 | } | |
136 | } | |
4aebb994 QW |
137 | |
138 | /* | |
139 | * Check if the super is valid: | |
140 | * - nodesize/sectorsize - minimum, maximum, alignment | |
141 | * - tree block starts - alignment | |
142 | * - number of devices - something sane | |
143 | * - sys array size - maximum | |
144 | */ | |
145 | static int btrfs_check_super(struct btrfs_super_block *sb) | |
146 | { | |
147 | u8 result[BTRFS_CSUM_SIZE]; | |
148 | u16 csum_type; | |
149 | int csum_size; | |
150 | u8 *metadata_uuid; | |
151 | ||
152 | if (btrfs_super_magic(sb) != BTRFS_MAGIC) | |
153 | return -EIO; | |
154 | ||
155 | csum_type = btrfs_super_csum_type(sb); | |
156 | if (csum_type >= btrfs_super_num_csums()) { | |
157 | error("unsupported checksum algorithm %u", csum_type); | |
158 | return -EIO; | |
159 | } | |
160 | csum_size = btrfs_super_csum_size(sb); | |
161 | ||
162 | btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE, | |
163 | result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); | |
164 | ||
165 | if (memcmp(result, sb->csum, csum_size)) { | |
166 | error("superblock checksum mismatch"); | |
167 | return -EIO; | |
168 | } | |
169 | if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { | |
170 | error("tree_root level too big: %d >= %d", | |
171 | btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); | |
172 | goto error_out; | |
173 | } | |
174 | if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { | |
175 | error("chunk_root level too big: %d >= %d", | |
176 | btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); | |
177 | goto error_out; | |
178 | } | |
179 | if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { | |
180 | error("log_root level too big: %d >= %d", | |
181 | btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); | |
182 | goto error_out; | |
183 | } | |
184 | ||
185 | if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) { | |
186 | error("tree_root block unaligned: %llu", btrfs_super_root(sb)); | |
187 | goto error_out; | |
188 | } | |
189 | if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) { | |
190 | error("chunk_root block unaligned: %llu", | |
191 | btrfs_super_chunk_root(sb)); | |
192 | goto error_out; | |
193 | } | |
194 | if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) { | |
195 | error("log_root block unaligned: %llu", | |
196 | btrfs_super_log_root(sb)); | |
197 | goto error_out; | |
198 | } | |
199 | if (btrfs_super_nodesize(sb) < 4096) { | |
200 | error("nodesize too small: %u < 4096", | |
201 | btrfs_super_nodesize(sb)); | |
202 | goto error_out; | |
203 | } | |
204 | if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) { | |
205 | error("nodesize unaligned: %u", btrfs_super_nodesize(sb)); | |
206 | goto error_out; | |
207 | } | |
208 | if (btrfs_super_sectorsize(sb) < 4096) { | |
209 | error("sectorsize too small: %u < 4096", | |
210 | btrfs_super_sectorsize(sb)); | |
211 | goto error_out; | |
212 | } | |
213 | if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) { | |
214 | error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb)); | |
215 | goto error_out; | |
216 | } | |
217 | if (btrfs_super_total_bytes(sb) == 0) { | |
218 | error("invalid total_bytes 0"); | |
219 | goto error_out; | |
220 | } | |
221 | if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { | |
222 | error("invalid bytes_used %llu", btrfs_super_bytes_used(sb)); | |
223 | goto error_out; | |
224 | } | |
225 | if ((btrfs_super_stripesize(sb) != 4096) | |
226 | && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) { | |
227 | error("invalid stripesize %u", btrfs_super_stripesize(sb)); | |
228 | goto error_out; | |
229 | } | |
230 | ||
231 | if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID) | |
232 | metadata_uuid = sb->metadata_uuid; | |
233 | else | |
234 | metadata_uuid = sb->fsid; | |
235 | ||
236 | if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { | |
237 | char fsid[BTRFS_UUID_UNPARSED_SIZE]; | |
238 | char dev_fsid[BTRFS_UUID_UNPARSED_SIZE]; | |
239 | ||
240 | uuid_unparse(sb->metadata_uuid, fsid); | |
241 | uuid_unparse(sb->dev_item.fsid, dev_fsid); | |
242 | error("dev_item UUID does not match fsid: %s != %s", | |
243 | dev_fsid, fsid); | |
244 | goto error_out; | |
245 | } | |
246 | ||
247 | /* | |
248 | * Hint to catch really bogus numbers, bitflips or so | |
249 | */ | |
250 | if (btrfs_super_num_devices(sb) > (1UL << 31)) { | |
251 | error("suspicious number of devices: %llu", | |
252 | btrfs_super_num_devices(sb)); | |
253 | } | |
254 | ||
255 | if (btrfs_super_num_devices(sb) == 0) { | |
256 | error("number of devices is 0"); | |
257 | goto error_out; | |
258 | } | |
259 | ||
260 | /* | |
261 | * Obvious sys_chunk_array corruptions, it must hold at least one key | |
262 | * and one chunk | |
263 | */ | |
264 | if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { | |
265 | error("system chunk array too big %u > %u", | |
266 | btrfs_super_sys_array_size(sb), | |
267 | BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); | |
268 | goto error_out; | |
269 | } | |
270 | if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) | |
271 | + sizeof(struct btrfs_chunk)) { | |
272 | error("system chunk array too small %u < %zu", | |
273 | btrfs_super_sys_array_size(sb), | |
274 | sizeof(struct btrfs_disk_key) + | |
275 | sizeof(struct btrfs_chunk)); | |
276 | goto error_out; | |
277 | } | |
278 | ||
279 | return 0; | |
280 | ||
281 | error_out: | |
282 | error("superblock checksum matches but it has invalid members"); | |
283 | return -EIO; | |
284 | } | |
285 | ||
286 | /* | |
287 | * btrfs_read_dev_super - read a valid primary superblock from a block device | |
288 | * @desc,@part: file descriptor of the device | |
289 | * @sb: buffer where the superblock is going to be read in | |
290 | * | |
291 | * Unlike the btrfs-progs/kernel version, here we ony care about the first | |
292 | * super block, thus it's much simpler. | |
293 | */ | |
294 | int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part, | |
295 | struct btrfs_super_block *sb) | |
296 | { | |
9e8bb078 | 297 | ALLOC_CACHE_ALIGN_BUFFER(char, tmp, BTRFS_SUPER_INFO_SIZE); |
4aebb994 QW |
298 | struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp; |
299 | int ret; | |
300 | ||
301 | ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE, | |
302 | BTRFS_SUPER_INFO_OFFSET); | |
303 | if (ret < BTRFS_SUPER_INFO_SIZE) | |
304 | return -EIO; | |
305 | ||
306 | if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET) | |
307 | return -EIO; | |
308 | ||
309 | if (btrfs_check_super(buf)) | |
310 | return -EIO; | |
311 | ||
312 | memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); | |
313 | return 0; | |
314 | } | |
315 | ||
75b0817b QW |
316 | static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, |
317 | int verify, int silent, u16 csum_type) | |
318 | { | |
319 | u8 result[BTRFS_CSUM_SIZE]; | |
320 | u32 len; | |
321 | ||
322 | len = buf->len - BTRFS_CSUM_SIZE; | |
323 | btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE, | |
324 | result, len); | |
325 | ||
326 | if (verify) { | |
327 | if (memcmp_extent_buffer(buf, result, 0, csum_size)) { | |
328 | /* FIXME: format */ | |
329 | if (!silent) | |
330 | printk("checksum verify failed on %llu found %08X wanted %08X\n", | |
331 | (unsigned long long)buf->start, | |
332 | result[0], | |
333 | buf->data[0]); | |
334 | return 1; | |
335 | } | |
336 | } else { | |
337 | write_extent_buffer(buf, result, 0, csum_size); | |
338 | } | |
339 | return 0; | |
340 | } | |
341 | ||
342 | int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify, | |
343 | u16 csum_type) | |
344 | { | |
345 | return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type); | |
346 | } | |
347 | ||
348 | static int csum_tree_block(struct btrfs_fs_info *fs_info, | |
349 | struct extent_buffer *buf, int verify) | |
350 | { | |
351 | u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); | |
352 | u16 csum_type = btrfs_super_csum_type(fs_info->super_copy); | |
353 | ||
354 | return csum_tree_block_size(buf, csum_size, verify, csum_type); | |
355 | } | |
356 | ||
357 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, | |
358 | u64 bytenr, u32 blocksize) | |
359 | { | |
360 | return find_extent_buffer(&fs_info->extent_cache, | |
361 | bytenr, blocksize); | |
362 | } | |
363 | ||
364 | struct extent_buffer* btrfs_find_create_tree_block( | |
365 | struct btrfs_fs_info *fs_info, u64 bytenr) | |
366 | { | |
367 | return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize); | |
368 | } | |
369 | ||
370 | static int verify_parent_transid(struct extent_io_tree *io_tree, | |
371 | struct extent_buffer *eb, u64 parent_transid, | |
372 | int ignore) | |
373 | { | |
374 | int ret; | |
375 | ||
376 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | |
377 | return 0; | |
378 | ||
379 | if (extent_buffer_uptodate(eb) && | |
380 | btrfs_header_generation(eb) == parent_transid) { | |
381 | ret = 0; | |
382 | goto out; | |
383 | } | |
384 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | |
385 | (unsigned long long)eb->start, | |
386 | (unsigned long long)parent_transid, | |
387 | (unsigned long long)btrfs_header_generation(eb)); | |
388 | if (ignore) { | |
389 | eb->flags |= EXTENT_BAD_TRANSID; | |
390 | printk("Ignoring transid failure\n"); | |
391 | return 0; | |
392 | } | |
393 | ||
394 | ret = 1; | |
395 | out: | |
396 | clear_extent_buffer_uptodate(eb); | |
397 | return ret; | |
398 | ||
399 | } | |
400 | ||
75b0817b QW |
401 | int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) |
402 | { | |
403 | unsigned long offset = 0; | |
404 | struct btrfs_multi_bio *multi = NULL; | |
405 | struct btrfs_device *device; | |
406 | int ret = 0; | |
407 | u64 read_len; | |
408 | unsigned long bytes_left = eb->len; | |
409 | ||
410 | while (bytes_left) { | |
411 | read_len = bytes_left; | |
412 | device = NULL; | |
413 | ||
414 | ret = btrfs_map_block(info, READ, eb->start + offset, | |
415 | &read_len, &multi, mirror, NULL); | |
416 | if (ret) { | |
417 | printk("Couldn't map the block %Lu\n", eb->start + offset); | |
418 | kfree(multi); | |
419 | return -EIO; | |
420 | } | |
421 | device = multi->stripes[0].dev; | |
422 | ||
423 | if (!device->desc || !device->part) { | |
424 | kfree(multi); | |
425 | return -EIO; | |
426 | } | |
427 | ||
428 | if (read_len > bytes_left) | |
429 | read_len = bytes_left; | |
430 | ||
431 | ret = read_extent_from_disk(device->desc, device->part, | |
432 | multi->stripes[0].physical, eb, | |
433 | offset, read_len); | |
434 | kfree(multi); | |
435 | multi = NULL; | |
436 | ||
437 | if (ret) | |
438 | return -EIO; | |
439 | offset += read_len; | |
440 | bytes_left -= read_len; | |
441 | } | |
442 | return 0; | |
443 | } | |
444 | ||
445 | struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, | |
446 | u64 parent_transid) | |
447 | { | |
448 | int ret; | |
449 | struct extent_buffer *eb; | |
450 | u64 best_transid = 0; | |
451 | u32 sectorsize = fs_info->sectorsize; | |
452 | int mirror_num = 1; | |
453 | int good_mirror = 0; | |
454 | int candidate_mirror = 0; | |
455 | int num_copies; | |
456 | int ignore = 0; | |
457 | ||
458 | /* | |
459 | * Don't even try to create tree block for unaligned tree block | |
460 | * bytenr. | |
461 | * Such unaligned tree block will free overlapping extent buffer, | |
462 | * causing use-after-free bugs for fuzzed images. | |
463 | */ | |
464 | if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) { | |
465 | error("tree block bytenr %llu is not aligned to sectorsize %u", | |
466 | bytenr, sectorsize); | |
467 | return ERR_PTR(-EIO); | |
468 | } | |
469 | ||
470 | eb = btrfs_find_create_tree_block(fs_info, bytenr); | |
471 | if (!eb) | |
472 | return ERR_PTR(-ENOMEM); | |
473 | ||
474 | if (btrfs_buffer_uptodate(eb, parent_transid)) | |
475 | return eb; | |
476 | ||
477 | num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); | |
478 | while (1) { | |
479 | ret = read_whole_eb(fs_info, eb, mirror_num); | |
480 | if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 && | |
481 | check_tree_block(fs_info, eb) == 0 && | |
482 | verify_parent_transid(&fs_info->extent_cache, eb, | |
483 | parent_transid, ignore) == 0) { | |
484 | /* | |
485 | * check_tree_block() is less strict to allow btrfs | |
486 | * check to get raw eb with bad key order and fix it. | |
487 | * But we still need to try to get a good copy if | |
488 | * possible, or bad key order can go into tools like | |
489 | * btrfs ins dump-tree. | |
490 | */ | |
491 | if (btrfs_header_level(eb)) | |
492 | ret = btrfs_check_node(fs_info, NULL, eb); | |
493 | else | |
494 | ret = btrfs_check_leaf(fs_info, NULL, eb); | |
495 | if (!ret || candidate_mirror == mirror_num) { | |
496 | btrfs_set_buffer_uptodate(eb); | |
497 | return eb; | |
498 | } | |
499 | if (candidate_mirror <= 0) | |
500 | candidate_mirror = mirror_num; | |
501 | } | |
502 | if (ignore) { | |
503 | if (candidate_mirror > 0) { | |
504 | mirror_num = candidate_mirror; | |
505 | continue; | |
506 | } | |
507 | if (check_tree_block(fs_info, eb)) | |
508 | print_tree_block_error(fs_info, eb, | |
509 | check_tree_block(fs_info, eb)); | |
510 | else | |
511 | fprintf(stderr, "Csum didn't match\n"); | |
512 | ret = -EIO; | |
513 | break; | |
514 | } | |
515 | if (num_copies == 1) { | |
516 | ignore = 1; | |
517 | continue; | |
518 | } | |
519 | if (btrfs_header_generation(eb) > best_transid) { | |
520 | best_transid = btrfs_header_generation(eb); | |
521 | good_mirror = mirror_num; | |
522 | } | |
523 | mirror_num++; | |
524 | if (mirror_num > num_copies) { | |
525 | if (candidate_mirror > 0) | |
526 | mirror_num = candidate_mirror; | |
527 | else | |
528 | mirror_num = good_mirror; | |
529 | ignore = 1; | |
530 | continue; | |
531 | } | |
532 | } | |
533 | /* | |
534 | * We failed to read this tree block, it be should deleted right now | |
535 | * to avoid stale cache populate the cache. | |
536 | */ | |
537 | free_extent_buffer(eb); | |
538 | return ERR_PTR(ret); | |
539 | } | |
f06bfcf5 | 540 | |
a26a6bed QW |
541 | int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical, |
542 | u64 *len, int mirror) | |
543 | { | |
544 | u64 offset = 0; | |
545 | struct btrfs_multi_bio *multi = NULL; | |
546 | struct btrfs_device *device; | |
547 | int ret = 0; | |
548 | u64 max_len = *len; | |
549 | ||
550 | ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror, | |
551 | NULL); | |
552 | if (ret) { | |
553 | fprintf(stderr, "Couldn't map the block %llu\n", | |
554 | logical + offset); | |
555 | goto err; | |
556 | } | |
557 | device = multi->stripes[0].dev; | |
558 | ||
559 | if (*len > max_len) | |
560 | *len = max_len; | |
561 | if (!device->desc || !device->part) { | |
562 | ret = -EIO; | |
563 | goto err; | |
564 | } | |
565 | ||
566 | ret = __btrfs_devread(device->desc, device->part, data, *len, | |
567 | multi->stripes[0].physical); | |
568 | if (ret != *len) | |
569 | ret = -EIO; | |
570 | else | |
571 | ret = 0; | |
572 | err: | |
573 | kfree(multi); | |
574 | return ret; | |
575 | } | |
576 | ||
f06bfcf5 QW |
577 | void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, |
578 | u64 objectid) | |
579 | { | |
580 | root->node = NULL; | |
581 | root->track_dirty = 0; | |
582 | ||
583 | root->fs_info = fs_info; | |
584 | root->objectid = objectid; | |
585 | root->last_trans = 0; | |
586 | root->last_inode_alloc = 0; | |
587 | ||
588 | memset(&root->root_key, 0, sizeof(root->root_key)); | |
589 | memset(&root->root_item, 0, sizeof(root->root_item)); | |
590 | root->root_key.objectid = objectid; | |
591 | } | |
592 | ||
593 | static int find_and_setup_root(struct btrfs_root *tree_root, | |
594 | struct btrfs_fs_info *fs_info, | |
595 | u64 objectid, struct btrfs_root *root) | |
596 | { | |
597 | int ret; | |
598 | u64 generation; | |
599 | ||
600 | btrfs_setup_root(root, fs_info, objectid); | |
601 | ret = btrfs_find_last_root(tree_root, objectid, | |
602 | &root->root_item, &root->root_key); | |
603 | if (ret) | |
604 | return ret; | |
605 | ||
606 | generation = btrfs_root_generation(&root->root_item); | |
607 | root->node = read_tree_block(fs_info, | |
608 | btrfs_root_bytenr(&root->root_item), generation); | |
609 | if (!extent_buffer_uptodate(root->node)) | |
610 | return -EIO; | |
611 | ||
612 | return 0; | |
613 | } | |
614 | ||
615 | int btrfs_free_fs_root(struct btrfs_root *root) | |
616 | { | |
617 | if (root->node) | |
618 | free_extent_buffer(root->node); | |
619 | kfree(root); | |
620 | return 0; | |
621 | } | |
622 | ||
623 | static void __free_fs_root(struct rb_node *node) | |
624 | { | |
625 | struct btrfs_root *root; | |
626 | ||
627 | root = container_of(node, struct btrfs_root, rb_node); | |
628 | btrfs_free_fs_root(root); | |
629 | } | |
630 | ||
631 | FREE_RB_BASED_TREE(fs_roots, __free_fs_root); | |
632 | ||
633 | struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, | |
634 | struct btrfs_key *location) | |
635 | { | |
636 | struct btrfs_root *root; | |
637 | struct btrfs_root *tree_root = fs_info->tree_root; | |
638 | struct btrfs_path *path; | |
639 | struct extent_buffer *l; | |
640 | u64 generation; | |
641 | int ret = 0; | |
642 | ||
643 | root = calloc(1, sizeof(*root)); | |
644 | if (!root) | |
645 | return ERR_PTR(-ENOMEM); | |
646 | if (location->offset == (u64)-1) { | |
647 | ret = find_and_setup_root(tree_root, fs_info, | |
648 | location->objectid, root); | |
649 | if (ret) { | |
650 | free(root); | |
651 | return ERR_PTR(ret); | |
652 | } | |
653 | goto insert; | |
654 | } | |
655 | ||
656 | btrfs_setup_root(root, fs_info, | |
657 | location->objectid); | |
658 | ||
659 | path = btrfs_alloc_path(); | |
660 | if (!path) { | |
661 | free(root); | |
662 | return ERR_PTR(-ENOMEM); | |
663 | } | |
664 | ||
665 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | |
666 | if (ret != 0) { | |
667 | if (ret > 0) | |
668 | ret = -ENOENT; | |
669 | goto out; | |
670 | } | |
671 | l = path->nodes[0]; | |
672 | read_extent_buffer(l, &root->root_item, | |
673 | btrfs_item_ptr_offset(l, path->slots[0]), | |
674 | sizeof(root->root_item)); | |
675 | memcpy(&root->root_key, location, sizeof(*location)); | |
676 | ||
677 | /* If this root is already an orphan, no need to read */ | |
678 | if (btrfs_root_refs(&root->root_item) == 0) { | |
679 | ret = -ENOENT; | |
680 | goto out; | |
681 | } | |
682 | ret = 0; | |
683 | out: | |
684 | btrfs_free_path(path); | |
685 | if (ret) { | |
686 | free(root); | |
687 | return ERR_PTR(ret); | |
688 | } | |
689 | generation = btrfs_root_generation(&root->root_item); | |
690 | root->node = read_tree_block(fs_info, | |
691 | btrfs_root_bytenr(&root->root_item), generation); | |
692 | if (!extent_buffer_uptodate(root->node)) { | |
693 | free(root); | |
694 | return ERR_PTR(-EIO); | |
695 | } | |
696 | insert: | |
697 | root->ref_cows = 1; | |
698 | return root; | |
699 | } | |
700 | ||
701 | static int btrfs_fs_roots_compare_objectids(struct rb_node *node, | |
702 | void *data) | |
703 | { | |
704 | u64 objectid = *((u64 *)data); | |
705 | struct btrfs_root *root; | |
706 | ||
707 | root = rb_entry(node, struct btrfs_root, rb_node); | |
708 | if (objectid > root->objectid) | |
709 | return 1; | |
710 | else if (objectid < root->objectid) | |
711 | return -1; | |
712 | else | |
713 | return 0; | |
714 | } | |
715 | ||
716 | int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2) | |
717 | { | |
718 | struct btrfs_root *root; | |
719 | ||
720 | root = rb_entry(node2, struct btrfs_root, rb_node); | |
721 | return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid); | |
722 | } | |
723 | ||
724 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |
725 | struct btrfs_key *location) | |
726 | { | |
727 | struct btrfs_root *root; | |
728 | struct rb_node *node; | |
729 | int ret; | |
730 | u64 objectid = location->objectid; | |
731 | ||
732 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | |
733 | return fs_info->tree_root; | |
734 | if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) | |
735 | return fs_info->chunk_root; | |
736 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | |
737 | return fs_info->csum_root; | |
1afb9f22 | 738 | BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID); |
f06bfcf5 QW |
739 | |
740 | node = rb_search(&fs_info->fs_root_tree, (void *)&objectid, | |
741 | btrfs_fs_roots_compare_objectids, NULL); | |
742 | if (node) | |
743 | return container_of(node, struct btrfs_root, rb_node); | |
744 | ||
745 | root = btrfs_read_fs_root_no_cache(fs_info, location); | |
746 | if (IS_ERR(root)) | |
747 | return root; | |
748 | ||
749 | ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node, | |
750 | btrfs_fs_roots_compare_roots); | |
751 | BUG_ON(ret); | |
752 | return root; | |
753 | } | |
754 | ||
755 | void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) | |
756 | { | |
757 | free(fs_info->tree_root); | |
758 | free(fs_info->chunk_root); | |
759 | free(fs_info->csum_root); | |
760 | free(fs_info->super_copy); | |
761 | free(fs_info); | |
762 | } | |
763 | ||
764 | struct btrfs_fs_info *btrfs_new_fs_info(void) | |
765 | { | |
766 | struct btrfs_fs_info *fs_info; | |
767 | ||
768 | fs_info = calloc(1, sizeof(struct btrfs_fs_info)); | |
769 | if (!fs_info) | |
770 | return NULL; | |
771 | ||
772 | fs_info->tree_root = calloc(1, sizeof(struct btrfs_root)); | |
773 | fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root)); | |
774 | fs_info->csum_root = calloc(1, sizeof(struct btrfs_root)); | |
775 | fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); | |
776 | ||
777 | if (!fs_info->tree_root || !fs_info->chunk_root || | |
778 | !fs_info->csum_root || !fs_info->super_copy) | |
779 | goto free_all; | |
780 | ||
781 | extent_io_tree_init(&fs_info->extent_cache); | |
782 | ||
783 | fs_info->fs_root_tree = RB_ROOT; | |
784 | cache_tree_init(&fs_info->mapping_tree.cache_tree); | |
785 | ||
f06bfcf5 QW |
786 | return fs_info; |
787 | free_all: | |
788 | btrfs_free_fs_info(fs_info); | |
789 | return NULL; | |
790 | } | |
791 | ||
792 | static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, | |
793 | struct btrfs_root *info_root, | |
794 | u64 objectid, char *str) | |
795 | { | |
796 | struct btrfs_root *root = fs_info->tree_root; | |
797 | int ret; | |
798 | ||
799 | ret = find_and_setup_root(root, fs_info, objectid, info_root); | |
800 | if (ret) { | |
801 | error("could not setup %s tree", str); | |
802 | return -EIO; | |
803 | } | |
804 | ||
805 | return 0; | |
806 | } | |
807 | ||
94509b79 MK |
808 | static int get_default_subvolume(struct btrfs_fs_info *fs_info, |
809 | struct btrfs_key *key_ret) | |
810 | { | |
811 | struct btrfs_root *root = fs_info->tree_root; | |
812 | struct btrfs_dir_item *dir_item; | |
813 | struct btrfs_path path; | |
814 | int ret = 0; | |
815 | ||
816 | btrfs_init_path(&path); | |
817 | ||
818 | dir_item = btrfs_lookup_dir_item(NULL, root, &path, | |
819 | BTRFS_ROOT_TREE_DIR_OBJECTID, | |
820 | "default", 7, 0); | |
821 | if (IS_ERR(dir_item)) { | |
822 | ret = PTR_ERR(dir_item); | |
823 | goto out; | |
824 | } | |
825 | ||
826 | btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret); | |
827 | out: | |
828 | btrfs_release_path(&path); | |
829 | return ret; | |
830 | } | |
831 | ||
f06bfcf5 QW |
832 | int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info) |
833 | { | |
834 | struct btrfs_super_block *sb = fs_info->super_copy; | |
835 | struct btrfs_root *root; | |
836 | struct btrfs_key key; | |
837 | u64 root_tree_bytenr; | |
838 | u64 generation; | |
839 | int ret; | |
840 | ||
841 | root = fs_info->tree_root; | |
842 | btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID); | |
843 | generation = btrfs_super_generation(sb); | |
844 | ||
845 | root_tree_bytenr = btrfs_super_root(sb); | |
846 | ||
847 | root->node = read_tree_block(fs_info, root_tree_bytenr, generation); | |
848 | if (!extent_buffer_uptodate(root->node)) { | |
849 | fprintf(stderr, "Couldn't read tree root\n"); | |
850 | return -EIO; | |
851 | } | |
852 | ||
853 | ret = setup_root_or_create_block(fs_info, fs_info->csum_root, | |
854 | BTRFS_CSUM_TREE_OBJECTID, "csum"); | |
855 | if (ret) | |
856 | return ret; | |
857 | fs_info->csum_root->track_dirty = 1; | |
858 | ||
859 | fs_info->last_trans_committed = generation; | |
860 | ||
94509b79 MK |
861 | ret = get_default_subvolume(fs_info, &key); |
862 | if (ret) { | |
863 | /* | |
864 | * The default dir item isn't there. Linux kernel behaviour is | |
865 | * to silently use the top-level subvolume in this case. | |
866 | */ | |
867 | key.objectid = BTRFS_FS_TREE_OBJECTID; | |
868 | key.type = BTRFS_ROOT_ITEM_KEY; | |
869 | key.offset = (u64)-1; | |
870 | } | |
871 | ||
f06bfcf5 QW |
872 | fs_info->fs_root = btrfs_read_fs_root(fs_info, &key); |
873 | ||
874 | if (IS_ERR(fs_info->fs_root)) | |
875 | return -EIO; | |
876 | return 0; | |
877 | } | |
878 | ||
879 | void btrfs_release_all_roots(struct btrfs_fs_info *fs_info) | |
880 | { | |
881 | if (fs_info->csum_root) | |
882 | free_extent_buffer(fs_info->csum_root->node); | |
883 | if (fs_info->tree_root) | |
884 | free_extent_buffer(fs_info->tree_root->node); | |
885 | if (fs_info->chunk_root) | |
886 | free_extent_buffer(fs_info->chunk_root->node); | |
887 | } | |
888 | ||
889 | static void free_map_lookup(struct cache_extent *ce) | |
890 | { | |
891 | struct map_lookup *map; | |
892 | ||
893 | map = container_of(ce, struct map_lookup, ce); | |
894 | kfree(map); | |
895 | } | |
896 | ||
897 | FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup); | |
898 | ||
899 | void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) | |
900 | { | |
901 | free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree); | |
902 | extent_io_tree_cleanup(&fs_info->extent_cache); | |
903 | } | |
904 | ||
905 | static int btrfs_scan_fs_devices(struct blk_desc *desc, | |
906 | struct disk_partition *part, | |
907 | struct btrfs_fs_devices **fs_devices) | |
908 | { | |
909 | u64 total_devs; | |
910 | int ret; | |
911 | ||
912 | if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET, | |
913 | desc->blksz) > (part->size << desc->log2blksz)) { | |
f337fb9e SG |
914 | log_debug("superblock end %u is larger than device size " LBAFU, |
915 | BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET, | |
916 | part->size << desc->log2blksz); | |
f06bfcf5 QW |
917 | return -EINVAL; |
918 | } | |
919 | ||
920 | ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs); | |
921 | if (ret) { | |
64acd46a SG |
922 | /* |
923 | * Avoid showing this when probing for a possible Btrfs | |
924 | * | |
925 | * fprintf(stderr, "No valid Btrfs found\n"); | |
926 | */ | |
f06bfcf5 QW |
927 | return ret; |
928 | } | |
929 | return 0; | |
930 | } | |
931 | ||
932 | int btrfs_check_fs_compatibility(struct btrfs_super_block *sb) | |
933 | { | |
934 | u64 features; | |
935 | ||
936 | features = btrfs_super_incompat_flags(sb) & | |
937 | ~BTRFS_FEATURE_INCOMPAT_SUPP; | |
938 | if (features) { | |
939 | printk("couldn't open because of unsupported " | |
940 | "option features (%llx).\n", | |
941 | (unsigned long long)features); | |
942 | return -ENOTSUPP; | |
943 | } | |
944 | ||
945 | features = btrfs_super_incompat_flags(sb); | |
946 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | |
947 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | |
948 | btrfs_set_super_incompat_flags(sb, features); | |
949 | } | |
950 | ||
951 | return 0; | |
952 | } | |
953 | ||
954 | static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info) | |
955 | { | |
956 | struct btrfs_super_block *sb = fs_info->super_copy; | |
957 | u64 chunk_root_bytenr; | |
958 | u64 generation; | |
959 | int ret; | |
960 | ||
961 | btrfs_setup_root(fs_info->chunk_root, fs_info, | |
962 | BTRFS_CHUNK_TREE_OBJECTID); | |
963 | ||
964 | ret = btrfs_read_sys_array(fs_info); | |
965 | if (ret) | |
966 | return ret; | |
967 | ||
968 | generation = btrfs_super_chunk_root_generation(sb); | |
969 | chunk_root_bytenr = btrfs_super_chunk_root(sb); | |
970 | ||
971 | fs_info->chunk_root->node = read_tree_block(fs_info, | |
972 | chunk_root_bytenr, | |
973 | generation); | |
974 | if (!extent_buffer_uptodate(fs_info->chunk_root->node)) { | |
975 | error("cannot read chunk root"); | |
976 | return -EIO; | |
977 | } | |
978 | ||
979 | ret = btrfs_read_chunk_tree(fs_info); | |
980 | if (ret) { | |
981 | fprintf(stderr, "Couldn't read chunk tree\n"); | |
982 | return ret; | |
983 | } | |
984 | return 0; | |
985 | } | |
986 | ||
987 | struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc, | |
988 | struct disk_partition *part) | |
989 | { | |
990 | struct btrfs_fs_info *fs_info; | |
991 | struct btrfs_super_block *disk_super; | |
992 | struct btrfs_fs_devices *fs_devices = NULL; | |
993 | struct extent_buffer *eb; | |
994 | int ret; | |
995 | ||
996 | fs_info = btrfs_new_fs_info(); | |
997 | if (!fs_info) { | |
998 | fprintf(stderr, "Failed to allocate memory for fs_info\n"); | |
999 | return NULL; | |
1000 | } | |
1001 | ||
1002 | ret = btrfs_scan_fs_devices(desc, part, &fs_devices); | |
1003 | if (ret) | |
1004 | goto out; | |
1005 | ||
1006 | fs_info->fs_devices = fs_devices; | |
1007 | ||
1008 | ret = btrfs_open_devices(fs_devices); | |
1009 | if (ret) | |
1010 | goto out; | |
1011 | ||
1012 | disk_super = fs_info->super_copy; | |
1013 | ret = btrfs_read_dev_super(desc, part, disk_super); | |
1014 | if (ret) { | |
64acd46a | 1015 | debug("No valid btrfs found\n"); |
f06bfcf5 QW |
1016 | goto out_devices; |
1017 | } | |
1018 | ||
1019 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) { | |
1020 | fprintf(stderr, "ERROR: Filesystem UUID change in progress\n"); | |
1021 | goto out_devices; | |
1022 | } | |
1023 | ||
1024 | ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE)); | |
1025 | if (btrfs_fs_incompat(fs_info, METADATA_UUID)) | |
1026 | ASSERT(!memcmp(disk_super->metadata_uuid, | |
1027 | fs_devices->metadata_uuid, BTRFS_FSID_SIZE)); | |
1028 | ||
1029 | fs_info->sectorsize = btrfs_super_sectorsize(disk_super); | |
1030 | fs_info->nodesize = btrfs_super_nodesize(disk_super); | |
1031 | fs_info->stripesize = btrfs_super_stripesize(disk_super); | |
1032 | ||
1033 | ret = btrfs_check_fs_compatibility(fs_info->super_copy); | |
1034 | if (ret) | |
1035 | goto out_devices; | |
1036 | ||
1037 | ret = btrfs_setup_chunk_tree_and_device_map(fs_info); | |
1038 | if (ret) | |
1039 | goto out_chunk; | |
1040 | ||
1041 | /* Chunk tree root is unable to read, return directly */ | |
1042 | if (!fs_info->chunk_root) | |
1043 | return fs_info; | |
1044 | ||
1045 | eb = fs_info->chunk_root->node; | |
1046 | read_extent_buffer(eb, fs_info->chunk_tree_uuid, | |
1047 | btrfs_header_chunk_tree_uuid(eb), | |
1048 | BTRFS_UUID_SIZE); | |
1049 | ||
1050 | ret = btrfs_setup_all_roots(fs_info); | |
1051 | if (ret) | |
1052 | goto out_chunk; | |
1053 | ||
1054 | return fs_info; | |
1055 | ||
1056 | out_chunk: | |
1057 | btrfs_release_all_roots(fs_info); | |
1058 | btrfs_cleanup_all_caches(fs_info); | |
1059 | out_devices: | |
1060 | btrfs_close_devices(fs_devices); | |
1061 | out: | |
1062 | btrfs_free_fs_info(fs_info); | |
1063 | return NULL; | |
1064 | } | |
1065 | ||
1066 | int close_ctree_fs_info(struct btrfs_fs_info *fs_info) | |
1067 | { | |
1068 | int ret; | |
f06bfcf5 QW |
1069 | |
1070 | free_fs_roots_tree(&fs_info->fs_root_tree); | |
1071 | ||
1072 | btrfs_release_all_roots(fs_info); | |
1073 | ret = btrfs_close_devices(fs_info->fs_devices); | |
1074 | btrfs_cleanup_all_caches(fs_info); | |
1075 | btrfs_free_fs_info(fs_info); | |
3b00a6ba | 1076 | return ret; |
f06bfcf5 QW |
1077 | } |
1078 | ||
1079 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | |
1080 | { | |
1081 | int ret; | |
1082 | ||
1083 | ret = extent_buffer_uptodate(buf); | |
1084 | if (!ret) | |
1085 | return ret; | |
1086 | ||
1087 | ret = verify_parent_transid(&buf->fs_info->extent_cache, buf, | |
1088 | parent_transid, 1); | |
1089 | return !ret; | |
1090 | } | |
1091 | ||
1092 | int btrfs_set_buffer_uptodate(struct extent_buffer *eb) | |
1093 | { | |
1094 | return set_extent_buffer_uptodate(eb); | |
1095 | } |