]>
Commit | Line | Data |
---|---|---|
565a4147 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
565a4147 | 2 | #include <fs_internal.h> |
f337fb9e | 3 | #include <log.h> |
58d825fb | 4 | #include <u-boot/uuid.h> |
4aebb994 QW |
5 | #include <memalign.h> |
6 | #include "kernel-shared/btrfs_tree.h" | |
f06bfcf5 | 7 | #include "common/rbtree-utils.h" |
565a4147 | 8 | #include "disk-io.h" |
4aebb994 QW |
9 | #include "ctree.h" |
10 | #include "btrfs.h" | |
75b0817b QW |
11 | #include "volumes.h" |
12 | #include "extent-io.h" | |
565a4147 QW |
13 | #include "crypto/hash.h" |
14 | ||
75b0817b QW |
15 | /* specified errno for check_tree_block */ |
16 | #define BTRFS_BAD_BYTENR (-1) | |
17 | #define BTRFS_BAD_FSID (-2) | |
18 | #define BTRFS_BAD_LEVEL (-3) | |
19 | #define BTRFS_BAD_NRITEMS (-4) | |
20 | ||
21 | /* Calculate max possible nritems for a leaf/node */ | |
22 | static u32 max_nritems(u8 level, u32 nodesize) | |
23 | { | |
24 | ||
25 | if (level == 0) | |
26 | return ((nodesize - sizeof(struct btrfs_header)) / | |
27 | sizeof(struct btrfs_item)); | |
28 | return ((nodesize - sizeof(struct btrfs_header)) / | |
29 | sizeof(struct btrfs_key_ptr)); | |
30 | } | |
31 | ||
32 | static int check_tree_block(struct btrfs_fs_info *fs_info, | |
33 | struct extent_buffer *buf) | |
34 | { | |
35 | ||
36 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | |
37 | u32 nodesize = fs_info->nodesize; | |
38 | bool fsid_match = false; | |
39 | int ret = BTRFS_BAD_FSID; | |
40 | ||
41 | if (buf->start != btrfs_header_bytenr(buf)) | |
42 | return BTRFS_BAD_BYTENR; | |
43 | if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL) | |
44 | return BTRFS_BAD_LEVEL; | |
45 | if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf), | |
46 | nodesize)) | |
47 | return BTRFS_BAD_NRITEMS; | |
48 | ||
49 | /* Only leaf can be empty */ | |
50 | if (btrfs_header_nritems(buf) == 0 && | |
51 | btrfs_header_level(buf) != 0) | |
52 | return BTRFS_BAD_NRITEMS; | |
53 | ||
54 | while (fs_devices) { | |
55 | /* | |
56 | * Checking the incompat flag is only valid for the current | |
57 | * fs. For seed devices it's forbidden to have their uuid | |
58 | * changed so reading ->fsid in this case is fine | |
59 | */ | |
60 | if (fs_devices == fs_info->fs_devices && | |
61 | btrfs_fs_incompat(fs_info, METADATA_UUID)) | |
62 | fsid_match = !memcmp_extent_buffer(buf, | |
63 | fs_devices->metadata_uuid, | |
64 | btrfs_header_fsid(), | |
65 | BTRFS_FSID_SIZE); | |
66 | else | |
67 | fsid_match = !memcmp_extent_buffer(buf, | |
68 | fs_devices->fsid, | |
69 | btrfs_header_fsid(), | |
70 | BTRFS_FSID_SIZE); | |
71 | ||
75b0817b QW |
72 | if (fsid_match) { |
73 | ret = 0; | |
74 | break; | |
75 | } | |
76 | fs_devices = fs_devices->seed; | |
77 | } | |
78 | return ret; | |
79 | } | |
80 | ||
81 | static void print_tree_block_error(struct btrfs_fs_info *fs_info, | |
82 | struct extent_buffer *eb, | |
83 | int err) | |
84 | { | |
85 | char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; | |
86 | char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'}; | |
87 | u8 buf[BTRFS_UUID_SIZE]; | |
88 | ||
89 | if (!err) | |
90 | return; | |
91 | ||
92 | fprintf(stderr, "bad tree block %llu, ", eb->start); | |
93 | switch (err) { | |
94 | case BTRFS_BAD_FSID: | |
95 | read_extent_buffer(eb, buf, btrfs_header_fsid(), | |
96 | BTRFS_UUID_SIZE); | |
97 | uuid_unparse(buf, found_uuid); | |
98 | uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid); | |
99 | fprintf(stderr, "fsid mismatch, want=%s, have=%s\n", | |
100 | fs_uuid, found_uuid); | |
101 | break; | |
102 | case BTRFS_BAD_BYTENR: | |
103 | fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n", | |
104 | eb->start, btrfs_header_bytenr(eb)); | |
105 | break; | |
106 | case BTRFS_BAD_LEVEL: | |
107 | fprintf(stderr, "bad level, %u > %d\n", | |
108 | btrfs_header_level(eb), BTRFS_MAX_LEVEL); | |
109 | break; | |
110 | case BTRFS_BAD_NRITEMS: | |
111 | fprintf(stderr, "invalid nr_items: %u\n", | |
112 | btrfs_header_nritems(eb)); | |
113 | break; | |
114 | } | |
115 | } | |
116 | ||
565a4147 QW |
117 | int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len) |
118 | { | |
119 | memset(out, 0, BTRFS_CSUM_SIZE); | |
120 | ||
121 | switch (csum_type) { | |
122 | case BTRFS_CSUM_TYPE_CRC32: | |
123 | return hash_crc32c(data, len, out); | |
124 | case BTRFS_CSUM_TYPE_XXHASH: | |
125 | return hash_xxhash(data, len, out); | |
126 | case BTRFS_CSUM_TYPE_SHA256: | |
127 | return hash_sha256(data, len, out); | |
1617165a QW |
128 | case BTRFS_CSUM_TYPE_BLAKE2: |
129 | return hash_blake2(data, len, out); | |
565a4147 QW |
130 | default: |
131 | printf("Unknown csum type %d\n", csum_type); | |
132 | return -EINVAL; | |
133 | } | |
134 | } | |
4aebb994 QW |
135 | |
136 | /* | |
137 | * Check if the super is valid: | |
138 | * - nodesize/sectorsize - minimum, maximum, alignment | |
139 | * - tree block starts - alignment | |
140 | * - number of devices - something sane | |
141 | * - sys array size - maximum | |
142 | */ | |
143 | static int btrfs_check_super(struct btrfs_super_block *sb) | |
144 | { | |
145 | u8 result[BTRFS_CSUM_SIZE]; | |
146 | u16 csum_type; | |
147 | int csum_size; | |
148 | u8 *metadata_uuid; | |
149 | ||
150 | if (btrfs_super_magic(sb) != BTRFS_MAGIC) | |
151 | return -EIO; | |
152 | ||
153 | csum_type = btrfs_super_csum_type(sb); | |
154 | if (csum_type >= btrfs_super_num_csums()) { | |
155 | error("unsupported checksum algorithm %u", csum_type); | |
156 | return -EIO; | |
157 | } | |
158 | csum_size = btrfs_super_csum_size(sb); | |
159 | ||
160 | btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE, | |
161 | result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); | |
162 | ||
163 | if (memcmp(result, sb->csum, csum_size)) { | |
164 | error("superblock checksum mismatch"); | |
165 | return -EIO; | |
166 | } | |
167 | if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { | |
168 | error("tree_root level too big: %d >= %d", | |
169 | btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); | |
170 | goto error_out; | |
171 | } | |
172 | if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { | |
173 | error("chunk_root level too big: %d >= %d", | |
174 | btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); | |
175 | goto error_out; | |
176 | } | |
177 | if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { | |
178 | error("log_root level too big: %d >= %d", | |
179 | btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); | |
180 | goto error_out; | |
181 | } | |
182 | ||
183 | if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) { | |
184 | error("tree_root block unaligned: %llu", btrfs_super_root(sb)); | |
185 | goto error_out; | |
186 | } | |
187 | if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) { | |
188 | error("chunk_root block unaligned: %llu", | |
189 | btrfs_super_chunk_root(sb)); | |
190 | goto error_out; | |
191 | } | |
192 | if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) { | |
193 | error("log_root block unaligned: %llu", | |
194 | btrfs_super_log_root(sb)); | |
195 | goto error_out; | |
196 | } | |
197 | if (btrfs_super_nodesize(sb) < 4096) { | |
198 | error("nodesize too small: %u < 4096", | |
199 | btrfs_super_nodesize(sb)); | |
200 | goto error_out; | |
201 | } | |
202 | if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) { | |
203 | error("nodesize unaligned: %u", btrfs_super_nodesize(sb)); | |
204 | goto error_out; | |
205 | } | |
206 | if (btrfs_super_sectorsize(sb) < 4096) { | |
207 | error("sectorsize too small: %u < 4096", | |
208 | btrfs_super_sectorsize(sb)); | |
209 | goto error_out; | |
210 | } | |
211 | if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) { | |
212 | error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb)); | |
213 | goto error_out; | |
214 | } | |
215 | if (btrfs_super_total_bytes(sb) == 0) { | |
216 | error("invalid total_bytes 0"); | |
217 | goto error_out; | |
218 | } | |
219 | if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { | |
220 | error("invalid bytes_used %llu", btrfs_super_bytes_used(sb)); | |
221 | goto error_out; | |
222 | } | |
223 | if ((btrfs_super_stripesize(sb) != 4096) | |
224 | && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) { | |
225 | error("invalid stripesize %u", btrfs_super_stripesize(sb)); | |
226 | goto error_out; | |
227 | } | |
228 | ||
229 | if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID) | |
230 | metadata_uuid = sb->metadata_uuid; | |
231 | else | |
232 | metadata_uuid = sb->fsid; | |
233 | ||
234 | if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { | |
235 | char fsid[BTRFS_UUID_UNPARSED_SIZE]; | |
236 | char dev_fsid[BTRFS_UUID_UNPARSED_SIZE]; | |
237 | ||
238 | uuid_unparse(sb->metadata_uuid, fsid); | |
239 | uuid_unparse(sb->dev_item.fsid, dev_fsid); | |
240 | error("dev_item UUID does not match fsid: %s != %s", | |
241 | dev_fsid, fsid); | |
242 | goto error_out; | |
243 | } | |
244 | ||
245 | /* | |
246 | * Hint to catch really bogus numbers, bitflips or so | |
247 | */ | |
248 | if (btrfs_super_num_devices(sb) > (1UL << 31)) { | |
249 | error("suspicious number of devices: %llu", | |
250 | btrfs_super_num_devices(sb)); | |
251 | } | |
252 | ||
253 | if (btrfs_super_num_devices(sb) == 0) { | |
254 | error("number of devices is 0"); | |
255 | goto error_out; | |
256 | } | |
257 | ||
258 | /* | |
259 | * Obvious sys_chunk_array corruptions, it must hold at least one key | |
260 | * and one chunk | |
261 | */ | |
262 | if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { | |
263 | error("system chunk array too big %u > %u", | |
264 | btrfs_super_sys_array_size(sb), | |
265 | BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); | |
266 | goto error_out; | |
267 | } | |
268 | if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) | |
269 | + sizeof(struct btrfs_chunk)) { | |
270 | error("system chunk array too small %u < %zu", | |
271 | btrfs_super_sys_array_size(sb), | |
272 | sizeof(struct btrfs_disk_key) + | |
273 | sizeof(struct btrfs_chunk)); | |
274 | goto error_out; | |
275 | } | |
276 | ||
277 | return 0; | |
278 | ||
279 | error_out: | |
280 | error("superblock checksum matches but it has invalid members"); | |
281 | return -EIO; | |
282 | } | |
283 | ||
284 | /* | |
285 | * btrfs_read_dev_super - read a valid primary superblock from a block device | |
286 | * @desc,@part: file descriptor of the device | |
287 | * @sb: buffer where the superblock is going to be read in | |
288 | * | |
289 | * Unlike the btrfs-progs/kernel version, here we ony care about the first | |
290 | * super block, thus it's much simpler. | |
291 | */ | |
292 | int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part, | |
293 | struct btrfs_super_block *sb) | |
294 | { | |
9e8bb078 | 295 | ALLOC_CACHE_ALIGN_BUFFER(char, tmp, BTRFS_SUPER_INFO_SIZE); |
4aebb994 QW |
296 | struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp; |
297 | int ret; | |
298 | ||
299 | ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE, | |
300 | BTRFS_SUPER_INFO_OFFSET); | |
301 | if (ret < BTRFS_SUPER_INFO_SIZE) | |
302 | return -EIO; | |
303 | ||
304 | if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET) | |
305 | return -EIO; | |
306 | ||
307 | if (btrfs_check_super(buf)) | |
308 | return -EIO; | |
309 | ||
310 | memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE); | |
311 | return 0; | |
312 | } | |
313 | ||
75b0817b QW |
314 | static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, |
315 | int verify, int silent, u16 csum_type) | |
316 | { | |
317 | u8 result[BTRFS_CSUM_SIZE]; | |
318 | u32 len; | |
319 | ||
320 | len = buf->len - BTRFS_CSUM_SIZE; | |
321 | btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE, | |
322 | result, len); | |
323 | ||
324 | if (verify) { | |
325 | if (memcmp_extent_buffer(buf, result, 0, csum_size)) { | |
326 | /* FIXME: format */ | |
327 | if (!silent) | |
328 | printk("checksum verify failed on %llu found %08X wanted %08X\n", | |
329 | (unsigned long long)buf->start, | |
330 | result[0], | |
331 | buf->data[0]); | |
332 | return 1; | |
333 | } | |
334 | } else { | |
335 | write_extent_buffer(buf, result, 0, csum_size); | |
336 | } | |
337 | return 0; | |
338 | } | |
339 | ||
340 | int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify, | |
341 | u16 csum_type) | |
342 | { | |
343 | return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type); | |
344 | } | |
345 | ||
346 | static int csum_tree_block(struct btrfs_fs_info *fs_info, | |
347 | struct extent_buffer *buf, int verify) | |
348 | { | |
349 | u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); | |
350 | u16 csum_type = btrfs_super_csum_type(fs_info->super_copy); | |
351 | ||
352 | return csum_tree_block_size(buf, csum_size, verify, csum_type); | |
353 | } | |
354 | ||
355 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, | |
356 | u64 bytenr, u32 blocksize) | |
357 | { | |
358 | return find_extent_buffer(&fs_info->extent_cache, | |
359 | bytenr, blocksize); | |
360 | } | |
361 | ||
362 | struct extent_buffer* btrfs_find_create_tree_block( | |
363 | struct btrfs_fs_info *fs_info, u64 bytenr) | |
364 | { | |
365 | return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize); | |
366 | } | |
367 | ||
368 | static int verify_parent_transid(struct extent_io_tree *io_tree, | |
369 | struct extent_buffer *eb, u64 parent_transid, | |
370 | int ignore) | |
371 | { | |
372 | int ret; | |
373 | ||
374 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | |
375 | return 0; | |
376 | ||
377 | if (extent_buffer_uptodate(eb) && | |
378 | btrfs_header_generation(eb) == parent_transid) { | |
379 | ret = 0; | |
380 | goto out; | |
381 | } | |
382 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | |
383 | (unsigned long long)eb->start, | |
384 | (unsigned long long)parent_transid, | |
385 | (unsigned long long)btrfs_header_generation(eb)); | |
386 | if (ignore) { | |
387 | eb->flags |= EXTENT_BAD_TRANSID; | |
388 | printk("Ignoring transid failure\n"); | |
389 | return 0; | |
390 | } | |
391 | ||
392 | ret = 1; | |
393 | out: | |
394 | clear_extent_buffer_uptodate(eb); | |
395 | return ret; | |
396 | ||
397 | } | |
398 | ||
75b0817b QW |
399 | int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) |
400 | { | |
401 | unsigned long offset = 0; | |
402 | struct btrfs_multi_bio *multi = NULL; | |
403 | struct btrfs_device *device; | |
404 | int ret = 0; | |
405 | u64 read_len; | |
406 | unsigned long bytes_left = eb->len; | |
407 | ||
408 | while (bytes_left) { | |
409 | read_len = bytes_left; | |
410 | device = NULL; | |
411 | ||
412 | ret = btrfs_map_block(info, READ, eb->start + offset, | |
413 | &read_len, &multi, mirror, NULL); | |
414 | if (ret) { | |
415 | printk("Couldn't map the block %Lu\n", eb->start + offset); | |
416 | kfree(multi); | |
417 | return -EIO; | |
418 | } | |
419 | device = multi->stripes[0].dev; | |
420 | ||
421 | if (!device->desc || !device->part) { | |
422 | kfree(multi); | |
423 | return -EIO; | |
424 | } | |
425 | ||
426 | if (read_len > bytes_left) | |
427 | read_len = bytes_left; | |
428 | ||
429 | ret = read_extent_from_disk(device->desc, device->part, | |
430 | multi->stripes[0].physical, eb, | |
431 | offset, read_len); | |
432 | kfree(multi); | |
433 | multi = NULL; | |
434 | ||
435 | if (ret) | |
436 | return -EIO; | |
437 | offset += read_len; | |
438 | bytes_left -= read_len; | |
439 | } | |
440 | return 0; | |
441 | } | |
442 | ||
443 | struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, | |
444 | u64 parent_transid) | |
445 | { | |
446 | int ret; | |
447 | struct extent_buffer *eb; | |
448 | u64 best_transid = 0; | |
449 | u32 sectorsize = fs_info->sectorsize; | |
450 | int mirror_num = 1; | |
451 | int good_mirror = 0; | |
452 | int candidate_mirror = 0; | |
453 | int num_copies; | |
454 | int ignore = 0; | |
455 | ||
456 | /* | |
457 | * Don't even try to create tree block for unaligned tree block | |
458 | * bytenr. | |
459 | * Such unaligned tree block will free overlapping extent buffer, | |
460 | * causing use-after-free bugs for fuzzed images. | |
461 | */ | |
462 | if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) { | |
463 | error("tree block bytenr %llu is not aligned to sectorsize %u", | |
464 | bytenr, sectorsize); | |
465 | return ERR_PTR(-EIO); | |
466 | } | |
467 | ||
468 | eb = btrfs_find_create_tree_block(fs_info, bytenr); | |
469 | if (!eb) | |
470 | return ERR_PTR(-ENOMEM); | |
471 | ||
472 | if (btrfs_buffer_uptodate(eb, parent_transid)) | |
473 | return eb; | |
474 | ||
475 | num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); | |
476 | while (1) { | |
477 | ret = read_whole_eb(fs_info, eb, mirror_num); | |
478 | if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 && | |
479 | check_tree_block(fs_info, eb) == 0 && | |
480 | verify_parent_transid(&fs_info->extent_cache, eb, | |
481 | parent_transid, ignore) == 0) { | |
482 | /* | |
483 | * check_tree_block() is less strict to allow btrfs | |
484 | * check to get raw eb with bad key order and fix it. | |
485 | * But we still need to try to get a good copy if | |
486 | * possible, or bad key order can go into tools like | |
487 | * btrfs ins dump-tree. | |
488 | */ | |
489 | if (btrfs_header_level(eb)) | |
490 | ret = btrfs_check_node(fs_info, NULL, eb); | |
491 | else | |
492 | ret = btrfs_check_leaf(fs_info, NULL, eb); | |
493 | if (!ret || candidate_mirror == mirror_num) { | |
494 | btrfs_set_buffer_uptodate(eb); | |
495 | return eb; | |
496 | } | |
497 | if (candidate_mirror <= 0) | |
498 | candidate_mirror = mirror_num; | |
499 | } | |
500 | if (ignore) { | |
501 | if (candidate_mirror > 0) { | |
502 | mirror_num = candidate_mirror; | |
503 | continue; | |
504 | } | |
505 | if (check_tree_block(fs_info, eb)) | |
506 | print_tree_block_error(fs_info, eb, | |
507 | check_tree_block(fs_info, eb)); | |
508 | else | |
509 | fprintf(stderr, "Csum didn't match\n"); | |
510 | ret = -EIO; | |
511 | break; | |
512 | } | |
513 | if (num_copies == 1) { | |
514 | ignore = 1; | |
515 | continue; | |
516 | } | |
517 | if (btrfs_header_generation(eb) > best_transid) { | |
518 | best_transid = btrfs_header_generation(eb); | |
519 | good_mirror = mirror_num; | |
520 | } | |
521 | mirror_num++; | |
522 | if (mirror_num > num_copies) { | |
523 | if (candidate_mirror > 0) | |
524 | mirror_num = candidate_mirror; | |
525 | else | |
526 | mirror_num = good_mirror; | |
527 | ignore = 1; | |
528 | continue; | |
529 | } | |
530 | } | |
531 | /* | |
532 | * We failed to read this tree block, it be should deleted right now | |
533 | * to avoid stale cache populate the cache. | |
534 | */ | |
535 | free_extent_buffer(eb); | |
536 | return ERR_PTR(ret); | |
537 | } | |
f06bfcf5 | 538 | |
a26a6bed QW |
539 | int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical, |
540 | u64 *len, int mirror) | |
541 | { | |
11d56701 QW |
542 | u64 orig_len = *len; |
543 | u64 cur = logical; | |
a26a6bed QW |
544 | struct btrfs_multi_bio *multi = NULL; |
545 | struct btrfs_device *device; | |
546 | int ret = 0; | |
a26a6bed | 547 | |
11d56701 QW |
548 | while (cur < logical + orig_len) { |
549 | u64 cur_len = logical + orig_len - cur; | |
a26a6bed | 550 | |
11d56701 QW |
551 | ret = btrfs_map_block(fs_info, READ, cur, &cur_len, &multi, |
552 | mirror, NULL); | |
553 | if (ret) { | |
554 | error("Couldn't map the block %llu", cur); | |
555 | goto err; | |
556 | } | |
557 | device = multi->stripes[0].dev; | |
558 | if (!device->desc || !device->part) { | |
559 | error("devid %llu is missing", device->devid); | |
560 | ret = -EIO; | |
561 | goto err; | |
562 | } | |
563 | ret = __btrfs_devread(device->desc, device->part, | |
564 | data + (cur - logical), cur_len, | |
565 | multi->stripes[0].physical); | |
566 | if (ret != cur_len) { | |
567 | error("read failed on devid %llu physical %llu", | |
568 | device->devid, multi->stripes[0].physical); | |
569 | ret = -EIO; | |
570 | goto err; | |
571 | } | |
572 | cur += cur_len; | |
a26a6bed | 573 | ret = 0; |
11d56701 | 574 | } |
a26a6bed QW |
575 | err: |
576 | kfree(multi); | |
577 | return ret; | |
578 | } | |
579 | ||
f06bfcf5 QW |
580 | void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, |
581 | u64 objectid) | |
582 | { | |
583 | root->node = NULL; | |
584 | root->track_dirty = 0; | |
585 | ||
586 | root->fs_info = fs_info; | |
587 | root->objectid = objectid; | |
588 | root->last_trans = 0; | |
589 | root->last_inode_alloc = 0; | |
590 | ||
591 | memset(&root->root_key, 0, sizeof(root->root_key)); | |
592 | memset(&root->root_item, 0, sizeof(root->root_item)); | |
593 | root->root_key.objectid = objectid; | |
594 | } | |
595 | ||
596 | static int find_and_setup_root(struct btrfs_root *tree_root, | |
597 | struct btrfs_fs_info *fs_info, | |
598 | u64 objectid, struct btrfs_root *root) | |
599 | { | |
600 | int ret; | |
601 | u64 generation; | |
602 | ||
603 | btrfs_setup_root(root, fs_info, objectid); | |
604 | ret = btrfs_find_last_root(tree_root, objectid, | |
605 | &root->root_item, &root->root_key); | |
606 | if (ret) | |
607 | return ret; | |
608 | ||
609 | generation = btrfs_root_generation(&root->root_item); | |
610 | root->node = read_tree_block(fs_info, | |
611 | btrfs_root_bytenr(&root->root_item), generation); | |
612 | if (!extent_buffer_uptodate(root->node)) | |
613 | return -EIO; | |
614 | ||
615 | return 0; | |
616 | } | |
617 | ||
618 | int btrfs_free_fs_root(struct btrfs_root *root) | |
619 | { | |
620 | if (root->node) | |
621 | free_extent_buffer(root->node); | |
622 | kfree(root); | |
623 | return 0; | |
624 | } | |
625 | ||
626 | static void __free_fs_root(struct rb_node *node) | |
627 | { | |
628 | struct btrfs_root *root; | |
629 | ||
630 | root = container_of(node, struct btrfs_root, rb_node); | |
631 | btrfs_free_fs_root(root); | |
632 | } | |
633 | ||
634 | FREE_RB_BASED_TREE(fs_roots, __free_fs_root); | |
635 | ||
636 | struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info, | |
637 | struct btrfs_key *location) | |
638 | { | |
639 | struct btrfs_root *root; | |
640 | struct btrfs_root *tree_root = fs_info->tree_root; | |
641 | struct btrfs_path *path; | |
642 | struct extent_buffer *l; | |
643 | u64 generation; | |
644 | int ret = 0; | |
645 | ||
646 | root = calloc(1, sizeof(*root)); | |
647 | if (!root) | |
648 | return ERR_PTR(-ENOMEM); | |
649 | if (location->offset == (u64)-1) { | |
650 | ret = find_and_setup_root(tree_root, fs_info, | |
651 | location->objectid, root); | |
652 | if (ret) { | |
653 | free(root); | |
654 | return ERR_PTR(ret); | |
655 | } | |
656 | goto insert; | |
657 | } | |
658 | ||
659 | btrfs_setup_root(root, fs_info, | |
660 | location->objectid); | |
661 | ||
662 | path = btrfs_alloc_path(); | |
663 | if (!path) { | |
664 | free(root); | |
665 | return ERR_PTR(-ENOMEM); | |
666 | } | |
667 | ||
668 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | |
669 | if (ret != 0) { | |
670 | if (ret > 0) | |
671 | ret = -ENOENT; | |
672 | goto out; | |
673 | } | |
674 | l = path->nodes[0]; | |
675 | read_extent_buffer(l, &root->root_item, | |
676 | btrfs_item_ptr_offset(l, path->slots[0]), | |
677 | sizeof(root->root_item)); | |
678 | memcpy(&root->root_key, location, sizeof(*location)); | |
679 | ||
680 | /* If this root is already an orphan, no need to read */ | |
681 | if (btrfs_root_refs(&root->root_item) == 0) { | |
682 | ret = -ENOENT; | |
683 | goto out; | |
684 | } | |
685 | ret = 0; | |
686 | out: | |
687 | btrfs_free_path(path); | |
688 | if (ret) { | |
689 | free(root); | |
690 | return ERR_PTR(ret); | |
691 | } | |
692 | generation = btrfs_root_generation(&root->root_item); | |
693 | root->node = read_tree_block(fs_info, | |
694 | btrfs_root_bytenr(&root->root_item), generation); | |
695 | if (!extent_buffer_uptodate(root->node)) { | |
696 | free(root); | |
697 | return ERR_PTR(-EIO); | |
698 | } | |
699 | insert: | |
700 | root->ref_cows = 1; | |
701 | return root; | |
702 | } | |
703 | ||
704 | static int btrfs_fs_roots_compare_objectids(struct rb_node *node, | |
705 | void *data) | |
706 | { | |
707 | u64 objectid = *((u64 *)data); | |
708 | struct btrfs_root *root; | |
709 | ||
710 | root = rb_entry(node, struct btrfs_root, rb_node); | |
711 | if (objectid > root->objectid) | |
712 | return 1; | |
713 | else if (objectid < root->objectid) | |
714 | return -1; | |
715 | else | |
716 | return 0; | |
717 | } | |
718 | ||
719 | int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2) | |
720 | { | |
721 | struct btrfs_root *root; | |
722 | ||
723 | root = rb_entry(node2, struct btrfs_root, rb_node); | |
724 | return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid); | |
725 | } | |
726 | ||
727 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |
728 | struct btrfs_key *location) | |
729 | { | |
730 | struct btrfs_root *root; | |
731 | struct rb_node *node; | |
732 | int ret; | |
733 | u64 objectid = location->objectid; | |
734 | ||
735 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | |
736 | return fs_info->tree_root; | |
737 | if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) | |
738 | return fs_info->chunk_root; | |
739 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | |
740 | return fs_info->csum_root; | |
1afb9f22 | 741 | BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID); |
f06bfcf5 QW |
742 | |
743 | node = rb_search(&fs_info->fs_root_tree, (void *)&objectid, | |
744 | btrfs_fs_roots_compare_objectids, NULL); | |
745 | if (node) | |
746 | return container_of(node, struct btrfs_root, rb_node); | |
747 | ||
748 | root = btrfs_read_fs_root_no_cache(fs_info, location); | |
749 | if (IS_ERR(root)) | |
750 | return root; | |
751 | ||
752 | ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node, | |
753 | btrfs_fs_roots_compare_roots); | |
754 | BUG_ON(ret); | |
755 | return root; | |
756 | } | |
757 | ||
758 | void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) | |
759 | { | |
760 | free(fs_info->tree_root); | |
761 | free(fs_info->chunk_root); | |
762 | free(fs_info->csum_root); | |
763 | free(fs_info->super_copy); | |
764 | free(fs_info); | |
765 | } | |
766 | ||
767 | struct btrfs_fs_info *btrfs_new_fs_info(void) | |
768 | { | |
769 | struct btrfs_fs_info *fs_info; | |
770 | ||
771 | fs_info = calloc(1, sizeof(struct btrfs_fs_info)); | |
772 | if (!fs_info) | |
773 | return NULL; | |
774 | ||
775 | fs_info->tree_root = calloc(1, sizeof(struct btrfs_root)); | |
776 | fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root)); | |
777 | fs_info->csum_root = calloc(1, sizeof(struct btrfs_root)); | |
778 | fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE); | |
779 | ||
780 | if (!fs_info->tree_root || !fs_info->chunk_root || | |
781 | !fs_info->csum_root || !fs_info->super_copy) | |
782 | goto free_all; | |
783 | ||
784 | extent_io_tree_init(&fs_info->extent_cache); | |
785 | ||
786 | fs_info->fs_root_tree = RB_ROOT; | |
787 | cache_tree_init(&fs_info->mapping_tree.cache_tree); | |
788 | ||
f06bfcf5 QW |
789 | return fs_info; |
790 | free_all: | |
791 | btrfs_free_fs_info(fs_info); | |
792 | return NULL; | |
793 | } | |
794 | ||
795 | static int setup_root_or_create_block(struct btrfs_fs_info *fs_info, | |
796 | struct btrfs_root *info_root, | |
797 | u64 objectid, char *str) | |
798 | { | |
799 | struct btrfs_root *root = fs_info->tree_root; | |
800 | int ret; | |
801 | ||
802 | ret = find_and_setup_root(root, fs_info, objectid, info_root); | |
803 | if (ret) { | |
804 | error("could not setup %s tree", str); | |
805 | return -EIO; | |
806 | } | |
807 | ||
808 | return 0; | |
809 | } | |
810 | ||
94509b79 MK |
811 | static int get_default_subvolume(struct btrfs_fs_info *fs_info, |
812 | struct btrfs_key *key_ret) | |
813 | { | |
814 | struct btrfs_root *root = fs_info->tree_root; | |
815 | struct btrfs_dir_item *dir_item; | |
816 | struct btrfs_path path; | |
817 | int ret = 0; | |
818 | ||
819 | btrfs_init_path(&path); | |
820 | ||
821 | dir_item = btrfs_lookup_dir_item(NULL, root, &path, | |
822 | BTRFS_ROOT_TREE_DIR_OBJECTID, | |
823 | "default", 7, 0); | |
824 | if (IS_ERR(dir_item)) { | |
825 | ret = PTR_ERR(dir_item); | |
826 | goto out; | |
827 | } | |
828 | ||
829 | btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret); | |
830 | out: | |
831 | btrfs_release_path(&path); | |
832 | return ret; | |
833 | } | |
834 | ||
f06bfcf5 QW |
835 | int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info) |
836 | { | |
837 | struct btrfs_super_block *sb = fs_info->super_copy; | |
838 | struct btrfs_root *root; | |
839 | struct btrfs_key key; | |
840 | u64 root_tree_bytenr; | |
841 | u64 generation; | |
842 | int ret; | |
843 | ||
844 | root = fs_info->tree_root; | |
845 | btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID); | |
846 | generation = btrfs_super_generation(sb); | |
847 | ||
848 | root_tree_bytenr = btrfs_super_root(sb); | |
849 | ||
850 | root->node = read_tree_block(fs_info, root_tree_bytenr, generation); | |
851 | if (!extent_buffer_uptodate(root->node)) { | |
852 | fprintf(stderr, "Couldn't read tree root\n"); | |
853 | return -EIO; | |
854 | } | |
855 | ||
856 | ret = setup_root_or_create_block(fs_info, fs_info->csum_root, | |
857 | BTRFS_CSUM_TREE_OBJECTID, "csum"); | |
858 | if (ret) | |
859 | return ret; | |
860 | fs_info->csum_root->track_dirty = 1; | |
861 | ||
862 | fs_info->last_trans_committed = generation; | |
863 | ||
94509b79 MK |
864 | ret = get_default_subvolume(fs_info, &key); |
865 | if (ret) { | |
866 | /* | |
867 | * The default dir item isn't there. Linux kernel behaviour is | |
868 | * to silently use the top-level subvolume in this case. | |
869 | */ | |
870 | key.objectid = BTRFS_FS_TREE_OBJECTID; | |
871 | key.type = BTRFS_ROOT_ITEM_KEY; | |
872 | key.offset = (u64)-1; | |
873 | } | |
874 | ||
f06bfcf5 QW |
875 | fs_info->fs_root = btrfs_read_fs_root(fs_info, &key); |
876 | ||
877 | if (IS_ERR(fs_info->fs_root)) | |
878 | return -EIO; | |
879 | return 0; | |
880 | } | |
881 | ||
882 | void btrfs_release_all_roots(struct btrfs_fs_info *fs_info) | |
883 | { | |
884 | if (fs_info->csum_root) | |
885 | free_extent_buffer(fs_info->csum_root->node); | |
886 | if (fs_info->tree_root) | |
887 | free_extent_buffer(fs_info->tree_root->node); | |
888 | if (fs_info->chunk_root) | |
889 | free_extent_buffer(fs_info->chunk_root->node); | |
890 | } | |
891 | ||
892 | static void free_map_lookup(struct cache_extent *ce) | |
893 | { | |
894 | struct map_lookup *map; | |
895 | ||
896 | map = container_of(ce, struct map_lookup, ce); | |
897 | kfree(map); | |
898 | } | |
899 | ||
900 | FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup); | |
901 | ||
902 | void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) | |
903 | { | |
904 | free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree); | |
905 | extent_io_tree_cleanup(&fs_info->extent_cache); | |
906 | } | |
907 | ||
908 | static int btrfs_scan_fs_devices(struct blk_desc *desc, | |
909 | struct disk_partition *part, | |
910 | struct btrfs_fs_devices **fs_devices) | |
911 | { | |
912 | u64 total_devs; | |
913 | int ret; | |
914 | ||
915 | if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET, | |
916 | desc->blksz) > (part->size << desc->log2blksz)) { | |
f337fb9e SG |
917 | log_debug("superblock end %u is larger than device size " LBAFU, |
918 | BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET, | |
919 | part->size << desc->log2blksz); | |
f06bfcf5 QW |
920 | return -EINVAL; |
921 | } | |
922 | ||
923 | ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs); | |
924 | if (ret) { | |
64acd46a SG |
925 | /* |
926 | * Avoid showing this when probing for a possible Btrfs | |
927 | * | |
928 | * fprintf(stderr, "No valid Btrfs found\n"); | |
929 | */ | |
f06bfcf5 QW |
930 | return ret; |
931 | } | |
932 | return 0; | |
933 | } | |
934 | ||
935 | int btrfs_check_fs_compatibility(struct btrfs_super_block *sb) | |
936 | { | |
937 | u64 features; | |
938 | ||
939 | features = btrfs_super_incompat_flags(sb) & | |
940 | ~BTRFS_FEATURE_INCOMPAT_SUPP; | |
941 | if (features) { | |
942 | printk("couldn't open because of unsupported " | |
943 | "option features (%llx).\n", | |
944 | (unsigned long long)features); | |
945 | return -ENOTSUPP; | |
946 | } | |
947 | ||
948 | features = btrfs_super_incompat_flags(sb); | |
949 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | |
950 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | |
951 | btrfs_set_super_incompat_flags(sb, features); | |
952 | } | |
953 | ||
954 | return 0; | |
955 | } | |
956 | ||
957 | static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info) | |
958 | { | |
959 | struct btrfs_super_block *sb = fs_info->super_copy; | |
960 | u64 chunk_root_bytenr; | |
961 | u64 generation; | |
962 | int ret; | |
963 | ||
964 | btrfs_setup_root(fs_info->chunk_root, fs_info, | |
965 | BTRFS_CHUNK_TREE_OBJECTID); | |
966 | ||
967 | ret = btrfs_read_sys_array(fs_info); | |
968 | if (ret) | |
969 | return ret; | |
970 | ||
971 | generation = btrfs_super_chunk_root_generation(sb); | |
972 | chunk_root_bytenr = btrfs_super_chunk_root(sb); | |
973 | ||
974 | fs_info->chunk_root->node = read_tree_block(fs_info, | |
975 | chunk_root_bytenr, | |
976 | generation); | |
977 | if (!extent_buffer_uptodate(fs_info->chunk_root->node)) { | |
978 | error("cannot read chunk root"); | |
979 | return -EIO; | |
980 | } | |
981 | ||
982 | ret = btrfs_read_chunk_tree(fs_info); | |
983 | if (ret) { | |
984 | fprintf(stderr, "Couldn't read chunk tree\n"); | |
985 | return ret; | |
986 | } | |
987 | return 0; | |
988 | } | |
989 | ||
990 | struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc, | |
991 | struct disk_partition *part) | |
992 | { | |
993 | struct btrfs_fs_info *fs_info; | |
994 | struct btrfs_super_block *disk_super; | |
995 | struct btrfs_fs_devices *fs_devices = NULL; | |
996 | struct extent_buffer *eb; | |
997 | int ret; | |
998 | ||
999 | fs_info = btrfs_new_fs_info(); | |
1000 | if (!fs_info) { | |
1001 | fprintf(stderr, "Failed to allocate memory for fs_info\n"); | |
1002 | return NULL; | |
1003 | } | |
1004 | ||
1005 | ret = btrfs_scan_fs_devices(desc, part, &fs_devices); | |
1006 | if (ret) | |
1007 | goto out; | |
1008 | ||
1009 | fs_info->fs_devices = fs_devices; | |
1010 | ||
1011 | ret = btrfs_open_devices(fs_devices); | |
1012 | if (ret) | |
1013 | goto out; | |
1014 | ||
1015 | disk_super = fs_info->super_copy; | |
1016 | ret = btrfs_read_dev_super(desc, part, disk_super); | |
1017 | if (ret) { | |
64acd46a | 1018 | debug("No valid btrfs found\n"); |
f06bfcf5 QW |
1019 | goto out_devices; |
1020 | } | |
1021 | ||
1022 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) { | |
1023 | fprintf(stderr, "ERROR: Filesystem UUID change in progress\n"); | |
1024 | goto out_devices; | |
1025 | } | |
1026 | ||
1027 | ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE)); | |
1028 | if (btrfs_fs_incompat(fs_info, METADATA_UUID)) | |
1029 | ASSERT(!memcmp(disk_super->metadata_uuid, | |
1030 | fs_devices->metadata_uuid, BTRFS_FSID_SIZE)); | |
1031 | ||
1032 | fs_info->sectorsize = btrfs_super_sectorsize(disk_super); | |
1033 | fs_info->nodesize = btrfs_super_nodesize(disk_super); | |
1034 | fs_info->stripesize = btrfs_super_stripesize(disk_super); | |
1035 | ||
1036 | ret = btrfs_check_fs_compatibility(fs_info->super_copy); | |
1037 | if (ret) | |
1038 | goto out_devices; | |
1039 | ||
1040 | ret = btrfs_setup_chunk_tree_and_device_map(fs_info); | |
1041 | if (ret) | |
1042 | goto out_chunk; | |
1043 | ||
1044 | /* Chunk tree root is unable to read, return directly */ | |
1045 | if (!fs_info->chunk_root) | |
1046 | return fs_info; | |
1047 | ||
1048 | eb = fs_info->chunk_root->node; | |
1049 | read_extent_buffer(eb, fs_info->chunk_tree_uuid, | |
1050 | btrfs_header_chunk_tree_uuid(eb), | |
1051 | BTRFS_UUID_SIZE); | |
1052 | ||
1053 | ret = btrfs_setup_all_roots(fs_info); | |
1054 | if (ret) | |
1055 | goto out_chunk; | |
1056 | ||
1057 | return fs_info; | |
1058 | ||
1059 | out_chunk: | |
1060 | btrfs_release_all_roots(fs_info); | |
1061 | btrfs_cleanup_all_caches(fs_info); | |
1062 | out_devices: | |
1063 | btrfs_close_devices(fs_devices); | |
1064 | out: | |
1065 | btrfs_free_fs_info(fs_info); | |
1066 | return NULL; | |
1067 | } | |
1068 | ||
1069 | int close_ctree_fs_info(struct btrfs_fs_info *fs_info) | |
1070 | { | |
1071 | int ret; | |
f06bfcf5 QW |
1072 | |
1073 | free_fs_roots_tree(&fs_info->fs_root_tree); | |
1074 | ||
1075 | btrfs_release_all_roots(fs_info); | |
1076 | ret = btrfs_close_devices(fs_info->fs_devices); | |
1077 | btrfs_cleanup_all_caches(fs_info); | |
1078 | btrfs_free_fs_info(fs_info); | |
3b00a6ba | 1079 | return ret; |
f06bfcf5 QW |
1080 | } |
1081 | ||
1082 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | |
1083 | { | |
1084 | int ret; | |
1085 | ||
1086 | ret = extent_buffer_uptodate(buf); | |
1087 | if (!ret) | |
1088 | return ret; | |
1089 | ||
1090 | ret = verify_parent_transid(&buf->fs_info->extent_cache, buf, | |
1091 | parent_transid, 1); | |
1092 | return !ret; | |
1093 | } | |
1094 | ||
1095 | int btrfs_set_buffer_uptodate(struct extent_buffer *eb) | |
1096 | { | |
1097 | return set_extent_buffer_uptodate(eb); | |
1098 | } |