]> Git Repo - linux.git/blob - drivers/md/dm-vdo/indexer/index-layout.c
Merge tag 'kbuild-v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy...
[linux.git] / drivers / md / dm-vdo / indexer / index-layout.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5
6 #include "index-layout.h"
7
8 #include <linux/random.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19
20 /*
21  * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22  * which are computed when the index is created. Every header and region begins on 4K block
23  * boundary. Save regions are further sub-divided into regions of their own.
24  *
25  * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26  * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27  * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28  * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29  *
30  * Every region header has a type and version.
31  *
32  *     +-+-+---------+--------+--------+-+
33  *     | | |   I N D E X  0   101, 0   | |
34  *     |H|C+---------+--------+--------+S|
35  *     |D|f| Volume  | Save   | Save   |e|
36  *     |R|g| Region  | Region | Region |a|
37  *     | | | 201, -1 | 202, 0 | 202, 1 |l|
38  *     +-+-+--------+---------+--------+-+
39  *
40  * The header contains the encoded region layout table as well as some index configuration data.
41  * The sub-index region and its subdivisions are maintained in the same table.
42  *
43  * There are two save regions to preserve the old state in case saving the new state is incomplete.
44  * They are used in alternation. Each save region is further divided into sub-regions.
45  *
46  *     +-+-----+------+------+-----+-----+
47  *     |H| IPM | MI   | MI   |     | OC  |
48  *     |D|     | zone | zone | ... |     |
49  *     |R| 301 | 302  | 302  |     | 303 |
50  *     | | -1  |  0   |  1   |     | -1  |
51  *     +-+-----+------+------+-----+-----+
52  *
53  * The header contains the encoded region layout table as well as index state data for that save.
54  * Each save also has a unique nonce.
55  */
56
57 #define MAGIC_SIZE 32
58 #define NONCE_INFO_SIZE 32
59 #define MAX_SAVES 2
60
61 enum region_kind {
62         RL_KIND_EMPTY = 0,
63         RL_KIND_HEADER = 1,
64         RL_KIND_CONFIG = 100,
65         RL_KIND_INDEX = 101,
66         RL_KIND_SEAL = 102,
67         RL_KIND_VOLUME = 201,
68         RL_KIND_SAVE = 202,
69         RL_KIND_INDEX_PAGE_MAP = 301,
70         RL_KIND_VOLUME_INDEX = 302,
71         RL_KIND_OPEN_CHAPTER = 303,
72 };
73
74 /* Some region types are historical and are no longer used. */
75 enum region_type {
76         RH_TYPE_FREE = 0, /* unused */
77         RH_TYPE_SUPER = 1,
78         RH_TYPE_SAVE = 2,
79         RH_TYPE_CHECKPOINT = 3, /* unused */
80         RH_TYPE_UNSAVED = 4,
81 };
82
83 #define RL_SOLE_INSTANCE 65535
84
85 /*
86  * Super block version 2 is the first released version.
87  *
88  * Super block version 3 is the normal version used from RHEL 8.2 onwards.
89  *
90  * Super block versions 4 through 6 were incremental development versions and
91  * are not supported.
92  *
93  * Super block version 7 is used for volumes which have been reduced in size by one chapter in
94  * order to make room to prepend LVM metadata to a volume originally created without lvm. This
95  * allows the index to retain most its deduplication records.
96  */
97 #define SUPER_VERSION_MINIMUM 3
98 #define SUPER_VERSION_CURRENT 3
99 #define SUPER_VERSION_MAXIMUM 7
100
101 static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
102 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
103
104 struct region_header {
105         u64 magic;
106         u64 region_blocks;
107         u16 type;
108         /* Currently always version 1 */
109         u16 version;
110         u16 region_count;
111         u16 payload;
112 };
113
114 struct layout_region {
115         u64 start_block;
116         u64 block_count;
117         u32 __unused;
118         u16 kind;
119         u16 instance;
120 };
121
122 struct region_table {
123         size_t encoded_size;
124         struct region_header header;
125         struct layout_region regions[];
126 };
127
128 struct index_save_data {
129         u64 timestamp;
130         u64 nonce;
131         /* Currently always version 1 */
132         u32 version;
133         u32 unused__;
134 };
135
136 struct index_state_version {
137         s32 signature;
138         s32 version_id;
139 };
140
141 static const struct index_state_version INDEX_STATE_VERSION_301 = {
142         .signature  = -1,
143         .version_id = 301,
144 };
145
146 struct index_state_data301 {
147         struct index_state_version version;
148         u64 newest_chapter;
149         u64 oldest_chapter;
150         u64 last_save;
151         u32 unused;
152         u32 padding;
153 };
154
155 struct index_save_layout {
156         unsigned int zone_count;
157         struct layout_region index_save;
158         struct layout_region header;
159         struct layout_region index_page_map;
160         struct layout_region free_space;
161         struct layout_region volume_index_zones[MAX_ZONES];
162         struct layout_region open_chapter;
163         struct index_save_data save_data;
164         struct index_state_data301 state_data;
165 };
166
167 struct sub_index_layout {
168         u64 nonce;
169         struct layout_region sub_index;
170         struct layout_region volume;
171         struct index_save_layout *saves;
172 };
173
174 struct super_block_data {
175         u8 magic_label[MAGIC_SIZE];
176         u8 nonce_info[NONCE_INFO_SIZE];
177         u64 nonce;
178         u32 version;
179         u32 block_size;
180         u16 index_count;
181         u16 max_saves;
182         /* Padding reflects a blank field on permanent storage */
183         u8 padding[4];
184         u64 open_chapter_blocks;
185         u64 page_map_blocks;
186         u64 volume_offset;
187         u64 start_offset;
188 };
189
190 struct index_layout {
191         struct io_factory *factory;
192         size_t factory_size;
193         off_t offset;
194         struct super_block_data super;
195         struct layout_region header;
196         struct layout_region config;
197         struct sub_index_layout index;
198         struct layout_region seal;
199         u64 total_blocks;
200 };
201
202 struct save_layout_sizes {
203         unsigned int save_count;
204         size_t block_size;
205         u64 volume_blocks;
206         u64 volume_index_blocks;
207         u64 page_map_blocks;
208         u64 open_chapter_blocks;
209         u64 save_blocks;
210         u64 sub_index_blocks;
211         u64 total_blocks;
212         size_t total_size;
213 };
214
215 static inline bool is_converted_super_block(struct super_block_data *super)
216 {
217         return super->version == 7;
218 }
219
220 static int __must_check compute_sizes(const struct uds_configuration *config,
221                                       struct save_layout_sizes *sls)
222 {
223         int result;
224         struct index_geometry *geometry = config->geometry;
225
226         memset(sls, 0, sizeof(*sls));
227         sls->save_count = MAX_SAVES;
228         sls->block_size = UDS_BLOCK_SIZE;
229         sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
230
231         result = uds_compute_volume_index_save_blocks(config, sls->block_size,
232                                                       &sls->volume_index_blocks);
233         if (result != UDS_SUCCESS)
234                 return vdo_log_error_strerror(result, "cannot compute index save size");
235
236         sls->page_map_blocks =
237                 DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
238                              sls->block_size);
239         sls->open_chapter_blocks =
240                 DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
241                              sls->block_size);
242         sls->save_blocks =
243                 1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
244         sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
245         sls->total_blocks = 3 + sls->sub_index_blocks;
246         sls->total_size = sls->total_blocks * sls->block_size;
247
248         return UDS_SUCCESS;
249 }
250
251 int uds_compute_index_size(const struct uds_parameters *parameters, u64 *index_size)
252 {
253         int result;
254         struct uds_configuration *index_config;
255         struct save_layout_sizes sizes;
256
257         if (index_size == NULL) {
258                 vdo_log_error("Missing output size pointer");
259                 return -EINVAL;
260         }
261
262         result = uds_make_configuration(parameters, &index_config);
263         if (result != UDS_SUCCESS) {
264                 vdo_log_error_strerror(result, "cannot compute index size");
265                 return uds_status_to_errno(result);
266         }
267
268         result = compute_sizes(index_config, &sizes);
269         uds_free_configuration(index_config);
270         if (result != UDS_SUCCESS)
271                 return uds_status_to_errno(result);
272
273         *index_size = sizes.total_size;
274         return UDS_SUCCESS;
275 }
276
277 /* Create unique data using the current time and a pseudorandom number. */
278 static void create_unique_nonce_data(u8 *buffer)
279 {
280         ktime_t now = current_time_ns(CLOCK_REALTIME);
281         u32 rand;
282         size_t offset = 0;
283
284         get_random_bytes(&rand, sizeof(u32));
285         memcpy(buffer + offset, &now, sizeof(now));
286         offset += sizeof(now);
287         memcpy(buffer + offset, &rand, sizeof(rand));
288         offset += sizeof(rand);
289         while (offset < NONCE_INFO_SIZE) {
290                 size_t len = min(NONCE_INFO_SIZE - offset, offset);
291
292                 memcpy(buffer + offset, buffer, len);
293                 offset += len;
294         }
295 }
296
297 static u64 hash_stuff(u64 start, const void *data, size_t len)
298 {
299         u32 seed = start ^ (start >> 27);
300         u8 hash_buffer[16];
301
302         murmurhash3_128(data, len, seed, hash_buffer);
303         return get_unaligned_le64(hash_buffer + 4);
304 }
305
306 /* Generate a primary nonce from the provided data. */
307 static u64 generate_primary_nonce(const void *data, size_t len)
308 {
309         return hash_stuff(0xa1b1e0fc, data, len);
310 }
311
312 /*
313  * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
314  * hashing the original nonce and the data to produce a new nonce.
315  */
316 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
317 {
318         return hash_stuff(nonce + 1, data, len);
319 }
320
321 static int __must_check open_layout_reader(struct index_layout *layout,
322                                            struct layout_region *lr, off_t offset,
323                                            struct buffered_reader **reader_ptr)
324 {
325         return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
326                                         lr->block_count, reader_ptr);
327 }
328
329 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
330                               struct buffered_reader **reader_ptr)
331 {
332         return open_layout_reader(layout, region, -layout->super.start_offset,
333                                   reader_ptr);
334 }
335
336 static int __must_check open_layout_writer(struct index_layout *layout,
337                                            struct layout_region *lr, off_t offset,
338                                            struct buffered_writer **writer_ptr)
339 {
340         return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
341                                         lr->block_count, writer_ptr);
342 }
343
344 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
345                               struct buffered_writer **writer_ptr)
346 {
347         return open_layout_writer(layout, region, -layout->super.start_offset,
348                                   writer_ptr);
349 }
350
351 static void generate_super_block_data(struct save_layout_sizes *sls,
352                                       struct super_block_data *super)
353 {
354         memset(super, 0, sizeof(*super));
355         memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
356         create_unique_nonce_data(super->nonce_info);
357
358         super->nonce = generate_primary_nonce(super->nonce_info,
359                                               sizeof(super->nonce_info));
360         super->version = SUPER_VERSION_CURRENT;
361         super->block_size = sls->block_size;
362         super->index_count = 1;
363         super->max_saves = sls->save_count;
364         super->open_chapter_blocks = sls->open_chapter_blocks;
365         super->page_map_blocks = sls->page_map_blocks;
366         super->volume_offset = 0;
367         super->start_offset = 0;
368 }
369
370 static void define_sub_index_nonce(struct index_layout *layout)
371 {
372         struct sub_index_nonce_data {
373                 u64 offset;
374                 u16 index_id;
375         };
376         struct sub_index_layout *sil = &layout->index;
377         u64 primary_nonce = layout->super.nonce;
378         u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
379         size_t offset = 0;
380
381         encode_u64_le(buffer, &offset, sil->sub_index.start_block);
382         encode_u16_le(buffer, &offset, 0);
383         sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
384         if (sil->nonce == 0) {
385                 sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
386                                                       sizeof(buffer));
387         }
388 }
389
390 static void setup_sub_index(struct index_layout *layout, u64 start_block,
391                             struct save_layout_sizes *sls)
392 {
393         struct sub_index_layout *sil = &layout->index;
394         u64 next_block = start_block;
395         unsigned int i;
396
397         sil->sub_index = (struct layout_region) {
398                 .start_block = start_block,
399                 .block_count = sls->sub_index_blocks,
400                 .kind = RL_KIND_INDEX,
401                 .instance = 0,
402         };
403
404         sil->volume = (struct layout_region) {
405                 .start_block = next_block,
406                 .block_count = sls->volume_blocks,
407                 .kind = RL_KIND_VOLUME,
408                 .instance = RL_SOLE_INSTANCE,
409         };
410
411         next_block += sls->volume_blocks;
412
413         for (i = 0; i < sls->save_count; i++) {
414                 sil->saves[i].index_save = (struct layout_region) {
415                         .start_block = next_block,
416                         .block_count = sls->save_blocks,
417                         .kind = RL_KIND_SAVE,
418                         .instance = i,
419                 };
420
421                 next_block += sls->save_blocks;
422         }
423
424         define_sub_index_nonce(layout);
425 }
426
427 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
428 {
429         u64 next_block = layout->offset / sls->block_size;
430
431         layout->total_blocks = sls->total_blocks;
432         generate_super_block_data(sls, &layout->super);
433         layout->header = (struct layout_region) {
434                 .start_block = next_block++,
435                 .block_count = 1,
436                 .kind = RL_KIND_HEADER,
437                 .instance = RL_SOLE_INSTANCE,
438         };
439
440         layout->config = (struct layout_region) {
441                 .start_block = next_block++,
442                 .block_count = 1,
443                 .kind = RL_KIND_CONFIG,
444                 .instance = RL_SOLE_INSTANCE,
445         };
446
447         setup_sub_index(layout, next_block, sls);
448         next_block += sls->sub_index_blocks;
449
450         layout->seal = (struct layout_region) {
451                 .start_block = next_block,
452                 .block_count = 1,
453                 .kind = RL_KIND_SEAL,
454                 .instance = RL_SOLE_INSTANCE,
455         };
456 }
457
458 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
459                                                      struct region_table **table_ptr)
460 {
461         int result;
462         unsigned int z;
463         struct region_table *table;
464         struct layout_region *lr;
465         u16 region_count;
466         size_t payload;
467         size_t type;
468
469         if (isl->zone_count > 0) {
470                 /*
471                  * Normal save regions: header, page map, volume index zones,
472                  * open chapter, and possibly free space.
473                  */
474                 region_count = 3 + isl->zone_count;
475                 if (isl->free_space.block_count > 0)
476                         region_count++;
477
478                 payload = sizeof(isl->save_data) + sizeof(isl->state_data);
479                 type = RH_TYPE_SAVE;
480         } else {
481                 /* Empty save regions: header, page map, free space. */
482                 region_count = 3;
483                 payload = sizeof(isl->save_data);
484                 type = RH_TYPE_UNSAVED;
485         }
486
487         result = vdo_allocate_extended(struct region_table, region_count,
488                                        struct layout_region,
489                                        "layout region table for ISL", &table);
490         if (result != VDO_SUCCESS)
491                 return result;
492
493         lr = &table->regions[0];
494         *lr++ = isl->header;
495         *lr++ = isl->index_page_map;
496         for (z = 0; z < isl->zone_count; z++)
497                 *lr++ = isl->volume_index_zones[z];
498
499         if (isl->zone_count > 0)
500                 *lr++ = isl->open_chapter;
501
502         if (isl->free_space.block_count > 0)
503                 *lr++ = isl->free_space;
504
505         table->header = (struct region_header) {
506                 .magic = REGION_MAGIC,
507                 .region_blocks = isl->index_save.block_count,
508                 .type = type,
509                 .version = 1,
510                 .region_count = region_count,
511                 .payload = payload,
512         };
513
514         table->encoded_size = (sizeof(struct region_header) + payload +
515                                region_count * sizeof(struct layout_region));
516         *table_ptr = table;
517         return UDS_SUCCESS;
518 }
519
520 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
521 {
522         unsigned int i;
523
524         encode_u64_le(buffer, offset, REGION_MAGIC);
525         encode_u64_le(buffer, offset, table->header.region_blocks);
526         encode_u16_le(buffer, offset, table->header.type);
527         encode_u16_le(buffer, offset, table->header.version);
528         encode_u16_le(buffer, offset, table->header.region_count);
529         encode_u16_le(buffer, offset, table->header.payload);
530
531         for (i = 0; i < table->header.region_count; i++) {
532                 encode_u64_le(buffer, offset, table->regions[i].start_block);
533                 encode_u64_le(buffer, offset, table->regions[i].block_count);
534                 encode_u32_le(buffer, offset, 0);
535                 encode_u16_le(buffer, offset, table->regions[i].kind);
536                 encode_u16_le(buffer, offset, table->regions[i].instance);
537         }
538 }
539
540 static int __must_check write_index_save_header(struct index_save_layout *isl,
541                                                 struct region_table *table,
542                                                 struct buffered_writer *writer)
543 {
544         int result;
545         u8 *buffer;
546         size_t offset = 0;
547
548         result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
549         if (result != VDO_SUCCESS)
550                 return result;
551
552         encode_region_table(buffer, &offset, table);
553         encode_u64_le(buffer, &offset, isl->save_data.timestamp);
554         encode_u64_le(buffer, &offset, isl->save_data.nonce);
555         encode_u32_le(buffer, &offset, isl->save_data.version);
556         encode_u32_le(buffer, &offset, 0);
557         if (isl->zone_count > 0) {
558                 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
559                 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
560                 encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
561                 encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
562                 encode_u64_le(buffer, &offset, isl->state_data.last_save);
563                 encode_u64_le(buffer, &offset, 0);
564         }
565
566         result = uds_write_to_buffered_writer(writer, buffer, offset);
567         vdo_free(buffer);
568         if (result != UDS_SUCCESS)
569                 return result;
570
571         return uds_flush_buffered_writer(writer);
572 }
573
574 static int write_index_save_layout(struct index_layout *layout,
575                                    struct index_save_layout *isl)
576 {
577         int result;
578         struct region_table *table;
579         struct buffered_writer *writer;
580
581         result = make_index_save_region_table(isl, &table);
582         if (result != UDS_SUCCESS)
583                 return result;
584
585         result = open_region_writer(layout, &isl->header, &writer);
586         if (result != UDS_SUCCESS) {
587                 vdo_free(table);
588                 return result;
589         }
590
591         result = write_index_save_header(isl, table, writer);
592         vdo_free(table);
593         uds_free_buffered_writer(writer);
594
595         return result;
596 }
597
598 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
599 {
600         u64 free_blocks;
601         u64 next_block = isl->index_save.start_block;
602
603         isl->zone_count = 0;
604         memset(&isl->save_data, 0, sizeof(isl->save_data));
605
606         isl->header = (struct layout_region) {
607                 .start_block = next_block++,
608                 .block_count = 1,
609                 .kind = RL_KIND_HEADER,
610                 .instance = RL_SOLE_INSTANCE,
611         };
612
613         isl->index_page_map = (struct layout_region) {
614                 .start_block = next_block,
615                 .block_count = page_map_blocks,
616                 .kind = RL_KIND_INDEX_PAGE_MAP,
617                 .instance = RL_SOLE_INSTANCE,
618         };
619
620         next_block += page_map_blocks;
621
622         free_blocks = isl->index_save.block_count - page_map_blocks - 1;
623         isl->free_space = (struct layout_region) {
624                 .start_block = next_block,
625                 .block_count = free_blocks,
626                 .kind = RL_KIND_EMPTY,
627                 .instance = RL_SOLE_INSTANCE,
628         };
629 }
630
631 static int __must_check invalidate_old_save(struct index_layout *layout,
632                                             struct index_save_layout *isl)
633 {
634         reset_index_save_layout(isl, layout->super.page_map_blocks);
635         return write_index_save_layout(layout, isl);
636 }
637
638 static int discard_index_state_data(struct index_layout *layout)
639 {
640         int result;
641         int saved_result = UDS_SUCCESS;
642         unsigned int i;
643
644         for (i = 0; i < layout->super.max_saves; i++) {
645                 result = invalidate_old_save(layout, &layout->index.saves[i]);
646                 if (result != UDS_SUCCESS)
647                         saved_result = result;
648         }
649
650         if (saved_result != UDS_SUCCESS) {
651                 return vdo_log_error_strerror(result,
652                                               "%s: cannot destroy all index saves",
653                                               __func__);
654         }
655
656         return UDS_SUCCESS;
657 }
658
659 static int __must_check make_layout_region_table(struct index_layout *layout,
660                                                  struct region_table **table_ptr)
661 {
662         int result;
663         unsigned int i;
664         /* Regions: header, config, index, volume, saves, seal */
665         u16 region_count = 5 + layout->super.max_saves;
666         u16 payload;
667         struct region_table *table;
668         struct layout_region *lr;
669
670         result = vdo_allocate_extended(struct region_table, region_count,
671                                        struct layout_region, "layout region table",
672                                        &table);
673         if (result != VDO_SUCCESS)
674                 return result;
675
676         lr = &table->regions[0];
677         *lr++ = layout->header;
678         *lr++ = layout->config;
679         *lr++ = layout->index.sub_index;
680         *lr++ = layout->index.volume;
681
682         for (i = 0; i < layout->super.max_saves; i++)
683                 *lr++ = layout->index.saves[i].index_save;
684
685         *lr++ = layout->seal;
686
687         if (is_converted_super_block(&layout->super)) {
688                 payload = sizeof(struct super_block_data);
689         } else {
690                 payload = (sizeof(struct super_block_data) -
691                            sizeof(layout->super.volume_offset) -
692                            sizeof(layout->super.start_offset));
693         }
694
695         table->header = (struct region_header) {
696                 .magic = REGION_MAGIC,
697                 .region_blocks = layout->total_blocks,
698                 .type = RH_TYPE_SUPER,
699                 .version = 1,
700                 .region_count = region_count,
701                 .payload = payload,
702         };
703
704         table->encoded_size = (sizeof(struct region_header) + payload +
705                                region_count * sizeof(struct layout_region));
706         *table_ptr = table;
707         return UDS_SUCCESS;
708 }
709
710 static int __must_check write_layout_header(struct index_layout *layout,
711                                             struct region_table *table,
712                                             struct buffered_writer *writer)
713 {
714         int result;
715         u8 *buffer;
716         size_t offset = 0;
717
718         result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
719         if (result != VDO_SUCCESS)
720                 return result;
721
722         encode_region_table(buffer, &offset, table);
723         memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
724         offset += MAGIC_SIZE;
725         memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
726         offset += NONCE_INFO_SIZE;
727         encode_u64_le(buffer, &offset, layout->super.nonce);
728         encode_u32_le(buffer, &offset, layout->super.version);
729         encode_u32_le(buffer, &offset, layout->super.block_size);
730         encode_u16_le(buffer, &offset, layout->super.index_count);
731         encode_u16_le(buffer, &offset, layout->super.max_saves);
732         encode_u32_le(buffer, &offset, 0);
733         encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
734         encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
735
736         if (is_converted_super_block(&layout->super)) {
737                 encode_u64_le(buffer, &offset, layout->super.volume_offset);
738                 encode_u64_le(buffer, &offset, layout->super.start_offset);
739         }
740
741         result = uds_write_to_buffered_writer(writer, buffer, offset);
742         vdo_free(buffer);
743         if (result != UDS_SUCCESS)
744                 return result;
745
746         return uds_flush_buffered_writer(writer);
747 }
748
749 static int __must_check write_uds_index_config(struct index_layout *layout,
750                                                struct uds_configuration *config,
751                                                off_t offset)
752 {
753         int result;
754         struct buffered_writer *writer = NULL;
755
756         result = open_layout_writer(layout, &layout->config, offset, &writer);
757         if (result != UDS_SUCCESS)
758                 return vdo_log_error_strerror(result, "failed to open config region");
759
760         result = uds_write_config_contents(writer, config, layout->super.version);
761         if (result != UDS_SUCCESS) {
762                 uds_free_buffered_writer(writer);
763                 return vdo_log_error_strerror(result, "failed to write config region");
764         }
765
766         result = uds_flush_buffered_writer(writer);
767         if (result != UDS_SUCCESS) {
768                 uds_free_buffered_writer(writer);
769                 return vdo_log_error_strerror(result, "cannot flush config writer");
770         }
771
772         uds_free_buffered_writer(writer);
773         return UDS_SUCCESS;
774 }
775
776 static int __must_check save_layout(struct index_layout *layout, off_t offset)
777 {
778         int result;
779         struct buffered_writer *writer = NULL;
780         struct region_table *table;
781
782         result = make_layout_region_table(layout, &table);
783         if (result != UDS_SUCCESS)
784                 return result;
785
786         result = open_layout_writer(layout, &layout->header, offset, &writer);
787         if (result != UDS_SUCCESS) {
788                 vdo_free(table);
789                 return result;
790         }
791
792         result = write_layout_header(layout, table, writer);
793         vdo_free(table);
794         uds_free_buffered_writer(writer);
795
796         return result;
797 }
798
799 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
800 {
801         int result;
802         struct save_layout_sizes sizes;
803
804         result = compute_sizes(config, &sizes);
805         if (result != UDS_SUCCESS)
806                 return result;
807
808         result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
809                               &layout->index.saves);
810         if (result != VDO_SUCCESS)
811                 return result;
812
813         initialize_layout(layout, &sizes);
814
815         result = discard_index_state_data(layout);
816         if (result != UDS_SUCCESS)
817                 return result;
818
819         result = write_uds_index_config(layout, config, 0);
820         if (result != UDS_SUCCESS)
821                 return result;
822
823         return save_layout(layout, 0);
824 }
825
826 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
827 {
828         struct save_nonce_data {
829                 struct index_save_data data;
830                 u64 offset;
831         } nonce_data;
832         u8 buffer[sizeof(nonce_data)];
833         size_t offset = 0;
834
835         encode_u64_le(buffer, &offset, isl->save_data.timestamp);
836         encode_u64_le(buffer, &offset, 0);
837         encode_u32_le(buffer, &offset, isl->save_data.version);
838         encode_u32_le(buffer, &offset, 0U);
839         encode_u64_le(buffer, &offset, isl->index_save.start_block);
840         VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
841                             "%zu bytes encoded of %zu expected",
842                             offset, sizeof(nonce_data));
843         return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
844 }
845
846 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
847 {
848         if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
849                 return 0;
850
851         if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
852                 return 0;
853
854         return isl->save_data.timestamp;
855 }
856
857 static int find_latest_uds_index_save_slot(struct index_layout *layout,
858                                            struct index_save_layout **isl_ptr)
859 {
860         struct index_save_layout *latest = NULL;
861         struct index_save_layout *isl;
862         unsigned int i;
863         u64 save_time = 0;
864         u64 latest_time = 0;
865
866         for (i = 0; i < layout->super.max_saves; i++) {
867                 isl = &layout->index.saves[i];
868                 save_time = validate_index_save_layout(isl, layout->index.nonce);
869                 if (save_time > latest_time) {
870                         latest = isl;
871                         latest_time = save_time;
872                 }
873         }
874
875         if (latest == NULL) {
876                 vdo_log_error("No valid index save found");
877                 return UDS_INDEX_NOT_SAVED_CLEANLY;
878         }
879
880         *isl_ptr = latest;
881         return UDS_SUCCESS;
882 }
883
884 int uds_discard_open_chapter(struct index_layout *layout)
885 {
886         int result;
887         struct index_save_layout *isl;
888         struct buffered_writer *writer;
889
890         result = find_latest_uds_index_save_slot(layout, &isl);
891         if (result != UDS_SUCCESS)
892                 return result;
893
894         result = open_region_writer(layout, &isl->open_chapter, &writer);
895         if (result != UDS_SUCCESS)
896                 return result;
897
898         result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
899         if (result != UDS_SUCCESS) {
900                 uds_free_buffered_writer(writer);
901                 return result;
902         }
903
904         result = uds_flush_buffered_writer(writer);
905         uds_free_buffered_writer(writer);
906         return result;
907 }
908
909 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
910 {
911         int result;
912         unsigned int zone;
913         struct index_save_layout *isl;
914         struct buffered_reader *readers[MAX_ZONES];
915
916         result = find_latest_uds_index_save_slot(layout, &isl);
917         if (result != UDS_SUCCESS)
918                 return result;
919
920         index->newest_virtual_chapter = isl->state_data.newest_chapter;
921         index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
922         index->last_save = isl->state_data.last_save;
923
924         result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
925         if (result != UDS_SUCCESS)
926                 return result;
927
928         result = uds_load_open_chapter(index, readers[0]);
929         uds_free_buffered_reader(readers[0]);
930         if (result != UDS_SUCCESS)
931                 return result;
932
933         for (zone = 0; zone < isl->zone_count; zone++) {
934                 result = open_region_reader(layout, &isl->volume_index_zones[zone],
935                                             &readers[zone]);
936                 if (result != UDS_SUCCESS) {
937                         for (; zone > 0; zone--)
938                                 uds_free_buffered_reader(readers[zone - 1]);
939
940                         return result;
941                 }
942         }
943
944         result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
945         for (zone = 0; zone < isl->zone_count; zone++)
946                 uds_free_buffered_reader(readers[zone]);
947         if (result != UDS_SUCCESS)
948                 return result;
949
950         result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
951         if (result != UDS_SUCCESS)
952                 return result;
953
954         result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
955         uds_free_buffered_reader(readers[0]);
956
957         return result;
958 }
959
960 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
961 {
962         struct index_save_layout *oldest = NULL;
963         struct index_save_layout *isl;
964         unsigned int i;
965         u64 save_time = 0;
966         u64 oldest_time = 0;
967
968         for (i = 0; i < layout->super.max_saves; i++) {
969                 isl = &layout->index.saves[i];
970                 save_time = validate_index_save_layout(isl, layout->index.nonce);
971                 if (oldest == NULL || save_time < oldest_time) {
972                         oldest = isl;
973                         oldest_time = save_time;
974                 }
975         }
976
977         return oldest;
978 }
979
980 static void instantiate_index_save_layout(struct index_save_layout *isl,
981                                           struct super_block_data *super,
982                                           u64 volume_nonce, unsigned int zone_count)
983 {
984         unsigned int z;
985         u64 next_block;
986         u64 free_blocks;
987         u64 volume_index_blocks;
988
989         isl->zone_count = zone_count;
990         memset(&isl->save_data, 0, sizeof(isl->save_data));
991         isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
992         isl->save_data.version = 1;
993         isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
994
995         next_block = isl->index_save.start_block;
996         isl->header = (struct layout_region) {
997                 .start_block = next_block++,
998                 .block_count = 1,
999                 .kind = RL_KIND_HEADER,
1000                 .instance = RL_SOLE_INSTANCE,
1001         };
1002
1003         isl->index_page_map = (struct layout_region) {
1004                 .start_block = next_block,
1005                 .block_count = super->page_map_blocks,
1006                 .kind = RL_KIND_INDEX_PAGE_MAP,
1007                 .instance = RL_SOLE_INSTANCE,
1008         };
1009         next_block += super->page_map_blocks;
1010
1011         free_blocks = (isl->index_save.block_count - 1 -
1012                        super->page_map_blocks -
1013                        super->open_chapter_blocks);
1014         volume_index_blocks = free_blocks / isl->zone_count;
1015         for (z = 0; z < isl->zone_count; z++) {
1016                 isl->volume_index_zones[z] = (struct layout_region) {
1017                         .start_block = next_block,
1018                         .block_count = volume_index_blocks,
1019                         .kind = RL_KIND_VOLUME_INDEX,
1020                         .instance = z,
1021                 };
1022
1023                 next_block += volume_index_blocks;
1024                 free_blocks -= volume_index_blocks;
1025         }
1026
1027         isl->open_chapter = (struct layout_region) {
1028                 .start_block = next_block,
1029                 .block_count = super->open_chapter_blocks,
1030                 .kind = RL_KIND_OPEN_CHAPTER,
1031                 .instance = RL_SOLE_INSTANCE,
1032         };
1033
1034         next_block += super->open_chapter_blocks;
1035
1036         isl->free_space = (struct layout_region) {
1037                 .start_block = next_block,
1038                 .block_count = free_blocks,
1039                 .kind = RL_KIND_EMPTY,
1040                 .instance = RL_SOLE_INSTANCE,
1041         };
1042 }
1043
1044 static int setup_uds_index_save_slot(struct index_layout *layout,
1045                                      unsigned int zone_count,
1046                                      struct index_save_layout **isl_ptr)
1047 {
1048         int result;
1049         struct index_save_layout *isl;
1050
1051         isl = select_oldest_index_save_layout(layout);
1052         result = invalidate_old_save(layout, isl);
1053         if (result != UDS_SUCCESS)
1054                 return result;
1055
1056         instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1057                                       zone_count);
1058
1059         *isl_ptr = isl;
1060         return UDS_SUCCESS;
1061 }
1062
1063 static void cancel_uds_index_save(struct index_save_layout *isl)
1064 {
1065         memset(&isl->save_data, 0, sizeof(isl->save_data));
1066         memset(&isl->state_data, 0, sizeof(isl->state_data));
1067         isl->zone_count = 0;
1068 }
1069
1070 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1071 {
1072         int result;
1073         unsigned int zone;
1074         struct index_save_layout *isl;
1075         struct buffered_writer *writers[MAX_ZONES];
1076
1077         result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1078         if (result != UDS_SUCCESS)
1079                 return result;
1080
1081         isl->state_data = (struct index_state_data301) {
1082                 .newest_chapter = index->newest_virtual_chapter,
1083                 .oldest_chapter = index->oldest_virtual_chapter,
1084                 .last_save = index->last_save,
1085         };
1086
1087         result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1088         if (result != UDS_SUCCESS) {
1089                 cancel_uds_index_save(isl);
1090                 return result;
1091         }
1092
1093         result = uds_save_open_chapter(index, writers[0]);
1094         uds_free_buffered_writer(writers[0]);
1095         if (result != UDS_SUCCESS) {
1096                 cancel_uds_index_save(isl);
1097                 return result;
1098         }
1099
1100         for (zone = 0; zone < index->zone_count; zone++) {
1101                 result = open_region_writer(layout, &isl->volume_index_zones[zone],
1102                                             &writers[zone]);
1103                 if (result != UDS_SUCCESS) {
1104                         for (; zone > 0; zone--)
1105                                 uds_free_buffered_writer(writers[zone - 1]);
1106
1107                         cancel_uds_index_save(isl);
1108                         return result;
1109                 }
1110         }
1111
1112         result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1113         for (zone = 0; zone < index->zone_count; zone++)
1114                 uds_free_buffered_writer(writers[zone]);
1115         if (result != UDS_SUCCESS) {
1116                 cancel_uds_index_save(isl);
1117                 return result;
1118         }
1119
1120         result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1121         if (result != UDS_SUCCESS) {
1122                 cancel_uds_index_save(isl);
1123                 return result;
1124         }
1125
1126         result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1127         uds_free_buffered_writer(writers[0]);
1128         if (result != UDS_SUCCESS) {
1129                 cancel_uds_index_save(isl);
1130                 return result;
1131         }
1132
1133         return write_index_save_layout(layout, isl);
1134 }
1135
1136 static int __must_check load_region_table(struct buffered_reader *reader,
1137                                           struct region_table **table_ptr)
1138 {
1139         int result;
1140         unsigned int i;
1141         struct region_header header;
1142         struct region_table *table;
1143         u8 buffer[sizeof(struct region_header)];
1144         size_t offset = 0;
1145
1146         result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1147         if (result != UDS_SUCCESS)
1148                 return vdo_log_error_strerror(result, "cannot read region table header");
1149
1150         decode_u64_le(buffer, &offset, &header.magic);
1151         decode_u64_le(buffer, &offset, &header.region_blocks);
1152         decode_u16_le(buffer, &offset, &header.type);
1153         decode_u16_le(buffer, &offset, &header.version);
1154         decode_u16_le(buffer, &offset, &header.region_count);
1155         decode_u16_le(buffer, &offset, &header.payload);
1156
1157         if (header.magic != REGION_MAGIC)
1158                 return UDS_NO_INDEX;
1159
1160         if (header.version != 1) {
1161                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1162                                               "unknown region table version %hu",
1163                                               header.version);
1164         }
1165
1166         result = vdo_allocate_extended(struct region_table, header.region_count,
1167                                        struct layout_region,
1168                                        "single file layout region table", &table);
1169         if (result != VDO_SUCCESS)
1170                 return result;
1171
1172         table->header = header;
1173         for (i = 0; i < header.region_count; i++) {
1174                 u8 region_buffer[sizeof(struct layout_region)];
1175
1176                 offset = 0;
1177                 result = uds_read_from_buffered_reader(reader, region_buffer,
1178                                                        sizeof(region_buffer));
1179                 if (result != UDS_SUCCESS) {
1180                         vdo_free(table);
1181                         return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1182                                                       "cannot read region table layouts");
1183                 }
1184
1185                 decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1186                 decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1187                 offset += sizeof(u32);
1188                 decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1189                 decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1190         }
1191
1192         *table_ptr = table;
1193         return UDS_SUCCESS;
1194 }
1195
1196 static int __must_check read_super_block_data(struct buffered_reader *reader,
1197                                               struct index_layout *layout,
1198                                               size_t saved_size)
1199 {
1200         int result;
1201         struct super_block_data *super = &layout->super;
1202         u8 *buffer;
1203         size_t offset = 0;
1204
1205         result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1206         if (result != VDO_SUCCESS)
1207                 return result;
1208
1209         result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1210         if (result != UDS_SUCCESS) {
1211                 vdo_free(buffer);
1212                 return vdo_log_error_strerror(result, "cannot read region table header");
1213         }
1214
1215         memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1216         offset += MAGIC_SIZE;
1217         memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1218         offset += NONCE_INFO_SIZE;
1219         decode_u64_le(buffer, &offset, &super->nonce);
1220         decode_u32_le(buffer, &offset, &super->version);
1221         decode_u32_le(buffer, &offset, &super->block_size);
1222         decode_u16_le(buffer, &offset, &super->index_count);
1223         decode_u16_le(buffer, &offset, &super->max_saves);
1224         offset += sizeof(u32);
1225         decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1226         decode_u64_le(buffer, &offset, &super->page_map_blocks);
1227
1228         if (is_converted_super_block(super)) {
1229                 decode_u64_le(buffer, &offset, &super->volume_offset);
1230                 decode_u64_le(buffer, &offset, &super->start_offset);
1231         } else {
1232                 super->volume_offset = 0;
1233                 super->start_offset = 0;
1234         }
1235
1236         vdo_free(buffer);
1237
1238         if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1239                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1240                                               "unknown superblock magic label");
1241
1242         if ((super->version < SUPER_VERSION_MINIMUM) ||
1243             (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1244             (super->version > SUPER_VERSION_MAXIMUM)) {
1245                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1246                                               "unknown superblock version number %u",
1247                                               super->version);
1248         }
1249
1250         if (super->volume_offset < super->start_offset) {
1251                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1252                                               "inconsistent offsets (start %llu, volume %llu)",
1253                                               (unsigned long long) super->start_offset,
1254                                               (unsigned long long) super->volume_offset);
1255         }
1256
1257         /* Sub-indexes are no longer used but the layout retains this field. */
1258         if (super->index_count != 1) {
1259                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1260                                               "invalid subindex count %u",
1261                                               super->index_count);
1262         }
1263
1264         if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1265                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1266                                               "inconsistent superblock nonce");
1267         }
1268
1269         return UDS_SUCCESS;
1270 }
1271
1272 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1273                                       enum region_kind kind, unsigned int instance)
1274 {
1275         if (lr->start_block != start_block)
1276                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1277                                               "incorrect layout region offset");
1278
1279         if (lr->kind != kind)
1280                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1281                                               "incorrect layout region kind");
1282
1283         if (lr->instance != instance) {
1284                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1285                                               "incorrect layout region instance");
1286         }
1287
1288         return UDS_SUCCESS;
1289 }
1290
1291 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1292                                          struct region_table *table)
1293 {
1294         int result;
1295         unsigned int i;
1296         struct sub_index_layout *sil = &layout->index;
1297         u64 next_block = start_block;
1298
1299         sil->sub_index = table->regions[2];
1300         result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1301         if (result != UDS_SUCCESS)
1302                 return result;
1303
1304         define_sub_index_nonce(layout);
1305
1306         sil->volume = table->regions[3];
1307         result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1308                                RL_SOLE_INSTANCE);
1309         if (result != UDS_SUCCESS)
1310                 return result;
1311
1312         next_block += sil->volume.block_count + layout->super.volume_offset;
1313
1314         for (i = 0; i < layout->super.max_saves; i++) {
1315                 sil->saves[i].index_save = table->regions[i + 4];
1316                 result = verify_region(&sil->saves[i].index_save, next_block,
1317                                        RL_KIND_SAVE, i);
1318                 if (result != UDS_SUCCESS)
1319                         return result;
1320
1321                 next_block += sil->saves[i].index_save.block_count;
1322         }
1323
1324         next_block -= layout->super.volume_offset;
1325         if (next_block != start_block + sil->sub_index.block_count) {
1326                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1327                                               "sub index region does not span all saves");
1328         }
1329
1330         return UDS_SUCCESS;
1331 }
1332
1333 static int __must_check reconstitute_layout(struct index_layout *layout,
1334                                             struct region_table *table, u64 first_block)
1335 {
1336         int result;
1337         u64 next_block = first_block;
1338
1339         result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1340                               __func__, &layout->index.saves);
1341         if (result != VDO_SUCCESS)
1342                 return result;
1343
1344         layout->total_blocks = table->header.region_blocks;
1345
1346         layout->header = table->regions[0];
1347         result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1348                                RL_SOLE_INSTANCE);
1349         if (result != UDS_SUCCESS)
1350                 return result;
1351
1352         layout->config = table->regions[1];
1353         result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1354                                RL_SOLE_INSTANCE);
1355         if (result != UDS_SUCCESS)
1356                 return result;
1357
1358         result = verify_sub_index(layout, next_block, table);
1359         if (result != UDS_SUCCESS)
1360                 return result;
1361
1362         next_block += layout->index.sub_index.block_count;
1363
1364         layout->seal = table->regions[table->header.region_count - 1];
1365         result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1366                                RL_KIND_SEAL, RL_SOLE_INSTANCE);
1367         if (result != UDS_SUCCESS)
1368                 return result;
1369
1370         if (++next_block != (first_block + layout->total_blocks)) {
1371                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1372                                               "layout table does not span total blocks");
1373         }
1374
1375         return UDS_SUCCESS;
1376 }
1377
1378 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1379                                          u64 first_block, struct buffered_reader *reader)
1380 {
1381         int result;
1382         struct region_table *table = NULL;
1383         struct super_block_data *super = &layout->super;
1384
1385         result = load_region_table(reader, &table);
1386         if (result != UDS_SUCCESS)
1387                 return result;
1388
1389         if (table->header.type != RH_TYPE_SUPER) {
1390                 vdo_free(table);
1391                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1392                                               "not a superblock region table");
1393         }
1394
1395         result = read_super_block_data(reader, layout, table->header.payload);
1396         if (result != UDS_SUCCESS) {
1397                 vdo_free(table);
1398                 return vdo_log_error_strerror(result, "unknown superblock format");
1399         }
1400
1401         if (super->block_size != block_size) {
1402                 vdo_free(table);
1403                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1404                                               "superblock saved block_size %u differs from supplied block_size %zu",
1405                                               super->block_size, block_size);
1406         }
1407
1408         first_block -= (super->volume_offset - super->start_offset);
1409         result = reconstitute_layout(layout, table, first_block);
1410         vdo_free(table);
1411         return result;
1412 }
1413
1414 static int __must_check read_index_save_data(struct buffered_reader *reader,
1415                                              struct index_save_layout *isl,
1416                                              size_t saved_size)
1417 {
1418         int result;
1419         struct index_state_version file_version;
1420         u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1421         size_t offset = 0;
1422
1423         if (saved_size != sizeof(buffer)) {
1424                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1425                                               "unexpected index save data size %zu",
1426                                               saved_size);
1427         }
1428
1429         result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1430         if (result != UDS_SUCCESS)
1431                 return vdo_log_error_strerror(result, "cannot read index save data");
1432
1433         decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1434         decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1435         decode_u32_le(buffer, &offset, &isl->save_data.version);
1436         offset += sizeof(u32);
1437
1438         if (isl->save_data.version > 1) {
1439                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1440                                               "unknown index save version number %u",
1441                                               isl->save_data.version);
1442         }
1443
1444         decode_s32_le(buffer, &offset, &file_version.signature);
1445         decode_s32_le(buffer, &offset, &file_version.version_id);
1446
1447         if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1448             (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1449                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1450                                               "index state version %d,%d is unsupported",
1451                                               file_version.signature,
1452                                               file_version.version_id);
1453         }
1454
1455         decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1456         decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1457         decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1458         /* Skip past some historical fields that are now unused */
1459         offset += sizeof(u32) + sizeof(u32);
1460         return UDS_SUCCESS;
1461 }
1462
1463 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1464                                                struct region_table *table)
1465 {
1466         int result;
1467         unsigned int z;
1468         struct layout_region *last_region;
1469         u64 next_block = isl->index_save.start_block;
1470         u64 last_block = next_block + isl->index_save.block_count;
1471
1472         isl->zone_count = table->header.region_count - 3;
1473
1474         last_region = &table->regions[table->header.region_count - 1];
1475         if (last_region->kind == RL_KIND_EMPTY) {
1476                 isl->free_space = *last_region;
1477                 isl->zone_count--;
1478         } else {
1479                 isl->free_space = (struct layout_region) {
1480                         .start_block = last_block,
1481                         .block_count = 0,
1482                         .kind = RL_KIND_EMPTY,
1483                         .instance = RL_SOLE_INSTANCE,
1484                 };
1485         }
1486
1487         isl->header = table->regions[0];
1488         result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1489                                RL_SOLE_INSTANCE);
1490         if (result != UDS_SUCCESS)
1491                 return result;
1492
1493         isl->index_page_map = table->regions[1];
1494         result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1495                                RL_SOLE_INSTANCE);
1496         if (result != UDS_SUCCESS)
1497                 return result;
1498
1499         next_block += isl->index_page_map.block_count;
1500
1501         for (z = 0; z < isl->zone_count; z++) {
1502                 isl->volume_index_zones[z] = table->regions[z + 2];
1503                 result = verify_region(&isl->volume_index_zones[z], next_block,
1504                                        RL_KIND_VOLUME_INDEX, z);
1505                 if (result != UDS_SUCCESS)
1506                         return result;
1507
1508                 next_block += isl->volume_index_zones[z].block_count;
1509         }
1510
1511         isl->open_chapter = table->regions[isl->zone_count + 2];
1512         result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1513                                RL_SOLE_INSTANCE);
1514         if (result != UDS_SUCCESS)
1515                 return result;
1516
1517         next_block += isl->open_chapter.block_count;
1518
1519         result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1520                                RL_SOLE_INSTANCE);
1521         if (result != UDS_SUCCESS)
1522                 return result;
1523
1524         next_block += isl->free_space.block_count;
1525         if (next_block != last_block) {
1526                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1527                                               "index save layout table incomplete");
1528         }
1529
1530         return UDS_SUCCESS;
1531 }
1532
1533 static int __must_check load_index_save(struct index_save_layout *isl,
1534                                         struct buffered_reader *reader,
1535                                         unsigned int instance)
1536 {
1537         int result;
1538         struct region_table *table = NULL;
1539
1540         result = load_region_table(reader, &table);
1541         if (result != UDS_SUCCESS) {
1542                 return vdo_log_error_strerror(result, "cannot read index save %u header",
1543                                               instance);
1544         }
1545
1546         if (table->header.region_blocks != isl->index_save.block_count) {
1547                 u64 region_blocks = table->header.region_blocks;
1548
1549                 vdo_free(table);
1550                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1551                                               "unexpected index save %u region block count %llu",
1552                                               instance,
1553                                               (unsigned long long) region_blocks);
1554         }
1555
1556         if (table->header.type == RH_TYPE_UNSAVED) {
1557                 vdo_free(table);
1558                 reset_index_save_layout(isl, 0);
1559                 return UDS_SUCCESS;
1560         }
1561
1562
1563         if (table->header.type != RH_TYPE_SAVE) {
1564                 vdo_log_error_strerror(UDS_CORRUPT_DATA,
1565                                        "unexpected index save %u header type %u",
1566                                        instance, table->header.type);
1567                 vdo_free(table);
1568                 return UDS_CORRUPT_DATA;
1569         }
1570
1571         result = read_index_save_data(reader, isl, table->header.payload);
1572         if (result != UDS_SUCCESS) {
1573                 vdo_free(table);
1574                 return vdo_log_error_strerror(result,
1575                                               "unknown index save %u data format",
1576                                               instance);
1577         }
1578
1579         result = reconstruct_index_save(isl, table);
1580         vdo_free(table);
1581         if (result != UDS_SUCCESS) {
1582                 return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1583                                               instance);
1584         }
1585
1586         return UDS_SUCCESS;
1587 }
1588
1589 static int __must_check load_sub_index_regions(struct index_layout *layout)
1590 {
1591         int result;
1592         unsigned int j;
1593         struct index_save_layout *isl;
1594         struct buffered_reader *reader;
1595
1596         for (j = 0; j < layout->super.max_saves; j++) {
1597                 isl = &layout->index.saves[j];
1598                 result = open_region_reader(layout, &isl->index_save, &reader);
1599
1600                 if (result != UDS_SUCCESS) {
1601                         vdo_log_error_strerror(result,
1602                                                "cannot get reader for index 0 save %u",
1603                                                j);
1604                         return result;
1605                 }
1606
1607                 result = load_index_save(isl, reader, j);
1608                 uds_free_buffered_reader(reader);
1609                 if (result != UDS_SUCCESS) {
1610                         /* Another save slot might be valid. */
1611                         reset_index_save_layout(isl, 0);
1612                         continue;
1613                 }
1614         }
1615
1616         return UDS_SUCCESS;
1617 }
1618
1619 static int __must_check verify_uds_index_config(struct index_layout *layout,
1620                                                 struct uds_configuration *config)
1621 {
1622         int result;
1623         struct buffered_reader *reader = NULL;
1624         u64 offset;
1625
1626         offset = layout->super.volume_offset - layout->super.start_offset;
1627         result = open_layout_reader(layout, &layout->config, offset, &reader);
1628         if (result != UDS_SUCCESS)
1629                 return vdo_log_error_strerror(result, "failed to open config reader");
1630
1631         result = uds_validate_config_contents(reader, config);
1632         if (result != UDS_SUCCESS) {
1633                 uds_free_buffered_reader(reader);
1634                 return vdo_log_error_strerror(result, "failed to read config region");
1635         }
1636
1637         uds_free_buffered_reader(reader);
1638         return UDS_SUCCESS;
1639 }
1640
1641 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1642 {
1643         int result;
1644         struct buffered_reader *reader;
1645
1646         result = uds_make_buffered_reader(layout->factory,
1647                                           layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1648         if (result != UDS_SUCCESS)
1649                 return vdo_log_error_strerror(result, "unable to read superblock");
1650
1651         result = load_super_block(layout, UDS_BLOCK_SIZE,
1652                                   layout->offset / UDS_BLOCK_SIZE, reader);
1653         uds_free_buffered_reader(reader);
1654         if (result != UDS_SUCCESS)
1655                 return result;
1656
1657         result = verify_uds_index_config(layout, config);
1658         if (result != UDS_SUCCESS)
1659                 return result;
1660
1661         return load_sub_index_regions(layout);
1662 }
1663
1664 static int create_layout_factory(struct index_layout *layout,
1665                                  const struct uds_configuration *config)
1666 {
1667         int result;
1668         size_t writable_size;
1669         struct io_factory *factory = NULL;
1670
1671         result = uds_make_io_factory(config->bdev, &factory);
1672         if (result != UDS_SUCCESS)
1673                 return result;
1674
1675         writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1676         if (writable_size < config->size + config->offset) {
1677                 uds_put_io_factory(factory);
1678                 vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1679                               writable_size, config->size + config->offset);
1680                 return -ENOSPC;
1681         }
1682
1683         layout->factory = factory;
1684         layout->factory_size = (config->size > 0) ? config->size : writable_size;
1685         layout->offset = config->offset;
1686         return UDS_SUCCESS;
1687 }
1688
1689 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1690                           struct index_layout **layout_ptr)
1691 {
1692         int result;
1693         struct index_layout *layout = NULL;
1694         struct save_layout_sizes sizes;
1695
1696         result = compute_sizes(config, &sizes);
1697         if (result != UDS_SUCCESS)
1698                 return result;
1699
1700         result = vdo_allocate(1, struct index_layout, __func__, &layout);
1701         if (result != VDO_SUCCESS)
1702                 return result;
1703
1704         result = create_layout_factory(layout, config);
1705         if (result != UDS_SUCCESS) {
1706                 uds_free_index_layout(layout);
1707                 return result;
1708         }
1709
1710         if (layout->factory_size < sizes.total_size) {
1711                 vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1712                               layout->factory_size,
1713                               (unsigned long long) sizes.total_size);
1714                 uds_free_index_layout(layout);
1715                 return -ENOSPC;
1716         }
1717
1718         if (new_layout)
1719                 result = create_index_layout(layout, config);
1720         else
1721                 result = load_index_layout(layout, config);
1722         if (result != UDS_SUCCESS) {
1723                 uds_free_index_layout(layout);
1724                 return result;
1725         }
1726
1727         *layout_ptr = layout;
1728         return UDS_SUCCESS;
1729 }
1730
1731 void uds_free_index_layout(struct index_layout *layout)
1732 {
1733         if (layout == NULL)
1734                 return;
1735
1736         vdo_free(layout->index.saves);
1737         if (layout->factory != NULL)
1738                 uds_put_io_factory(layout->factory);
1739
1740         vdo_free(layout);
1741 }
1742
1743 int uds_replace_index_layout_storage(struct index_layout *layout,
1744                                      struct block_device *bdev)
1745 {
1746         return uds_replace_storage(layout->factory, bdev);
1747 }
1748
1749 /* Obtain a dm_bufio_client for the volume region. */
1750 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1751                           unsigned int reserved_buffers,
1752                           struct dm_bufio_client **client_ptr)
1753 {
1754         off_t offset = (layout->index.volume.start_block +
1755                         layout->super.volume_offset -
1756                         layout->super.start_offset);
1757
1758         return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1759                               client_ptr);
1760 }
1761
1762 u64 uds_get_volume_nonce(struct index_layout *layout)
1763 {
1764         return layout->index.nonce;
1765 }
This page took 0.141501 seconds and 4 git commands to generate.