]> Git Repo - linux.git/blob - drivers/md/dm-vdo/indexer/index-layout.c
Linux 6.14-rc3
[linux.git] / drivers / md / dm-vdo / indexer / index-layout.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2023 Red Hat
4  */
5
6 #include "index-layout.h"
7
8 #include <linux/random.h>
9
10 #include "logger.h"
11 #include "memory-alloc.h"
12 #include "murmurhash3.h"
13 #include "numeric.h"
14 #include "time-utils.h"
15
16 #include "config.h"
17 #include "open-chapter.h"
18 #include "volume-index.h"
19
20 /*
21  * The UDS layout on storage media is divided into a number of fixed-size regions, the sizes of
22  * which are computed when the index is created. Every header and region begins on 4K block
23  * boundary. Save regions are further sub-divided into regions of their own.
24  *
25  * Each region has a kind and an instance number. Some kinds only have one instance and therefore
26  * use RL_SOLE_INSTANCE (-1) as the instance number. The RL_KIND_INDEX used to use instances to
27  * represent sub-indices; now, however there is only ever one sub-index and therefore one instance.
28  * The RL_KIND_VOLUME_INDEX uses instances to record which zone is being saved.
29  *
30  * Every region header has a type and version.
31  *
32  *     +-+-+---------+--------+--------+-+
33  *     | | |   I N D E X  0   101, 0   | |
34  *     |H|C+---------+--------+--------+S|
35  *     |D|f| Volume  | Save   | Save   |e|
36  *     |R|g| Region  | Region | Region |a|
37  *     | | | 201, -1 | 202, 0 | 202, 1 |l|
38  *     +-+-+--------+---------+--------+-+
39  *
40  * The header contains the encoded region layout table as well as some index configuration data.
41  * The sub-index region and its subdivisions are maintained in the same table.
42  *
43  * There are two save regions to preserve the old state in case saving the new state is incomplete.
44  * They are used in alternation. Each save region is further divided into sub-regions.
45  *
46  *     +-+-----+------+------+-----+-----+
47  *     |H| IPM | MI   | MI   |     | OC  |
48  *     |D|     | zone | zone | ... |     |
49  *     |R| 301 | 302  | 302  |     | 303 |
50  *     | | -1  |  0   |  1   |     | -1  |
51  *     +-+-----+------+------+-----+-----+
52  *
53  * The header contains the encoded region layout table as well as index state data for that save.
54  * Each save also has a unique nonce.
55  */
56
57 #define MAGIC_SIZE 32
58 #define NONCE_INFO_SIZE 32
59 #define MAX_SAVES 2
60
61 enum region_kind {
62         RL_KIND_EMPTY = 0,
63         RL_KIND_HEADER = 1,
64         RL_KIND_CONFIG = 100,
65         RL_KIND_INDEX = 101,
66         RL_KIND_SEAL = 102,
67         RL_KIND_VOLUME = 201,
68         RL_KIND_SAVE = 202,
69         RL_KIND_INDEX_PAGE_MAP = 301,
70         RL_KIND_VOLUME_INDEX = 302,
71         RL_KIND_OPEN_CHAPTER = 303,
72 };
73
74 /* Some region types are historical and are no longer used. */
75 enum region_type {
76         RH_TYPE_FREE = 0, /* unused */
77         RH_TYPE_SUPER = 1,
78         RH_TYPE_SAVE = 2,
79         RH_TYPE_CHECKPOINT = 3, /* unused */
80         RH_TYPE_UNSAVED = 4,
81 };
82
83 #define RL_SOLE_INSTANCE 65535
84
85 /*
86  * Super block version 2 is the first released version.
87  *
88  * Super block version 3 is the normal version used from RHEL 8.2 onwards.
89  *
90  * Super block versions 4 through 6 were incremental development versions and
91  * are not supported.
92  *
93  * Super block version 7 is used for volumes which have been reduced in size by one chapter in
94  * order to make room to prepend LVM metadata to a volume originally created without lvm. This
95  * allows the index to retain most its deduplication records.
96  */
97 #define SUPER_VERSION_MINIMUM 3
98 #define SUPER_VERSION_CURRENT 3
99 #define SUPER_VERSION_MAXIMUM 7
100
101 static const u8 LAYOUT_MAGIC[MAGIC_SIZE] = "*ALBIREO*SINGLE*FILE*LAYOUT*001*";
102 static const u64 REGION_MAGIC = 0x416c6252676e3031; /* 'AlbRgn01' */
103
104 struct region_header {
105         u64 magic;
106         u64 region_blocks;
107         u16 type;
108         /* Currently always version 1 */
109         u16 version;
110         u16 region_count;
111         u16 payload;
112 };
113
114 struct layout_region {
115         u64 start_block;
116         u64 block_count;
117         u32 __unused;
118         u16 kind;
119         u16 instance;
120 };
121
122 struct region_table {
123         size_t encoded_size;
124         struct region_header header;
125         struct layout_region regions[];
126 };
127
128 struct index_save_data {
129         u64 timestamp;
130         u64 nonce;
131         /* Currently always version 1 */
132         u32 version;
133         u32 unused__;
134 };
135
136 struct index_state_version {
137         s32 signature;
138         s32 version_id;
139 };
140
141 static const struct index_state_version INDEX_STATE_VERSION_301 = {
142         .signature  = -1,
143         .version_id = 301,
144 };
145
146 struct index_state_data301 {
147         struct index_state_version version;
148         u64 newest_chapter;
149         u64 oldest_chapter;
150         u64 last_save;
151         u32 unused;
152         u32 padding;
153 };
154
155 struct index_save_layout {
156         unsigned int zone_count;
157         struct layout_region index_save;
158         struct layout_region header;
159         struct layout_region index_page_map;
160         struct layout_region free_space;
161         struct layout_region volume_index_zones[MAX_ZONES];
162         struct layout_region open_chapter;
163         struct index_save_data save_data;
164         struct index_state_data301 state_data;
165 };
166
167 struct sub_index_layout {
168         u64 nonce;
169         struct layout_region sub_index;
170         struct layout_region volume;
171         struct index_save_layout *saves;
172 };
173
174 struct super_block_data {
175         u8 magic_label[MAGIC_SIZE];
176         u8 nonce_info[NONCE_INFO_SIZE];
177         u64 nonce;
178         u32 version;
179         u32 block_size;
180         u16 index_count;
181         u16 max_saves;
182         /* Padding reflects a blank field on permanent storage */
183         u8 padding[4];
184         u64 open_chapter_blocks;
185         u64 page_map_blocks;
186         u64 volume_offset;
187         u64 start_offset;
188 };
189
190 struct index_layout {
191         struct io_factory *factory;
192         size_t factory_size;
193         off_t offset;
194         struct super_block_data super;
195         struct layout_region header;
196         struct layout_region config;
197         struct sub_index_layout index;
198         struct layout_region seal;
199         u64 total_blocks;
200 };
201
202 struct save_layout_sizes {
203         unsigned int save_count;
204         size_t block_size;
205         u64 volume_blocks;
206         u64 volume_index_blocks;
207         u64 page_map_blocks;
208         u64 open_chapter_blocks;
209         u64 save_blocks;
210         u64 sub_index_blocks;
211         u64 total_blocks;
212         size_t total_size;
213 };
214
215 static inline bool is_converted_super_block(struct super_block_data *super)
216 {
217         return super->version == 7;
218 }
219
220 static int __must_check compute_sizes(const struct uds_configuration *config,
221                                       struct save_layout_sizes *sls)
222 {
223         int result;
224         struct index_geometry *geometry = config->geometry;
225
226         memset(sls, 0, sizeof(*sls));
227         sls->save_count = MAX_SAVES;
228         sls->block_size = UDS_BLOCK_SIZE;
229         sls->volume_blocks = geometry->bytes_per_volume / sls->block_size;
230
231         result = uds_compute_volume_index_save_blocks(config, sls->block_size,
232                                                       &sls->volume_index_blocks);
233         if (result != UDS_SUCCESS)
234                 return vdo_log_error_strerror(result, "cannot compute index save size");
235
236         sls->page_map_blocks =
237                 DIV_ROUND_UP(uds_compute_index_page_map_save_size(geometry),
238                              sls->block_size);
239         sls->open_chapter_blocks =
240                 DIV_ROUND_UP(uds_compute_saved_open_chapter_size(geometry),
241                              sls->block_size);
242         sls->save_blocks =
243                 1 + (sls->volume_index_blocks + sls->page_map_blocks + sls->open_chapter_blocks);
244         sls->sub_index_blocks = sls->volume_blocks + (sls->save_count * sls->save_blocks);
245         sls->total_blocks = 3 + sls->sub_index_blocks;
246         sls->total_size = sls->total_blocks * sls->block_size;
247
248         return UDS_SUCCESS;
249 }
250
251 /* Create unique data using the current time and a pseudorandom number. */
252 static void create_unique_nonce_data(u8 *buffer)
253 {
254         ktime_t now = current_time_ns(CLOCK_REALTIME);
255         u32 rand;
256         size_t offset = 0;
257
258         get_random_bytes(&rand, sizeof(u32));
259         memcpy(buffer + offset, &now, sizeof(now));
260         offset += sizeof(now);
261         memcpy(buffer + offset, &rand, sizeof(rand));
262         offset += sizeof(rand);
263         while (offset < NONCE_INFO_SIZE) {
264                 size_t len = min(NONCE_INFO_SIZE - offset, offset);
265
266                 memcpy(buffer + offset, buffer, len);
267                 offset += len;
268         }
269 }
270
271 static u64 hash_stuff(u64 start, const void *data, size_t len)
272 {
273         u32 seed = start ^ (start >> 27);
274         u8 hash_buffer[16];
275
276         murmurhash3_128(data, len, seed, hash_buffer);
277         return get_unaligned_le64(hash_buffer + 4);
278 }
279
280 /* Generate a primary nonce from the provided data. */
281 static u64 generate_primary_nonce(const void *data, size_t len)
282 {
283         return hash_stuff(0xa1b1e0fc, data, len);
284 }
285
286 /*
287  * Deterministically generate a secondary nonce from an existing nonce and some arbitrary data by
288  * hashing the original nonce and the data to produce a new nonce.
289  */
290 static u64 generate_secondary_nonce(u64 nonce, const void *data, size_t len)
291 {
292         return hash_stuff(nonce + 1, data, len);
293 }
294
295 static int __must_check open_layout_reader(struct index_layout *layout,
296                                            struct layout_region *lr, off_t offset,
297                                            struct buffered_reader **reader_ptr)
298 {
299         return uds_make_buffered_reader(layout->factory, lr->start_block + offset,
300                                         lr->block_count, reader_ptr);
301 }
302
303 static int open_region_reader(struct index_layout *layout, struct layout_region *region,
304                               struct buffered_reader **reader_ptr)
305 {
306         return open_layout_reader(layout, region, -layout->super.start_offset,
307                                   reader_ptr);
308 }
309
310 static int __must_check open_layout_writer(struct index_layout *layout,
311                                            struct layout_region *lr, off_t offset,
312                                            struct buffered_writer **writer_ptr)
313 {
314         return uds_make_buffered_writer(layout->factory, lr->start_block + offset,
315                                         lr->block_count, writer_ptr);
316 }
317
318 static int open_region_writer(struct index_layout *layout, struct layout_region *region,
319                               struct buffered_writer **writer_ptr)
320 {
321         return open_layout_writer(layout, region, -layout->super.start_offset,
322                                   writer_ptr);
323 }
324
325 static void generate_super_block_data(struct save_layout_sizes *sls,
326                                       struct super_block_data *super)
327 {
328         memset(super, 0, sizeof(*super));
329         memcpy(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE);
330         create_unique_nonce_data(super->nonce_info);
331
332         super->nonce = generate_primary_nonce(super->nonce_info,
333                                               sizeof(super->nonce_info));
334         super->version = SUPER_VERSION_CURRENT;
335         super->block_size = sls->block_size;
336         super->index_count = 1;
337         super->max_saves = sls->save_count;
338         super->open_chapter_blocks = sls->open_chapter_blocks;
339         super->page_map_blocks = sls->page_map_blocks;
340         super->volume_offset = 0;
341         super->start_offset = 0;
342 }
343
344 static void define_sub_index_nonce(struct index_layout *layout)
345 {
346         struct sub_index_nonce_data {
347                 u64 offset;
348                 u16 index_id;
349         };
350         struct sub_index_layout *sil = &layout->index;
351         u64 primary_nonce = layout->super.nonce;
352         u8 buffer[sizeof(struct sub_index_nonce_data)] = { 0 };
353         size_t offset = 0;
354
355         encode_u64_le(buffer, &offset, sil->sub_index.start_block);
356         encode_u16_le(buffer, &offset, 0);
357         sil->nonce = generate_secondary_nonce(primary_nonce, buffer, sizeof(buffer));
358         if (sil->nonce == 0) {
359                 sil->nonce = generate_secondary_nonce(~primary_nonce + 1, buffer,
360                                                       sizeof(buffer));
361         }
362 }
363
364 static void setup_sub_index(struct index_layout *layout, u64 start_block,
365                             struct save_layout_sizes *sls)
366 {
367         struct sub_index_layout *sil = &layout->index;
368         u64 next_block = start_block;
369         unsigned int i;
370
371         sil->sub_index = (struct layout_region) {
372                 .start_block = start_block,
373                 .block_count = sls->sub_index_blocks,
374                 .kind = RL_KIND_INDEX,
375                 .instance = 0,
376         };
377
378         sil->volume = (struct layout_region) {
379                 .start_block = next_block,
380                 .block_count = sls->volume_blocks,
381                 .kind = RL_KIND_VOLUME,
382                 .instance = RL_SOLE_INSTANCE,
383         };
384
385         next_block += sls->volume_blocks;
386
387         for (i = 0; i < sls->save_count; i++) {
388                 sil->saves[i].index_save = (struct layout_region) {
389                         .start_block = next_block,
390                         .block_count = sls->save_blocks,
391                         .kind = RL_KIND_SAVE,
392                         .instance = i,
393                 };
394
395                 next_block += sls->save_blocks;
396         }
397
398         define_sub_index_nonce(layout);
399 }
400
401 static void initialize_layout(struct index_layout *layout, struct save_layout_sizes *sls)
402 {
403         u64 next_block = layout->offset / sls->block_size;
404
405         layout->total_blocks = sls->total_blocks;
406         generate_super_block_data(sls, &layout->super);
407         layout->header = (struct layout_region) {
408                 .start_block = next_block++,
409                 .block_count = 1,
410                 .kind = RL_KIND_HEADER,
411                 .instance = RL_SOLE_INSTANCE,
412         };
413
414         layout->config = (struct layout_region) {
415                 .start_block = next_block++,
416                 .block_count = 1,
417                 .kind = RL_KIND_CONFIG,
418                 .instance = RL_SOLE_INSTANCE,
419         };
420
421         setup_sub_index(layout, next_block, sls);
422         next_block += sls->sub_index_blocks;
423
424         layout->seal = (struct layout_region) {
425                 .start_block = next_block,
426                 .block_count = 1,
427                 .kind = RL_KIND_SEAL,
428                 .instance = RL_SOLE_INSTANCE,
429         };
430 }
431
432 static int __must_check make_index_save_region_table(struct index_save_layout *isl,
433                                                      struct region_table **table_ptr)
434 {
435         int result;
436         unsigned int z;
437         struct region_table *table;
438         struct layout_region *lr;
439         u16 region_count;
440         size_t payload;
441         size_t type;
442
443         if (isl->zone_count > 0) {
444                 /*
445                  * Normal save regions: header, page map, volume index zones,
446                  * open chapter, and possibly free space.
447                  */
448                 region_count = 3 + isl->zone_count;
449                 if (isl->free_space.block_count > 0)
450                         region_count++;
451
452                 payload = sizeof(isl->save_data) + sizeof(isl->state_data);
453                 type = RH_TYPE_SAVE;
454         } else {
455                 /* Empty save regions: header, page map, free space. */
456                 region_count = 3;
457                 payload = sizeof(isl->save_data);
458                 type = RH_TYPE_UNSAVED;
459         }
460
461         result = vdo_allocate_extended(struct region_table, region_count,
462                                        struct layout_region,
463                                        "layout region table for ISL", &table);
464         if (result != VDO_SUCCESS)
465                 return result;
466
467         lr = &table->regions[0];
468         *lr++ = isl->header;
469         *lr++ = isl->index_page_map;
470         for (z = 0; z < isl->zone_count; z++)
471                 *lr++ = isl->volume_index_zones[z];
472
473         if (isl->zone_count > 0)
474                 *lr++ = isl->open_chapter;
475
476         if (isl->free_space.block_count > 0)
477                 *lr++ = isl->free_space;
478
479         table->header = (struct region_header) {
480                 .magic = REGION_MAGIC,
481                 .region_blocks = isl->index_save.block_count,
482                 .type = type,
483                 .version = 1,
484                 .region_count = region_count,
485                 .payload = payload,
486         };
487
488         table->encoded_size = (sizeof(struct region_header) + payload +
489                                region_count * sizeof(struct layout_region));
490         *table_ptr = table;
491         return UDS_SUCCESS;
492 }
493
494 static void encode_region_table(u8 *buffer, size_t *offset, struct region_table *table)
495 {
496         unsigned int i;
497
498         encode_u64_le(buffer, offset, REGION_MAGIC);
499         encode_u64_le(buffer, offset, table->header.region_blocks);
500         encode_u16_le(buffer, offset, table->header.type);
501         encode_u16_le(buffer, offset, table->header.version);
502         encode_u16_le(buffer, offset, table->header.region_count);
503         encode_u16_le(buffer, offset, table->header.payload);
504
505         for (i = 0; i < table->header.region_count; i++) {
506                 encode_u64_le(buffer, offset, table->regions[i].start_block);
507                 encode_u64_le(buffer, offset, table->regions[i].block_count);
508                 encode_u32_le(buffer, offset, 0);
509                 encode_u16_le(buffer, offset, table->regions[i].kind);
510                 encode_u16_le(buffer, offset, table->regions[i].instance);
511         }
512 }
513
514 static int __must_check write_index_save_header(struct index_save_layout *isl,
515                                                 struct region_table *table,
516                                                 struct buffered_writer *writer)
517 {
518         int result;
519         u8 *buffer;
520         size_t offset = 0;
521
522         result = vdo_allocate(table->encoded_size, u8, "index save data", &buffer);
523         if (result != VDO_SUCCESS)
524                 return result;
525
526         encode_region_table(buffer, &offset, table);
527         encode_u64_le(buffer, &offset, isl->save_data.timestamp);
528         encode_u64_le(buffer, &offset, isl->save_data.nonce);
529         encode_u32_le(buffer, &offset, isl->save_data.version);
530         encode_u32_le(buffer, &offset, 0);
531         if (isl->zone_count > 0) {
532                 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.signature);
533                 encode_u32_le(buffer, &offset, INDEX_STATE_VERSION_301.version_id);
534                 encode_u64_le(buffer, &offset, isl->state_data.newest_chapter);
535                 encode_u64_le(buffer, &offset, isl->state_data.oldest_chapter);
536                 encode_u64_le(buffer, &offset, isl->state_data.last_save);
537                 encode_u64_le(buffer, &offset, 0);
538         }
539
540         result = uds_write_to_buffered_writer(writer, buffer, offset);
541         vdo_free(buffer);
542         if (result != UDS_SUCCESS)
543                 return result;
544
545         return uds_flush_buffered_writer(writer);
546 }
547
548 static int write_index_save_layout(struct index_layout *layout,
549                                    struct index_save_layout *isl)
550 {
551         int result;
552         struct region_table *table;
553         struct buffered_writer *writer;
554
555         result = make_index_save_region_table(isl, &table);
556         if (result != UDS_SUCCESS)
557                 return result;
558
559         result = open_region_writer(layout, &isl->header, &writer);
560         if (result != UDS_SUCCESS) {
561                 vdo_free(table);
562                 return result;
563         }
564
565         result = write_index_save_header(isl, table, writer);
566         vdo_free(table);
567         uds_free_buffered_writer(writer);
568
569         return result;
570 }
571
572 static void reset_index_save_layout(struct index_save_layout *isl, u64 page_map_blocks)
573 {
574         u64 free_blocks;
575         u64 next_block = isl->index_save.start_block;
576
577         isl->zone_count = 0;
578         memset(&isl->save_data, 0, sizeof(isl->save_data));
579
580         isl->header = (struct layout_region) {
581                 .start_block = next_block++,
582                 .block_count = 1,
583                 .kind = RL_KIND_HEADER,
584                 .instance = RL_SOLE_INSTANCE,
585         };
586
587         isl->index_page_map = (struct layout_region) {
588                 .start_block = next_block,
589                 .block_count = page_map_blocks,
590                 .kind = RL_KIND_INDEX_PAGE_MAP,
591                 .instance = RL_SOLE_INSTANCE,
592         };
593
594         next_block += page_map_blocks;
595
596         free_blocks = isl->index_save.block_count - page_map_blocks - 1;
597         isl->free_space = (struct layout_region) {
598                 .start_block = next_block,
599                 .block_count = free_blocks,
600                 .kind = RL_KIND_EMPTY,
601                 .instance = RL_SOLE_INSTANCE,
602         };
603 }
604
605 static int __must_check invalidate_old_save(struct index_layout *layout,
606                                             struct index_save_layout *isl)
607 {
608         reset_index_save_layout(isl, layout->super.page_map_blocks);
609         return write_index_save_layout(layout, isl);
610 }
611
612 static int discard_index_state_data(struct index_layout *layout)
613 {
614         int result;
615         int saved_result = UDS_SUCCESS;
616         unsigned int i;
617
618         for (i = 0; i < layout->super.max_saves; i++) {
619                 result = invalidate_old_save(layout, &layout->index.saves[i]);
620                 if (result != UDS_SUCCESS)
621                         saved_result = result;
622         }
623
624         if (saved_result != UDS_SUCCESS) {
625                 return vdo_log_error_strerror(result,
626                                               "%s: cannot destroy all index saves",
627                                               __func__);
628         }
629
630         return UDS_SUCCESS;
631 }
632
633 static int __must_check make_layout_region_table(struct index_layout *layout,
634                                                  struct region_table **table_ptr)
635 {
636         int result;
637         unsigned int i;
638         /* Regions: header, config, index, volume, saves, seal */
639         u16 region_count = 5 + layout->super.max_saves;
640         u16 payload;
641         struct region_table *table;
642         struct layout_region *lr;
643
644         result = vdo_allocate_extended(struct region_table, region_count,
645                                        struct layout_region, "layout region table",
646                                        &table);
647         if (result != VDO_SUCCESS)
648                 return result;
649
650         lr = &table->regions[0];
651         *lr++ = layout->header;
652         *lr++ = layout->config;
653         *lr++ = layout->index.sub_index;
654         *lr++ = layout->index.volume;
655
656         for (i = 0; i < layout->super.max_saves; i++)
657                 *lr++ = layout->index.saves[i].index_save;
658
659         *lr++ = layout->seal;
660
661         if (is_converted_super_block(&layout->super)) {
662                 payload = sizeof(struct super_block_data);
663         } else {
664                 payload = (sizeof(struct super_block_data) -
665                            sizeof(layout->super.volume_offset) -
666                            sizeof(layout->super.start_offset));
667         }
668
669         table->header = (struct region_header) {
670                 .magic = REGION_MAGIC,
671                 .region_blocks = layout->total_blocks,
672                 .type = RH_TYPE_SUPER,
673                 .version = 1,
674                 .region_count = region_count,
675                 .payload = payload,
676         };
677
678         table->encoded_size = (sizeof(struct region_header) + payload +
679                                region_count * sizeof(struct layout_region));
680         *table_ptr = table;
681         return UDS_SUCCESS;
682 }
683
684 static int __must_check write_layout_header(struct index_layout *layout,
685                                             struct region_table *table,
686                                             struct buffered_writer *writer)
687 {
688         int result;
689         u8 *buffer;
690         size_t offset = 0;
691
692         result = vdo_allocate(table->encoded_size, u8, "layout data", &buffer);
693         if (result != VDO_SUCCESS)
694                 return result;
695
696         encode_region_table(buffer, &offset, table);
697         memcpy(buffer + offset, &layout->super.magic_label, MAGIC_SIZE);
698         offset += MAGIC_SIZE;
699         memcpy(buffer + offset, &layout->super.nonce_info, NONCE_INFO_SIZE);
700         offset += NONCE_INFO_SIZE;
701         encode_u64_le(buffer, &offset, layout->super.nonce);
702         encode_u32_le(buffer, &offset, layout->super.version);
703         encode_u32_le(buffer, &offset, layout->super.block_size);
704         encode_u16_le(buffer, &offset, layout->super.index_count);
705         encode_u16_le(buffer, &offset, layout->super.max_saves);
706         encode_u32_le(buffer, &offset, 0);
707         encode_u64_le(buffer, &offset, layout->super.open_chapter_blocks);
708         encode_u64_le(buffer, &offset, layout->super.page_map_blocks);
709
710         if (is_converted_super_block(&layout->super)) {
711                 encode_u64_le(buffer, &offset, layout->super.volume_offset);
712                 encode_u64_le(buffer, &offset, layout->super.start_offset);
713         }
714
715         result = uds_write_to_buffered_writer(writer, buffer, offset);
716         vdo_free(buffer);
717         if (result != UDS_SUCCESS)
718                 return result;
719
720         return uds_flush_buffered_writer(writer);
721 }
722
723 static int __must_check write_uds_index_config(struct index_layout *layout,
724                                                struct uds_configuration *config,
725                                                off_t offset)
726 {
727         int result;
728         struct buffered_writer *writer = NULL;
729
730         result = open_layout_writer(layout, &layout->config, offset, &writer);
731         if (result != UDS_SUCCESS)
732                 return vdo_log_error_strerror(result, "failed to open config region");
733
734         result = uds_write_config_contents(writer, config, layout->super.version);
735         if (result != UDS_SUCCESS) {
736                 uds_free_buffered_writer(writer);
737                 return vdo_log_error_strerror(result, "failed to write config region");
738         }
739
740         result = uds_flush_buffered_writer(writer);
741         if (result != UDS_SUCCESS) {
742                 uds_free_buffered_writer(writer);
743                 return vdo_log_error_strerror(result, "cannot flush config writer");
744         }
745
746         uds_free_buffered_writer(writer);
747         return UDS_SUCCESS;
748 }
749
750 static int __must_check save_layout(struct index_layout *layout, off_t offset)
751 {
752         int result;
753         struct buffered_writer *writer = NULL;
754         struct region_table *table;
755
756         result = make_layout_region_table(layout, &table);
757         if (result != UDS_SUCCESS)
758                 return result;
759
760         result = open_layout_writer(layout, &layout->header, offset, &writer);
761         if (result != UDS_SUCCESS) {
762                 vdo_free(table);
763                 return result;
764         }
765
766         result = write_layout_header(layout, table, writer);
767         vdo_free(table);
768         uds_free_buffered_writer(writer);
769
770         return result;
771 }
772
773 static int create_index_layout(struct index_layout *layout, struct uds_configuration *config)
774 {
775         int result;
776         struct save_layout_sizes sizes;
777
778         result = compute_sizes(config, &sizes);
779         if (result != UDS_SUCCESS)
780                 return result;
781
782         result = vdo_allocate(sizes.save_count, struct index_save_layout, __func__,
783                               &layout->index.saves);
784         if (result != VDO_SUCCESS)
785                 return result;
786
787         initialize_layout(layout, &sizes);
788
789         result = discard_index_state_data(layout);
790         if (result != UDS_SUCCESS)
791                 return result;
792
793         result = write_uds_index_config(layout, config, 0);
794         if (result != UDS_SUCCESS)
795                 return result;
796
797         return save_layout(layout, 0);
798 }
799
800 static u64 generate_index_save_nonce(u64 volume_nonce, struct index_save_layout *isl)
801 {
802         struct save_nonce_data {
803                 struct index_save_data data;
804                 u64 offset;
805         } nonce_data;
806         u8 buffer[sizeof(nonce_data)];
807         size_t offset = 0;
808
809         encode_u64_le(buffer, &offset, isl->save_data.timestamp);
810         encode_u64_le(buffer, &offset, 0);
811         encode_u32_le(buffer, &offset, isl->save_data.version);
812         encode_u32_le(buffer, &offset, 0U);
813         encode_u64_le(buffer, &offset, isl->index_save.start_block);
814         VDO_ASSERT_LOG_ONLY(offset == sizeof(nonce_data),
815                             "%zu bytes encoded of %zu expected",
816                             offset, sizeof(nonce_data));
817         return generate_secondary_nonce(volume_nonce, buffer, sizeof(buffer));
818 }
819
820 static u64 validate_index_save_layout(struct index_save_layout *isl, u64 volume_nonce)
821 {
822         if ((isl->zone_count == 0) || (isl->save_data.timestamp == 0))
823                 return 0;
824
825         if (isl->save_data.nonce != generate_index_save_nonce(volume_nonce, isl))
826                 return 0;
827
828         return isl->save_data.timestamp;
829 }
830
831 static int find_latest_uds_index_save_slot(struct index_layout *layout,
832                                            struct index_save_layout **isl_ptr)
833 {
834         struct index_save_layout *latest = NULL;
835         struct index_save_layout *isl;
836         unsigned int i;
837         u64 save_time = 0;
838         u64 latest_time = 0;
839
840         for (i = 0; i < layout->super.max_saves; i++) {
841                 isl = &layout->index.saves[i];
842                 save_time = validate_index_save_layout(isl, layout->index.nonce);
843                 if (save_time > latest_time) {
844                         latest = isl;
845                         latest_time = save_time;
846                 }
847         }
848
849         if (latest == NULL) {
850                 vdo_log_error("No valid index save found");
851                 return UDS_INDEX_NOT_SAVED_CLEANLY;
852         }
853
854         *isl_ptr = latest;
855         return UDS_SUCCESS;
856 }
857
858 int uds_discard_open_chapter(struct index_layout *layout)
859 {
860         int result;
861         struct index_save_layout *isl;
862         struct buffered_writer *writer;
863
864         result = find_latest_uds_index_save_slot(layout, &isl);
865         if (result != UDS_SUCCESS)
866                 return result;
867
868         result = open_region_writer(layout, &isl->open_chapter, &writer);
869         if (result != UDS_SUCCESS)
870                 return result;
871
872         result = uds_write_to_buffered_writer(writer, NULL, UDS_BLOCK_SIZE);
873         if (result != UDS_SUCCESS) {
874                 uds_free_buffered_writer(writer);
875                 return result;
876         }
877
878         result = uds_flush_buffered_writer(writer);
879         uds_free_buffered_writer(writer);
880         return result;
881 }
882
883 int uds_load_index_state(struct index_layout *layout, struct uds_index *index)
884 {
885         int result;
886         unsigned int zone;
887         struct index_save_layout *isl;
888         struct buffered_reader *readers[MAX_ZONES];
889
890         result = find_latest_uds_index_save_slot(layout, &isl);
891         if (result != UDS_SUCCESS)
892                 return result;
893
894         index->newest_virtual_chapter = isl->state_data.newest_chapter;
895         index->oldest_virtual_chapter = isl->state_data.oldest_chapter;
896         index->last_save = isl->state_data.last_save;
897
898         result = open_region_reader(layout, &isl->open_chapter, &readers[0]);
899         if (result != UDS_SUCCESS)
900                 return result;
901
902         result = uds_load_open_chapter(index, readers[0]);
903         uds_free_buffered_reader(readers[0]);
904         if (result != UDS_SUCCESS)
905                 return result;
906
907         for (zone = 0; zone < isl->zone_count; zone++) {
908                 result = open_region_reader(layout, &isl->volume_index_zones[zone],
909                                             &readers[zone]);
910                 if (result != UDS_SUCCESS) {
911                         for (; zone > 0; zone--)
912                                 uds_free_buffered_reader(readers[zone - 1]);
913
914                         return result;
915                 }
916         }
917
918         result = uds_load_volume_index(index->volume_index, readers, isl->zone_count);
919         for (zone = 0; zone < isl->zone_count; zone++)
920                 uds_free_buffered_reader(readers[zone]);
921         if (result != UDS_SUCCESS)
922                 return result;
923
924         result = open_region_reader(layout, &isl->index_page_map, &readers[0]);
925         if (result != UDS_SUCCESS)
926                 return result;
927
928         result = uds_read_index_page_map(index->volume->index_page_map, readers[0]);
929         uds_free_buffered_reader(readers[0]);
930
931         return result;
932 }
933
934 static struct index_save_layout *select_oldest_index_save_layout(struct index_layout *layout)
935 {
936         struct index_save_layout *oldest = NULL;
937         struct index_save_layout *isl;
938         unsigned int i;
939         u64 save_time = 0;
940         u64 oldest_time = 0;
941
942         for (i = 0; i < layout->super.max_saves; i++) {
943                 isl = &layout->index.saves[i];
944                 save_time = validate_index_save_layout(isl, layout->index.nonce);
945                 if (oldest == NULL || save_time < oldest_time) {
946                         oldest = isl;
947                         oldest_time = save_time;
948                 }
949         }
950
951         return oldest;
952 }
953
954 static void instantiate_index_save_layout(struct index_save_layout *isl,
955                                           struct super_block_data *super,
956                                           u64 volume_nonce, unsigned int zone_count)
957 {
958         unsigned int z;
959         u64 next_block;
960         u64 free_blocks;
961         u64 volume_index_blocks;
962
963         isl->zone_count = zone_count;
964         memset(&isl->save_data, 0, sizeof(isl->save_data));
965         isl->save_data.timestamp = ktime_to_ms(current_time_ns(CLOCK_REALTIME));
966         isl->save_data.version = 1;
967         isl->save_data.nonce = generate_index_save_nonce(volume_nonce, isl);
968
969         next_block = isl->index_save.start_block;
970         isl->header = (struct layout_region) {
971                 .start_block = next_block++,
972                 .block_count = 1,
973                 .kind = RL_KIND_HEADER,
974                 .instance = RL_SOLE_INSTANCE,
975         };
976
977         isl->index_page_map = (struct layout_region) {
978                 .start_block = next_block,
979                 .block_count = super->page_map_blocks,
980                 .kind = RL_KIND_INDEX_PAGE_MAP,
981                 .instance = RL_SOLE_INSTANCE,
982         };
983         next_block += super->page_map_blocks;
984
985         free_blocks = (isl->index_save.block_count - 1 -
986                        super->page_map_blocks -
987                        super->open_chapter_blocks);
988         volume_index_blocks = free_blocks / isl->zone_count;
989         for (z = 0; z < isl->zone_count; z++) {
990                 isl->volume_index_zones[z] = (struct layout_region) {
991                         .start_block = next_block,
992                         .block_count = volume_index_blocks,
993                         .kind = RL_KIND_VOLUME_INDEX,
994                         .instance = z,
995                 };
996
997                 next_block += volume_index_blocks;
998                 free_blocks -= volume_index_blocks;
999         }
1000
1001         isl->open_chapter = (struct layout_region) {
1002                 .start_block = next_block,
1003                 .block_count = super->open_chapter_blocks,
1004                 .kind = RL_KIND_OPEN_CHAPTER,
1005                 .instance = RL_SOLE_INSTANCE,
1006         };
1007
1008         next_block += super->open_chapter_blocks;
1009
1010         isl->free_space = (struct layout_region) {
1011                 .start_block = next_block,
1012                 .block_count = free_blocks,
1013                 .kind = RL_KIND_EMPTY,
1014                 .instance = RL_SOLE_INSTANCE,
1015         };
1016 }
1017
1018 static int setup_uds_index_save_slot(struct index_layout *layout,
1019                                      unsigned int zone_count,
1020                                      struct index_save_layout **isl_ptr)
1021 {
1022         int result;
1023         struct index_save_layout *isl;
1024
1025         isl = select_oldest_index_save_layout(layout);
1026         result = invalidate_old_save(layout, isl);
1027         if (result != UDS_SUCCESS)
1028                 return result;
1029
1030         instantiate_index_save_layout(isl, &layout->super, layout->index.nonce,
1031                                       zone_count);
1032
1033         *isl_ptr = isl;
1034         return UDS_SUCCESS;
1035 }
1036
1037 static void cancel_uds_index_save(struct index_save_layout *isl)
1038 {
1039         memset(&isl->save_data, 0, sizeof(isl->save_data));
1040         memset(&isl->state_data, 0, sizeof(isl->state_data));
1041         isl->zone_count = 0;
1042 }
1043
1044 int uds_save_index_state(struct index_layout *layout, struct uds_index *index)
1045 {
1046         int result;
1047         unsigned int zone;
1048         struct index_save_layout *isl;
1049         struct buffered_writer *writers[MAX_ZONES];
1050
1051         result = setup_uds_index_save_slot(layout, index->zone_count, &isl);
1052         if (result != UDS_SUCCESS)
1053                 return result;
1054
1055         isl->state_data = (struct index_state_data301) {
1056                 .newest_chapter = index->newest_virtual_chapter,
1057                 .oldest_chapter = index->oldest_virtual_chapter,
1058                 .last_save = index->last_save,
1059         };
1060
1061         result = open_region_writer(layout, &isl->open_chapter, &writers[0]);
1062         if (result != UDS_SUCCESS) {
1063                 cancel_uds_index_save(isl);
1064                 return result;
1065         }
1066
1067         result = uds_save_open_chapter(index, writers[0]);
1068         uds_free_buffered_writer(writers[0]);
1069         if (result != UDS_SUCCESS) {
1070                 cancel_uds_index_save(isl);
1071                 return result;
1072         }
1073
1074         for (zone = 0; zone < index->zone_count; zone++) {
1075                 result = open_region_writer(layout, &isl->volume_index_zones[zone],
1076                                             &writers[zone]);
1077                 if (result != UDS_SUCCESS) {
1078                         for (; zone > 0; zone--)
1079                                 uds_free_buffered_writer(writers[zone - 1]);
1080
1081                         cancel_uds_index_save(isl);
1082                         return result;
1083                 }
1084         }
1085
1086         result = uds_save_volume_index(index->volume_index, writers, index->zone_count);
1087         for (zone = 0; zone < index->zone_count; zone++)
1088                 uds_free_buffered_writer(writers[zone]);
1089         if (result != UDS_SUCCESS) {
1090                 cancel_uds_index_save(isl);
1091                 return result;
1092         }
1093
1094         result = open_region_writer(layout, &isl->index_page_map, &writers[0]);
1095         if (result != UDS_SUCCESS) {
1096                 cancel_uds_index_save(isl);
1097                 return result;
1098         }
1099
1100         result = uds_write_index_page_map(index->volume->index_page_map, writers[0]);
1101         uds_free_buffered_writer(writers[0]);
1102         if (result != UDS_SUCCESS) {
1103                 cancel_uds_index_save(isl);
1104                 return result;
1105         }
1106
1107         return write_index_save_layout(layout, isl);
1108 }
1109
1110 static int __must_check load_region_table(struct buffered_reader *reader,
1111                                           struct region_table **table_ptr)
1112 {
1113         int result;
1114         unsigned int i;
1115         struct region_header header;
1116         struct region_table *table;
1117         u8 buffer[sizeof(struct region_header)];
1118         size_t offset = 0;
1119
1120         result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1121         if (result != UDS_SUCCESS)
1122                 return vdo_log_error_strerror(result, "cannot read region table header");
1123
1124         decode_u64_le(buffer, &offset, &header.magic);
1125         decode_u64_le(buffer, &offset, &header.region_blocks);
1126         decode_u16_le(buffer, &offset, &header.type);
1127         decode_u16_le(buffer, &offset, &header.version);
1128         decode_u16_le(buffer, &offset, &header.region_count);
1129         decode_u16_le(buffer, &offset, &header.payload);
1130
1131         if (header.magic != REGION_MAGIC)
1132                 return UDS_NO_INDEX;
1133
1134         if (header.version != 1) {
1135                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1136                                               "unknown region table version %hu",
1137                                               header.version);
1138         }
1139
1140         result = vdo_allocate_extended(struct region_table, header.region_count,
1141                                        struct layout_region,
1142                                        "single file layout region table", &table);
1143         if (result != VDO_SUCCESS)
1144                 return result;
1145
1146         table->header = header;
1147         for (i = 0; i < header.region_count; i++) {
1148                 u8 region_buffer[sizeof(struct layout_region)];
1149
1150                 offset = 0;
1151                 result = uds_read_from_buffered_reader(reader, region_buffer,
1152                                                        sizeof(region_buffer));
1153                 if (result != UDS_SUCCESS) {
1154                         vdo_free(table);
1155                         return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1156                                                       "cannot read region table layouts");
1157                 }
1158
1159                 decode_u64_le(region_buffer, &offset, &table->regions[i].start_block);
1160                 decode_u64_le(region_buffer, &offset, &table->regions[i].block_count);
1161                 offset += sizeof(u32);
1162                 decode_u16_le(region_buffer, &offset, &table->regions[i].kind);
1163                 decode_u16_le(region_buffer, &offset, &table->regions[i].instance);
1164         }
1165
1166         *table_ptr = table;
1167         return UDS_SUCCESS;
1168 }
1169
1170 static int __must_check read_super_block_data(struct buffered_reader *reader,
1171                                               struct index_layout *layout,
1172                                               size_t saved_size)
1173 {
1174         int result;
1175         struct super_block_data *super = &layout->super;
1176         u8 *buffer;
1177         size_t offset = 0;
1178
1179         result = vdo_allocate(saved_size, u8, "super block data", &buffer);
1180         if (result != VDO_SUCCESS)
1181                 return result;
1182
1183         result = uds_read_from_buffered_reader(reader, buffer, saved_size);
1184         if (result != UDS_SUCCESS) {
1185                 vdo_free(buffer);
1186                 return vdo_log_error_strerror(result, "cannot read region table header");
1187         }
1188
1189         memcpy(&super->magic_label, buffer, MAGIC_SIZE);
1190         offset += MAGIC_SIZE;
1191         memcpy(&super->nonce_info, buffer + offset, NONCE_INFO_SIZE);
1192         offset += NONCE_INFO_SIZE;
1193         decode_u64_le(buffer, &offset, &super->nonce);
1194         decode_u32_le(buffer, &offset, &super->version);
1195         decode_u32_le(buffer, &offset, &super->block_size);
1196         decode_u16_le(buffer, &offset, &super->index_count);
1197         decode_u16_le(buffer, &offset, &super->max_saves);
1198         offset += sizeof(u32);
1199         decode_u64_le(buffer, &offset, &super->open_chapter_blocks);
1200         decode_u64_le(buffer, &offset, &super->page_map_blocks);
1201
1202         if (is_converted_super_block(super)) {
1203                 decode_u64_le(buffer, &offset, &super->volume_offset);
1204                 decode_u64_le(buffer, &offset, &super->start_offset);
1205         } else {
1206                 super->volume_offset = 0;
1207                 super->start_offset = 0;
1208         }
1209
1210         vdo_free(buffer);
1211
1212         if (memcmp(super->magic_label, LAYOUT_MAGIC, MAGIC_SIZE) != 0)
1213                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1214                                               "unknown superblock magic label");
1215
1216         if ((super->version < SUPER_VERSION_MINIMUM) ||
1217             (super->version == 4) || (super->version == 5) || (super->version == 6) ||
1218             (super->version > SUPER_VERSION_MAXIMUM)) {
1219                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1220                                               "unknown superblock version number %u",
1221                                               super->version);
1222         }
1223
1224         if (super->volume_offset < super->start_offset) {
1225                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1226                                               "inconsistent offsets (start %llu, volume %llu)",
1227                                               (unsigned long long) super->start_offset,
1228                                               (unsigned long long) super->volume_offset);
1229         }
1230
1231         /* Sub-indexes are no longer used but the layout retains this field. */
1232         if (super->index_count != 1) {
1233                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1234                                               "invalid subindex count %u",
1235                                               super->index_count);
1236         }
1237
1238         if (generate_primary_nonce(super->nonce_info, sizeof(super->nonce_info)) != super->nonce) {
1239                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1240                                               "inconsistent superblock nonce");
1241         }
1242
1243         return UDS_SUCCESS;
1244 }
1245
1246 static int __must_check verify_region(struct layout_region *lr, u64 start_block,
1247                                       enum region_kind kind, unsigned int instance)
1248 {
1249         if (lr->start_block != start_block)
1250                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1251                                               "incorrect layout region offset");
1252
1253         if (lr->kind != kind)
1254                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1255                                               "incorrect layout region kind");
1256
1257         if (lr->instance != instance) {
1258                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1259                                               "incorrect layout region instance");
1260         }
1261
1262         return UDS_SUCCESS;
1263 }
1264
1265 static int __must_check verify_sub_index(struct index_layout *layout, u64 start_block,
1266                                          struct region_table *table)
1267 {
1268         int result;
1269         unsigned int i;
1270         struct sub_index_layout *sil = &layout->index;
1271         u64 next_block = start_block;
1272
1273         sil->sub_index = table->regions[2];
1274         result = verify_region(&sil->sub_index, next_block, RL_KIND_INDEX, 0);
1275         if (result != UDS_SUCCESS)
1276                 return result;
1277
1278         define_sub_index_nonce(layout);
1279
1280         sil->volume = table->regions[3];
1281         result = verify_region(&sil->volume, next_block, RL_KIND_VOLUME,
1282                                RL_SOLE_INSTANCE);
1283         if (result != UDS_SUCCESS)
1284                 return result;
1285
1286         next_block += sil->volume.block_count + layout->super.volume_offset;
1287
1288         for (i = 0; i < layout->super.max_saves; i++) {
1289                 sil->saves[i].index_save = table->regions[i + 4];
1290                 result = verify_region(&sil->saves[i].index_save, next_block,
1291                                        RL_KIND_SAVE, i);
1292                 if (result != UDS_SUCCESS)
1293                         return result;
1294
1295                 next_block += sil->saves[i].index_save.block_count;
1296         }
1297
1298         next_block -= layout->super.volume_offset;
1299         if (next_block != start_block + sil->sub_index.block_count) {
1300                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1301                                               "sub index region does not span all saves");
1302         }
1303
1304         return UDS_SUCCESS;
1305 }
1306
1307 static int __must_check reconstitute_layout(struct index_layout *layout,
1308                                             struct region_table *table, u64 first_block)
1309 {
1310         int result;
1311         u64 next_block = first_block;
1312
1313         result = vdo_allocate(layout->super.max_saves, struct index_save_layout,
1314                               __func__, &layout->index.saves);
1315         if (result != VDO_SUCCESS)
1316                 return result;
1317
1318         layout->total_blocks = table->header.region_blocks;
1319
1320         layout->header = table->regions[0];
1321         result = verify_region(&layout->header, next_block++, RL_KIND_HEADER,
1322                                RL_SOLE_INSTANCE);
1323         if (result != UDS_SUCCESS)
1324                 return result;
1325
1326         layout->config = table->regions[1];
1327         result = verify_region(&layout->config, next_block++, RL_KIND_CONFIG,
1328                                RL_SOLE_INSTANCE);
1329         if (result != UDS_SUCCESS)
1330                 return result;
1331
1332         result = verify_sub_index(layout, next_block, table);
1333         if (result != UDS_SUCCESS)
1334                 return result;
1335
1336         next_block += layout->index.sub_index.block_count;
1337
1338         layout->seal = table->regions[table->header.region_count - 1];
1339         result = verify_region(&layout->seal, next_block + layout->super.volume_offset,
1340                                RL_KIND_SEAL, RL_SOLE_INSTANCE);
1341         if (result != UDS_SUCCESS)
1342                 return result;
1343
1344         if (++next_block != (first_block + layout->total_blocks)) {
1345                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1346                                               "layout table does not span total blocks");
1347         }
1348
1349         return UDS_SUCCESS;
1350 }
1351
1352 static int __must_check load_super_block(struct index_layout *layout, size_t block_size,
1353                                          u64 first_block, struct buffered_reader *reader)
1354 {
1355         int result;
1356         struct region_table *table = NULL;
1357         struct super_block_data *super = &layout->super;
1358
1359         result = load_region_table(reader, &table);
1360         if (result != UDS_SUCCESS)
1361                 return result;
1362
1363         if (table->header.type != RH_TYPE_SUPER) {
1364                 vdo_free(table);
1365                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1366                                               "not a superblock region table");
1367         }
1368
1369         result = read_super_block_data(reader, layout, table->header.payload);
1370         if (result != UDS_SUCCESS) {
1371                 vdo_free(table);
1372                 return vdo_log_error_strerror(result, "unknown superblock format");
1373         }
1374
1375         if (super->block_size != block_size) {
1376                 vdo_free(table);
1377                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1378                                               "superblock saved block_size %u differs from supplied block_size %zu",
1379                                               super->block_size, block_size);
1380         }
1381
1382         first_block -= (super->volume_offset - super->start_offset);
1383         result = reconstitute_layout(layout, table, first_block);
1384         vdo_free(table);
1385         return result;
1386 }
1387
1388 static int __must_check read_index_save_data(struct buffered_reader *reader,
1389                                              struct index_save_layout *isl,
1390                                              size_t saved_size)
1391 {
1392         int result;
1393         struct index_state_version file_version;
1394         u8 buffer[sizeof(struct index_save_data) + sizeof(struct index_state_data301)];
1395         size_t offset = 0;
1396
1397         if (saved_size != sizeof(buffer)) {
1398                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1399                                               "unexpected index save data size %zu",
1400                                               saved_size);
1401         }
1402
1403         result = uds_read_from_buffered_reader(reader, buffer, sizeof(buffer));
1404         if (result != UDS_SUCCESS)
1405                 return vdo_log_error_strerror(result, "cannot read index save data");
1406
1407         decode_u64_le(buffer, &offset, &isl->save_data.timestamp);
1408         decode_u64_le(buffer, &offset, &isl->save_data.nonce);
1409         decode_u32_le(buffer, &offset, &isl->save_data.version);
1410         offset += sizeof(u32);
1411
1412         if (isl->save_data.version > 1) {
1413                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1414                                               "unknown index save version number %u",
1415                                               isl->save_data.version);
1416         }
1417
1418         decode_s32_le(buffer, &offset, &file_version.signature);
1419         decode_s32_le(buffer, &offset, &file_version.version_id);
1420
1421         if ((file_version.signature != INDEX_STATE_VERSION_301.signature) ||
1422             (file_version.version_id != INDEX_STATE_VERSION_301.version_id)) {
1423                 return vdo_log_error_strerror(UDS_UNSUPPORTED_VERSION,
1424                                               "index state version %d,%d is unsupported",
1425                                               file_version.signature,
1426                                               file_version.version_id);
1427         }
1428
1429         decode_u64_le(buffer, &offset, &isl->state_data.newest_chapter);
1430         decode_u64_le(buffer, &offset, &isl->state_data.oldest_chapter);
1431         decode_u64_le(buffer, &offset, &isl->state_data.last_save);
1432         /* Skip past some historical fields that are now unused */
1433         offset += sizeof(u32) + sizeof(u32);
1434         return UDS_SUCCESS;
1435 }
1436
1437 static int __must_check reconstruct_index_save(struct index_save_layout *isl,
1438                                                struct region_table *table)
1439 {
1440         int result;
1441         unsigned int z;
1442         struct layout_region *last_region;
1443         u64 next_block = isl->index_save.start_block;
1444         u64 last_block = next_block + isl->index_save.block_count;
1445
1446         isl->zone_count = table->header.region_count - 3;
1447
1448         last_region = &table->regions[table->header.region_count - 1];
1449         if (last_region->kind == RL_KIND_EMPTY) {
1450                 isl->free_space = *last_region;
1451                 isl->zone_count--;
1452         } else {
1453                 isl->free_space = (struct layout_region) {
1454                         .start_block = last_block,
1455                         .block_count = 0,
1456                         .kind = RL_KIND_EMPTY,
1457                         .instance = RL_SOLE_INSTANCE,
1458                 };
1459         }
1460
1461         isl->header = table->regions[0];
1462         result = verify_region(&isl->header, next_block++, RL_KIND_HEADER,
1463                                RL_SOLE_INSTANCE);
1464         if (result != UDS_SUCCESS)
1465                 return result;
1466
1467         isl->index_page_map = table->regions[1];
1468         result = verify_region(&isl->index_page_map, next_block, RL_KIND_INDEX_PAGE_MAP,
1469                                RL_SOLE_INSTANCE);
1470         if (result != UDS_SUCCESS)
1471                 return result;
1472
1473         next_block += isl->index_page_map.block_count;
1474
1475         for (z = 0; z < isl->zone_count; z++) {
1476                 isl->volume_index_zones[z] = table->regions[z + 2];
1477                 result = verify_region(&isl->volume_index_zones[z], next_block,
1478                                        RL_KIND_VOLUME_INDEX, z);
1479                 if (result != UDS_SUCCESS)
1480                         return result;
1481
1482                 next_block += isl->volume_index_zones[z].block_count;
1483         }
1484
1485         isl->open_chapter = table->regions[isl->zone_count + 2];
1486         result = verify_region(&isl->open_chapter, next_block, RL_KIND_OPEN_CHAPTER,
1487                                RL_SOLE_INSTANCE);
1488         if (result != UDS_SUCCESS)
1489                 return result;
1490
1491         next_block += isl->open_chapter.block_count;
1492
1493         result = verify_region(&isl->free_space, next_block, RL_KIND_EMPTY,
1494                                RL_SOLE_INSTANCE);
1495         if (result != UDS_SUCCESS)
1496                 return result;
1497
1498         next_block += isl->free_space.block_count;
1499         if (next_block != last_block) {
1500                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1501                                               "index save layout table incomplete");
1502         }
1503
1504         return UDS_SUCCESS;
1505 }
1506
1507 static int __must_check load_index_save(struct index_save_layout *isl,
1508                                         struct buffered_reader *reader,
1509                                         unsigned int instance)
1510 {
1511         int result;
1512         struct region_table *table = NULL;
1513
1514         result = load_region_table(reader, &table);
1515         if (result != UDS_SUCCESS) {
1516                 return vdo_log_error_strerror(result, "cannot read index save %u header",
1517                                               instance);
1518         }
1519
1520         if (table->header.region_blocks != isl->index_save.block_count) {
1521                 u64 region_blocks = table->header.region_blocks;
1522
1523                 vdo_free(table);
1524                 return vdo_log_error_strerror(UDS_CORRUPT_DATA,
1525                                               "unexpected index save %u region block count %llu",
1526                                               instance,
1527                                               (unsigned long long) region_blocks);
1528         }
1529
1530         if (table->header.type == RH_TYPE_UNSAVED) {
1531                 vdo_free(table);
1532                 reset_index_save_layout(isl, 0);
1533                 return UDS_SUCCESS;
1534         }
1535
1536
1537         if (table->header.type != RH_TYPE_SAVE) {
1538                 vdo_log_error_strerror(UDS_CORRUPT_DATA,
1539                                        "unexpected index save %u header type %u",
1540                                        instance, table->header.type);
1541                 vdo_free(table);
1542                 return UDS_CORRUPT_DATA;
1543         }
1544
1545         result = read_index_save_data(reader, isl, table->header.payload);
1546         if (result != UDS_SUCCESS) {
1547                 vdo_free(table);
1548                 return vdo_log_error_strerror(result,
1549                                               "unknown index save %u data format",
1550                                               instance);
1551         }
1552
1553         result = reconstruct_index_save(isl, table);
1554         vdo_free(table);
1555         if (result != UDS_SUCCESS) {
1556                 return vdo_log_error_strerror(result, "cannot reconstruct index save %u",
1557                                               instance);
1558         }
1559
1560         return UDS_SUCCESS;
1561 }
1562
1563 static int __must_check load_sub_index_regions(struct index_layout *layout)
1564 {
1565         int result;
1566         unsigned int j;
1567         struct index_save_layout *isl;
1568         struct buffered_reader *reader;
1569
1570         for (j = 0; j < layout->super.max_saves; j++) {
1571                 isl = &layout->index.saves[j];
1572                 result = open_region_reader(layout, &isl->index_save, &reader);
1573
1574                 if (result != UDS_SUCCESS) {
1575                         vdo_log_error_strerror(result,
1576                                                "cannot get reader for index 0 save %u",
1577                                                j);
1578                         return result;
1579                 }
1580
1581                 result = load_index_save(isl, reader, j);
1582                 uds_free_buffered_reader(reader);
1583                 if (result != UDS_SUCCESS) {
1584                         /* Another save slot might be valid. */
1585                         reset_index_save_layout(isl, 0);
1586                         continue;
1587                 }
1588         }
1589
1590         return UDS_SUCCESS;
1591 }
1592
1593 static int __must_check verify_uds_index_config(struct index_layout *layout,
1594                                                 struct uds_configuration *config)
1595 {
1596         int result;
1597         struct buffered_reader *reader = NULL;
1598         u64 offset;
1599
1600         offset = layout->super.volume_offset - layout->super.start_offset;
1601         result = open_layout_reader(layout, &layout->config, offset, &reader);
1602         if (result != UDS_SUCCESS)
1603                 return vdo_log_error_strerror(result, "failed to open config reader");
1604
1605         result = uds_validate_config_contents(reader, config);
1606         if (result != UDS_SUCCESS) {
1607                 uds_free_buffered_reader(reader);
1608                 return vdo_log_error_strerror(result, "failed to read config region");
1609         }
1610
1611         uds_free_buffered_reader(reader);
1612         return UDS_SUCCESS;
1613 }
1614
1615 static int load_index_layout(struct index_layout *layout, struct uds_configuration *config)
1616 {
1617         int result;
1618         struct buffered_reader *reader;
1619
1620         result = uds_make_buffered_reader(layout->factory,
1621                                           layout->offset / UDS_BLOCK_SIZE, 1, &reader);
1622         if (result != UDS_SUCCESS)
1623                 return vdo_log_error_strerror(result, "unable to read superblock");
1624
1625         result = load_super_block(layout, UDS_BLOCK_SIZE,
1626                                   layout->offset / UDS_BLOCK_SIZE, reader);
1627         uds_free_buffered_reader(reader);
1628         if (result != UDS_SUCCESS)
1629                 return result;
1630
1631         result = verify_uds_index_config(layout, config);
1632         if (result != UDS_SUCCESS)
1633                 return result;
1634
1635         return load_sub_index_regions(layout);
1636 }
1637
1638 static int create_layout_factory(struct index_layout *layout,
1639                                  const struct uds_configuration *config)
1640 {
1641         int result;
1642         size_t writable_size;
1643         struct io_factory *factory = NULL;
1644
1645         result = uds_make_io_factory(config->bdev, &factory);
1646         if (result != UDS_SUCCESS)
1647                 return result;
1648
1649         writable_size = uds_get_writable_size(factory) & -UDS_BLOCK_SIZE;
1650         if (writable_size < config->size + config->offset) {
1651                 uds_put_io_factory(factory);
1652                 vdo_log_error("index storage (%zu) is smaller than the requested size %zu",
1653                               writable_size, config->size + config->offset);
1654                 return -ENOSPC;
1655         }
1656
1657         layout->factory = factory;
1658         layout->factory_size = (config->size > 0) ? config->size : writable_size;
1659         layout->offset = config->offset;
1660         return UDS_SUCCESS;
1661 }
1662
1663 int uds_make_index_layout(struct uds_configuration *config, bool new_layout,
1664                           struct index_layout **layout_ptr)
1665 {
1666         int result;
1667         struct index_layout *layout = NULL;
1668         struct save_layout_sizes sizes;
1669
1670         result = compute_sizes(config, &sizes);
1671         if (result != UDS_SUCCESS)
1672                 return result;
1673
1674         result = vdo_allocate(1, struct index_layout, __func__, &layout);
1675         if (result != VDO_SUCCESS)
1676                 return result;
1677
1678         result = create_layout_factory(layout, config);
1679         if (result != UDS_SUCCESS) {
1680                 uds_free_index_layout(layout);
1681                 return result;
1682         }
1683
1684         if (layout->factory_size < sizes.total_size) {
1685                 vdo_log_error("index storage (%zu) is smaller than the required size %llu",
1686                               layout->factory_size,
1687                               (unsigned long long) sizes.total_size);
1688                 uds_free_index_layout(layout);
1689                 return -ENOSPC;
1690         }
1691
1692         if (new_layout)
1693                 result = create_index_layout(layout, config);
1694         else
1695                 result = load_index_layout(layout, config);
1696         if (result != UDS_SUCCESS) {
1697                 uds_free_index_layout(layout);
1698                 return result;
1699         }
1700
1701         *layout_ptr = layout;
1702         return UDS_SUCCESS;
1703 }
1704
1705 void uds_free_index_layout(struct index_layout *layout)
1706 {
1707         if (layout == NULL)
1708                 return;
1709
1710         vdo_free(layout->index.saves);
1711         if (layout->factory != NULL)
1712                 uds_put_io_factory(layout->factory);
1713
1714         vdo_free(layout);
1715 }
1716
1717 int uds_replace_index_layout_storage(struct index_layout *layout,
1718                                      struct block_device *bdev)
1719 {
1720         return uds_replace_storage(layout->factory, bdev);
1721 }
1722
1723 /* Obtain a dm_bufio_client for the volume region. */
1724 int uds_open_volume_bufio(struct index_layout *layout, size_t block_size,
1725                           unsigned int reserved_buffers,
1726                           struct dm_bufio_client **client_ptr)
1727 {
1728         off_t offset = (layout->index.volume.start_block +
1729                         layout->super.volume_offset -
1730                         layout->super.start_offset);
1731
1732         return uds_make_bufio(layout->factory, offset, block_size, reserved_buffers,
1733                               client_ptr);
1734 }
1735
1736 u64 uds_get_volume_nonce(struct index_layout *layout)
1737 {
1738         return layout->index.nonce;
1739 }
This page took 0.133821 seconds and 4 git commands to generate.