]> Git Repo - linux.git/blob - drivers/md/dm-integrity.c
x86/kaslr: Expose and use the end of the physical memory address space
[linux.git] / drivers / md / dm-integrity.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2016-2017 Red Hat, Inc. All rights reserved.
4  * Copyright (C) 2016-2017 Milan Broz
5  * Copyright (C) 2016-2017 Mikulas Patocka
6  *
7  * This file is released under the GPL.
8  */
9
10 #include "dm-bio-record.h"
11
12 #include <linux/compiler.h>
13 #include <linux/module.h>
14 #include <linux/device-mapper.h>
15 #include <linux/dm-io.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sort.h>
18 #include <linux/rbtree.h>
19 #include <linux/delay.h>
20 #include <linux/random.h>
21 #include <linux/reboot.h>
22 #include <crypto/hash.h>
23 #include <crypto/skcipher.h>
24 #include <linux/async_tx.h>
25 #include <linux/dm-bufio.h>
26
27 #include "dm-audit.h"
28
29 #define DM_MSG_PREFIX "integrity"
30
31 #define DEFAULT_INTERLEAVE_SECTORS      32768
32 #define DEFAULT_JOURNAL_SIZE_FACTOR     7
33 #define DEFAULT_SECTORS_PER_BITMAP_BIT  32768
34 #define DEFAULT_BUFFER_SECTORS          128
35 #define DEFAULT_JOURNAL_WATERMARK       50
36 #define DEFAULT_SYNC_MSEC               10000
37 #define DEFAULT_MAX_JOURNAL_SECTORS     (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192)
38 #define MIN_LOG2_INTERLEAVE_SECTORS     3
39 #define MAX_LOG2_INTERLEAVE_SECTORS     31
40 #define METADATA_WORKQUEUE_MAX_ACTIVE   16
41 #define RECALC_SECTORS                  (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048)
42 #define RECALC_WRITE_SUPER              16
43 #define BITMAP_BLOCK_SIZE               4096    /* don't change it */
44 #define BITMAP_FLUSH_INTERVAL           (10 * HZ)
45 #define DISCARD_FILLER                  0xf6
46 #define SALT_SIZE                       16
47 #define RECHECK_POOL_SIZE               256
48
49 /*
50  * Warning - DEBUG_PRINT prints security-sensitive data to the log,
51  * so it should not be enabled in the official kernel
52  */
53 //#define DEBUG_PRINT
54 //#define INTERNAL_VERIFY
55
56 /*
57  * On disk structures
58  */
59
60 #define SB_MAGIC                        "integrt"
61 #define SB_VERSION_1                    1
62 #define SB_VERSION_2                    2
63 #define SB_VERSION_3                    3
64 #define SB_VERSION_4                    4
65 #define SB_VERSION_5                    5
66 #define SB_VERSION_6                    6
67 #define SB_SECTORS                      8
68 #define MAX_SECTORS_PER_BLOCK           8
69
70 struct superblock {
71         __u8 magic[8];
72         __u8 version;
73         __u8 log2_interleave_sectors;
74         __le16 integrity_tag_size;
75         __le32 journal_sections;
76         __le64 provided_data_sectors;   /* userspace uses this value */
77         __le32 flags;
78         __u8 log2_sectors_per_block;
79         __u8 log2_blocks_per_bitmap_bit;
80         __u8 pad[2];
81         __le64 recalc_sector;
82         __u8 pad2[8];
83         __u8 salt[SALT_SIZE];
84 };
85
86 #define SB_FLAG_HAVE_JOURNAL_MAC        0x1
87 #define SB_FLAG_RECALCULATING           0x2
88 #define SB_FLAG_DIRTY_BITMAP            0x4
89 #define SB_FLAG_FIXED_PADDING           0x8
90 #define SB_FLAG_FIXED_HMAC              0x10
91 #define SB_FLAG_INLINE                  0x20
92
93 #define JOURNAL_ENTRY_ROUNDUP           8
94
95 typedef __le64 commit_id_t;
96 #define JOURNAL_MAC_PER_SECTOR          8
97
98 struct journal_entry {
99         union {
100                 struct {
101                         __le32 sector_lo;
102                         __le32 sector_hi;
103                 } s;
104                 __le64 sector;
105         } u;
106         commit_id_t last_bytes[];
107         /* __u8 tag[0]; */
108 };
109
110 #define journal_entry_tag(ic, je)               ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
111
112 #if BITS_PER_LONG == 64
113 #define journal_entry_set_sector(je, x)         do { smp_wmb(); WRITE_ONCE((je)->u.sector, cpu_to_le64(x)); } while (0)
114 #else
115 #define journal_entry_set_sector(je, x)         do { (je)->u.s.sector_lo = cpu_to_le32(x); smp_wmb(); WRITE_ONCE((je)->u.s.sector_hi, cpu_to_le32((x) >> 32)); } while (0)
116 #endif
117 #define journal_entry_get_sector(je)            le64_to_cpu((je)->u.sector)
118 #define journal_entry_is_unused(je)             ((je)->u.s.sector_hi == cpu_to_le32(-1))
119 #define journal_entry_set_unused(je)            ((je)->u.s.sector_hi = cpu_to_le32(-1))
120 #define journal_entry_is_inprogress(je)         ((je)->u.s.sector_hi == cpu_to_le32(-2))
121 #define journal_entry_set_inprogress(je)        ((je)->u.s.sector_hi = cpu_to_le32(-2))
122
123 #define JOURNAL_BLOCK_SECTORS           8
124 #define JOURNAL_SECTOR_DATA             ((1 << SECTOR_SHIFT) - sizeof(commit_id_t))
125 #define JOURNAL_MAC_SIZE                (JOURNAL_MAC_PER_SECTOR * JOURNAL_BLOCK_SECTORS)
126
127 struct journal_sector {
128         struct_group(sectors,
129                 __u8 entries[JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR];
130                 __u8 mac[JOURNAL_MAC_PER_SECTOR];
131         );
132         commit_id_t commit_id;
133 };
134
135 #define MAX_TAG_SIZE                    (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
136
137 #define METADATA_PADDING_SECTORS        8
138
139 #define N_COMMIT_IDS                    4
140
141 static unsigned char prev_commit_seq(unsigned char seq)
142 {
143         return (seq + N_COMMIT_IDS - 1) % N_COMMIT_IDS;
144 }
145
146 static unsigned char next_commit_seq(unsigned char seq)
147 {
148         return (seq + 1) % N_COMMIT_IDS;
149 }
150
151 /*
152  * In-memory structures
153  */
154
155 struct journal_node {
156         struct rb_node node;
157         sector_t sector;
158 };
159
160 struct alg_spec {
161         char *alg_string;
162         char *key_string;
163         __u8 *key;
164         unsigned int key_size;
165 };
166
167 struct dm_integrity_c {
168         struct dm_dev *dev;
169         struct dm_dev *meta_dev;
170         unsigned int tag_size;
171         __s8 log2_tag_size;
172         unsigned int tuple_size;
173         sector_t start;
174         mempool_t journal_io_mempool;
175         struct dm_io_client *io;
176         struct dm_bufio_client *bufio;
177         struct workqueue_struct *metadata_wq;
178         struct superblock *sb;
179         unsigned int journal_pages;
180         unsigned int n_bitmap_blocks;
181
182         struct page_list *journal;
183         struct page_list *journal_io;
184         struct page_list *journal_xor;
185         struct page_list *recalc_bitmap;
186         struct page_list *may_write_bitmap;
187         struct bitmap_block_status *bbs;
188         unsigned int bitmap_flush_interval;
189         int synchronous_mode;
190         struct bio_list synchronous_bios;
191         struct delayed_work bitmap_flush_work;
192
193         struct crypto_skcipher *journal_crypt;
194         struct scatterlist **journal_scatterlist;
195         struct scatterlist **journal_io_scatterlist;
196         struct skcipher_request **sk_requests;
197
198         struct crypto_shash *journal_mac;
199
200         struct journal_node *journal_tree;
201         struct rb_root journal_tree_root;
202
203         sector_t provided_data_sectors;
204
205         unsigned short journal_entry_size;
206         unsigned char journal_entries_per_sector;
207         unsigned char journal_section_entries;
208         unsigned short journal_section_sectors;
209         unsigned int journal_sections;
210         unsigned int journal_entries;
211         sector_t data_device_sectors;
212         sector_t meta_device_sectors;
213         unsigned int initial_sectors;
214         unsigned int metadata_run;
215         __s8 log2_metadata_run;
216         __u8 log2_buffer_sectors;
217         __u8 sectors_per_block;
218         __u8 log2_blocks_per_bitmap_bit;
219
220         unsigned char mode;
221
222         int failed;
223
224         struct crypto_shash *internal_hash;
225
226         struct dm_target *ti;
227
228         /* these variables are locked with endio_wait.lock */
229         struct rb_root in_progress;
230         struct list_head wait_list;
231         wait_queue_head_t endio_wait;
232         struct workqueue_struct *wait_wq;
233         struct workqueue_struct *offload_wq;
234
235         unsigned char commit_seq;
236         commit_id_t commit_ids[N_COMMIT_IDS];
237
238         unsigned int committed_section;
239         unsigned int n_committed_sections;
240
241         unsigned int uncommitted_section;
242         unsigned int n_uncommitted_sections;
243
244         unsigned int free_section;
245         unsigned char free_section_entry;
246         unsigned int free_sectors;
247
248         unsigned int free_sectors_threshold;
249
250         struct workqueue_struct *commit_wq;
251         struct work_struct commit_work;
252
253         struct workqueue_struct *writer_wq;
254         struct work_struct writer_work;
255
256         struct workqueue_struct *recalc_wq;
257         struct work_struct recalc_work;
258
259         struct bio_list flush_bio_list;
260
261         unsigned long autocommit_jiffies;
262         struct timer_list autocommit_timer;
263         unsigned int autocommit_msec;
264
265         wait_queue_head_t copy_to_journal_wait;
266
267         struct completion crypto_backoff;
268
269         bool wrote_to_journal;
270         bool journal_uptodate;
271         bool just_formatted;
272         bool recalculate_flag;
273         bool reset_recalculate_flag;
274         bool discard;
275         bool fix_padding;
276         bool fix_hmac;
277         bool legacy_recalculate;
278
279         struct alg_spec internal_hash_alg;
280         struct alg_spec journal_crypt_alg;
281         struct alg_spec journal_mac_alg;
282
283         atomic64_t number_of_mismatches;
284
285         mempool_t recheck_pool;
286         struct bio_set recheck_bios;
287
288         struct notifier_block reboot_notifier;
289 };
290
291 struct dm_integrity_range {
292         sector_t logical_sector;
293         sector_t n_sectors;
294         bool waiting;
295         union {
296                 struct rb_node node;
297                 struct {
298                         struct task_struct *task;
299                         struct list_head wait_entry;
300                 };
301         };
302 };
303
304 struct dm_integrity_io {
305         struct work_struct work;
306
307         struct dm_integrity_c *ic;
308         enum req_op op;
309         bool fua;
310
311         struct dm_integrity_range range;
312
313         sector_t metadata_block;
314         unsigned int metadata_offset;
315
316         atomic_t in_flight;
317         blk_status_t bi_status;
318
319         struct completion *completion;
320
321         struct dm_bio_details bio_details;
322
323         char *integrity_payload;
324         bool integrity_payload_from_mempool;
325 };
326
327 struct journal_completion {
328         struct dm_integrity_c *ic;
329         atomic_t in_flight;
330         struct completion comp;
331 };
332
333 struct journal_io {
334         struct dm_integrity_range range;
335         struct journal_completion *comp;
336 };
337
338 struct bitmap_block_status {
339         struct work_struct work;
340         struct dm_integrity_c *ic;
341         unsigned int idx;
342         unsigned long *bitmap;
343         struct bio_list bio_queue;
344         spinlock_t bio_queue_lock;
345
346 };
347
348 static struct kmem_cache *journal_io_cache;
349
350 #define JOURNAL_IO_MEMPOOL      32
351
352 #ifdef DEBUG_PRINT
353 #define DEBUG_print(x, ...)                     printk(KERN_DEBUG x, ##__VA_ARGS__)
354 #define DEBUG_bytes(bytes, len, msg, ...)       printk(KERN_DEBUG msg "%s%*ph\n", ##__VA_ARGS__, \
355                                                        len ? ": " : "", len, bytes)
356 #else
357 #define DEBUG_print(x, ...)                     do { } while (0)
358 #define DEBUG_bytes(bytes, len, msg, ...)       do { } while (0)
359 #endif
360
361 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
362 static int dm_integrity_map_inline(struct dm_integrity_io *dio);
363 static void integrity_bio_wait(struct work_struct *w);
364 static void dm_integrity_dtr(struct dm_target *ti);
365
366 static void dm_integrity_io_error(struct dm_integrity_c *ic, const char *msg, int err)
367 {
368         if (err == -EILSEQ)
369                 atomic64_inc(&ic->number_of_mismatches);
370         if (!cmpxchg(&ic->failed, 0, err))
371                 DMERR("Error on %s: %d", msg, err);
372 }
373
374 static int dm_integrity_failed(struct dm_integrity_c *ic)
375 {
376         return READ_ONCE(ic->failed);
377 }
378
379 static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
380 {
381         if (ic->legacy_recalculate)
382                 return false;
383         if (!(ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) ?
384             ic->internal_hash_alg.key || ic->journal_mac_alg.key :
385             ic->internal_hash_alg.key && !ic->journal_mac_alg.key)
386                 return true;
387         return false;
388 }
389
390 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned int i,
391                                           unsigned int j, unsigned char seq)
392 {
393         /*
394          * Xor the number with section and sector, so that if a piece of
395          * journal is written at wrong place, it is detected.
396          */
397         return ic->commit_ids[seq] ^ cpu_to_le64(((__u64)i << 32) ^ j);
398 }
399
400 static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
401                                 sector_t *area, sector_t *offset)
402 {
403         if (!ic->meta_dev) {
404                 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
405                 *area = data_sector >> log2_interleave_sectors;
406                 *offset = (unsigned int)data_sector & ((1U << log2_interleave_sectors) - 1);
407         } else {
408                 *area = 0;
409                 *offset = data_sector;
410         }
411 }
412
413 #define sector_to_block(ic, n)                                          \
414 do {                                                                    \
415         BUG_ON((n) & (unsigned int)((ic)->sectors_per_block - 1));              \
416         (n) >>= (ic)->sb->log2_sectors_per_block;                       \
417 } while (0)
418
419 static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
420                                             sector_t offset, unsigned int *metadata_offset)
421 {
422         __u64 ms;
423         unsigned int mo;
424
425         ms = area << ic->sb->log2_interleave_sectors;
426         if (likely(ic->log2_metadata_run >= 0))
427                 ms += area << ic->log2_metadata_run;
428         else
429                 ms += area * ic->metadata_run;
430         ms >>= ic->log2_buffer_sectors;
431
432         sector_to_block(ic, offset);
433
434         if (likely(ic->log2_tag_size >= 0)) {
435                 ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
436                 mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
437         } else {
438                 ms += (__u64)offset * ic->tag_size >> (SECTOR_SHIFT + ic->log2_buffer_sectors);
439                 mo = (offset * ic->tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
440         }
441         *metadata_offset = mo;
442         return ms;
443 }
444
445 static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector_t offset)
446 {
447         sector_t result;
448
449         if (ic->meta_dev)
450                 return offset;
451
452         result = area << ic->sb->log2_interleave_sectors;
453         if (likely(ic->log2_metadata_run >= 0))
454                 result += (area + 1) << ic->log2_metadata_run;
455         else
456                 result += (area + 1) * ic->metadata_run;
457
458         result += (sector_t)ic->initial_sectors + offset;
459         result += ic->start;
460
461         return result;
462 }
463
464 static void wraparound_section(struct dm_integrity_c *ic, unsigned int *sec_ptr)
465 {
466         if (unlikely(*sec_ptr >= ic->journal_sections))
467                 *sec_ptr -= ic->journal_sections;
468 }
469
470 static void sb_set_version(struct dm_integrity_c *ic)
471 {
472         if (ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE))
473                 ic->sb->version = SB_VERSION_6;
474         else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC))
475                 ic->sb->version = SB_VERSION_5;
476         else if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING))
477                 ic->sb->version = SB_VERSION_4;
478         else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
479                 ic->sb->version = SB_VERSION_3;
480         else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
481                 ic->sb->version = SB_VERSION_2;
482         else
483                 ic->sb->version = SB_VERSION_1;
484 }
485
486 static int sb_mac(struct dm_integrity_c *ic, bool wr)
487 {
488         SHASH_DESC_ON_STACK(desc, ic->journal_mac);
489         int r;
490         unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac);
491         __u8 *sb = (__u8 *)ic->sb;
492         __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size;
493
494         if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) {
495                 dm_integrity_io_error(ic, "digest is too long", -EINVAL);
496                 return -EINVAL;
497         }
498
499         desc->tfm = ic->journal_mac;
500
501         if (likely(wr)) {
502                 r = crypto_shash_digest(desc, sb, mac - sb, mac);
503                 if (unlikely(r < 0)) {
504                         dm_integrity_io_error(ic, "crypto_shash_digest", r);
505                         return r;
506                 }
507         } else {
508                 __u8 actual_mac[HASH_MAX_DIGESTSIZE];
509
510                 r = crypto_shash_digest(desc, sb, mac - sb, actual_mac);
511                 if (unlikely(r < 0)) {
512                         dm_integrity_io_error(ic, "crypto_shash_digest", r);
513                         return r;
514                 }
515                 if (memcmp(mac, actual_mac, mac_size)) {
516                         dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
517                         dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
518                         return -EILSEQ;
519                 }
520         }
521
522         return 0;
523 }
524
525 static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf)
526 {
527         struct dm_io_request io_req;
528         struct dm_io_region io_loc;
529         const enum req_op op = opf & REQ_OP_MASK;
530         int r;
531
532         io_req.bi_opf = opf;
533         io_req.mem.type = DM_IO_KMEM;
534         io_req.mem.ptr.addr = ic->sb;
535         io_req.notify.fn = NULL;
536         io_req.client = ic->io;
537         io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
538         io_loc.sector = ic->start;
539         io_loc.count = SB_SECTORS;
540
541         if (op == REQ_OP_WRITE) {
542                 sb_set_version(ic);
543                 if (ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
544                         r = sb_mac(ic, true);
545                         if (unlikely(r))
546                                 return r;
547                 }
548         }
549
550         r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
551         if (unlikely(r))
552                 return r;
553
554         if (op == REQ_OP_READ) {
555                 if (ic->mode != 'R' && ic->journal_mac && ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
556                         r = sb_mac(ic, false);
557                         if (unlikely(r))
558                                 return r;
559                 }
560         }
561
562         return 0;
563 }
564
565 #define BITMAP_OP_TEST_ALL_SET          0
566 #define BITMAP_OP_TEST_ALL_CLEAR        1
567 #define BITMAP_OP_SET                   2
568 #define BITMAP_OP_CLEAR                 3
569
570 static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap,
571                             sector_t sector, sector_t n_sectors, int mode)
572 {
573         unsigned long bit, end_bit, this_end_bit, page, end_page;
574         unsigned long *data;
575
576         if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
577                 DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)",
578                         sector,
579                         n_sectors,
580                         ic->sb->log2_sectors_per_block,
581                         ic->log2_blocks_per_bitmap_bit,
582                         mode);
583                 BUG();
584         }
585
586         if (unlikely(!n_sectors))
587                 return true;
588
589         bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
590         end_bit = (sector + n_sectors - 1) >>
591                 (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
592
593         page = bit / (PAGE_SIZE * 8);
594         bit %= PAGE_SIZE * 8;
595
596         end_page = end_bit / (PAGE_SIZE * 8);
597         end_bit %= PAGE_SIZE * 8;
598
599 repeat:
600         if (page < end_page)
601                 this_end_bit = PAGE_SIZE * 8 - 1;
602         else
603                 this_end_bit = end_bit;
604
605         data = lowmem_page_address(bitmap[page].page);
606
607         if (mode == BITMAP_OP_TEST_ALL_SET) {
608                 while (bit <= this_end_bit) {
609                         if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
610                                 do {
611                                         if (data[bit / BITS_PER_LONG] != -1)
612                                                 return false;
613                                         bit += BITS_PER_LONG;
614                                 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
615                                 continue;
616                         }
617                         if (!test_bit(bit, data))
618                                 return false;
619                         bit++;
620                 }
621         } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) {
622                 while (bit <= this_end_bit) {
623                         if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
624                                 do {
625                                         if (data[bit / BITS_PER_LONG] != 0)
626                                                 return false;
627                                         bit += BITS_PER_LONG;
628                                 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
629                                 continue;
630                         }
631                         if (test_bit(bit, data))
632                                 return false;
633                         bit++;
634                 }
635         } else if (mode == BITMAP_OP_SET) {
636                 while (bit <= this_end_bit) {
637                         if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
638                                 do {
639                                         data[bit / BITS_PER_LONG] = -1;
640                                         bit += BITS_PER_LONG;
641                                 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
642                                 continue;
643                         }
644                         __set_bit(bit, data);
645                         bit++;
646                 }
647         } else if (mode == BITMAP_OP_CLEAR) {
648                 if (!bit && this_end_bit == PAGE_SIZE * 8 - 1)
649                         clear_page(data);
650                 else {
651                         while (bit <= this_end_bit) {
652                                 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
653                                         do {
654                                                 data[bit / BITS_PER_LONG] = 0;
655                                                 bit += BITS_PER_LONG;
656                                         } while (this_end_bit >= bit + BITS_PER_LONG - 1);
657                                         continue;
658                                 }
659                                 __clear_bit(bit, data);
660                                 bit++;
661                         }
662                 }
663         } else {
664                 BUG();
665         }
666
667         if (unlikely(page < end_page)) {
668                 bit = 0;
669                 page++;
670                 goto repeat;
671         }
672
673         return true;
674 }
675
676 static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src)
677 {
678         unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
679         unsigned int i;
680
681         for (i = 0; i < n_bitmap_pages; i++) {
682                 unsigned long *dst_data = lowmem_page_address(dst[i].page);
683                 unsigned long *src_data = lowmem_page_address(src[i].page);
684
685                 copy_page(dst_data, src_data);
686         }
687 }
688
689 static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector)
690 {
691         unsigned int bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
692         unsigned int bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8);
693
694         BUG_ON(bitmap_block >= ic->n_bitmap_blocks);
695         return &ic->bbs[bitmap_block];
696 }
697
698 static void access_journal_check(struct dm_integrity_c *ic, unsigned int section, unsigned int offset,
699                                  bool e, const char *function)
700 {
701 #if defined(CONFIG_DM_DEBUG) || defined(INTERNAL_VERIFY)
702         unsigned int limit = e ? ic->journal_section_entries : ic->journal_section_sectors;
703
704         if (unlikely(section >= ic->journal_sections) ||
705             unlikely(offset >= limit)) {
706                 DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)",
707                        function, section, offset, ic->journal_sections, limit);
708                 BUG();
709         }
710 #endif
711 }
712
713 static void page_list_location(struct dm_integrity_c *ic, unsigned int section, unsigned int offset,
714                                unsigned int *pl_index, unsigned int *pl_offset)
715 {
716         unsigned int sector;
717
718         access_journal_check(ic, section, offset, false, "page_list_location");
719
720         sector = section * ic->journal_section_sectors + offset;
721
722         *pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
723         *pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
724 }
725
726 static struct journal_sector *access_page_list(struct dm_integrity_c *ic, struct page_list *pl,
727                                                unsigned int section, unsigned int offset, unsigned int *n_sectors)
728 {
729         unsigned int pl_index, pl_offset;
730         char *va;
731
732         page_list_location(ic, section, offset, &pl_index, &pl_offset);
733
734         if (n_sectors)
735                 *n_sectors = (PAGE_SIZE - pl_offset) >> SECTOR_SHIFT;
736
737         va = lowmem_page_address(pl[pl_index].page);
738
739         return (struct journal_sector *)(va + pl_offset);
740 }
741
742 static struct journal_sector *access_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset)
743 {
744         return access_page_list(ic, ic->journal, section, offset, NULL);
745 }
746
747 static struct journal_entry *access_journal_entry(struct dm_integrity_c *ic, unsigned int section, unsigned int n)
748 {
749         unsigned int rel_sector, offset;
750         struct journal_sector *js;
751
752         access_journal_check(ic, section, n, true, "access_journal_entry");
753
754         rel_sector = n % JOURNAL_BLOCK_SECTORS;
755         offset = n / JOURNAL_BLOCK_SECTORS;
756
757         js = access_journal(ic, section, rel_sector);
758         return (struct journal_entry *)((char *)js + offset * ic->journal_entry_size);
759 }
760
761 static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned int section, unsigned int n)
762 {
763         n <<= ic->sb->log2_sectors_per_block;
764
765         n += JOURNAL_BLOCK_SECTORS;
766
767         access_journal_check(ic, section, n, false, "access_journal_data");
768
769         return access_journal(ic, section, n);
770 }
771
772 static void section_mac(struct dm_integrity_c *ic, unsigned int section, __u8 result[JOURNAL_MAC_SIZE])
773 {
774         SHASH_DESC_ON_STACK(desc, ic->journal_mac);
775         int r;
776         unsigned int j, size;
777
778         desc->tfm = ic->journal_mac;
779
780         r = crypto_shash_init(desc);
781         if (unlikely(r < 0)) {
782                 dm_integrity_io_error(ic, "crypto_shash_init", r);
783                 goto err;
784         }
785
786         if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
787                 __le64 section_le;
788
789                 r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE);
790                 if (unlikely(r < 0)) {
791                         dm_integrity_io_error(ic, "crypto_shash_update", r);
792                         goto err;
793                 }
794
795                 section_le = cpu_to_le64(section);
796                 r = crypto_shash_update(desc, (__u8 *)&section_le, sizeof(section_le));
797                 if (unlikely(r < 0)) {
798                         dm_integrity_io_error(ic, "crypto_shash_update", r);
799                         goto err;
800                 }
801         }
802
803         for (j = 0; j < ic->journal_section_entries; j++) {
804                 struct journal_entry *je = access_journal_entry(ic, section, j);
805
806                 r = crypto_shash_update(desc, (__u8 *)&je->u.sector, sizeof(je->u.sector));
807                 if (unlikely(r < 0)) {
808                         dm_integrity_io_error(ic, "crypto_shash_update", r);
809                         goto err;
810                 }
811         }
812
813         size = crypto_shash_digestsize(ic->journal_mac);
814
815         if (likely(size <= JOURNAL_MAC_SIZE)) {
816                 r = crypto_shash_final(desc, result);
817                 if (unlikely(r < 0)) {
818                         dm_integrity_io_error(ic, "crypto_shash_final", r);
819                         goto err;
820                 }
821                 memset(result + size, 0, JOURNAL_MAC_SIZE - size);
822         } else {
823                 __u8 digest[HASH_MAX_DIGESTSIZE];
824
825                 if (WARN_ON(size > sizeof(digest))) {
826                         dm_integrity_io_error(ic, "digest_size", -EINVAL);
827                         goto err;
828                 }
829                 r = crypto_shash_final(desc, digest);
830                 if (unlikely(r < 0)) {
831                         dm_integrity_io_error(ic, "crypto_shash_final", r);
832                         goto err;
833                 }
834                 memcpy(result, digest, JOURNAL_MAC_SIZE);
835         }
836
837         return;
838 err:
839         memset(result, 0, JOURNAL_MAC_SIZE);
840 }
841
842 static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool wr)
843 {
844         __u8 result[JOURNAL_MAC_SIZE];
845         unsigned int j;
846
847         if (!ic->journal_mac)
848                 return;
849
850         section_mac(ic, section, result);
851
852         for (j = 0; j < JOURNAL_BLOCK_SECTORS; j++) {
853                 struct journal_sector *js = access_journal(ic, section, j);
854
855                 if (likely(wr))
856                         memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
857                 else {
858                         if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) {
859                                 dm_integrity_io_error(ic, "journal mac", -EILSEQ);
860                                 dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0);
861                         }
862                 }
863         }
864 }
865
866 static void complete_journal_op(void *context)
867 {
868         struct journal_completion *comp = context;
869
870         BUG_ON(!atomic_read(&comp->in_flight));
871         if (likely(atomic_dec_and_test(&comp->in_flight)))
872                 complete(&comp->comp);
873 }
874
875 static void xor_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section,
876                         unsigned int n_sections, struct journal_completion *comp)
877 {
878         struct async_submit_ctl submit;
879         size_t n_bytes = (size_t)(n_sections * ic->journal_section_sectors) << SECTOR_SHIFT;
880         unsigned int pl_index, pl_offset, section_index;
881         struct page_list *source_pl, *target_pl;
882
883         if (likely(encrypt)) {
884                 source_pl = ic->journal;
885                 target_pl = ic->journal_io;
886         } else {
887                 source_pl = ic->journal_io;
888                 target_pl = ic->journal;
889         }
890
891         page_list_location(ic, section, 0, &pl_index, &pl_offset);
892
893         atomic_add(roundup(pl_offset + n_bytes, PAGE_SIZE) >> PAGE_SHIFT, &comp->in_flight);
894
895         init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, complete_journal_op, comp, NULL);
896
897         section_index = pl_index;
898
899         do {
900                 size_t this_step;
901                 struct page *src_pages[2];
902                 struct page *dst_page;
903
904                 while (unlikely(pl_index == section_index)) {
905                         unsigned int dummy;
906
907                         if (likely(encrypt))
908                                 rw_section_mac(ic, section, true);
909                         section++;
910                         n_sections--;
911                         if (!n_sections)
912                                 break;
913                         page_list_location(ic, section, 0, &section_index, &dummy);
914                 }
915
916                 this_step = min(n_bytes, (size_t)PAGE_SIZE - pl_offset);
917                 dst_page = target_pl[pl_index].page;
918                 src_pages[0] = source_pl[pl_index].page;
919                 src_pages[1] = ic->journal_xor[pl_index].page;
920
921                 async_xor(dst_page, src_pages, pl_offset, 2, this_step, &submit);
922
923                 pl_index++;
924                 pl_offset = 0;
925                 n_bytes -= this_step;
926         } while (n_bytes);
927
928         BUG_ON(n_sections);
929
930         async_tx_issue_pending_all();
931 }
932
933 static void complete_journal_encrypt(void *data, int err)
934 {
935         struct journal_completion *comp = data;
936
937         if (unlikely(err)) {
938                 if (likely(err == -EINPROGRESS)) {
939                         complete(&comp->ic->crypto_backoff);
940                         return;
941                 }
942                 dm_integrity_io_error(comp->ic, "asynchronous encrypt", err);
943         }
944         complete_journal_op(comp);
945 }
946
947 static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp)
948 {
949         int r;
950
951         skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
952                                       complete_journal_encrypt, comp);
953         if (likely(encrypt))
954                 r = crypto_skcipher_encrypt(req);
955         else
956                 r = crypto_skcipher_decrypt(req);
957         if (likely(!r))
958                 return false;
959         if (likely(r == -EINPROGRESS))
960                 return true;
961         if (likely(r == -EBUSY)) {
962                 wait_for_completion(&comp->ic->crypto_backoff);
963                 reinit_completion(&comp->ic->crypto_backoff);
964                 return true;
965         }
966         dm_integrity_io_error(comp->ic, "encrypt", r);
967         return false;
968 }
969
970 static void crypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section,
971                           unsigned int n_sections, struct journal_completion *comp)
972 {
973         struct scatterlist **source_sg;
974         struct scatterlist **target_sg;
975
976         atomic_add(2, &comp->in_flight);
977
978         if (likely(encrypt)) {
979                 source_sg = ic->journal_scatterlist;
980                 target_sg = ic->journal_io_scatterlist;
981         } else {
982                 source_sg = ic->journal_io_scatterlist;
983                 target_sg = ic->journal_scatterlist;
984         }
985
986         do {
987                 struct skcipher_request *req;
988                 unsigned int ivsize;
989                 char *iv;
990
991                 if (likely(encrypt))
992                         rw_section_mac(ic, section, true);
993
994                 req = ic->sk_requests[section];
995                 ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
996                 iv = req->iv;
997
998                 memcpy(iv, iv + ivsize, ivsize);
999
1000                 req->src = source_sg[section];
1001                 req->dst = target_sg[section];
1002
1003                 if (unlikely(do_crypt(encrypt, req, comp)))
1004                         atomic_inc(&comp->in_flight);
1005
1006                 section++;
1007                 n_sections--;
1008         } while (n_sections);
1009
1010         atomic_dec(&comp->in_flight);
1011         complete_journal_op(comp);
1012 }
1013
1014 static void encrypt_journal(struct dm_integrity_c *ic, bool encrypt, unsigned int section,
1015                             unsigned int n_sections, struct journal_completion *comp)
1016 {
1017         if (ic->journal_xor)
1018                 return xor_journal(ic, encrypt, section, n_sections, comp);
1019         else
1020                 return crypt_journal(ic, encrypt, section, n_sections, comp);
1021 }
1022
1023 static void complete_journal_io(unsigned long error, void *context)
1024 {
1025         struct journal_completion *comp = context;
1026
1027         if (unlikely(error != 0))
1028                 dm_integrity_io_error(comp->ic, "writing journal", -EIO);
1029         complete_journal_op(comp);
1030 }
1031
1032 static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf,
1033                                unsigned int sector, unsigned int n_sectors,
1034                                struct journal_completion *comp)
1035 {
1036         struct dm_io_request io_req;
1037         struct dm_io_region io_loc;
1038         unsigned int pl_index, pl_offset;
1039         int r;
1040
1041         if (unlikely(dm_integrity_failed(ic))) {
1042                 if (comp)
1043                         complete_journal_io(-1UL, comp);
1044                 return;
1045         }
1046
1047         pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
1048         pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
1049
1050         io_req.bi_opf = opf;
1051         io_req.mem.type = DM_IO_PAGE_LIST;
1052         if (ic->journal_io)
1053                 io_req.mem.ptr.pl = &ic->journal_io[pl_index];
1054         else
1055                 io_req.mem.ptr.pl = &ic->journal[pl_index];
1056         io_req.mem.offset = pl_offset;
1057         if (likely(comp != NULL)) {
1058                 io_req.notify.fn = complete_journal_io;
1059                 io_req.notify.context = comp;
1060         } else {
1061                 io_req.notify.fn = NULL;
1062         }
1063         io_req.client = ic->io;
1064         io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
1065         io_loc.sector = ic->start + SB_SECTORS + sector;
1066         io_loc.count = n_sectors;
1067
1068         r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
1069         if (unlikely(r)) {
1070                 dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ?
1071                                       "reading journal" : "writing journal", r);
1072                 if (comp) {
1073                         WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
1074                         complete_journal_io(-1UL, comp);
1075                 }
1076         }
1077 }
1078
1079 static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf,
1080                        unsigned int section, unsigned int n_sections,
1081                        struct journal_completion *comp)
1082 {
1083         unsigned int sector, n_sectors;
1084
1085         sector = section * ic->journal_section_sectors;
1086         n_sectors = n_sections * ic->journal_section_sectors;
1087
1088         rw_journal_sectors(ic, opf, sector, n_sectors, comp);
1089 }
1090
1091 static void write_journal(struct dm_integrity_c *ic, unsigned int commit_start, unsigned int commit_sections)
1092 {
1093         struct journal_completion io_comp;
1094         struct journal_completion crypt_comp_1;
1095         struct journal_completion crypt_comp_2;
1096         unsigned int i;
1097
1098         io_comp.ic = ic;
1099         init_completion(&io_comp.comp);
1100
1101         if (commit_start + commit_sections <= ic->journal_sections) {
1102                 io_comp.in_flight = (atomic_t)ATOMIC_INIT(1);
1103                 if (ic->journal_io) {
1104                         crypt_comp_1.ic = ic;
1105                         init_completion(&crypt_comp_1.comp);
1106                         crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1107                         encrypt_journal(ic, true, commit_start, commit_sections, &crypt_comp_1);
1108                         wait_for_completion_io(&crypt_comp_1.comp);
1109                 } else {
1110                         for (i = 0; i < commit_sections; i++)
1111                                 rw_section_mac(ic, commit_start + i, true);
1112                 }
1113                 rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start,
1114                            commit_sections, &io_comp);
1115         } else {
1116                 unsigned int to_end;
1117
1118                 io_comp.in_flight = (atomic_t)ATOMIC_INIT(2);
1119                 to_end = ic->journal_sections - commit_start;
1120                 if (ic->journal_io) {
1121                         crypt_comp_1.ic = ic;
1122                         init_completion(&crypt_comp_1.comp);
1123                         crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1124                         encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1);
1125                         if (try_wait_for_completion(&crypt_comp_1.comp)) {
1126                                 rw_journal(ic, REQ_OP_WRITE | REQ_FUA,
1127                                            commit_start, to_end, &io_comp);
1128                                 reinit_completion(&crypt_comp_1.comp);
1129                                 crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0);
1130                                 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1);
1131                                 wait_for_completion_io(&crypt_comp_1.comp);
1132                         } else {
1133                                 crypt_comp_2.ic = ic;
1134                                 init_completion(&crypt_comp_2.comp);
1135                                 crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0);
1136                                 encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2);
1137                                 wait_for_completion_io(&crypt_comp_1.comp);
1138                                 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
1139                                 wait_for_completion_io(&crypt_comp_2.comp);
1140                         }
1141                 } else {
1142                         for (i = 0; i < to_end; i++)
1143                                 rw_section_mac(ic, commit_start + i, true);
1144                         rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp);
1145                         for (i = 0; i < commit_sections - to_end; i++)
1146                                 rw_section_mac(ic, i, true);
1147                 }
1148                 rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp);
1149         }
1150
1151         wait_for_completion_io(&io_comp.comp);
1152 }
1153
1154 static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, unsigned int offset,
1155                               unsigned int n_sectors, sector_t target, io_notify_fn fn, void *data)
1156 {
1157         struct dm_io_request io_req;
1158         struct dm_io_region io_loc;
1159         int r;
1160         unsigned int sector, pl_index, pl_offset;
1161
1162         BUG_ON((target | n_sectors | offset) & (unsigned int)(ic->sectors_per_block - 1));
1163
1164         if (unlikely(dm_integrity_failed(ic))) {
1165                 fn(-1UL, data);
1166                 return;
1167         }
1168
1169         sector = section * ic->journal_section_sectors + JOURNAL_BLOCK_SECTORS + offset;
1170
1171         pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
1172         pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
1173
1174         io_req.bi_opf = REQ_OP_WRITE;
1175         io_req.mem.type = DM_IO_PAGE_LIST;
1176         io_req.mem.ptr.pl = &ic->journal[pl_index];
1177         io_req.mem.offset = pl_offset;
1178         io_req.notify.fn = fn;
1179         io_req.notify.context = data;
1180         io_req.client = ic->io;
1181         io_loc.bdev = ic->dev->bdev;
1182         io_loc.sector = target;
1183         io_loc.count = n_sectors;
1184
1185         r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
1186         if (unlikely(r)) {
1187                 WARN_ONCE(1, "asynchronous dm_io failed: %d", r);
1188                 fn(-1UL, data);
1189         }
1190 }
1191
1192 static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
1193 {
1194         return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
1195                range1->logical_sector + range1->n_sectors > range2->logical_sector;
1196 }
1197
1198 static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
1199 {
1200         struct rb_node **n = &ic->in_progress.rb_node;
1201         struct rb_node *parent;
1202
1203         BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned int)(ic->sectors_per_block - 1));
1204
1205         if (likely(check_waiting)) {
1206                 struct dm_integrity_range *range;
1207
1208                 list_for_each_entry(range, &ic->wait_list, wait_entry) {
1209                         if (unlikely(ranges_overlap(range, new_range)))
1210                                 return false;
1211                 }
1212         }
1213
1214         parent = NULL;
1215
1216         while (*n) {
1217                 struct dm_integrity_range *range = container_of(*n, struct dm_integrity_range, node);
1218
1219                 parent = *n;
1220                 if (new_range->logical_sector + new_range->n_sectors <= range->logical_sector)
1221                         n = &range->node.rb_left;
1222                 else if (new_range->logical_sector >= range->logical_sector + range->n_sectors)
1223                         n = &range->node.rb_right;
1224                 else
1225                         return false;
1226         }
1227
1228         rb_link_node(&new_range->node, parent, n);
1229         rb_insert_color(&new_range->node, &ic->in_progress);
1230
1231         return true;
1232 }
1233
1234 static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
1235 {
1236         rb_erase(&range->node, &ic->in_progress);
1237         while (unlikely(!list_empty(&ic->wait_list))) {
1238                 struct dm_integrity_range *last_range =
1239                         list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
1240                 struct task_struct *last_range_task;
1241
1242                 last_range_task = last_range->task;
1243                 list_del(&last_range->wait_entry);
1244                 if (!add_new_range(ic, last_range, false)) {
1245                         last_range->task = last_range_task;
1246                         list_add(&last_range->wait_entry, &ic->wait_list);
1247                         break;
1248                 }
1249                 last_range->waiting = false;
1250                 wake_up_process(last_range_task);
1251         }
1252 }
1253
1254 static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
1255 {
1256         unsigned long flags;
1257
1258         spin_lock_irqsave(&ic->endio_wait.lock, flags);
1259         remove_range_unlocked(ic, range);
1260         spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1261 }
1262
1263 static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
1264 {
1265         new_range->waiting = true;
1266         list_add_tail(&new_range->wait_entry, &ic->wait_list);
1267         new_range->task = current;
1268         do {
1269                 __set_current_state(TASK_UNINTERRUPTIBLE);
1270                 spin_unlock_irq(&ic->endio_wait.lock);
1271                 io_schedule();
1272                 spin_lock_irq(&ic->endio_wait.lock);
1273         } while (unlikely(new_range->waiting));
1274 }
1275
1276 static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
1277 {
1278         if (unlikely(!add_new_range(ic, new_range, true)))
1279                 wait_and_add_new_range(ic, new_range);
1280 }
1281
1282 static void init_journal_node(struct journal_node *node)
1283 {
1284         RB_CLEAR_NODE(&node->node);
1285         node->sector = (sector_t)-1;
1286 }
1287
1288 static void add_journal_node(struct dm_integrity_c *ic, struct journal_node *node, sector_t sector)
1289 {
1290         struct rb_node **link;
1291         struct rb_node *parent;
1292
1293         node->sector = sector;
1294         BUG_ON(!RB_EMPTY_NODE(&node->node));
1295
1296         link = &ic->journal_tree_root.rb_node;
1297         parent = NULL;
1298
1299         while (*link) {
1300                 struct journal_node *j;
1301
1302                 parent = *link;
1303                 j = container_of(parent, struct journal_node, node);
1304                 if (sector < j->sector)
1305                         link = &j->node.rb_left;
1306                 else
1307                         link = &j->node.rb_right;
1308         }
1309
1310         rb_link_node(&node->node, parent, link);
1311         rb_insert_color(&node->node, &ic->journal_tree_root);
1312 }
1313
1314 static void remove_journal_node(struct dm_integrity_c *ic, struct journal_node *node)
1315 {
1316         BUG_ON(RB_EMPTY_NODE(&node->node));
1317         rb_erase(&node->node, &ic->journal_tree_root);
1318         init_journal_node(node);
1319 }
1320
1321 #define NOT_FOUND       (-1U)
1322
1323 static unsigned int find_journal_node(struct dm_integrity_c *ic, sector_t sector, sector_t *next_sector)
1324 {
1325         struct rb_node *n = ic->journal_tree_root.rb_node;
1326         unsigned int found = NOT_FOUND;
1327
1328         *next_sector = (sector_t)-1;
1329         while (n) {
1330                 struct journal_node *j = container_of(n, struct journal_node, node);
1331
1332                 if (sector == j->sector)
1333                         found = j - ic->journal_tree;
1334
1335                 if (sector < j->sector) {
1336                         *next_sector = j->sector;
1337                         n = j->node.rb_left;
1338                 } else
1339                         n = j->node.rb_right;
1340         }
1341
1342         return found;
1343 }
1344
1345 static bool test_journal_node(struct dm_integrity_c *ic, unsigned int pos, sector_t sector)
1346 {
1347         struct journal_node *node, *next_node;
1348         struct rb_node *next;
1349
1350         if (unlikely(pos >= ic->journal_entries))
1351                 return false;
1352         node = &ic->journal_tree[pos];
1353         if (unlikely(RB_EMPTY_NODE(&node->node)))
1354                 return false;
1355         if (unlikely(node->sector != sector))
1356                 return false;
1357
1358         next = rb_next(&node->node);
1359         if (unlikely(!next))
1360                 return true;
1361
1362         next_node = container_of(next, struct journal_node, node);
1363         return next_node->sector != sector;
1364 }
1365
1366 static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_node *node)
1367 {
1368         struct rb_node *next;
1369         struct journal_node *next_node;
1370         unsigned int next_section;
1371
1372         BUG_ON(RB_EMPTY_NODE(&node->node));
1373
1374         next = rb_next(&node->node);
1375         if (unlikely(!next))
1376                 return false;
1377
1378         next_node = container_of(next, struct journal_node, node);
1379
1380         if (next_node->sector != node->sector)
1381                 return false;
1382
1383         next_section = (unsigned int)(next_node - ic->journal_tree) / ic->journal_section_entries;
1384         if (next_section >= ic->committed_section &&
1385             next_section < ic->committed_section + ic->n_committed_sections)
1386                 return true;
1387         if (next_section + ic->journal_sections < ic->committed_section + ic->n_committed_sections)
1388                 return true;
1389
1390         return false;
1391 }
1392
1393 #define TAG_READ        0
1394 #define TAG_WRITE       1
1395 #define TAG_CMP         2
1396
1397 static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
1398                                unsigned int *metadata_offset, unsigned int total_size, int op)
1399 {
1400 #define MAY_BE_FILLER           1
1401 #define MAY_BE_HASH             2
1402         unsigned int hash_offset = 0;
1403         unsigned int may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
1404
1405         do {
1406                 unsigned char *data, *dp;
1407                 struct dm_buffer *b;
1408                 unsigned int to_copy;
1409                 int r;
1410
1411                 r = dm_integrity_failed(ic);
1412                 if (unlikely(r))
1413                         return r;
1414
1415                 data = dm_bufio_read(ic->bufio, *metadata_block, &b);
1416                 if (IS_ERR(data))
1417                         return PTR_ERR(data);
1418
1419                 to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
1420                 dp = data + *metadata_offset;
1421                 if (op == TAG_READ) {
1422                         memcpy(tag, dp, to_copy);
1423                 } else if (op == TAG_WRITE) {
1424                         if (memcmp(dp, tag, to_copy)) {
1425                                 memcpy(dp, tag, to_copy);
1426                                 dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
1427                         }
1428                 } else {
1429                         /* e.g.: op == TAG_CMP */
1430
1431                         if (likely(is_power_of_2(ic->tag_size))) {
1432                                 if (unlikely(memcmp(dp, tag, to_copy)))
1433                                         if (unlikely(!ic->discard) ||
1434                                             unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) {
1435                                                 goto thorough_test;
1436                                 }
1437                         } else {
1438                                 unsigned int i, ts;
1439 thorough_test:
1440                                 ts = total_size;
1441
1442                                 for (i = 0; i < to_copy; i++, ts--) {
1443                                         if (unlikely(dp[i] != tag[i]))
1444                                                 may_be &= ~MAY_BE_HASH;
1445                                         if (likely(dp[i] != DISCARD_FILLER))
1446                                                 may_be &= ~MAY_BE_FILLER;
1447                                         hash_offset++;
1448                                         if (unlikely(hash_offset == ic->tag_size)) {
1449                                                 if (unlikely(!may_be)) {
1450                                                         dm_bufio_release(b);
1451                                                         return ts;
1452                                                 }
1453                                                 hash_offset = 0;
1454                                                 may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
1455                                         }
1456                                 }
1457                         }
1458                 }
1459                 dm_bufio_release(b);
1460
1461                 tag += to_copy;
1462                 *metadata_offset += to_copy;
1463                 if (unlikely(*metadata_offset == 1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
1464                         (*metadata_block)++;
1465                         *metadata_offset = 0;
1466                 }
1467
1468                 if (unlikely(!is_power_of_2(ic->tag_size)))
1469                         hash_offset = (hash_offset + to_copy) % ic->tag_size;
1470
1471                 total_size -= to_copy;
1472         } while (unlikely(total_size));
1473
1474         return 0;
1475 #undef MAY_BE_FILLER
1476 #undef MAY_BE_HASH
1477 }
1478
1479 struct flush_request {
1480         struct dm_io_request io_req;
1481         struct dm_io_region io_reg;
1482         struct dm_integrity_c *ic;
1483         struct completion comp;
1484 };
1485
1486 static void flush_notify(unsigned long error, void *fr_)
1487 {
1488         struct flush_request *fr = fr_;
1489
1490         if (unlikely(error != 0))
1491                 dm_integrity_io_error(fr->ic, "flushing disk cache", -EIO);
1492         complete(&fr->comp);
1493 }
1494
1495 static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
1496 {
1497         int r;
1498         struct flush_request fr;
1499
1500         if (!ic->meta_dev)
1501                 flush_data = false;
1502         if (flush_data) {
1503                 fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
1504                 fr.io_req.mem.type = DM_IO_KMEM,
1505                 fr.io_req.mem.ptr.addr = NULL,
1506                 fr.io_req.notify.fn = flush_notify,
1507                 fr.io_req.notify.context = &fr;
1508                 fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
1509                 fr.io_reg.bdev = ic->dev->bdev,
1510                 fr.io_reg.sector = 0,
1511                 fr.io_reg.count = 0,
1512                 fr.ic = ic;
1513                 init_completion(&fr.comp);
1514                 r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL, IOPRIO_DEFAULT);
1515                 BUG_ON(r);
1516         }
1517
1518         r = dm_bufio_write_dirty_buffers(ic->bufio);
1519         if (unlikely(r))
1520                 dm_integrity_io_error(ic, "writing tags", r);
1521
1522         if (flush_data)
1523                 wait_for_completion(&fr.comp);
1524 }
1525
1526 static void sleep_on_endio_wait(struct dm_integrity_c *ic)
1527 {
1528         DECLARE_WAITQUEUE(wait, current);
1529
1530         __add_wait_queue(&ic->endio_wait, &wait);
1531         __set_current_state(TASK_UNINTERRUPTIBLE);
1532         spin_unlock_irq(&ic->endio_wait.lock);
1533         io_schedule();
1534         spin_lock_irq(&ic->endio_wait.lock);
1535         __remove_wait_queue(&ic->endio_wait, &wait);
1536 }
1537
1538 static void autocommit_fn(struct timer_list *t)
1539 {
1540         struct dm_integrity_c *ic = from_timer(ic, t, autocommit_timer);
1541
1542         if (likely(!dm_integrity_failed(ic)))
1543                 queue_work(ic->commit_wq, &ic->commit_work);
1544 }
1545
1546 static void schedule_autocommit(struct dm_integrity_c *ic)
1547 {
1548         if (!timer_pending(&ic->autocommit_timer))
1549                 mod_timer(&ic->autocommit_timer, jiffies + ic->autocommit_jiffies);
1550 }
1551
1552 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1553 {
1554         struct bio *bio;
1555         unsigned long flags;
1556
1557         spin_lock_irqsave(&ic->endio_wait.lock, flags);
1558         bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1559         bio_list_add(&ic->flush_bio_list, bio);
1560         spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1561
1562         queue_work(ic->commit_wq, &ic->commit_work);
1563 }
1564
1565 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
1566 {
1567         int r;
1568
1569         r = dm_integrity_failed(ic);
1570         if (unlikely(r) && !bio->bi_status)
1571                 bio->bi_status = errno_to_blk_status(r);
1572         if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) {
1573                 unsigned long flags;
1574
1575                 spin_lock_irqsave(&ic->endio_wait.lock, flags);
1576                 bio_list_add(&ic->synchronous_bios, bio);
1577                 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
1578                 spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
1579                 return;
1580         }
1581         bio_endio(bio);
1582 }
1583
1584 static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
1585 {
1586         struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1587
1588         if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
1589                 submit_flush_bio(ic, dio);
1590         else
1591                 do_endio(ic, bio);
1592 }
1593
1594 static void dec_in_flight(struct dm_integrity_io *dio)
1595 {
1596         if (atomic_dec_and_test(&dio->in_flight)) {
1597                 struct dm_integrity_c *ic = dio->ic;
1598                 struct bio *bio;
1599
1600                 remove_range(ic, &dio->range);
1601
1602                 if (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))
1603                         schedule_autocommit(ic);
1604
1605                 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1606                 if (unlikely(dio->bi_status) && !bio->bi_status)
1607                         bio->bi_status = dio->bi_status;
1608                 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
1609                         dio->range.logical_sector += dio->range.n_sectors;
1610                         bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
1611                         INIT_WORK(&dio->work, integrity_bio_wait);
1612                         queue_work(ic->offload_wq, &dio->work);
1613                         return;
1614                 }
1615                 do_endio_flush(ic, dio);
1616         }
1617 }
1618
1619 static void integrity_end_io(struct bio *bio)
1620 {
1621         struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
1622
1623         dm_bio_restore(&dio->bio_details, bio);
1624         if (bio->bi_integrity)
1625                 bio->bi_opf |= REQ_INTEGRITY;
1626
1627         if (dio->completion)
1628                 complete(dio->completion);
1629
1630         dec_in_flight(dio);
1631 }
1632
1633 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector,
1634                                       const char *data, char *result)
1635 {
1636         __le64 sector_le = cpu_to_le64(sector);
1637         SHASH_DESC_ON_STACK(req, ic->internal_hash);
1638         int r;
1639         unsigned int digest_size;
1640
1641         req->tfm = ic->internal_hash;
1642
1643         r = crypto_shash_init(req);
1644         if (unlikely(r < 0)) {
1645                 dm_integrity_io_error(ic, "crypto_shash_init", r);
1646                 goto failed;
1647         }
1648
1649         if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
1650                 r = crypto_shash_update(req, (__u8 *)&ic->sb->salt, SALT_SIZE);
1651                 if (unlikely(r < 0)) {
1652                         dm_integrity_io_error(ic, "crypto_shash_update", r);
1653                         goto failed;
1654                 }
1655         }
1656
1657         r = crypto_shash_update(req, (const __u8 *)&sector_le, sizeof(sector_le));
1658         if (unlikely(r < 0)) {
1659                 dm_integrity_io_error(ic, "crypto_shash_update", r);
1660                 goto failed;
1661         }
1662
1663         r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT);
1664         if (unlikely(r < 0)) {
1665                 dm_integrity_io_error(ic, "crypto_shash_update", r);
1666                 goto failed;
1667         }
1668
1669         r = crypto_shash_final(req, result);
1670         if (unlikely(r < 0)) {
1671                 dm_integrity_io_error(ic, "crypto_shash_final", r);
1672                 goto failed;
1673         }
1674
1675         digest_size = crypto_shash_digestsize(ic->internal_hash);
1676         if (unlikely(digest_size < ic->tag_size))
1677                 memset(result + digest_size, 0, ic->tag_size - digest_size);
1678
1679         return;
1680
1681 failed:
1682         /* this shouldn't happen anyway, the hash functions have no reason to fail */
1683         get_random_bytes(result, ic->tag_size);
1684 }
1685
1686 static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum)
1687 {
1688         struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1689         struct dm_integrity_c *ic = dio->ic;
1690         struct bvec_iter iter;
1691         struct bio_vec bv;
1692         sector_t sector, logical_sector, area, offset;
1693         struct page *page;
1694
1695         get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
1696         dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset,
1697                                                              &dio->metadata_offset);
1698         sector = get_data_sector(ic, area, offset);
1699         logical_sector = dio->range.logical_sector;
1700
1701         page = mempool_alloc(&ic->recheck_pool, GFP_NOIO);
1702
1703         __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
1704                 unsigned pos = 0;
1705
1706                 do {
1707                         sector_t alignment;
1708                         char *mem;
1709                         char *buffer = page_to_virt(page);
1710                         int r;
1711                         struct dm_io_request io_req;
1712                         struct dm_io_region io_loc;
1713                         io_req.bi_opf = REQ_OP_READ;
1714                         io_req.mem.type = DM_IO_KMEM;
1715                         io_req.mem.ptr.addr = buffer;
1716                         io_req.notify.fn = NULL;
1717                         io_req.client = ic->io;
1718                         io_loc.bdev = ic->dev->bdev;
1719                         io_loc.sector = sector;
1720                         io_loc.count = ic->sectors_per_block;
1721
1722                         /* Align the bio to logical block size */
1723                         alignment = dio->range.logical_sector | bio_sectors(bio) | (PAGE_SIZE >> SECTOR_SHIFT);
1724                         alignment &= -alignment;
1725                         io_loc.sector = round_down(io_loc.sector, alignment);
1726                         io_loc.count += sector - io_loc.sector;
1727                         buffer += (sector - io_loc.sector) << SECTOR_SHIFT;
1728                         io_loc.count = round_up(io_loc.count, alignment);
1729
1730                         r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
1731                         if (unlikely(r)) {
1732                                 dio->bi_status = errno_to_blk_status(r);
1733                                 goto free_ret;
1734                         }
1735
1736                         integrity_sector_checksum(ic, logical_sector, buffer, checksum);
1737                         r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block,
1738                                                 &dio->metadata_offset, ic->tag_size, TAG_CMP);
1739                         if (r) {
1740                                 if (r > 0) {
1741                                         DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
1742                                                     bio->bi_bdev, logical_sector);
1743                                         atomic64_inc(&ic->number_of_mismatches);
1744                                         dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
1745                                                          bio, logical_sector, 0);
1746                                         r = -EILSEQ;
1747                                 }
1748                                 dio->bi_status = errno_to_blk_status(r);
1749                                 goto free_ret;
1750                         }
1751
1752                         mem = bvec_kmap_local(&bv);
1753                         memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT);
1754                         kunmap_local(mem);
1755
1756                         pos += ic->sectors_per_block << SECTOR_SHIFT;
1757                         sector += ic->sectors_per_block;
1758                         logical_sector += ic->sectors_per_block;
1759                 } while (pos < bv.bv_len);
1760         }
1761 free_ret:
1762         mempool_free(page, &ic->recheck_pool);
1763 }
1764
1765 static void integrity_metadata(struct work_struct *w)
1766 {
1767         struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
1768         struct dm_integrity_c *ic = dio->ic;
1769
1770         int r;
1771
1772         if (ic->internal_hash) {
1773                 struct bvec_iter iter;
1774                 struct bio_vec bv;
1775                 unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash);
1776                 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1777                 char *checksums;
1778                 unsigned int extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
1779                 char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
1780                 sector_t sector;
1781                 unsigned int sectors_to_process;
1782
1783                 if (unlikely(ic->mode == 'R'))
1784                         goto skip_io;
1785
1786                 if (likely(dio->op != REQ_OP_DISCARD))
1787                         checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
1788                                             GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1789                 else
1790                         checksums = kmalloc(PAGE_SIZE, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
1791                 if (!checksums) {
1792                         checksums = checksums_onstack;
1793                         if (WARN_ON(extra_space &&
1794                                     digest_size > sizeof(checksums_onstack))) {
1795                                 r = -EINVAL;
1796                                 goto error;
1797                         }
1798                 }
1799
1800                 if (unlikely(dio->op == REQ_OP_DISCARD)) {
1801                         unsigned int bi_size = dio->bio_details.bi_iter.bi_size;
1802                         unsigned int max_size = likely(checksums != checksums_onstack) ? PAGE_SIZE : HASH_MAX_DIGESTSIZE;
1803                         unsigned int max_blocks = max_size / ic->tag_size;
1804
1805                         memset(checksums, DISCARD_FILLER, max_size);
1806
1807                         while (bi_size) {
1808                                 unsigned int this_step_blocks = bi_size >> (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1809
1810                                 this_step_blocks = min(this_step_blocks, max_blocks);
1811                                 r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1812                                                         this_step_blocks * ic->tag_size, TAG_WRITE);
1813                                 if (unlikely(r)) {
1814                                         if (likely(checksums != checksums_onstack))
1815                                                 kfree(checksums);
1816                                         goto error;
1817                                 }
1818
1819                                 bi_size -= this_step_blocks << (SECTOR_SHIFT + ic->sb->log2_sectors_per_block);
1820                         }
1821
1822                         if (likely(checksums != checksums_onstack))
1823                                 kfree(checksums);
1824                         goto skip_io;
1825                 }
1826
1827                 sector = dio->range.logical_sector;
1828                 sectors_to_process = dio->range.n_sectors;
1829
1830                 __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
1831                         struct bio_vec bv_copy = bv;
1832                         unsigned int pos;
1833                         char *mem, *checksums_ptr;
1834
1835 again:
1836                         mem = bvec_kmap_local(&bv_copy);
1837                         pos = 0;
1838                         checksums_ptr = checksums;
1839                         do {
1840                                 integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
1841                                 checksums_ptr += ic->tag_size;
1842                                 sectors_to_process -= ic->sectors_per_block;
1843                                 pos += ic->sectors_per_block << SECTOR_SHIFT;
1844                                 sector += ic->sectors_per_block;
1845                         } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack);
1846                         kunmap_local(mem);
1847
1848                         r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
1849                                                 checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
1850                         if (unlikely(r)) {
1851                                 if (likely(checksums != checksums_onstack))
1852                                         kfree(checksums);
1853                                 if (r > 0) {
1854                                         integrity_recheck(dio, checksums_onstack);
1855                                         goto skip_io;
1856                                 }
1857                                 goto error;
1858                         }
1859
1860                         if (!sectors_to_process)
1861                                 break;
1862
1863                         if (unlikely(pos < bv_copy.bv_len)) {
1864                                 bv_copy.bv_offset += pos;
1865                                 bv_copy.bv_len -= pos;
1866                                 goto again;
1867                         }
1868                 }
1869
1870                 if (likely(checksums != checksums_onstack))
1871                         kfree(checksums);
1872         } else {
1873                 struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
1874
1875                 if (bip) {
1876                         struct bio_vec biv;
1877                         struct bvec_iter iter;
1878                         unsigned int data_to_process = dio->range.n_sectors;
1879
1880                         sector_to_block(ic, data_to_process);
1881                         data_to_process *= ic->tag_size;
1882
1883                         bip_for_each_vec(biv, bip, iter) {
1884                                 unsigned char *tag;
1885                                 unsigned int this_len;
1886
1887                                 BUG_ON(PageHighMem(biv.bv_page));
1888                                 tag = bvec_virt(&biv);
1889                                 this_len = min(biv.bv_len, data_to_process);
1890                                 r = dm_integrity_rw_tag(ic, tag, &dio->metadata_block, &dio->metadata_offset,
1891                                                         this_len, dio->op == REQ_OP_READ ? TAG_READ : TAG_WRITE);
1892                                 if (unlikely(r))
1893                                         goto error;
1894                                 data_to_process -= this_len;
1895                                 if (!data_to_process)
1896                                         break;
1897                         }
1898                 }
1899         }
1900 skip_io:
1901         dec_in_flight(dio);
1902         return;
1903 error:
1904         dio->bi_status = errno_to_blk_status(r);
1905         dec_in_flight(dio);
1906 }
1907
1908 static inline bool dm_integrity_check_limits(struct dm_integrity_c *ic, sector_t logical_sector, struct bio *bio)
1909 {
1910         if (unlikely(logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
1911                 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
1912                       logical_sector, bio_sectors(bio),
1913                       ic->provided_data_sectors);
1914                 return false;
1915         }
1916         if (unlikely((logical_sector | bio_sectors(bio)) & (unsigned int)(ic->sectors_per_block - 1))) {
1917                 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
1918                       ic->sectors_per_block,
1919                       logical_sector, bio_sectors(bio));
1920                 return false;
1921         }
1922         if (ic->sectors_per_block > 1 && likely(bio_op(bio) != REQ_OP_DISCARD)) {
1923                 struct bvec_iter iter;
1924                 struct bio_vec bv;
1925
1926                 bio_for_each_segment(bv, bio, iter) {
1927                         if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
1928                                 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
1929                                         bv.bv_offset, bv.bv_len, ic->sectors_per_block);
1930                                 return false;
1931                         }
1932                 }
1933         }
1934         return true;
1935 }
1936
1937 static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1938 {
1939         struct dm_integrity_c *ic = ti->private;
1940         struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
1941         struct bio_integrity_payload *bip;
1942
1943         sector_t area, offset;
1944
1945         dio->ic = ic;
1946         dio->bi_status = 0;
1947         dio->op = bio_op(bio);
1948
1949         if (ic->mode == 'I')
1950                 return dm_integrity_map_inline(dio);
1951
1952         if (unlikely(dio->op == REQ_OP_DISCARD)) {
1953                 if (ti->max_io_len) {
1954                         sector_t sec = dm_target_offset(ti, bio->bi_iter.bi_sector);
1955                         unsigned int log2_max_io_len = __fls(ti->max_io_len);
1956                         sector_t start_boundary = sec >> log2_max_io_len;
1957                         sector_t end_boundary = (sec + bio_sectors(bio) - 1) >> log2_max_io_len;
1958
1959                         if (start_boundary < end_boundary) {
1960                                 sector_t len = ti->max_io_len - (sec & (ti->max_io_len - 1));
1961
1962                                 dm_accept_partial_bio(bio, len);
1963                         }
1964                 }
1965         }
1966
1967         if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
1968                 submit_flush_bio(ic, dio);
1969                 return DM_MAPIO_SUBMITTED;
1970         }
1971
1972         dio->range.logical_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
1973         dio->fua = dio->op == REQ_OP_WRITE && bio->bi_opf & REQ_FUA;
1974         if (unlikely(dio->fua)) {
1975                 /*
1976                  * Don't pass down the FUA flag because we have to flush
1977                  * disk cache anyway.
1978                  */
1979                 bio->bi_opf &= ~REQ_FUA;
1980         }
1981         if (unlikely(!dm_integrity_check_limits(ic, dio->range.logical_sector, bio)))
1982                 return DM_MAPIO_KILL;
1983
1984         bip = bio_integrity(bio);
1985         if (!ic->internal_hash) {
1986                 if (bip) {
1987                         unsigned int wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block;
1988
1989                         if (ic->log2_tag_size >= 0)
1990                                 wanted_tag_size <<= ic->log2_tag_size;
1991                         else
1992                                 wanted_tag_size *= ic->tag_size;
1993                         if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
1994                                 DMERR("Invalid integrity data size %u, expected %u",
1995                                       bip->bip_iter.bi_size, wanted_tag_size);
1996                                 return DM_MAPIO_KILL;
1997                         }
1998                 }
1999         } else {
2000                 if (unlikely(bip != NULL)) {
2001                         DMERR("Unexpected integrity data when using internal hash");
2002                         return DM_MAPIO_KILL;
2003                 }
2004         }
2005
2006         if (unlikely(ic->mode == 'R') && unlikely(dio->op != REQ_OP_READ))
2007                 return DM_MAPIO_KILL;
2008
2009         get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
2010         dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
2011         bio->bi_iter.bi_sector = get_data_sector(ic, area, offset);
2012
2013         dm_integrity_map_continue(dio, true);
2014         return DM_MAPIO_SUBMITTED;
2015 }
2016
2017 static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio,
2018                                  unsigned int journal_section, unsigned int journal_entry)
2019 {
2020         struct dm_integrity_c *ic = dio->ic;
2021         sector_t logical_sector;
2022         unsigned int n_sectors;
2023
2024         logical_sector = dio->range.logical_sector;
2025         n_sectors = dio->range.n_sectors;
2026         do {
2027                 struct bio_vec bv = bio_iovec(bio);
2028                 char *mem;
2029
2030                 if (unlikely(bv.bv_len >> SECTOR_SHIFT > n_sectors))
2031                         bv.bv_len = n_sectors << SECTOR_SHIFT;
2032                 n_sectors -= bv.bv_len >> SECTOR_SHIFT;
2033                 bio_advance_iter(bio, &bio->bi_iter, bv.bv_len);
2034 retry_kmap:
2035                 mem = kmap_local_page(bv.bv_page);
2036                 if (likely(dio->op == REQ_OP_WRITE))
2037                         flush_dcache_page(bv.bv_page);
2038
2039                 do {
2040                         struct journal_entry *je = access_journal_entry(ic, journal_section, journal_entry);
2041
2042                         if (unlikely(dio->op == REQ_OP_READ)) {
2043                                 struct journal_sector *js;
2044                                 char *mem_ptr;
2045                                 unsigned int s;
2046
2047                                 if (unlikely(journal_entry_is_inprogress(je))) {
2048                                         flush_dcache_page(bv.bv_page);
2049                                         kunmap_local(mem);
2050
2051                                         __io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
2052                                         goto retry_kmap;
2053                                 }
2054                                 smp_rmb();
2055                                 BUG_ON(journal_entry_get_sector(je) != logical_sector);
2056                                 js = access_journal_data(ic, journal_section, journal_entry);
2057                                 mem_ptr = mem + bv.bv_offset;
2058                                 s = 0;
2059                                 do {
2060                                         memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA);
2061                                         *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s];
2062                                         js++;
2063                                         mem_ptr += 1 << SECTOR_SHIFT;
2064                                 } while (++s < ic->sectors_per_block);
2065 #ifdef INTERNAL_VERIFY
2066                                 if (ic->internal_hash) {
2067                                         char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
2068
2069                                         integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
2070                                         if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
2071                                                 DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
2072                                                             logical_sector);
2073                                                 dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum",
2074                                                                  bio, logical_sector, 0);
2075                                         }
2076                                 }
2077 #endif
2078                         }
2079
2080                         if (!ic->internal_hash) {
2081                                 struct bio_integrity_payload *bip = bio_integrity(bio);
2082                                 unsigned int tag_todo = ic->tag_size;
2083                                 char *tag_ptr = journal_entry_tag(ic, je);
2084
2085                                 if (bip) {
2086                                         do {
2087                                                 struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
2088                                                 unsigned int tag_now = min(biv.bv_len, tag_todo);
2089                                                 char *tag_addr;
2090
2091                                                 BUG_ON(PageHighMem(biv.bv_page));
2092                                                 tag_addr = bvec_virt(&biv);
2093                                                 if (likely(dio->op == REQ_OP_WRITE))
2094                                                         memcpy(tag_ptr, tag_addr, tag_now);
2095                                                 else
2096                                                         memcpy(tag_addr, tag_ptr, tag_now);
2097                                                 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, tag_now);
2098                                                 tag_ptr += tag_now;
2099                                                 tag_todo -= tag_now;
2100                                         } while (unlikely(tag_todo));
2101                                 } else if (likely(dio->op == REQ_OP_WRITE))
2102                                         memset(tag_ptr, 0, tag_todo);
2103                         }
2104
2105                         if (likely(dio->op == REQ_OP_WRITE)) {
2106                                 struct journal_sector *js;
2107                                 unsigned int s;
2108
2109                                 js = access_journal_data(ic, journal_section, journal_entry);
2110                                 memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT);
2111
2112                                 s = 0;
2113                                 do {
2114                                         je->last_bytes[s] = js[s].commit_id;
2115                                 } while (++s < ic->sectors_per_block);
2116
2117                                 if (ic->internal_hash) {
2118                                         unsigned int digest_size = crypto_shash_digestsize(ic->internal_hash);
2119
2120                                         if (unlikely(digest_size > ic->tag_size)) {
2121                                                 char checksums_onstack[HASH_MAX_DIGESTSIZE];
2122
2123                                                 integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
2124                                                 memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
2125                                         } else
2126                                                 integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je));
2127                                 }
2128
2129                                 journal_entry_set_sector(je, logical_sector);
2130                         }
2131                         logical_sector += ic->sectors_per_block;
2132
2133                         journal_entry++;
2134                         if (unlikely(journal_entry == ic->journal_section_entries)) {
2135                                 journal_entry = 0;
2136                                 journal_section++;
2137                                 wraparound_section(ic, &journal_section);
2138                         }
2139
2140                         bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
2141                 } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
2142
2143                 if (unlikely(dio->op == REQ_OP_READ))
2144                         flush_dcache_page(bv.bv_page);
2145                 kunmap_local(mem);
2146         } while (n_sectors);
2147
2148         if (likely(dio->op == REQ_OP_WRITE)) {
2149                 smp_mb();
2150                 if (unlikely(waitqueue_active(&ic->copy_to_journal_wait)))
2151                         wake_up(&ic->copy_to_journal_wait);
2152                 if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold)
2153                         queue_work(ic->commit_wq, &ic->commit_work);
2154                 else
2155                         schedule_autocommit(ic);
2156         } else
2157                 remove_range(ic, &dio->range);
2158
2159         if (unlikely(bio->bi_iter.bi_size)) {
2160                 sector_t area, offset;
2161
2162                 dio->range.logical_sector = logical_sector;
2163                 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
2164                 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
2165                 return true;
2166         }
2167
2168         return false;
2169 }
2170
2171 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map)
2172 {
2173         struct dm_integrity_c *ic = dio->ic;
2174         struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2175         unsigned int journal_section, journal_entry;
2176         unsigned int journal_read_pos;
2177         struct completion read_comp;
2178         bool discard_retried = false;
2179         bool need_sync_io = ic->internal_hash && dio->op == REQ_OP_READ;
2180
2181         if (unlikely(dio->op == REQ_OP_DISCARD) && ic->mode != 'D')
2182                 need_sync_io = true;
2183
2184         if (need_sync_io && from_map) {
2185                 INIT_WORK(&dio->work, integrity_bio_wait);
2186                 queue_work(ic->offload_wq, &dio->work);
2187                 return;
2188         }
2189
2190 lock_retry:
2191         spin_lock_irq(&ic->endio_wait.lock);
2192 retry:
2193         if (unlikely(dm_integrity_failed(ic))) {
2194                 spin_unlock_irq(&ic->endio_wait.lock);
2195                 do_endio(ic, bio);
2196                 return;
2197         }
2198         dio->range.n_sectors = bio_sectors(bio);
2199         journal_read_pos = NOT_FOUND;
2200         if (ic->mode == 'J' && likely(dio->op != REQ_OP_DISCARD)) {
2201                 if (dio->op == REQ_OP_WRITE) {
2202                         unsigned int next_entry, i, pos;
2203                         unsigned int ws, we, range_sectors;
2204
2205                         dio->range.n_sectors = min(dio->range.n_sectors,
2206                                                    (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block);
2207                         if (unlikely(!dio->range.n_sectors)) {
2208                                 if (from_map)
2209                                         goto offload_to_thread;
2210                                 sleep_on_endio_wait(ic);
2211                                 goto retry;
2212                         }
2213                         range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
2214                         ic->free_sectors -= range_sectors;
2215                         journal_section = ic->free_section;
2216                         journal_entry = ic->free_section_entry;
2217
2218                         next_entry = ic->free_section_entry + range_sectors;
2219                         ic->free_section_entry = next_entry % ic->journal_section_entries;
2220                         ic->free_section += next_entry / ic->journal_section_entries;
2221                         ic->n_uncommitted_sections += next_entry / ic->journal_section_entries;
2222                         wraparound_section(ic, &ic->free_section);
2223
2224                         pos = journal_section * ic->journal_section_entries + journal_entry;
2225                         ws = journal_section;
2226                         we = journal_entry;
2227                         i = 0;
2228                         do {
2229                                 struct journal_entry *je;
2230
2231                                 add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i);
2232                                 pos++;
2233                                 if (unlikely(pos >= ic->journal_entries))
2234                                         pos = 0;
2235
2236                                 je = access_journal_entry(ic, ws, we);
2237                                 BUG_ON(!journal_entry_is_unused(je));
2238                                 journal_entry_set_inprogress(je);
2239                                 we++;
2240                                 if (unlikely(we == ic->journal_section_entries)) {
2241                                         we = 0;
2242                                         ws++;
2243                                         wraparound_section(ic, &ws);
2244                                 }
2245                         } while ((i += ic->sectors_per_block) < dio->range.n_sectors);
2246
2247                         spin_unlock_irq(&ic->endio_wait.lock);
2248                         goto journal_read_write;
2249                 } else {
2250                         sector_t next_sector;
2251
2252                         journal_read_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2253                         if (likely(journal_read_pos == NOT_FOUND)) {
2254                                 if (unlikely(dio->range.n_sectors > next_sector - dio->range.logical_sector))
2255                                         dio->range.n_sectors = next_sector - dio->range.logical_sector;
2256                         } else {
2257                                 unsigned int i;
2258                                 unsigned int jp = journal_read_pos + 1;
2259
2260                                 for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) {
2261                                         if (!test_journal_node(ic, jp, dio->range.logical_sector + i))
2262                                                 break;
2263                                 }
2264                                 dio->range.n_sectors = i;
2265                         }
2266                 }
2267         }
2268         if (unlikely(!add_new_range(ic, &dio->range, true))) {
2269                 /*
2270                  * We must not sleep in the request routine because it could
2271                  * stall bios on current->bio_list.
2272                  * So, we offload the bio to a workqueue if we have to sleep.
2273                  */
2274                 if (from_map) {
2275 offload_to_thread:
2276                         spin_unlock_irq(&ic->endio_wait.lock);
2277                         INIT_WORK(&dio->work, integrity_bio_wait);
2278                         queue_work(ic->wait_wq, &dio->work);
2279                         return;
2280                 }
2281                 if (journal_read_pos != NOT_FOUND)
2282                         dio->range.n_sectors = ic->sectors_per_block;
2283                 wait_and_add_new_range(ic, &dio->range);
2284                 /*
2285                  * wait_and_add_new_range drops the spinlock, so the journal
2286                  * may have been changed arbitrarily. We need to recheck.
2287                  * To simplify the code, we restrict I/O size to just one block.
2288                  */
2289                 if (journal_read_pos != NOT_FOUND) {
2290                         sector_t next_sector;
2291                         unsigned int new_pos;
2292
2293                         new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2294                         if (unlikely(new_pos != journal_read_pos)) {
2295                                 remove_range_unlocked(ic, &dio->range);
2296                                 goto retry;
2297                         }
2298                 }
2299         }
2300         if (ic->mode == 'J' && likely(dio->op == REQ_OP_DISCARD) && !discard_retried) {
2301                 sector_t next_sector;
2302                 unsigned int new_pos;
2303
2304                 new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
2305                 if (unlikely(new_pos != NOT_FOUND) ||
2306                     unlikely(next_sector < dio->range.logical_sector - dio->range.n_sectors)) {
2307                         remove_range_unlocked(ic, &dio->range);
2308                         spin_unlock_irq(&ic->endio_wait.lock);
2309                         queue_work(ic->commit_wq, &ic->commit_work);
2310                         flush_workqueue(ic->commit_wq);
2311                         queue_work(ic->writer_wq, &ic->writer_work);
2312                         flush_workqueue(ic->writer_wq);
2313                         discard_retried = true;
2314                         goto lock_retry;
2315                 }
2316         }
2317         spin_unlock_irq(&ic->endio_wait.lock);
2318
2319         if (unlikely(journal_read_pos != NOT_FOUND)) {
2320                 journal_section = journal_read_pos / ic->journal_section_entries;
2321                 journal_entry = journal_read_pos % ic->journal_section_entries;
2322                 goto journal_read_write;
2323         }
2324
2325         if (ic->mode == 'B' && (dio->op == REQ_OP_WRITE || unlikely(dio->op == REQ_OP_DISCARD))) {
2326                 if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
2327                                      dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
2328                         struct bitmap_block_status *bbs;
2329
2330                         bbs = sector_to_bitmap_block(ic, dio->range.logical_sector);
2331                         spin_lock(&bbs->bio_queue_lock);
2332                         bio_list_add(&bbs->bio_queue, bio);
2333                         spin_unlock(&bbs->bio_queue_lock);
2334                         queue_work(ic->writer_wq, &bbs->work);
2335                         return;
2336                 }
2337         }
2338
2339         dio->in_flight = (atomic_t)ATOMIC_INIT(2);
2340
2341         if (need_sync_io) {
2342                 init_completion(&read_comp);
2343                 dio->completion = &read_comp;
2344         } else
2345                 dio->completion = NULL;
2346
2347         dm_bio_record(&dio->bio_details, bio);
2348         bio_set_dev(bio, ic->dev->bdev);
2349         bio->bi_integrity = NULL;
2350         bio->bi_opf &= ~REQ_INTEGRITY;
2351         bio->bi_end_io = integrity_end_io;
2352         bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
2353
2354         if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
2355                 integrity_metadata(&dio->work);
2356                 dm_integrity_flush_buffers(ic, false);
2357
2358                 dio->in_flight = (atomic_t)ATOMIC_INIT(1);
2359                 dio->completion = NULL;
2360
2361                 submit_bio_noacct(bio);
2362
2363                 return;
2364         }
2365
2366         submit_bio_noacct(bio);
2367
2368         if (need_sync_io) {
2369                 wait_for_completion_io(&read_comp);
2370                 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
2371                     dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
2372                         goto skip_check;
2373                 if (ic->mode == 'B') {
2374                         if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector,
2375                                              dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
2376                                 goto skip_check;
2377                 }
2378
2379                 if (likely(!bio->bi_status))
2380                         integrity_metadata(&dio->work);
2381                 else
2382 skip_check:
2383                         dec_in_flight(dio);
2384         } else {
2385                 INIT_WORK(&dio->work, integrity_metadata);
2386                 queue_work(ic->metadata_wq, &dio->work);
2387         }
2388
2389         return;
2390
2391 journal_read_write:
2392         if (unlikely(__journal_read_write(dio, bio, journal_section, journal_entry)))
2393                 goto lock_retry;
2394
2395         do_endio_flush(ic, dio);
2396 }
2397
2398 static int dm_integrity_map_inline(struct dm_integrity_io *dio)
2399 {
2400         struct dm_integrity_c *ic = dio->ic;
2401         struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2402         struct bio_integrity_payload *bip;
2403         unsigned payload_len, digest_size, extra_size, ret;
2404
2405         dio->integrity_payload = NULL;
2406         dio->integrity_payload_from_mempool = false;
2407
2408         if (unlikely(bio_integrity(bio))) {
2409                 bio->bi_status = BLK_STS_NOTSUPP;
2410                 bio_endio(bio);
2411                 return DM_MAPIO_SUBMITTED;
2412         }
2413
2414         bio_set_dev(bio, ic->dev->bdev);
2415         if (unlikely((bio->bi_opf & REQ_PREFLUSH) != 0))
2416                 return DM_MAPIO_REMAPPED;
2417
2418 retry:
2419         payload_len = ic->tuple_size * (bio_sectors(bio) >> ic->sb->log2_sectors_per_block);
2420         digest_size = crypto_shash_digestsize(ic->internal_hash);
2421         extra_size = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
2422         payload_len += extra_size;
2423         dio->integrity_payload = kmalloc(payload_len, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
2424         if (unlikely(!dio->integrity_payload)) {
2425                 const unsigned x_size = PAGE_SIZE << 1;
2426                 if (payload_len > x_size) {
2427                         unsigned sectors = ((x_size - extra_size) / ic->tuple_size) << ic->sb->log2_sectors_per_block;
2428                         if (WARN_ON(!sectors || sectors >= bio_sectors(bio))) {
2429                                 bio->bi_status = BLK_STS_NOTSUPP;
2430                                 bio_endio(bio);
2431                                 return DM_MAPIO_SUBMITTED;
2432                         }
2433                         dm_accept_partial_bio(bio, sectors);
2434                         goto retry;
2435                 }
2436                 dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO));
2437                 dio->integrity_payload_from_mempool = true;
2438         }
2439
2440         bio->bi_iter.bi_sector = dm_target_offset(ic->ti, bio->bi_iter.bi_sector);
2441         dio->bio_details.bi_iter = bio->bi_iter;
2442
2443         if (unlikely(!dm_integrity_check_limits(ic, bio->bi_iter.bi_sector, bio))) {
2444                 return DM_MAPIO_KILL;
2445         }
2446
2447         bio->bi_iter.bi_sector += ic->start + SB_SECTORS;
2448
2449         bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
2450         if (unlikely(IS_ERR(bip))) {
2451                 bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
2452                 bio_endio(bio);
2453                 return DM_MAPIO_SUBMITTED;
2454         }
2455
2456         if (dio->op == REQ_OP_WRITE) {
2457                 unsigned pos = 0;
2458                 while (dio->bio_details.bi_iter.bi_size) {
2459                         struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
2460                         const char *mem = bvec_kmap_local(&bv);
2461                         if (ic->tag_size < ic->tuple_size)
2462                                 memset(dio->integrity_payload + pos + ic->tag_size, 0, ic->tuple_size - ic->tuple_size);
2463                         integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, dio->integrity_payload + pos);
2464                         kunmap_local(mem);
2465                         pos += ic->tuple_size;
2466                         bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
2467                 }
2468         }
2469
2470         ret = bio_integrity_add_page(bio, virt_to_page(dio->integrity_payload),
2471                                         payload_len, offset_in_page(dio->integrity_payload));
2472         if (unlikely(ret != payload_len)) {
2473                 bio->bi_status = BLK_STS_RESOURCE;
2474                 bio_endio(bio);
2475                 return DM_MAPIO_SUBMITTED;
2476         }
2477
2478         return DM_MAPIO_REMAPPED;
2479 }
2480
2481 static inline void dm_integrity_free_payload(struct dm_integrity_io *dio)
2482 {
2483         struct dm_integrity_c *ic = dio->ic;
2484         if (unlikely(dio->integrity_payload_from_mempool))
2485                 mempool_free(virt_to_page(dio->integrity_payload), &ic->recheck_pool);
2486         else
2487                 kfree(dio->integrity_payload);
2488         dio->integrity_payload = NULL;
2489         dio->integrity_payload_from_mempool = false;
2490 }
2491
2492 static void dm_integrity_inline_recheck(struct work_struct *w)
2493 {
2494         struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
2495         struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
2496         struct dm_integrity_c *ic = dio->ic;
2497         struct bio *outgoing_bio;
2498         void *outgoing_data;
2499
2500         dio->integrity_payload = page_to_virt((struct page *)mempool_alloc(&ic->recheck_pool, GFP_NOIO));
2501         dio->integrity_payload_from_mempool = true;
2502
2503         outgoing_data = dio->integrity_payload + PAGE_SIZE;
2504
2505         while (dio->bio_details.bi_iter.bi_size) {
2506                 char digest[HASH_MAX_DIGESTSIZE];
2507                 int r;
2508                 struct bio_integrity_payload *bip;
2509                 struct bio_vec bv;
2510                 char *mem;
2511
2512                 outgoing_bio = bio_alloc_bioset(ic->dev->bdev, 1, REQ_OP_READ, GFP_NOIO, &ic->recheck_bios);
2513
2514                 r = bio_add_page(outgoing_bio, virt_to_page(outgoing_data), ic->sectors_per_block << SECTOR_SHIFT, 0);
2515                 if (unlikely(r != (ic->sectors_per_block << SECTOR_SHIFT))) {
2516                         bio_put(outgoing_bio);
2517                         bio->bi_status = BLK_STS_RESOURCE;
2518                         bio_endio(bio);
2519                         return;
2520                 }
2521
2522                 bip = bio_integrity_alloc(outgoing_bio, GFP_NOIO, 1);
2523                 if (unlikely(IS_ERR(bip))) {
2524                         bio_put(outgoing_bio);
2525                         bio->bi_status = errno_to_blk_status(PTR_ERR(bip));
2526                         bio_endio(bio);
2527                         return;
2528                 }
2529
2530                 r = bio_integrity_add_page(outgoing_bio, virt_to_page(dio->integrity_payload), ic->tuple_size, 0);
2531                 if (unlikely(r != ic->tuple_size)) {
2532                         bio_put(outgoing_bio);
2533                         bio->bi_status = BLK_STS_RESOURCE;
2534                         bio_endio(bio);
2535                         return;
2536                 }
2537
2538                 outgoing_bio->bi_iter.bi_sector = dio->bio_details.bi_iter.bi_sector + ic->start + SB_SECTORS;
2539
2540                 r = submit_bio_wait(outgoing_bio);
2541                 if (unlikely(r != 0)) {
2542                         bio_put(outgoing_bio);
2543                         bio->bi_status = errno_to_blk_status(r);
2544                         bio_endio(bio);
2545                         return;
2546                 }
2547                 bio_put(outgoing_bio);
2548
2549                 integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, outgoing_data, digest);
2550                 if (unlikely(memcmp(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
2551                         DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
2552                                 ic->dev->bdev, dio->bio_details.bi_iter.bi_sector);
2553                         atomic64_inc(&ic->number_of_mismatches);
2554                         dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",
2555                                 bio, dio->bio_details.bi_iter.bi_sector, 0);
2556
2557                         bio->bi_status = BLK_STS_PROTECTION;
2558                         bio_endio(bio);
2559                         return;
2560                 }
2561
2562                 bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
2563                 mem = bvec_kmap_local(&bv);
2564                 memcpy(mem, outgoing_data, ic->sectors_per_block << SECTOR_SHIFT);
2565                 kunmap_local(mem);
2566
2567                 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
2568         }
2569
2570         bio_endio(bio);
2571 }
2572
2573 static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
2574 {
2575         struct dm_integrity_c *ic = ti->private;
2576         if (ic->mode == 'I') {
2577                 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2578                 if (dio->op == REQ_OP_READ && likely(*status == BLK_STS_OK)) {
2579                         unsigned pos = 0;
2580                         while (dio->bio_details.bi_iter.bi_size) {
2581                                 char digest[HASH_MAX_DIGESTSIZE];
2582                                 struct bio_vec bv = bio_iter_iovec(bio, dio->bio_details.bi_iter);
2583                                 char *mem = bvec_kmap_local(&bv);
2584                                 //memset(mem, 0xff, ic->sectors_per_block << SECTOR_SHIFT);
2585                                 integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, digest);
2586                                 if (unlikely(memcmp(digest, dio->integrity_payload + pos,
2587                                                 min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
2588                                         kunmap_local(mem);
2589                                         dm_integrity_free_payload(dio);
2590                                         INIT_WORK(&dio->work, dm_integrity_inline_recheck);
2591                                         queue_work(ic->offload_wq, &dio->work);
2592                                         return DM_ENDIO_INCOMPLETE;
2593                                 }
2594                                 kunmap_local(mem);
2595                                 pos += ic->tuple_size;
2596                                 bio_advance_iter_single(bio, &dio->bio_details.bi_iter, ic->sectors_per_block << SECTOR_SHIFT);
2597                         }
2598                 }
2599                 if (likely(dio->op == REQ_OP_READ) || likely(dio->op == REQ_OP_WRITE)) {
2600                         dm_integrity_free_payload(dio);
2601                 }
2602         }
2603         return DM_ENDIO_DONE;
2604 }
2605
2606 static void integrity_bio_wait(struct work_struct *w)
2607 {
2608         struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
2609
2610         dm_integrity_map_continue(dio, false);
2611 }
2612
2613 static void pad_uncommitted(struct dm_integrity_c *ic)
2614 {
2615         if (ic->free_section_entry) {
2616                 ic->free_sectors -= ic->journal_section_entries - ic->free_section_entry;
2617                 ic->free_section_entry = 0;
2618                 ic->free_section++;
2619                 wraparound_section(ic, &ic->free_section);
2620                 ic->n_uncommitted_sections++;
2621         }
2622         if (WARN_ON(ic->journal_sections * ic->journal_section_entries !=
2623                     (ic->n_uncommitted_sections + ic->n_committed_sections) *
2624                     ic->journal_section_entries + ic->free_sectors)) {
2625                 DMCRIT("journal_sections %u, journal_section_entries %u, "
2626                        "n_uncommitted_sections %u, n_committed_sections %u, "
2627                        "journal_section_entries %u, free_sectors %u",
2628                        ic->journal_sections, ic->journal_section_entries,
2629                        ic->n_uncommitted_sections, ic->n_committed_sections,
2630                        ic->journal_section_entries, ic->free_sectors);
2631         }
2632 }
2633
2634 static void integrity_commit(struct work_struct *w)
2635 {
2636         struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, commit_work);
2637         unsigned int commit_start, commit_sections;
2638         unsigned int i, j, n;
2639         struct bio *flushes;
2640
2641         del_timer(&ic->autocommit_timer);
2642
2643         if (ic->mode == 'I')
2644                 return;
2645
2646         spin_lock_irq(&ic->endio_wait.lock);
2647         flushes = bio_list_get(&ic->flush_bio_list);
2648         if (unlikely(ic->mode != 'J')) {
2649                 spin_unlock_irq(&ic->endio_wait.lock);
2650                 dm_integrity_flush_buffers(ic, true);
2651                 goto release_flush_bios;
2652         }
2653
2654         pad_uncommitted(ic);
2655         commit_start = ic->uncommitted_section;
2656         commit_sections = ic->n_uncommitted_sections;
2657         spin_unlock_irq(&ic->endio_wait.lock);
2658
2659         if (!commit_sections)
2660                 goto release_flush_bios;
2661
2662         ic->wrote_to_journal = true;
2663
2664         i = commit_start;
2665         for (n = 0; n < commit_sections; n++) {
2666                 for (j = 0; j < ic->journal_section_entries; j++) {
2667                         struct journal_entry *je;
2668
2669                         je = access_journal_entry(ic, i, j);
2670                         io_wait_event(ic->copy_to_journal_wait, !journal_entry_is_inprogress(je));
2671                 }
2672                 for (j = 0; j < ic->journal_section_sectors; j++) {
2673                         struct journal_sector *js;
2674
2675                         js = access_journal(ic, i, j);
2676                         js->commit_id = dm_integrity_commit_id(ic, i, j, ic->commit_seq);
2677                 }
2678                 i++;
2679                 if (unlikely(i >= ic->journal_sections))
2680                         ic->commit_seq = next_commit_seq(ic->commit_seq);
2681                 wraparound_section(ic, &i);
2682         }
2683         smp_rmb();
2684
2685         write_journal(ic, commit_start, commit_sections);
2686
2687         spin_lock_irq(&ic->endio_wait.lock);
2688         ic->uncommitted_section += commit_sections;
2689         wraparound_section(ic, &ic->uncommitted_section);
2690         ic->n_uncommitted_sections -= commit_sections;
2691         ic->n_committed_sections += commit_sections;
2692         spin_unlock_irq(&ic->endio_wait.lock);
2693
2694         if (READ_ONCE(ic->free_sectors) <= ic->free_sectors_threshold)
2695                 queue_work(ic->writer_wq, &ic->writer_work);
2696
2697 release_flush_bios:
2698         while (flushes) {
2699                 struct bio *next = flushes->bi_next;
2700
2701                 flushes->bi_next = NULL;
2702                 do_endio(ic, flushes);
2703                 flushes = next;
2704         }
2705 }
2706
2707 static void complete_copy_from_journal(unsigned long error, void *context)
2708 {
2709         struct journal_io *io = context;
2710         struct journal_completion *comp = io->comp;
2711         struct dm_integrity_c *ic = comp->ic;
2712
2713         remove_range(ic, &io->range);
2714         mempool_free(io, &ic->journal_io_mempool);
2715         if (unlikely(error != 0))
2716                 dm_integrity_io_error(ic, "copying from journal", -EIO);
2717         complete_journal_op(comp);
2718 }
2719
2720 static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js,
2721                                struct journal_entry *je)
2722 {
2723         unsigned int s = 0;
2724
2725         do {
2726                 js->commit_id = je->last_bytes[s];
2727                 js++;
2728         } while (++s < ic->sectors_per_block);
2729 }
2730
2731 static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start,
2732                              unsigned int write_sections, bool from_replay)
2733 {
2734         unsigned int i, j, n;
2735         struct journal_completion comp;
2736         struct blk_plug plug;
2737
2738         blk_start_plug(&plug);
2739
2740         comp.ic = ic;
2741         comp.in_flight = (atomic_t)ATOMIC_INIT(1);
2742         init_completion(&comp.comp);
2743
2744         i = write_start;
2745         for (n = 0; n < write_sections; n++, i++, wraparound_section(ic, &i)) {
2746 #ifndef INTERNAL_VERIFY
2747                 if (unlikely(from_replay))
2748 #endif
2749                         rw_section_mac(ic, i, false);
2750                 for (j = 0; j < ic->journal_section_entries; j++) {
2751                         struct journal_entry *je = access_journal_entry(ic, i, j);
2752                         sector_t sec, area, offset;
2753                         unsigned int k, l, next_loop;
2754                         sector_t metadata_block;
2755                         unsigned int metadata_offset;
2756                         struct journal_io *io;
2757
2758                         if (journal_entry_is_unused(je))
2759                                 continue;
2760                         BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay);
2761                         sec = journal_entry_get_sector(je);
2762                         if (unlikely(from_replay)) {
2763                                 if (unlikely(sec & (unsigned int)(ic->sectors_per_block - 1))) {
2764                                         dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
2765                                         sec &= ~(sector_t)(ic->sectors_per_block - 1);
2766                                 }
2767                                 if (unlikely(sec >= ic->provided_data_sectors)) {
2768                                         journal_entry_set_unused(je);
2769                                         continue;
2770                                 }
2771                         }
2772                         get_area_and_offset(ic, sec, &area, &offset);
2773                         restore_last_bytes(ic, access_journal_data(ic, i, j), je);
2774                         for (k = j + 1; k < ic->journal_section_entries; k++) {
2775                                 struct journal_entry *je2 = access_journal_entry(ic, i, k);
2776                                 sector_t sec2, area2, offset2;
2777
2778                                 if (journal_entry_is_unused(je2))
2779                                         break;
2780                                 BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
2781                                 sec2 = journal_entry_get_sector(je2);
2782                                 if (unlikely(sec2 >= ic->provided_data_sectors))
2783                                         break;
2784                                 get_area_and_offset(ic, sec2, &area2, &offset2);
2785                                 if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
2786                                         break;
2787                                 restore_last_bytes(ic, access_journal_data(ic, i, k), je2);
2788                         }
2789                         next_loop = k - 1;
2790
2791                         io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO);
2792                         io->comp = &comp;
2793                         io->range.logical_sector = sec;
2794                         io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
2795
2796                         spin_lock_irq(&ic->endio_wait.lock);
2797                         add_new_range_and_wait(ic, &io->range);
2798
2799                         if (likely(!from_replay)) {
2800                                 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
2801
2802                                 /* don't write if there is newer committed sector */
2803                                 while (j < k && find_newer_committed_node(ic, &section_node[j])) {
2804                                         struct journal_entry *je2 = access_journal_entry(ic, i, j);
2805
2806                                         journal_entry_set_unused(je2);
2807                                         remove_journal_node(ic, &section_node[j]);
2808                                         j++;
2809                                         sec += ic->sectors_per_block;
2810                                         offset += ic->sectors_per_block;
2811                                 }
2812                                 while (j < k && find_newer_committed_node(ic, &section_node[k - 1])) {
2813                                         struct journal_entry *je2 = access_journal_entry(ic, i, k - 1);
2814
2815                                         journal_entry_set_unused(je2);
2816                                         remove_journal_node(ic, &section_node[k - 1]);
2817                                         k--;
2818                                 }
2819                                 if (j == k) {
2820                                         remove_range_unlocked(ic, &io->range);
2821                                         spin_unlock_irq(&ic->endio_wait.lock);
2822                                         mempool_free(io, &ic->journal_io_mempool);
2823                                         goto skip_io;
2824                                 }
2825                                 for (l = j; l < k; l++)
2826                                         remove_journal_node(ic, &section_node[l]);
2827                         }
2828                         spin_unlock_irq(&ic->endio_wait.lock);
2829
2830                         metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
2831                         for (l = j; l < k; l++) {
2832                                 int r;
2833                                 struct journal_entry *je2 = access_journal_entry(ic, i, l);
2834
2835                                 if (
2836 #ifndef INTERNAL_VERIFY
2837                                     unlikely(from_replay) &&
2838 #endif
2839                                     ic->internal_hash) {
2840                                         char test_tag[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
2841
2842                                         integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
2843                                                                   (char *)access_journal_data(ic, i, l), test_tag);
2844                                         if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
2845                                                 dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
2846                                                 dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0);
2847                                         }
2848                                 }
2849
2850                                 journal_entry_set_unused(je2);
2851                                 r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset,
2852                                                         ic->tag_size, TAG_WRITE);
2853                                 if (unlikely(r))
2854                                         dm_integrity_io_error(ic, "reading tags", r);
2855                         }
2856
2857                         atomic_inc(&comp.in_flight);
2858                         copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block,
2859                                           (k - j) << ic->sb->log2_sectors_per_block,
2860                                           get_data_sector(ic, area, offset),
2861                                           complete_copy_from_journal, io);
2862 skip_io:
2863                         j = next_loop;
2864                 }
2865         }
2866
2867         dm_bufio_write_dirty_buffers_async(ic->bufio);
2868
2869         blk_finish_plug(&plug);
2870
2871         complete_journal_op(&comp);
2872         wait_for_completion_io(&comp.comp);
2873
2874         dm_integrity_flush_buffers(ic, true);
2875 }
2876
2877 static void integrity_writer(struct work_struct *w)
2878 {
2879         struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, writer_work);
2880         unsigned int write_start, write_sections;
2881         unsigned int prev_free_sectors;
2882
2883         spin_lock_irq(&ic->endio_wait.lock);
2884         write_start = ic->committed_section;
2885         write_sections = ic->n_committed_sections;
2886         spin_unlock_irq(&ic->endio_wait.lock);
2887
2888         if (!write_sections)
2889                 return;
2890
2891         do_journal_write(ic, write_start, write_sections, false);
2892
2893         spin_lock_irq(&ic->endio_wait.lock);
2894
2895         ic->committed_section += write_sections;
2896         wraparound_section(ic, &ic->committed_section);
2897         ic->n_committed_sections -= write_sections;
2898
2899         prev_free_sectors = ic->free_sectors;
2900         ic->free_sectors += write_sections * ic->journal_section_entries;
2901         if (unlikely(!prev_free_sectors))
2902                 wake_up_locked(&ic->endio_wait);
2903
2904         spin_unlock_irq(&ic->endio_wait.lock);
2905 }
2906
2907 static void recalc_write_super(struct dm_integrity_c *ic)
2908 {
2909         int r;
2910
2911         dm_integrity_flush_buffers(ic, false);
2912         if (dm_integrity_failed(ic))
2913                 return;
2914
2915         r = sync_rw_sb(ic, REQ_OP_WRITE);
2916         if (unlikely(r))
2917                 dm_integrity_io_error(ic, "writing superblock", r);
2918 }
2919
2920 static void integrity_recalc(struct work_struct *w)
2921 {
2922         struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work);
2923         size_t recalc_tags_size;
2924         u8 *recalc_buffer = NULL;
2925         u8 *recalc_tags = NULL;
2926         struct dm_integrity_range range;
2927         struct dm_io_request io_req;
2928         struct dm_io_region io_loc;
2929         sector_t area, offset;
2930         sector_t metadata_block;
2931         unsigned int metadata_offset;
2932         sector_t logical_sector, n_sectors;
2933         __u8 *t;
2934         unsigned int i;
2935         int r;
2936         unsigned int super_counter = 0;
2937         unsigned recalc_sectors = RECALC_SECTORS;
2938
2939 retry:
2940         recalc_buffer = __vmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO);
2941         if (!recalc_buffer) {
2942 oom:
2943                 recalc_sectors >>= 1;
2944                 if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block)
2945                         goto retry;
2946                 DMCRIT("out of memory for recalculate buffer - recalculation disabled");
2947                 goto free_ret;
2948         }
2949         recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
2950         if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
2951                 recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
2952         recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO);
2953         if (!recalc_tags) {
2954                 vfree(recalc_buffer);
2955                 recalc_buffer = NULL;
2956                 goto oom;
2957         }
2958
2959         DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector));
2960
2961         spin_lock_irq(&ic->endio_wait.lock);
2962
2963 next_chunk:
2964
2965         if (unlikely(dm_post_suspending(ic->ti)))
2966                 goto unlock_ret;
2967
2968         range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
2969         if (unlikely(range.logical_sector >= ic->provided_data_sectors)) {
2970                 if (ic->mode == 'B') {
2971                         block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
2972                         DEBUG_print("queue_delayed_work: bitmap_flush_work\n");
2973                         queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
2974                 }
2975                 goto unlock_ret;
2976         }
2977
2978         get_area_and_offset(ic, range.logical_sector, &area, &offset);
2979         range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector);
2980         if (!ic->meta_dev)
2981                 range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned int)offset);
2982
2983         add_new_range_and_wait(ic, &range);
2984         spin_unlock_irq(&ic->endio_wait.lock);
2985         logical_sector = range.logical_sector;
2986         n_sectors = range.n_sectors;
2987
2988         if (ic->mode == 'B') {
2989                 if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
2990                         goto advance_and_next;
2991
2992                 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector,
2993                                        ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
2994                         logical_sector += ic->sectors_per_block;
2995                         n_sectors -= ic->sectors_per_block;
2996                         cond_resched();
2997                 }
2998                 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block,
2999                                        ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
3000                         n_sectors -= ic->sectors_per_block;
3001                         cond_resched();
3002                 }
3003                 get_area_and_offset(ic, logical_sector, &area, &offset);
3004         }
3005
3006         DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors);
3007
3008         if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
3009                 recalc_write_super(ic);
3010                 if (ic->mode == 'B')
3011                         queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
3012
3013                 super_counter = 0;
3014         }
3015
3016         if (unlikely(dm_integrity_failed(ic)))
3017                 goto err;
3018
3019         io_req.bi_opf = REQ_OP_READ;
3020         io_req.mem.type = DM_IO_VMA;
3021         io_req.mem.ptr.addr = recalc_buffer;
3022         io_req.notify.fn = NULL;
3023         io_req.client = ic->io;
3024         io_loc.bdev = ic->dev->bdev;
3025         io_loc.sector = get_data_sector(ic, area, offset);
3026         io_loc.count = n_sectors;
3027
3028         r = dm_io(&io_req, 1, &io_loc, NULL, IOPRIO_DEFAULT);
3029         if (unlikely(r)) {
3030                 dm_integrity_io_error(ic, "reading data", r);
3031                 goto err;
3032         }
3033
3034         t = recalc_tags;
3035         for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
3036                 integrity_sector_checksum(ic, logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t);
3037                 t += ic->tag_size;
3038         }
3039
3040         metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
3041
3042         r = dm_integrity_rw_tag(ic, recalc_tags, &metadata_block, &metadata_offset, t - recalc_tags, TAG_WRITE);
3043         if (unlikely(r)) {
3044                 dm_integrity_io_error(ic, "writing tags", r);
3045                 goto err;
3046         }
3047
3048         if (ic->mode == 'B') {
3049                 sector_t start, end;
3050
3051                 start = (range.logical_sector >>
3052                          (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
3053                         (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
3054                 end = ((range.logical_sector + range.n_sectors) >>
3055                        (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)) <<
3056                         (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
3057                 block_bitmap_op(ic, ic->recalc_bitmap, start, end - start, BITMAP_OP_CLEAR);
3058         }
3059
3060 advance_and_next:
3061         cond_resched();
3062
3063         spin_lock_irq(&ic->endio_wait.lock);
3064         remove_range_unlocked(ic, &range);
3065         ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
3066         goto next_chunk;
3067
3068 err:
3069         remove_range(ic, &range);
3070         goto free_ret;
3071
3072 unlock_ret:
3073         spin_unlock_irq(&ic->endio_wait.lock);
3074
3075         recalc_write_super(ic);
3076
3077 free_ret:
3078         vfree(recalc_buffer);
3079         kvfree(recalc_tags);
3080 }
3081
3082 static void bitmap_block_work(struct work_struct *w)
3083 {
3084         struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
3085         struct dm_integrity_c *ic = bbs->ic;
3086         struct bio *bio;
3087         struct bio_list bio_queue;
3088         struct bio_list waiting;
3089
3090         bio_list_init(&waiting);
3091
3092         spin_lock(&bbs->bio_queue_lock);
3093         bio_queue = bbs->bio_queue;
3094         bio_list_init(&bbs->bio_queue);
3095         spin_unlock(&bbs->bio_queue_lock);
3096
3097         while ((bio = bio_list_pop(&bio_queue))) {
3098                 struct dm_integrity_io *dio;
3099
3100                 dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
3101
3102                 if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
3103                                     dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
3104                         remove_range(ic, &dio->range);
3105                         INIT_WORK(&dio->work, integrity_bio_wait);
3106                         queue_work(ic->offload_wq, &dio->work);
3107                 } else {
3108                         block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
3109                                         dio->range.n_sectors, BITMAP_OP_SET);
3110                         bio_list_add(&waiting, bio);
3111                 }
3112         }
3113
3114         if (bio_list_empty(&waiting))
3115                 return;
3116
3117         rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC,
3118                            bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT),
3119                            BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
3120
3121         while ((bio = bio_list_pop(&waiting))) {
3122                 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
3123
3124                 block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
3125                                 dio->range.n_sectors, BITMAP_OP_SET);
3126
3127                 remove_range(ic, &dio->range);
3128                 INIT_WORK(&dio->work, integrity_bio_wait);
3129                 queue_work(ic->offload_wq, &dio->work);
3130         }
3131
3132         queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
3133 }
3134
3135 static void bitmap_flush_work(struct work_struct *work)
3136 {
3137         struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work);
3138         struct dm_integrity_range range;
3139         unsigned long limit;
3140         struct bio *bio;
3141
3142         dm_integrity_flush_buffers(ic, false);
3143
3144         range.logical_sector = 0;
3145         range.n_sectors = ic->provided_data_sectors;
3146
3147         spin_lock_irq(&ic->endio_wait.lock);
3148         add_new_range_and_wait(ic, &range);
3149         spin_unlock_irq(&ic->endio_wait.lock);
3150
3151         dm_integrity_flush_buffers(ic, true);
3152
3153         limit = ic->provided_data_sectors;
3154         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3155                 limit = le64_to_cpu(ic->sb->recalc_sector)
3156                         >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)
3157                         << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
3158         }
3159         /*DEBUG_print("zeroing journal\n");*/
3160         block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
3161         block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
3162
3163         rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3164                            ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3165
3166         spin_lock_irq(&ic->endio_wait.lock);
3167         remove_range_unlocked(ic, &range);
3168         while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) {
3169                 bio_endio(bio);
3170                 spin_unlock_irq(&ic->endio_wait.lock);
3171                 spin_lock_irq(&ic->endio_wait.lock);
3172         }
3173         spin_unlock_irq(&ic->endio_wait.lock);
3174 }
3175
3176
3177 static void init_journal(struct dm_integrity_c *ic, unsigned int start_section,
3178                          unsigned int n_sections, unsigned char commit_seq)
3179 {
3180         unsigned int i, j, n;
3181
3182         if (!n_sections)
3183                 return;
3184
3185         for (n = 0; n < n_sections; n++) {
3186                 i = start_section + n;
3187                 wraparound_section(ic, &i);
3188                 for (j = 0; j < ic->journal_section_sectors; j++) {
3189                         struct journal_sector *js = access_journal(ic, i, j);
3190
3191                         BUILD_BUG_ON(sizeof(js->sectors) != JOURNAL_SECTOR_DATA);
3192                         memset(&js->sectors, 0, sizeof(js->sectors));
3193                         js->commit_id = dm_integrity_commit_id(ic, i, j, commit_seq);
3194                 }
3195                 for (j = 0; j < ic->journal_section_entries; j++) {
3196                         struct journal_entry *je = access_journal_entry(ic, i, j);
3197
3198                         journal_entry_set_unused(je);
3199                 }
3200         }
3201
3202         write_journal(ic, start_section, n_sections);
3203 }
3204
3205 static int find_commit_seq(struct dm_integrity_c *ic, unsigned int i, unsigned int j, commit_id_t id)
3206 {
3207         unsigned char k;
3208
3209         for (k = 0; k < N_COMMIT_IDS; k++) {
3210                 if (dm_integrity_commit_id(ic, i, j, k) == id)
3211                         return k;
3212         }
3213         dm_integrity_io_error(ic, "journal commit id", -EIO);
3214         return -EIO;
3215 }
3216
3217 static void replay_journal(struct dm_integrity_c *ic)
3218 {
3219         unsigned int i, j;
3220         bool used_commit_ids[N_COMMIT_IDS];
3221         unsigned int max_commit_id_sections[N_COMMIT_IDS];
3222         unsigned int write_start, write_sections;
3223         unsigned int continue_section;
3224         bool journal_empty;
3225         unsigned char unused, last_used, want_commit_seq;
3226
3227         if (ic->mode == 'R')
3228                 return;
3229
3230         if (ic->journal_uptodate)
3231                 return;
3232
3233         last_used = 0;
3234         write_start = 0;
3235
3236         if (!ic->just_formatted) {
3237                 DEBUG_print("reading journal\n");
3238                 rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL);
3239                 if (ic->journal_io)
3240                         DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal");
3241                 if (ic->journal_io) {
3242                         struct journal_completion crypt_comp;
3243
3244                         crypt_comp.ic = ic;
3245                         init_completion(&crypt_comp.comp);
3246                         crypt_comp.in_flight = (atomic_t)ATOMIC_INIT(0);
3247                         encrypt_journal(ic, false, 0, ic->journal_sections, &crypt_comp);
3248                         wait_for_completion(&crypt_comp.comp);
3249                 }
3250                 DEBUG_bytes(lowmem_page_address(ic->journal[0].page), 64, "decrypted journal");
3251         }
3252
3253         if (dm_integrity_failed(ic))
3254                 goto clear_journal;
3255
3256         journal_empty = true;
3257         memset(used_commit_ids, 0, sizeof(used_commit_ids));
3258         memset(max_commit_id_sections, 0, sizeof(max_commit_id_sections));
3259         for (i = 0; i < ic->journal_sections; i++) {
3260                 for (j = 0; j < ic->journal_section_sectors; j++) {
3261                         int k;
3262                         struct journal_sector *js = access_journal(ic, i, j);
3263
3264                         k = find_commit_seq(ic, i, j, js->commit_id);
3265                         if (k < 0)
3266                                 goto clear_journal;
3267                         used_commit_ids[k] = true;
3268                         max_commit_id_sections[k] = i;
3269                 }
3270                 if (journal_empty) {
3271                         for (j = 0; j < ic->journal_section_entries; j++) {
3272                                 struct journal_entry *je = access_journal_entry(ic, i, j);
3273
3274                                 if (!journal_entry_is_unused(je)) {
3275                                         journal_empty = false;
3276                                         break;
3277                                 }
3278                         }
3279                 }
3280         }
3281
3282         if (!used_commit_ids[N_COMMIT_IDS - 1]) {
3283                 unused = N_COMMIT_IDS - 1;
3284                 while (unused && !used_commit_ids[unused - 1])
3285                         unused--;
3286         } else {
3287                 for (unused = 0; unused < N_COMMIT_IDS; unused++)
3288                         if (!used_commit_ids[unused])
3289                                 break;
3290                 if (unused == N_COMMIT_IDS) {
3291                         dm_integrity_io_error(ic, "journal commit ids", -EIO);
3292                         goto clear_journal;
3293                 }
3294         }
3295         DEBUG_print("first unused commit seq %d [%d,%d,%d,%d]\n",
3296                     unused, used_commit_ids[0], used_commit_ids[1],
3297                     used_commit_ids[2], used_commit_ids[3]);
3298
3299         last_used = prev_commit_seq(unused);
3300         want_commit_seq = prev_commit_seq(last_used);
3301
3302         if (!used_commit_ids[want_commit_seq] && used_commit_ids[prev_commit_seq(want_commit_seq)])
3303                 journal_empty = true;
3304
3305         write_start = max_commit_id_sections[last_used] + 1;
3306         if (unlikely(write_start >= ic->journal_sections))
3307                 want_commit_seq = next_commit_seq(want_commit_seq);
3308         wraparound_section(ic, &write_start);
3309
3310         i = write_start;
3311         for (write_sections = 0; write_sections < ic->journal_sections; write_sections++) {
3312                 for (j = 0; j < ic->journal_section_sectors; j++) {
3313                         struct journal_sector *js = access_journal(ic, i, j);
3314
3315                         if (js->commit_id != dm_integrity_commit_id(ic, i, j, want_commit_seq)) {
3316                                 /*
3317                                  * This could be caused by crash during writing.
3318                                  * We won't replay the inconsistent part of the
3319                                  * journal.
3320                                  */
3321                                 DEBUG_print("commit id mismatch at position (%u, %u): %d != %d\n",
3322                                             i, j, find_commit_seq(ic, i, j, js->commit_id), want_commit_seq);
3323                                 goto brk;
3324                         }
3325                 }
3326                 i++;
3327                 if (unlikely(i >= ic->journal_sections))
3328                         want_commit_seq = next_commit_seq(want_commit_seq);
3329                 wraparound_section(ic, &i);
3330         }
3331 brk:
3332
3333         if (!journal_empty) {
3334                 DEBUG_print("replaying %u sections, starting at %u, commit seq %d\n",
3335                             write_sections, write_start, want_commit_seq);
3336                 do_journal_write(ic, write_start, write_sections, true);
3337         }
3338
3339         if (write_sections == ic->journal_sections && (ic->mode == 'J' || journal_empty)) {
3340                 continue_section = write_start;
3341                 ic->commit_seq = want_commit_seq;
3342                 DEBUG_print("continuing from section %u, commit seq %d\n", write_start, ic->commit_seq);
3343         } else {
3344                 unsigned int s;
3345                 unsigned char erase_seq;
3346
3347 clear_journal:
3348                 DEBUG_print("clearing journal\n");
3349
3350                 erase_seq = prev_commit_seq(prev_commit_seq(last_used));
3351                 s = write_start;
3352                 init_journal(ic, s, 1, erase_seq);
3353                 s++;
3354                 wraparound_section(ic, &s);
3355                 if (ic->journal_sections >= 2) {
3356                         init_journal(ic, s, ic->journal_sections - 2, erase_seq);
3357                         s += ic->journal_sections - 2;
3358                         wraparound_section(ic, &s);
3359                         init_journal(ic, s, 1, erase_seq);
3360                 }
3361
3362                 continue_section = 0;
3363                 ic->commit_seq = next_commit_seq(erase_seq);
3364         }
3365
3366         ic->committed_section = continue_section;
3367         ic->n_committed_sections = 0;
3368
3369         ic->uncommitted_section = continue_section;
3370         ic->n_uncommitted_sections = 0;
3371
3372         ic->free_section = continue_section;
3373         ic->free_section_entry = 0;
3374         ic->free_sectors = ic->journal_entries;
3375
3376         ic->journal_tree_root = RB_ROOT;
3377         for (i = 0; i < ic->journal_entries; i++)
3378                 init_journal_node(&ic->journal_tree[i]);
3379 }
3380
3381 static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic)
3382 {
3383         DEBUG_print("%s\n", __func__);
3384
3385         if (ic->mode == 'B') {
3386                 ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1;
3387                 ic->synchronous_mode = 1;
3388
3389                 cancel_delayed_work_sync(&ic->bitmap_flush_work);
3390                 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
3391                 flush_workqueue(ic->commit_wq);
3392         }
3393 }
3394
3395 static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x)
3396 {
3397         struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier);
3398
3399         DEBUG_print("%s\n", __func__);
3400
3401         dm_integrity_enter_synchronous_mode(ic);
3402
3403         return NOTIFY_DONE;
3404 }
3405
3406 static void dm_integrity_postsuspend(struct dm_target *ti)
3407 {
3408         struct dm_integrity_c *ic = ti->private;
3409         int r;
3410
3411         WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier));
3412
3413         del_timer_sync(&ic->autocommit_timer);
3414
3415         if (ic->recalc_wq)
3416                 drain_workqueue(ic->recalc_wq);
3417
3418         if (ic->mode == 'B')
3419                 cancel_delayed_work_sync(&ic->bitmap_flush_work);
3420
3421         queue_work(ic->commit_wq, &ic->commit_work);
3422         drain_workqueue(ic->commit_wq);
3423
3424         if (ic->mode == 'J') {
3425                 queue_work(ic->writer_wq, &ic->writer_work);
3426                 drain_workqueue(ic->writer_wq);
3427                 dm_integrity_flush_buffers(ic, true);
3428                 if (ic->wrote_to_journal) {
3429                         init_journal(ic, ic->free_section,
3430                                      ic->journal_sections - ic->free_section, ic->commit_seq);
3431                         if (ic->free_section) {
3432                                 init_journal(ic, 0, ic->free_section,
3433                                              next_commit_seq(ic->commit_seq));
3434                         }
3435                 }
3436         }
3437
3438         if (ic->mode == 'B') {
3439                 dm_integrity_flush_buffers(ic, true);
3440 #if 1
3441                 /* set to 0 to test bitmap replay code */
3442                 init_journal(ic, 0, ic->journal_sections, 0);
3443                 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3444                 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3445                 if (unlikely(r))
3446                         dm_integrity_io_error(ic, "writing superblock", r);
3447 #endif
3448         }
3449
3450         BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
3451
3452         ic->journal_uptodate = true;
3453 }
3454
3455 static void dm_integrity_resume(struct dm_target *ti)
3456 {
3457         struct dm_integrity_c *ic = ti->private;
3458         __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
3459         int r;
3460
3461         DEBUG_print("resume\n");
3462
3463         ic->wrote_to_journal = false;
3464
3465         if (ic->provided_data_sectors != old_provided_data_sectors) {
3466                 if (ic->provided_data_sectors > old_provided_data_sectors &&
3467                     ic->mode == 'B' &&
3468                     ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
3469                         rw_journal_sectors(ic, REQ_OP_READ, 0,
3470                                            ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3471                         block_bitmap_op(ic, ic->journal, old_provided_data_sectors,
3472                                         ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET);
3473                         rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3474                                            ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3475                 }
3476
3477                 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
3478                 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3479                 if (unlikely(r))
3480                         dm_integrity_io_error(ic, "writing superblock", r);
3481         }
3482
3483         if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
3484                 DEBUG_print("resume dirty_bitmap\n");
3485                 rw_journal_sectors(ic, REQ_OP_READ, 0,
3486                                    ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3487                 if (ic->mode == 'B') {
3488                         if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
3489                             !ic->reset_recalculate_flag) {
3490                                 block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal);
3491                                 block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal);
3492                                 if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors,
3493                                                      BITMAP_OP_TEST_ALL_CLEAR)) {
3494                                         ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3495                                         ic->sb->recalc_sector = cpu_to_le64(0);
3496                                 }
3497                         } else {
3498                                 DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n",
3499                                             ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit);
3500                                 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3501                                 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3502                                 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3503                                 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
3504                                 rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3505                                                    ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3506                                 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3507                                 ic->sb->recalc_sector = cpu_to_le64(0);
3508                         }
3509                 } else {
3510                         if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
3511                               block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) ||
3512                             ic->reset_recalculate_flag) {
3513                                 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3514                                 ic->sb->recalc_sector = cpu_to_le64(0);
3515                         }
3516                         init_journal(ic, 0, ic->journal_sections, 0);
3517                         replay_journal(ic);
3518                         ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3519                 }
3520                 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3521                 if (unlikely(r))
3522                         dm_integrity_io_error(ic, "writing superblock", r);
3523         } else {
3524                 replay_journal(ic);
3525                 if (ic->reset_recalculate_flag) {
3526                         ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3527                         ic->sb->recalc_sector = cpu_to_le64(0);
3528                 }
3529                 if (ic->mode == 'B') {
3530                         ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
3531                         ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
3532                         r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
3533                         if (unlikely(r))
3534                                 dm_integrity_io_error(ic, "writing superblock", r);
3535
3536                         block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3537                         block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3538                         block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR);
3539                         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
3540                             le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) {
3541                                 block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector),
3542                                                 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3543                                 block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3544                                                 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3545                                 block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector),
3546                                                 ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET);
3547                         }
3548                         rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0,
3549                                            ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
3550                 }
3551         }
3552
3553         DEBUG_print("testing recalc: %x\n", ic->sb->flags);
3554         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3555                 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
3556
3557                 DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors);
3558                 if (recalc_pos < ic->provided_data_sectors) {
3559                         queue_work(ic->recalc_wq, &ic->recalc_work);
3560                 } else if (recalc_pos > ic->provided_data_sectors) {
3561                         ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors);
3562                         recalc_write_super(ic);
3563                 }
3564         }
3565
3566         ic->reboot_notifier.notifier_call = dm_integrity_reboot;
3567         ic->reboot_notifier.next = NULL;
3568         ic->reboot_notifier.priority = INT_MAX - 1;     /* be notified after md and before hardware drivers */
3569         WARN_ON(register_reboot_notifier(&ic->reboot_notifier));
3570
3571 #if 0
3572         /* set to 1 to stress test synchronous mode */
3573         dm_integrity_enter_synchronous_mode(ic);
3574 #endif
3575 }
3576
3577 static void dm_integrity_status(struct dm_target *ti, status_type_t type,
3578                                 unsigned int status_flags, char *result, unsigned int maxlen)
3579 {
3580         struct dm_integrity_c *ic = ti->private;
3581         unsigned int arg_count;
3582         size_t sz = 0;
3583
3584         switch (type) {
3585         case STATUSTYPE_INFO:
3586                 DMEMIT("%llu %llu",
3587                         (unsigned long long)atomic64_read(&ic->number_of_mismatches),
3588                         ic->provided_data_sectors);
3589                 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
3590                         DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector));
3591                 else
3592                         DMEMIT(" -");
3593                 break;
3594
3595         case STATUSTYPE_TABLE: {
3596                 __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
3597
3598                 watermark_percentage += ic->journal_entries / 2;
3599                 do_div(watermark_percentage, ic->journal_entries);
3600                 arg_count = 3;
3601                 arg_count += !!ic->meta_dev;
3602                 arg_count += ic->sectors_per_block != 1;
3603                 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
3604                 arg_count += ic->reset_recalculate_flag;
3605                 arg_count += ic->discard;
3606                 arg_count += ic->mode == 'J';
3607                 arg_count += ic->mode == 'J';
3608                 arg_count += ic->mode == 'B';
3609                 arg_count += ic->mode == 'B';
3610                 arg_count += !!ic->internal_hash_alg.alg_string;
3611                 arg_count += !!ic->journal_crypt_alg.alg_string;
3612                 arg_count += !!ic->journal_mac_alg.alg_string;
3613                 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
3614                 arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0;
3615                 arg_count += ic->legacy_recalculate;
3616                 DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start,
3617                        ic->tag_size, ic->mode, arg_count);
3618                 if (ic->meta_dev)
3619                         DMEMIT(" meta_device:%s", ic->meta_dev->name);
3620                 if (ic->sectors_per_block != 1)
3621                         DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
3622                 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
3623                         DMEMIT(" recalculate");
3624                 if (ic->reset_recalculate_flag)
3625                         DMEMIT(" reset_recalculate");
3626                 if (ic->discard)
3627                         DMEMIT(" allow_discards");
3628                 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
3629                 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
3630                 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
3631                 if (ic->mode == 'J') {
3632                         DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage);
3633                         DMEMIT(" commit_time:%u", ic->autocommit_msec);
3634                 }
3635                 if (ic->mode == 'B') {
3636                         DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
3637                         DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
3638                 }
3639                 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
3640                         DMEMIT(" fix_padding");
3641                 if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0)
3642                         DMEMIT(" fix_hmac");
3643                 if (ic->legacy_recalculate)
3644                         DMEMIT(" legacy_recalculate");
3645
3646 #define EMIT_ALG(a, n)                                                  \
3647                 do {                                                    \
3648                         if (ic->a.alg_string) {                         \
3649                                 DMEMIT(" %s:%s", n, ic->a.alg_string);  \
3650                                 if (ic->a.key_string)                   \
3651                                         DMEMIT(":%s", ic->a.key_string);\
3652                         }                                               \
3653                 } while (0)
3654                 EMIT_ALG(internal_hash_alg, "internal_hash");
3655                 EMIT_ALG(journal_crypt_alg, "journal_crypt");
3656                 EMIT_ALG(journal_mac_alg, "journal_mac");
3657                 break;
3658         }
3659         case STATUSTYPE_IMA:
3660                 DMEMIT_TARGET_NAME_VERSION(ti->type);
3661                 DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c",
3662                         ic->dev->name, ic->start, ic->tag_size, ic->mode);
3663
3664                 if (ic->meta_dev)
3665                         DMEMIT(",meta_device=%s", ic->meta_dev->name);
3666                 if (ic->sectors_per_block != 1)
3667                         DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT);
3668
3669                 DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ?
3670                        'y' : 'n');
3671                 DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n');
3672                 DMEMIT(",fix_padding=%c",
3673                        ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n');
3674                 DMEMIT(",fix_hmac=%c",
3675                        ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n');
3676                 DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n');
3677
3678                 DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS);
3679                 DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors);
3680                 DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors);
3681                 DMEMIT(";");
3682                 break;
3683         }
3684 }
3685
3686 static int dm_integrity_iterate_devices(struct dm_target *ti,
3687                                         iterate_devices_callout_fn fn, void *data)
3688 {
3689         struct dm_integrity_c *ic = ti->private;
3690
3691         if (!ic->meta_dev)
3692                 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
3693         else
3694                 return fn(ti, ic->dev, 0, ti->len, data);
3695 }
3696
3697 static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
3698 {
3699         struct dm_integrity_c *ic = ti->private;
3700
3701         if (ic->sectors_per_block > 1) {
3702                 limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
3703                 limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
3704                 limits->io_min = ic->sectors_per_block << SECTOR_SHIFT;
3705                 limits->dma_alignment = limits->logical_block_size - 1;
3706                 limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
3707         }
3708
3709         if (!ic->internal_hash) {
3710                 struct blk_integrity *bi = &limits->integrity;
3711
3712                 memset(bi, 0, sizeof(*bi));
3713                 bi->tuple_size = ic->tag_size;
3714                 bi->tag_size = bi->tuple_size;
3715                 bi->interval_exp =
3716                         ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
3717         }
3718
3719         limits->max_integrity_segments = USHRT_MAX;
3720 }
3721
3722 static void calculate_journal_section_size(struct dm_integrity_c *ic)
3723 {
3724         unsigned int sector_space = JOURNAL_SECTOR_DATA;
3725
3726         ic->journal_sections = le32_to_cpu(ic->sb->journal_sections);
3727         ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size,
3728                                          JOURNAL_ENTRY_ROUNDUP);
3729
3730         if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC))
3731                 sector_space -= JOURNAL_MAC_PER_SECTOR;
3732         ic->journal_entries_per_sector = sector_space / ic->journal_entry_size;
3733         ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS;
3734         ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS;
3735         ic->journal_entries = ic->journal_section_entries * ic->journal_sections;
3736 }
3737
3738 static int calculate_device_limits(struct dm_integrity_c *ic)
3739 {
3740         __u64 initial_sectors;
3741
3742         calculate_journal_section_size(ic);
3743         initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections;
3744         if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX)
3745                 return -EINVAL;
3746         ic->initial_sectors = initial_sectors;
3747
3748         if (ic->mode == 'I') {
3749                 if (ic->initial_sectors + ic->provided_data_sectors > ic->meta_device_sectors)
3750                         return -EINVAL;
3751         } else if (!ic->meta_dev) {
3752                 sector_t last_sector, last_area, last_offset;
3753
3754                 /* we have to maintain excessive padding for compatibility with existing volumes */
3755                 __u64 metadata_run_padding =
3756                         ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ?
3757                         (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) :
3758                         (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS);
3759
3760                 ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
3761                                             metadata_run_padding) >> SECTOR_SHIFT;
3762                 if (!(ic->metadata_run & (ic->metadata_run - 1)))
3763                         ic->log2_metadata_run = __ffs(ic->metadata_run);
3764                 else
3765                         ic->log2_metadata_run = -1;
3766
3767                 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
3768                 last_sector = get_data_sector(ic, last_area, last_offset);
3769                 if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
3770                         return -EINVAL;
3771         } else {
3772                 __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
3773
3774                 meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
3775                                 >> (ic->log2_buffer_sectors + SECTOR_SHIFT);
3776                 meta_size <<= ic->log2_buffer_sectors;
3777                 if (ic->initial_sectors + meta_size < ic->initial_sectors ||
3778                     ic->initial_sectors + meta_size > ic->meta_device_sectors)
3779                         return -EINVAL;
3780                 ic->metadata_run = 1;
3781                 ic->log2_metadata_run = 0;
3782         }
3783
3784         return 0;
3785 }
3786
3787 static void get_provided_data_sectors(struct dm_integrity_c *ic)
3788 {
3789         if (!ic->meta_dev) {
3790                 int test_bit;
3791
3792                 ic->provided_data_sectors = 0;
3793                 for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
3794                         __u64 prev_data_sectors = ic->provided_data_sectors;
3795
3796                         ic->provided_data_sectors |= (sector_t)1 << test_bit;
3797                         if (calculate_device_limits(ic))
3798                                 ic->provided_data_sectors = prev_data_sectors;
3799                 }
3800         } else {
3801                 ic->provided_data_sectors = ic->data_device_sectors;
3802                 ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
3803         }
3804 }
3805
3806 static int initialize_superblock(struct dm_integrity_c *ic,
3807                                  unsigned int journal_sectors, unsigned int interleave_sectors)
3808 {
3809         unsigned int journal_sections;
3810         int test_bit;
3811
3812         memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT);
3813         memcpy(ic->sb->magic, SB_MAGIC, 8);
3814         if (ic->mode == 'I')
3815                 ic->sb->flags |= cpu_to_le32(SB_FLAG_INLINE);
3816         ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
3817         ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
3818         if (ic->journal_mac_alg.alg_string)
3819                 ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC);
3820
3821         calculate_journal_section_size(ic);
3822         journal_sections = journal_sectors / ic->journal_section_sectors;
3823         if (!journal_sections)
3824                 journal_sections = 1;
3825         if (ic->mode == 'I')
3826                 journal_sections = 0;
3827
3828         if (ic->fix_hmac && (ic->internal_hash_alg.alg_string || ic->journal_mac_alg.alg_string)) {
3829                 ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_HMAC);
3830                 get_random_bytes(ic->sb->salt, SALT_SIZE);
3831         }
3832
3833         if (!ic->meta_dev) {
3834                 if (ic->fix_padding)
3835                         ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING);
3836                 ic->sb->journal_sections = cpu_to_le32(journal_sections);
3837                 if (!interleave_sectors)
3838                         interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
3839                 ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
3840                 ic->sb->log2_interleave_sectors = max_t(__u8, MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
3841                 ic->sb->log2_interleave_sectors = min_t(__u8, MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
3842
3843                 get_provided_data_sectors(ic);
3844                 if (!ic->provided_data_sectors)
3845                         return -EINVAL;
3846         } else {
3847                 ic->sb->log2_interleave_sectors = 0;
3848
3849                 get_provided_data_sectors(ic);
3850                 if (!ic->provided_data_sectors)
3851                         return -EINVAL;
3852
3853 try_smaller_buffer:
3854                 ic->sb->journal_sections = cpu_to_le32(0);
3855                 for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) {
3856                         __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections);
3857                         __u32 test_journal_sections = prev_journal_sections | (1U << test_bit);
3858
3859                         if (test_journal_sections > journal_sections)
3860                                 continue;
3861                         ic->sb->journal_sections = cpu_to_le32(test_journal_sections);
3862                         if (calculate_device_limits(ic))
3863                                 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections);
3864
3865                 }
3866                 if (!le32_to_cpu(ic->sb->journal_sections)) {
3867                         if (ic->log2_buffer_sectors > 3) {
3868                                 ic->log2_buffer_sectors--;
3869                                 goto try_smaller_buffer;
3870                         }
3871                         return -EINVAL;
3872                 }
3873         }
3874
3875         ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
3876
3877         sb_set_version(ic);
3878
3879         return 0;
3880 }
3881
3882 static void dm_integrity_free_page_list(struct page_list *pl)
3883 {
3884         unsigned int i;
3885
3886         if (!pl)
3887                 return;
3888         for (i = 0; pl[i].page; i++)
3889                 __free_page(pl[i].page);
3890         kvfree(pl);
3891 }
3892
3893 static struct page_list *dm_integrity_alloc_page_list(unsigned int n_pages)
3894 {
3895         struct page_list *pl;
3896         unsigned int i;
3897
3898         pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO);
3899         if (!pl)
3900                 return NULL;
3901
3902         for (i = 0; i < n_pages; i++) {
3903                 pl[i].page = alloc_page(GFP_KERNEL);
3904                 if (!pl[i].page) {
3905                         dm_integrity_free_page_list(pl);
3906                         return NULL;
3907                 }
3908                 if (i)
3909                         pl[i - 1].next = &pl[i];
3910         }
3911         pl[i].page = NULL;
3912         pl[i].next = NULL;
3913
3914         return pl;
3915 }
3916
3917 static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, struct scatterlist **sl)
3918 {
3919         unsigned int i;
3920
3921         for (i = 0; i < ic->journal_sections; i++)
3922                 kvfree(sl[i]);
3923         kvfree(sl);
3924 }
3925
3926 static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic,
3927                                                                    struct page_list *pl)
3928 {
3929         struct scatterlist **sl;
3930         unsigned int i;
3931
3932         sl = kvmalloc_array(ic->journal_sections,
3933                             sizeof(struct scatterlist *),
3934                             GFP_KERNEL | __GFP_ZERO);
3935         if (!sl)
3936                 return NULL;
3937
3938         for (i = 0; i < ic->journal_sections; i++) {
3939                 struct scatterlist *s;
3940                 unsigned int start_index, start_offset;
3941                 unsigned int end_index, end_offset;
3942                 unsigned int n_pages;
3943                 unsigned int idx;
3944
3945                 page_list_location(ic, i, 0, &start_index, &start_offset);
3946                 page_list_location(ic, i, ic->journal_section_sectors - 1,
3947                                    &end_index, &end_offset);
3948
3949                 n_pages = (end_index - start_index + 1);
3950
3951                 s = kvmalloc_array(n_pages, sizeof(struct scatterlist),
3952                                    GFP_KERNEL);
3953                 if (!s) {
3954                         dm_integrity_free_journal_scatterlist(ic, sl);
3955                         return NULL;
3956                 }
3957
3958                 sg_init_table(s, n_pages);
3959                 for (idx = start_index; idx <= end_index; idx++) {
3960                         char *va = lowmem_page_address(pl[idx].page);
3961                         unsigned int start = 0, end = PAGE_SIZE;
3962
3963                         if (idx == start_index)
3964                                 start = start_offset;
3965                         if (idx == end_index)
3966                                 end = end_offset + (1 << SECTOR_SHIFT);
3967                         sg_set_buf(&s[idx - start_index], va + start, end - start);
3968                 }
3969
3970                 sl[i] = s;
3971         }
3972
3973         return sl;
3974 }
3975
3976 static void free_alg(struct alg_spec *a)
3977 {
3978         kfree_sensitive(a->alg_string);
3979         kfree_sensitive(a->key);
3980         memset(a, 0, sizeof(*a));
3981 }
3982
3983 static int get_alg_and_key(const char *arg, struct alg_spec *a, char **error, char *error_inval)
3984 {
3985         char *k;
3986
3987         free_alg(a);
3988
3989         a->alg_string = kstrdup(strchr(arg, ':') + 1, GFP_KERNEL);
3990         if (!a->alg_string)
3991                 goto nomem;
3992
3993         k = strchr(a->alg_string, ':');
3994         if (k) {
3995                 *k = 0;
3996                 a->key_string = k + 1;
3997                 if (strlen(a->key_string) & 1)
3998                         goto inval;
3999
4000                 a->key_size = strlen(a->key_string) / 2;
4001                 a->key = kmalloc(a->key_size, GFP_KERNEL);
4002                 if (!a->key)
4003                         goto nomem;
4004                 if (hex2bin(a->key, a->key_string, a->key_size))
4005                         goto inval;
4006         }
4007
4008         return 0;
4009 inval:
4010         *error = error_inval;
4011         return -EINVAL;
4012 nomem:
4013         *error = "Out of memory for an argument";
4014         return -ENOMEM;
4015 }
4016
4017 static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
4018                    char *error_alg, char *error_key)
4019 {
4020         int r;
4021
4022         if (a->alg_string) {
4023                 *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
4024                 if (IS_ERR(*hash)) {
4025                         *error = error_alg;
4026                         r = PTR_ERR(*hash);
4027                         *hash = NULL;
4028                         return r;
4029                 }
4030
4031                 if (a->key) {
4032                         r = crypto_shash_setkey(*hash, a->key, a->key_size);
4033                         if (r) {
4034                                 *error = error_key;
4035                                 return r;
4036                         }
4037                 } else if (crypto_shash_get_flags(*hash) & CRYPTO_TFM_NEED_KEY) {
4038                         *error = error_key;
4039                         return -ENOKEY;
4040                 }
4041         }
4042
4043         return 0;
4044 }
4045
4046 static int create_journal(struct dm_integrity_c *ic, char **error)
4047 {
4048         int r = 0;
4049         unsigned int i;
4050         __u64 journal_pages, journal_desc_size, journal_tree_size;
4051         unsigned char *crypt_data = NULL, *crypt_iv = NULL;
4052         struct skcipher_request *req = NULL;
4053
4054         ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL);
4055         ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL);
4056         ic->commit_ids[2] = cpu_to_le64(0x3333333333333333ULL);
4057         ic->commit_ids[3] = cpu_to_le64(0x4444444444444444ULL);
4058
4059         journal_pages = roundup((__u64)ic->journal_sections * ic->journal_section_sectors,
4060                                 PAGE_SIZE >> SECTOR_SHIFT) >> (PAGE_SHIFT - SECTOR_SHIFT);
4061         journal_desc_size = journal_pages * sizeof(struct page_list);
4062         if (journal_pages >= totalram_pages() - totalhigh_pages() || journal_desc_size > ULONG_MAX) {
4063                 *error = "Journal doesn't fit into memory";
4064                 r = -ENOMEM;
4065                 goto bad;
4066         }
4067         ic->journal_pages = journal_pages;
4068
4069         ic->journal = dm_integrity_alloc_page_list(ic->journal_pages);
4070         if (!ic->journal) {
4071                 *error = "Could not allocate memory for journal";
4072                 r = -ENOMEM;
4073                 goto bad;
4074         }
4075         if (ic->journal_crypt_alg.alg_string) {
4076                 unsigned int ivsize, blocksize;
4077                 struct journal_completion comp;
4078
4079                 comp.ic = ic;
4080                 ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
4081                 if (IS_ERR(ic->journal_crypt)) {
4082                         *error = "Invalid journal cipher";
4083                         r = PTR_ERR(ic->journal_crypt);
4084                         ic->journal_crypt = NULL;
4085                         goto bad;
4086                 }
4087                 ivsize = crypto_skcipher_ivsize(ic->journal_crypt);
4088                 blocksize = crypto_skcipher_blocksize(ic->journal_crypt);
4089
4090                 if (ic->journal_crypt_alg.key) {
4091                         r = crypto_skcipher_setkey(ic->journal_crypt, ic->journal_crypt_alg.key,
4092                                                    ic->journal_crypt_alg.key_size);
4093                         if (r) {
4094                                 *error = "Error setting encryption key";
4095                                 goto bad;
4096                         }
4097                 }
4098                 DEBUG_print("cipher %s, block size %u iv size %u\n",
4099                             ic->journal_crypt_alg.alg_string, blocksize, ivsize);
4100
4101                 ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages);
4102                 if (!ic->journal_io) {
4103                         *error = "Could not allocate memory for journal io";
4104                         r = -ENOMEM;
4105                         goto bad;
4106                 }
4107
4108                 if (blocksize == 1) {
4109                         struct scatterlist *sg;
4110
4111                         req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
4112                         if (!req) {
4113                                 *error = "Could not allocate crypt request";
4114                                 r = -ENOMEM;
4115                                 goto bad;
4116                         }
4117
4118                         crypt_iv = kzalloc(ivsize, GFP_KERNEL);
4119                         if (!crypt_iv) {
4120                                 *error = "Could not allocate iv";
4121                                 r = -ENOMEM;
4122                                 goto bad;
4123                         }
4124
4125                         ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages);
4126                         if (!ic->journal_xor) {
4127                                 *error = "Could not allocate memory for journal xor";
4128                                 r = -ENOMEM;
4129                                 goto bad;
4130                         }
4131
4132                         sg = kvmalloc_array(ic->journal_pages + 1,
4133                                             sizeof(struct scatterlist),
4134                                             GFP_KERNEL);
4135                         if (!sg) {
4136                                 *error = "Unable to allocate sg list";
4137                                 r = -ENOMEM;
4138                                 goto bad;
4139                         }
4140                         sg_init_table(sg, ic->journal_pages + 1);
4141                         for (i = 0; i < ic->journal_pages; i++) {
4142                                 char *va = lowmem_page_address(ic->journal_xor[i].page);
4143
4144                                 clear_page(va);
4145                                 sg_set_buf(&sg[i], va, PAGE_SIZE);
4146                         }
4147                         sg_set_buf(&sg[i], &ic->commit_ids, sizeof(ic->commit_ids));
4148
4149                         skcipher_request_set_crypt(req, sg, sg,
4150                                                    PAGE_SIZE * ic->journal_pages + sizeof(ic->commit_ids), crypt_iv);
4151                         init_completion(&comp.comp);
4152                         comp.in_flight = (atomic_t)ATOMIC_INIT(1);
4153                         if (do_crypt(true, req, &comp))
4154                                 wait_for_completion(&comp.comp);
4155                         kvfree(sg);
4156                         r = dm_integrity_failed(ic);
4157                         if (r) {
4158                                 *error = "Unable to encrypt journal";
4159                                 goto bad;
4160                         }
4161                         DEBUG_bytes(lowmem_page_address(ic->journal_xor[0].page), 64, "xor data");
4162
4163                         crypto_free_skcipher(ic->journal_crypt);
4164                         ic->journal_crypt = NULL;
4165                 } else {
4166                         unsigned int crypt_len = roundup(ivsize, blocksize);
4167
4168                         req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
4169                         if (!req) {
4170                                 *error = "Could not allocate crypt request";
4171                                 r = -ENOMEM;
4172                                 goto bad;
4173                         }
4174
4175                         crypt_iv = kmalloc(ivsize, GFP_KERNEL);
4176                         if (!crypt_iv) {
4177                                 *error = "Could not allocate iv";
4178                                 r = -ENOMEM;
4179                                 goto bad;
4180                         }
4181
4182                         crypt_data = kmalloc(crypt_len, GFP_KERNEL);
4183                         if (!crypt_data) {
4184                                 *error = "Unable to allocate crypt data";
4185                                 r = -ENOMEM;
4186                                 goto bad;
4187                         }
4188
4189                         ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal);
4190                         if (!ic->journal_scatterlist) {
4191                                 *error = "Unable to allocate sg list";
4192                                 r = -ENOMEM;
4193                                 goto bad;
4194                         }
4195                         ic->journal_io_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal_io);
4196                         if (!ic->journal_io_scatterlist) {
4197                                 *error = "Unable to allocate sg list";
4198                                 r = -ENOMEM;
4199                                 goto bad;
4200                         }
4201                         ic->sk_requests = kvmalloc_array(ic->journal_sections,
4202                                                          sizeof(struct skcipher_request *),
4203                                                          GFP_KERNEL | __GFP_ZERO);
4204                         if (!ic->sk_requests) {
4205                                 *error = "Unable to allocate sk requests";
4206                                 r = -ENOMEM;
4207                                 goto bad;
4208                         }
4209                         for (i = 0; i < ic->journal_sections; i++) {
4210                                 struct scatterlist sg;
4211                                 struct skcipher_request *section_req;
4212                                 __le32 section_le = cpu_to_le32(i);
4213
4214                                 memset(crypt_iv, 0x00, ivsize);
4215                                 memset(crypt_data, 0x00, crypt_len);
4216                                 memcpy(crypt_data, &section_le, min_t(size_t, crypt_len, sizeof(section_le)));
4217
4218                                 sg_init_one(&sg, crypt_data, crypt_len);
4219                                 skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv);
4220                                 init_completion(&comp.comp);
4221                                 comp.in_flight = (atomic_t)ATOMIC_INIT(1);
4222                                 if (do_crypt(true, req, &comp))
4223                                         wait_for_completion(&comp.comp);
4224
4225                                 r = dm_integrity_failed(ic);
4226                                 if (r) {
4227                                         *error = "Unable to generate iv";
4228                                         goto bad;
4229                                 }
4230
4231                                 section_req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
4232                                 if (!section_req) {
4233                                         *error = "Unable to allocate crypt request";
4234                                         r = -ENOMEM;
4235                                         goto bad;
4236                                 }
4237                                 section_req->iv = kmalloc_array(ivsize, 2,
4238                                                                 GFP_KERNEL);
4239                                 if (!section_req->iv) {
4240                                         skcipher_request_free(section_req);
4241                                         *error = "Unable to allocate iv";
4242                                         r = -ENOMEM;
4243                                         goto bad;
4244                                 }
4245                                 memcpy(section_req->iv + ivsize, crypt_data, ivsize);
4246                                 section_req->cryptlen = (size_t)ic->journal_section_sectors << SECTOR_SHIFT;
4247                                 ic->sk_requests[i] = section_req;
4248                                 DEBUG_bytes(crypt_data, ivsize, "iv(%u)", i);
4249                         }
4250                 }
4251         }
4252
4253         for (i = 0; i < N_COMMIT_IDS; i++) {
4254                 unsigned int j;
4255
4256 retest_commit_id:
4257                 for (j = 0; j < i; j++) {
4258                         if (ic->commit_ids[j] == ic->commit_ids[i]) {
4259                                 ic->commit_ids[i] = cpu_to_le64(le64_to_cpu(ic->commit_ids[i]) + 1);
4260                                 goto retest_commit_id;
4261                         }
4262                 }
4263                 DEBUG_print("commit id %u: %016llx\n", i, ic->commit_ids[i]);
4264         }
4265
4266         journal_tree_size = (__u64)ic->journal_entries * sizeof(struct journal_node);
4267         if (journal_tree_size > ULONG_MAX) {
4268                 *error = "Journal doesn't fit into memory";
4269                 r = -ENOMEM;
4270                 goto bad;
4271         }
4272         ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL);
4273         if (!ic->journal_tree) {
4274                 *error = "Could not allocate memory for journal tree";
4275                 r = -ENOMEM;
4276         }
4277 bad:
4278         kfree(crypt_data);
4279         kfree(crypt_iv);
4280         skcipher_request_free(req);
4281
4282         return r;
4283 }
4284
4285 /*
4286  * Construct a integrity mapping
4287  *
4288  * Arguments:
4289  *      device
4290  *      offset from the start of the device
4291  *      tag size
4292  *      D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode
4293  *      number of optional arguments
4294  *      optional arguments:
4295  *              journal_sectors
4296  *              interleave_sectors
4297  *              buffer_sectors
4298  *              journal_watermark
4299  *              commit_time
4300  *              meta_device
4301  *              block_size
4302  *              sectors_per_bit
4303  *              bitmap_flush_interval
4304  *              internal_hash
4305  *              journal_crypt
4306  *              journal_mac
4307  *              recalculate
4308  */
4309 static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
4310 {
4311         struct dm_integrity_c *ic;
4312         char dummy;
4313         int r;
4314         unsigned int extra_args;
4315         struct dm_arg_set as;
4316         static const struct dm_arg _args[] = {
4317                 {0, 18, "Invalid number of feature args"},
4318         };
4319         unsigned int journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
4320         bool should_write_sb;
4321         __u64 threshold;
4322         unsigned long long start;
4323         __s8 log2_sectors_per_bitmap_bit = -1;
4324         __s8 log2_blocks_per_bitmap_bit;
4325         __u64 bits_in_journal;
4326         __u64 n_bitmap_bits;
4327
4328 #define DIRECT_ARGUMENTS        4
4329
4330         if (argc <= DIRECT_ARGUMENTS) {
4331                 ti->error = "Invalid argument count";
4332                 return -EINVAL;
4333         }
4334
4335         ic = kzalloc(sizeof(struct dm_integrity_c), GFP_KERNEL);
4336         if (!ic) {
4337                 ti->error = "Cannot allocate integrity context";
4338                 return -ENOMEM;
4339         }
4340         ti->private = ic;
4341         ti->per_io_data_size = sizeof(struct dm_integrity_io);
4342         ic->ti = ti;
4343
4344         ic->in_progress = RB_ROOT;
4345         INIT_LIST_HEAD(&ic->wait_list);
4346         init_waitqueue_head(&ic->endio_wait);
4347         bio_list_init(&ic->flush_bio_list);
4348         init_waitqueue_head(&ic->copy_to_journal_wait);
4349         init_completion(&ic->crypto_backoff);
4350         atomic64_set(&ic->number_of_mismatches, 0);
4351         ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL;
4352
4353         r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
4354         if (r) {
4355                 ti->error = "Device lookup failed";
4356                 goto bad;
4357         }
4358
4359         if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
4360                 ti->error = "Invalid starting offset";
4361                 r = -EINVAL;
4362                 goto bad;
4363         }
4364         ic->start = start;
4365
4366         if (strcmp(argv[2], "-")) {
4367                 if (sscanf(argv[2], "%u%c", &ic->tag_size, &dummy) != 1 || !ic->tag_size) {
4368                         ti->error = "Invalid tag size";
4369                         r = -EINVAL;
4370                         goto bad;
4371                 }
4372         }
4373
4374         if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") ||
4375             !strcmp(argv[3], "D") || !strcmp(argv[3], "R") ||
4376             !strcmp(argv[3], "I")) {
4377                 ic->mode = argv[3][0];
4378         } else {
4379                 ti->error = "Invalid mode (expecting J, B, D, R, I)";
4380                 r = -EINVAL;
4381                 goto bad;
4382         }
4383
4384         journal_sectors = 0;
4385         interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
4386         buffer_sectors = DEFAULT_BUFFER_SECTORS;
4387         journal_watermark = DEFAULT_JOURNAL_WATERMARK;
4388         sync_msec = DEFAULT_SYNC_MSEC;
4389         ic->sectors_per_block = 1;
4390
4391         as.argc = argc - DIRECT_ARGUMENTS;
4392         as.argv = argv + DIRECT_ARGUMENTS;
4393         r = dm_read_arg_group(_args, &as, &extra_args, &ti->error);
4394         if (r)
4395                 goto bad;
4396
4397         while (extra_args--) {
4398                 const char *opt_string;
4399                 unsigned int val;
4400                 unsigned long long llval;
4401
4402                 opt_string = dm_shift_arg(&as);
4403                 if (!opt_string) {
4404                         r = -EINVAL;
4405                         ti->error = "Not enough feature arguments";
4406                         goto bad;
4407                 }
4408                 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1)
4409                         journal_sectors = val ? val : 1;
4410                 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1)
4411                         interleave_sectors = val;
4412                 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1)
4413                         buffer_sectors = val;
4414                 else if (sscanf(opt_string, "journal_watermark:%u%c", &val, &dummy) == 1 && val <= 100)
4415                         journal_watermark = val;
4416                 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
4417                         sync_msec = val;
4418                 else if (!strncmp(opt_string, "meta_device:", strlen("meta_device:"))) {
4419                         if (ic->meta_dev) {
4420                                 dm_put_device(ti, ic->meta_dev);
4421                                 ic->meta_dev = NULL;
4422                         }
4423                         r = dm_get_device(ti, strchr(opt_string, ':') + 1,
4424                                           dm_table_get_mode(ti->table), &ic->meta_dev);
4425                         if (r) {
4426                                 ti->error = "Device lookup failed";
4427                                 goto bad;
4428                         }
4429                 } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
4430                         if (val < 1 << SECTOR_SHIFT ||
4431                             val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
4432                             (val & (val - 1))) {
4433                                 r = -EINVAL;
4434                                 ti->error = "Invalid block_size argument";
4435                                 goto bad;
4436                         }
4437                         ic->sectors_per_block = val >> SECTOR_SHIFT;
4438                 } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
4439                         log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
4440                 } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
4441                         if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
4442                                 r = -EINVAL;
4443                                 ti->error = "Invalid bitmap_flush_interval argument";
4444                                 goto bad;
4445                         }
4446                         ic->bitmap_flush_interval = msecs_to_jiffies(val);
4447                 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
4448                         r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
4449                                             "Invalid internal_hash argument");
4450                         if (r)
4451                                 goto bad;
4452                 } else if (!strncmp(opt_string, "journal_crypt:", strlen("journal_crypt:"))) {
4453                         r = get_alg_and_key(opt_string, &ic->journal_crypt_alg, &ti->error,
4454                                             "Invalid journal_crypt argument");
4455                         if (r)
4456                                 goto bad;
4457                 } else if (!strncmp(opt_string, "journal_mac:", strlen("journal_mac:"))) {
4458                         r = get_alg_and_key(opt_string, &ic->journal_mac_alg, &ti->error,
4459                                             "Invalid journal_mac argument");
4460                         if (r)
4461                                 goto bad;
4462                 } else if (!strcmp(opt_string, "recalculate")) {
4463                         ic->recalculate_flag = true;
4464                 } else if (!strcmp(opt_string, "reset_recalculate")) {
4465                         ic->recalculate_flag = true;
4466                         ic->reset_recalculate_flag = true;
4467                 } else if (!strcmp(opt_string, "allow_discards")) {
4468                         ic->discard = true;
4469                 } else if (!strcmp(opt_string, "fix_padding")) {
4470                         ic->fix_padding = true;
4471                 } else if (!strcmp(opt_string, "fix_hmac")) {
4472                         ic->fix_hmac = true;
4473                 } else if (!strcmp(opt_string, "legacy_recalculate")) {
4474                         ic->legacy_recalculate = true;
4475                 } else {
4476                         r = -EINVAL;
4477                         ti->error = "Invalid argument";
4478                         goto bad;
4479                 }
4480         }
4481
4482         ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev);
4483         if (!ic->meta_dev)
4484                 ic->meta_device_sectors = ic->data_device_sectors;
4485         else
4486                 ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev);
4487
4488         if (!journal_sectors) {
4489                 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
4490                                       ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
4491         }
4492
4493         if (!buffer_sectors)
4494                 buffer_sectors = 1;
4495         ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT);
4496
4497         r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error,
4498                     "Invalid internal hash", "Error setting internal hash key");
4499         if (r)
4500                 goto bad;
4501
4502         r = get_mac(&ic->journal_mac, &ic->journal_mac_alg, &ti->error,
4503                     "Invalid journal mac", "Error setting journal mac key");
4504         if (r)
4505                 goto bad;
4506
4507         if (!ic->tag_size) {
4508                 if (!ic->internal_hash) {
4509                         ti->error = "Unknown tag size";
4510                         r = -EINVAL;
4511                         goto bad;
4512                 }
4513                 ic->tag_size = crypto_shash_digestsize(ic->internal_hash);
4514         }
4515         if (ic->tag_size > MAX_TAG_SIZE) {
4516                 ti->error = "Too big tag size";
4517                 r = -EINVAL;
4518                 goto bad;
4519         }
4520         if (!(ic->tag_size & (ic->tag_size - 1)))
4521                 ic->log2_tag_size = __ffs(ic->tag_size);
4522         else
4523                 ic->log2_tag_size = -1;
4524
4525         if (ic->mode == 'I') {
4526                 struct blk_integrity *bi;
4527                 if (ic->meta_dev) {
4528                         r = -EINVAL;
4529                         ti->error = "Metadata device not supported in inline mode";
4530                         goto bad;
4531                 }
4532                 if (!ic->internal_hash_alg.alg_string) {
4533                         r = -EINVAL;
4534                         ti->error = "Internal hash not set in inline mode";
4535                         goto bad;
4536                 }
4537                 if (ic->journal_crypt_alg.alg_string || ic->journal_mac_alg.alg_string) {
4538                         r = -EINVAL;
4539                         ti->error = "Journal crypt not supported in inline mode";
4540                         goto bad;
4541                 }
4542                 if (ic->discard) {
4543                         r = -EINVAL;
4544                         ti->error = "Discards not supported in inline mode";
4545                         goto bad;
4546                 }
4547                 bi = blk_get_integrity(ic->dev->bdev->bd_disk);
4548                 if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) {
4549                         r = -EINVAL;
4550                         ti->error = "Integrity profile not supported";
4551                         goto bad;
4552                 }
4553                 /*printk("tag_size: %u, tuple_size: %u\n", bi->tag_size, bi->tuple_size);*/
4554                 if (bi->tuple_size < ic->tag_size) {
4555                         r = -EINVAL;
4556                         ti->error = "The integrity profile is smaller than tag size";
4557                         goto bad;
4558                 }
4559                 if ((unsigned long)bi->tuple_size > PAGE_SIZE / 2) {
4560                         r = -EINVAL;
4561                         ti->error = "Too big tuple size";
4562                         goto bad;
4563                 }
4564                 ic->tuple_size = bi->tuple_size;
4565                 if (1 << bi->interval_exp != ic->sectors_per_block << SECTOR_SHIFT) {
4566                         r = -EINVAL;
4567                         ti->error = "Integrity profile sector size mismatch";
4568                         goto bad;
4569                 }
4570         }
4571
4572         if (ic->mode == 'B' && !ic->internal_hash) {
4573                 r = -EINVAL;
4574                 ti->error = "Bitmap mode can be only used with internal hash";
4575                 goto bad;
4576         }
4577
4578         if (ic->discard && !ic->internal_hash) {
4579                 r = -EINVAL;
4580                 ti->error = "Discard can be only used with internal hash";
4581                 goto bad;
4582         }
4583
4584         ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
4585         ic->autocommit_msec = sync_msec;
4586         timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
4587
4588         ic->io = dm_io_client_create();
4589         if (IS_ERR(ic->io)) {
4590                 r = PTR_ERR(ic->io);
4591                 ic->io = NULL;
4592                 ti->error = "Cannot allocate dm io";
4593                 goto bad;
4594         }
4595
4596         r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache);
4597         if (r) {
4598                 ti->error = "Cannot allocate mempool";
4599                 goto bad;
4600         }
4601
4602         r = mempool_init_page_pool(&ic->recheck_pool, 1, ic->mode == 'I' ? 1 : 0);
4603         if (r) {
4604                 ti->error = "Cannot allocate mempool";
4605                 goto bad;
4606         }
4607
4608         if (ic->mode == 'I') {
4609                 r = bioset_init(&ic->recheck_bios, RECHECK_POOL_SIZE, 0, BIOSET_NEED_BVECS);
4610                 if (r) {
4611                         ti->error = "Cannot allocate bio set";
4612                         goto bad;
4613                 }
4614                 r = bioset_integrity_create(&ic->recheck_bios, RECHECK_POOL_SIZE);
4615                 if (r) {
4616                         ti->error = "Cannot allocate bio integrity set";
4617                         r = -ENOMEM;
4618                         goto bad;
4619                 }
4620         }
4621
4622         ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
4623                                           WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE);
4624         if (!ic->metadata_wq) {
4625                 ti->error = "Cannot allocate workqueue";
4626                 r = -ENOMEM;
4627                 goto bad;
4628         }
4629
4630         /*
4631          * If this workqueue weren't ordered, it would cause bio reordering
4632          * and reduced performance.
4633          */
4634         ic->wait_wq = alloc_ordered_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM);
4635         if (!ic->wait_wq) {
4636                 ti->error = "Cannot allocate workqueue";
4637                 r = -ENOMEM;
4638                 goto bad;
4639         }
4640
4641         ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
4642                                           METADATA_WORKQUEUE_MAX_ACTIVE);
4643         if (!ic->offload_wq) {
4644                 ti->error = "Cannot allocate workqueue";
4645                 r = -ENOMEM;
4646                 goto bad;
4647         }
4648
4649         ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
4650         if (!ic->commit_wq) {
4651                 ti->error = "Cannot allocate workqueue";
4652                 r = -ENOMEM;
4653                 goto bad;
4654         }
4655         INIT_WORK(&ic->commit_work, integrity_commit);
4656
4657         if (ic->mode == 'J' || ic->mode == 'B') {
4658                 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
4659                 if (!ic->writer_wq) {
4660                         ti->error = "Cannot allocate workqueue";
4661                         r = -ENOMEM;
4662                         goto bad;
4663                 }
4664                 INIT_WORK(&ic->writer_work, integrity_writer);
4665         }
4666
4667         ic->sb = alloc_pages_exact(SB_SECTORS << SECTOR_SHIFT, GFP_KERNEL);
4668         if (!ic->sb) {
4669                 r = -ENOMEM;
4670                 ti->error = "Cannot allocate superblock area";
4671                 goto bad;
4672         }
4673
4674         r = sync_rw_sb(ic, REQ_OP_READ);
4675         if (r) {
4676                 ti->error = "Error reading superblock";
4677                 goto bad;
4678         }
4679         should_write_sb = false;
4680         if (memcmp(ic->sb->magic, SB_MAGIC, 8)) {
4681                 if (ic->mode != 'R') {
4682                         if (memchr_inv(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT)) {
4683                                 r = -EINVAL;
4684                                 ti->error = "The device is not initialized";
4685                                 goto bad;
4686                         }
4687                 }
4688
4689                 r = initialize_superblock(ic, journal_sectors, interleave_sectors);
4690                 if (r) {
4691                         ti->error = "Could not initialize superblock";
4692                         goto bad;
4693                 }
4694                 if (ic->mode != 'R')
4695                         should_write_sb = true;
4696         }
4697
4698         if (!ic->sb->version || ic->sb->version > SB_VERSION_6) {
4699                 r = -EINVAL;
4700                 ti->error = "Unknown version";
4701                 goto bad;
4702         }
4703         if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_INLINE)) != (ic->mode == 'I')) {
4704                 r = -EINVAL;
4705                 ti->error = "Inline flag mismatch";
4706                 goto bad;
4707         }
4708         if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
4709                 r = -EINVAL;
4710                 ti->error = "Tag size doesn't match the information in superblock";
4711                 goto bad;
4712         }
4713         if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) {
4714                 r = -EINVAL;
4715                 ti->error = "Block size doesn't match the information in superblock";
4716                 goto bad;
4717         }
4718         if (!le32_to_cpu(ic->sb->journal_sections) != (ic->mode == 'I')) {
4719                 r = -EINVAL;
4720                 if (ic->mode != 'I')
4721                         ti->error = "Corrupted superblock, journal_sections is 0";
4722                 else
4723                         ti->error = "Corrupted superblock, journal_sections is not 0";
4724                 goto bad;
4725         }
4726         /* make sure that ti->max_io_len doesn't overflow */
4727         if (!ic->meta_dev) {
4728                 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
4729                     ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
4730                         r = -EINVAL;
4731                         ti->error = "Invalid interleave_sectors in the superblock";
4732                         goto bad;
4733                 }
4734         } else {
4735                 if (ic->sb->log2_interleave_sectors) {
4736                         r = -EINVAL;
4737                         ti->error = "Invalid interleave_sectors in the superblock";
4738                         goto bad;
4739                 }
4740         }
4741         if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
4742                 r = -EINVAL;
4743                 ti->error = "Journal mac mismatch";
4744                 goto bad;
4745         }
4746
4747         get_provided_data_sectors(ic);
4748         if (!ic->provided_data_sectors) {
4749                 r = -EINVAL;
4750                 ti->error = "The device is too small";
4751                 goto bad;
4752         }
4753
4754 try_smaller_buffer:
4755         r = calculate_device_limits(ic);
4756         if (r) {
4757                 if (ic->meta_dev) {
4758                         if (ic->log2_buffer_sectors > 3) {
4759                                 ic->log2_buffer_sectors--;
4760                                 goto try_smaller_buffer;
4761                         }
4762                 }
4763                 ti->error = "The device is too small";
4764                 goto bad;
4765         }
4766
4767         if (log2_sectors_per_bitmap_bit < 0)
4768                 log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT);
4769         if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block)
4770                 log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block;
4771
4772         bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
4773         if (bits_in_journal > UINT_MAX)
4774                 bits_in_journal = UINT_MAX;
4775         if (bits_in_journal)
4776                 while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
4777                         log2_sectors_per_bitmap_bit++;
4778
4779         log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
4780         ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
4781         if (should_write_sb)
4782                 ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
4783
4784         n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block)
4785                                 + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit;
4786         ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8);
4787
4788         if (!ic->meta_dev)
4789                 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
4790
4791         if (ti->len > ic->provided_data_sectors) {
4792                 r = -EINVAL;
4793                 ti->error = "Not enough provided sectors for requested mapping size";
4794                 goto bad;
4795         }
4796
4797         threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
4798         threshold += 50;
4799         do_div(threshold, 100);
4800         ic->free_sectors_threshold = threshold;
4801
4802         DEBUG_print("initialized:\n");
4803         DEBUG_print("   integrity_tag_size %u\n", le16_to_cpu(ic->sb->integrity_tag_size));
4804         DEBUG_print("   journal_entry_size %u\n", ic->journal_entry_size);
4805         DEBUG_print("   journal_entries_per_sector %u\n", ic->journal_entries_per_sector);
4806         DEBUG_print("   journal_section_entries %u\n", ic->journal_section_entries);
4807         DEBUG_print("   journal_section_sectors %u\n", ic->journal_section_sectors);
4808         DEBUG_print("   journal_sections %u\n", (unsigned int)le32_to_cpu(ic->sb->journal_sections));
4809         DEBUG_print("   journal_entries %u\n", ic->journal_entries);
4810         DEBUG_print("   log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
4811         DEBUG_print("   data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev));
4812         DEBUG_print("   initial_sectors 0x%x\n", ic->initial_sectors);
4813         DEBUG_print("   metadata_run 0x%x\n", ic->metadata_run);
4814         DEBUG_print("   log2_metadata_run %d\n", ic->log2_metadata_run);
4815         DEBUG_print("   provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors);
4816         DEBUG_print("   log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
4817         DEBUG_print("   bits_in_journal %llu\n", bits_in_journal);
4818
4819         if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
4820                 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
4821                 ic->sb->recalc_sector = cpu_to_le64(0);
4822         }
4823
4824         if (ic->internal_hash) {
4825                 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
4826                 if (!ic->recalc_wq) {
4827                         ti->error = "Cannot allocate workqueue";
4828                         r = -ENOMEM;
4829                         goto bad;
4830                 }
4831                 INIT_WORK(&ic->recalc_work, integrity_recalc);
4832         } else {
4833                 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
4834                         ti->error = "Recalculate can only be specified with internal_hash";
4835                         r = -EINVAL;
4836                         goto bad;
4837                 }
4838         }
4839
4840         if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
4841             le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
4842             dm_integrity_disable_recalculate(ic)) {
4843                 ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
4844                 r = -EOPNOTSUPP;
4845                 goto bad;
4846         }
4847
4848         if (ic->mode != 'I') {
4849                 ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
4850                                 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL, 0);
4851                 if (IS_ERR(ic->bufio)) {
4852                         r = PTR_ERR(ic->bufio);
4853                         ti->error = "Cannot initialize dm-bufio";
4854                         ic->bufio = NULL;
4855                         goto bad;
4856                 }
4857                 dm_bufio_set_sector_offset(ic->bufio, ic->start + ic->initial_sectors);
4858         }
4859
4860         if (ic->mode != 'R' && ic->mode != 'I') {
4861                 r = create_journal(ic, &ti->error);
4862                 if (r)
4863                         goto bad;
4864
4865         }
4866
4867         if (ic->mode == 'B') {
4868                 unsigned int i;
4869                 unsigned int n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
4870
4871                 ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
4872                 if (!ic->recalc_bitmap) {
4873                         r = -ENOMEM;
4874                         goto bad;
4875                 }
4876                 ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
4877                 if (!ic->may_write_bitmap) {
4878                         r = -ENOMEM;
4879                         goto bad;
4880                 }
4881                 ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
4882                 if (!ic->bbs) {
4883                         r = -ENOMEM;
4884                         goto bad;
4885                 }
4886                 INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work);
4887                 for (i = 0; i < ic->n_bitmap_blocks; i++) {
4888                         struct bitmap_block_status *bbs = &ic->bbs[i];
4889                         unsigned int sector, pl_index, pl_offset;
4890
4891                         INIT_WORK(&bbs->work, bitmap_block_work);
4892                         bbs->ic = ic;
4893                         bbs->idx = i;
4894                         bio_list_init(&bbs->bio_queue);
4895                         spin_lock_init(&bbs->bio_queue_lock);
4896
4897                         sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT);
4898                         pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
4899                         pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
4900
4901                         bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset;
4902                 }
4903         }
4904
4905         if (should_write_sb) {
4906                 init_journal(ic, 0, ic->journal_sections, 0);
4907                 r = dm_integrity_failed(ic);
4908                 if (unlikely(r)) {
4909                         ti->error = "Error initializing journal";
4910                         goto bad;
4911                 }
4912                 r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA);
4913                 if (r) {
4914                         ti->error = "Error initializing superblock";
4915                         goto bad;
4916                 }
4917                 ic->just_formatted = true;
4918         }
4919
4920         if (!ic->meta_dev && ic->mode != 'I') {
4921                 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
4922                 if (r)
4923                         goto bad;
4924         }
4925         if (ic->mode == 'B') {
4926                 unsigned int max_io_len;
4927
4928                 max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8);
4929                 if (!max_io_len)
4930                         max_io_len = 1U << 31;
4931                 DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len);
4932                 if (!ti->max_io_len || ti->max_io_len > max_io_len) {
4933                         r = dm_set_target_max_io_len(ti, max_io_len);
4934                         if (r)
4935                                 goto bad;
4936                 }
4937         }
4938
4939         ti->num_flush_bios = 1;
4940         ti->flush_supported = true;
4941         if (ic->discard)
4942                 ti->num_discard_bios = 1;
4943
4944         if (ic->mode == 'I')
4945                 ti->mempool_needs_integrity = true;
4946
4947         dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1);
4948         return 0;
4949
4950 bad:
4951         dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0);
4952         dm_integrity_dtr(ti);
4953         return r;
4954 }
4955
4956 static void dm_integrity_dtr(struct dm_target *ti)
4957 {
4958         struct dm_integrity_c *ic = ti->private;
4959
4960         BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
4961         BUG_ON(!list_empty(&ic->wait_list));
4962
4963         if (ic->mode == 'B')
4964                 cancel_delayed_work_sync(&ic->bitmap_flush_work);
4965         if (ic->metadata_wq)
4966                 destroy_workqueue(ic->metadata_wq);
4967         if (ic->wait_wq)
4968                 destroy_workqueue(ic->wait_wq);
4969         if (ic->offload_wq)
4970                 destroy_workqueue(ic->offload_wq);
4971         if (ic->commit_wq)
4972                 destroy_workqueue(ic->commit_wq);
4973         if (ic->writer_wq)
4974                 destroy_workqueue(ic->writer_wq);
4975         if (ic->recalc_wq)
4976                 destroy_workqueue(ic->recalc_wq);
4977         kvfree(ic->bbs);
4978         if (ic->bufio)
4979                 dm_bufio_client_destroy(ic->bufio);
4980         bioset_exit(&ic->recheck_bios);
4981         mempool_exit(&ic->recheck_pool);
4982         mempool_exit(&ic->journal_io_mempool);
4983         if (ic->io)
4984                 dm_io_client_destroy(ic->io);
4985         if (ic->dev)
4986                 dm_put_device(ti, ic->dev);
4987         if (ic->meta_dev)
4988                 dm_put_device(ti, ic->meta_dev);
4989         dm_integrity_free_page_list(ic->journal);
4990         dm_integrity_free_page_list(ic->journal_io);
4991         dm_integrity_free_page_list(ic->journal_xor);
4992         dm_integrity_free_page_list(ic->recalc_bitmap);
4993         dm_integrity_free_page_list(ic->may_write_bitmap);
4994         if (ic->journal_scatterlist)
4995                 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
4996         if (ic->journal_io_scatterlist)
4997                 dm_integrity_free_journal_scatterlist(ic, ic->journal_io_scatterlist);
4998         if (ic->sk_requests) {
4999                 unsigned int i;
5000
5001                 for (i = 0; i < ic->journal_sections; i++) {
5002                         struct skcipher_request *req;
5003
5004                         req = ic->sk_requests[i];
5005                         if (req) {
5006                                 kfree_sensitive(req->iv);
5007                                 skcipher_request_free(req);
5008                         }
5009                 }
5010                 kvfree(ic->sk_requests);
5011         }
5012         kvfree(ic->journal_tree);
5013         if (ic->sb)
5014                 free_pages_exact(ic->sb, SB_SECTORS << SECTOR_SHIFT);
5015
5016         if (ic->internal_hash)
5017                 crypto_free_shash(ic->internal_hash);
5018         free_alg(&ic->internal_hash_alg);
5019
5020         if (ic->journal_crypt)
5021                 crypto_free_skcipher(ic->journal_crypt);
5022         free_alg(&ic->journal_crypt_alg);
5023
5024         if (ic->journal_mac)
5025                 crypto_free_shash(ic->journal_mac);
5026         free_alg(&ic->journal_mac_alg);
5027
5028         kfree(ic);
5029         dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1);
5030 }
5031
5032 static struct target_type integrity_target = {
5033         .name                   = "integrity",
5034         .version                = {1, 12, 0},
5035         .module                 = THIS_MODULE,
5036         .features               = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
5037         .ctr                    = dm_integrity_ctr,
5038         .dtr                    = dm_integrity_dtr,
5039         .map                    = dm_integrity_map,
5040         .end_io                 = dm_integrity_end_io,
5041         .postsuspend            = dm_integrity_postsuspend,
5042         .resume                 = dm_integrity_resume,
5043         .status                 = dm_integrity_status,
5044         .iterate_devices        = dm_integrity_iterate_devices,
5045         .io_hints               = dm_integrity_io_hints,
5046 };
5047
5048 static int __init dm_integrity_init(void)
5049 {
5050         int r;
5051
5052         journal_io_cache = kmem_cache_create("integrity_journal_io",
5053                                              sizeof(struct journal_io), 0, 0, NULL);
5054         if (!journal_io_cache) {
5055                 DMERR("can't allocate journal io cache");
5056                 return -ENOMEM;
5057         }
5058
5059         r = dm_register_target(&integrity_target);
5060         if (r < 0) {
5061                 kmem_cache_destroy(journal_io_cache);
5062                 return r;
5063         }
5064
5065         return 0;
5066 }
5067
5068 static void __exit dm_integrity_exit(void)
5069 {
5070         dm_unregister_target(&integrity_target);
5071         kmem_cache_destroy(journal_io_cache);
5072 }
5073
5074 module_init(dm_integrity_init);
5075 module_exit(dm_integrity_exit);
5076
5077 MODULE_AUTHOR("Milan Broz");
5078 MODULE_AUTHOR("Mikulas Patocka");
5079 MODULE_DESCRIPTION(DM_NAME " target for integrity tags extension");
5080 MODULE_LICENSE("GPL");
This page took 0.387223 seconds and 4 git commands to generate.