]>
Commit | Line | Data |
---|---|---|
7fc18728 DLM |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2021 Western Digital Corporation or its affiliates. | |
4 | */ | |
5 | ||
6 | #include <linux/blkdev.h> | |
bb37d772 DLM |
7 | #include <linux/mm.h> |
8 | #include <linux/sched/mm.h> | |
9 | #include <linux/slab.h> | |
7fc18728 DLM |
10 | |
11 | #include "dm-core.h" | |
12 | ||
bb37d772 DLM |
13 | #define DM_MSG_PREFIX "zone" |
14 | ||
15 | #define DM_ZONE_INVALID_WP_OFST UINT_MAX | |
16 | ||
7fc18728 | 17 | /* |
bb37d772 | 18 | * For internal zone reports bypassing the top BIO submission path. |
7fc18728 | 19 | */ |
bb37d772 DLM |
20 | static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, |
21 | sector_t sector, unsigned int nr_zones, | |
22 | report_zones_cb cb, void *data) | |
7fc18728 | 23 | { |
bb37d772 DLM |
24 | struct gendisk *disk = md->disk; |
25 | int ret; | |
7fc18728 DLM |
26 | struct dm_report_zones_args args = { |
27 | .next_sector = sector, | |
28 | .orig_data = data, | |
29 | .orig_cb = cb, | |
30 | }; | |
31 | ||
7fc18728 DLM |
32 | do { |
33 | struct dm_target *tgt; | |
34 | ||
bb37d772 DLM |
35 | tgt = dm_table_find_target(t, args.next_sector); |
36 | if (WARN_ON_ONCE(!tgt->type->report_zones)) | |
37 | return -EIO; | |
7fc18728 DLM |
38 | |
39 | args.tgt = tgt; | |
40 | ret = tgt->type->report_zones(tgt, &args, | |
41 | nr_zones - args.zone_idx); | |
42 | if (ret < 0) | |
bb37d772 | 43 | return ret; |
7fc18728 DLM |
44 | } while (args.zone_idx < nr_zones && |
45 | args.next_sector < get_capacity(disk)); | |
46 | ||
bb37d772 DLM |
47 | return args.zone_idx; |
48 | } | |
49 | ||
50 | /* | |
51 | * User facing dm device block device report zone operation. This calls the | |
52 | * report_zones operation for each target of a device table. This operation is | |
53 | * generally implemented by targets using dm_report_zones(). | |
54 | */ | |
55 | int dm_blk_report_zones(struct gendisk *disk, sector_t sector, | |
56 | unsigned int nr_zones, report_zones_cb cb, void *data) | |
57 | { | |
58 | struct mapped_device *md = disk->private_data; | |
59 | struct dm_table *map; | |
60 | int srcu_idx, ret; | |
61 | ||
62 | if (dm_suspended_md(md)) | |
63 | return -EAGAIN; | |
64 | ||
65 | map = dm_get_live_table(md, &srcu_idx); | |
66 | if (!map) | |
67 | return -EIO; | |
68 | ||
69 | ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); | |
70 | ||
7fc18728 | 71 | dm_put_live_table(md, srcu_idx); |
bb37d772 | 72 | |
7fc18728 DLM |
73 | return ret; |
74 | } | |
75 | ||
912e8875 DLM |
76 | static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, |
77 | void *data) | |
7fc18728 DLM |
78 | { |
79 | struct dm_report_zones_args *args = data; | |
80 | sector_t sector_diff = args->tgt->begin - args->start; | |
81 | ||
82 | /* | |
83 | * Ignore zones beyond the target range. | |
84 | */ | |
85 | if (zone->start >= args->start + args->tgt->len) | |
86 | return 0; | |
87 | ||
88 | /* | |
89 | * Remap the start sector and write pointer position of the zone | |
90 | * to match its position in the target range. | |
91 | */ | |
92 | zone->start += sector_diff; | |
93 | if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { | |
94 | if (zone->cond == BLK_ZONE_COND_FULL) | |
95 | zone->wp = zone->start + zone->len; | |
96 | else if (zone->cond == BLK_ZONE_COND_EMPTY) | |
97 | zone->wp = zone->start; | |
98 | else | |
99 | zone->wp += sector_diff; | |
100 | } | |
101 | ||
102 | args->next_sector = zone->start + zone->len; | |
103 | return args->orig_cb(zone, args->zone_idx++, args->orig_data); | |
104 | } | |
912e8875 DLM |
105 | |
106 | /* | |
107 | * Helper for drivers of zoned targets to implement struct target_type | |
108 | * report_zones operation. | |
109 | */ | |
110 | int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, | |
111 | struct dm_report_zones_args *args, unsigned int nr_zones) | |
112 | { | |
113 | /* | |
114 | * Set the target mapping start sector first so that | |
115 | * dm_report_zones_cb() can correctly remap zone information. | |
116 | */ | |
117 | args->start = start; | |
118 | ||
119 | return blkdev_report_zones(bdev, sector, nr_zones, | |
120 | dm_report_zones_cb, args); | |
121 | } | |
122 | EXPORT_SYMBOL_GPL(dm_report_zones); | |
7fc18728 | 123 | |
bf14e2b2 DLM |
124 | bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) |
125 | { | |
126 | struct request_queue *q = md->queue; | |
127 | ||
128 | if (!blk_queue_is_zoned(q)) | |
129 | return false; | |
130 | ||
131 | switch (bio_op(bio)) { | |
132 | case REQ_OP_WRITE_ZEROES: | |
133 | case REQ_OP_WRITE_SAME: | |
134 | case REQ_OP_WRITE: | |
135 | return !op_is_flush(bio->bi_opf) && bio_sectors(bio); | |
136 | default: | |
137 | return false; | |
138 | } | |
139 | } | |
140 | ||
bb37d772 | 141 | void dm_cleanup_zoned_dev(struct mapped_device *md) |
7fc18728 | 142 | { |
bb37d772 DLM |
143 | struct request_queue *q = md->queue; |
144 | ||
145 | if (q) { | |
146 | kfree(q->conv_zones_bitmap); | |
147 | q->conv_zones_bitmap = NULL; | |
148 | kfree(q->seq_zones_wlock); | |
149 | q->seq_zones_wlock = NULL; | |
150 | } | |
151 | ||
152 | kvfree(md->zwp_offset); | |
153 | md->zwp_offset = NULL; | |
154 | md->nr_zones = 0; | |
155 | } | |
156 | ||
157 | static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone) | |
158 | { | |
159 | switch (zone->cond) { | |
160 | case BLK_ZONE_COND_IMP_OPEN: | |
161 | case BLK_ZONE_COND_EXP_OPEN: | |
162 | case BLK_ZONE_COND_CLOSED: | |
163 | return zone->wp - zone->start; | |
164 | case BLK_ZONE_COND_FULL: | |
165 | return zone->len; | |
166 | case BLK_ZONE_COND_EMPTY: | |
167 | case BLK_ZONE_COND_NOT_WP: | |
168 | case BLK_ZONE_COND_OFFLINE: | |
169 | case BLK_ZONE_COND_READONLY: | |
170 | default: | |
171 | /* | |
172 | * Conventional, offline and read-only zones do not have a valid | |
173 | * write pointer. Use 0 as for an empty zone. | |
174 | */ | |
175 | return 0; | |
176 | } | |
177 | } | |
178 | ||
179 | static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, | |
180 | void *data) | |
181 | { | |
182 | struct mapped_device *md = data; | |
183 | struct request_queue *q = md->queue; | |
184 | ||
185 | switch (zone->type) { | |
186 | case BLK_ZONE_TYPE_CONVENTIONAL: | |
187 | if (!q->conv_zones_bitmap) { | |
188 | q->conv_zones_bitmap = | |
189 | kcalloc(BITS_TO_LONGS(q->nr_zones), | |
190 | sizeof(unsigned long), GFP_NOIO); | |
191 | if (!q->conv_zones_bitmap) | |
192 | return -ENOMEM; | |
193 | } | |
194 | set_bit(idx, q->conv_zones_bitmap); | |
195 | break; | |
196 | case BLK_ZONE_TYPE_SEQWRITE_REQ: | |
197 | case BLK_ZONE_TYPE_SEQWRITE_PREF: | |
198 | if (!q->seq_zones_wlock) { | |
199 | q->seq_zones_wlock = | |
200 | kcalloc(BITS_TO_LONGS(q->nr_zones), | |
201 | sizeof(unsigned long), GFP_NOIO); | |
202 | if (!q->seq_zones_wlock) | |
203 | return -ENOMEM; | |
204 | } | |
205 | if (!md->zwp_offset) { | |
206 | md->zwp_offset = | |
207 | kvcalloc(q->nr_zones, sizeof(unsigned int), | |
28436ba3 | 208 | GFP_KERNEL); |
bb37d772 DLM |
209 | if (!md->zwp_offset) |
210 | return -ENOMEM; | |
211 | } | |
212 | md->zwp_offset[idx] = dm_get_zone_wp_offset(zone); | |
213 | ||
214 | break; | |
215 | default: | |
216 | DMERR("Invalid zone type 0x%x at sectors %llu", | |
217 | (int)zone->type, zone->start); | |
218 | return -ENODEV; | |
219 | } | |
220 | ||
221 | return 0; | |
222 | } | |
223 | ||
224 | /* | |
225 | * Revalidate the zones of a mapped device to initialize resource necessary | |
226 | * for zone append emulation. Note that we cannot simply use the block layer | |
227 | * blk_revalidate_disk_zones() function here as the mapped device is suspended | |
228 | * (this is called from __bind() context). | |
229 | */ | |
230 | static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) | |
231 | { | |
232 | struct request_queue *q = md->queue; | |
28436ba3 | 233 | unsigned int noio_flag; |
bb37d772 DLM |
234 | int ret; |
235 | ||
236 | /* | |
237 | * Check if something changed. If yes, cleanup the current resources | |
238 | * and reallocate everything. | |
239 | */ | |
240 | if (!q->nr_zones || q->nr_zones != md->nr_zones) | |
241 | dm_cleanup_zoned_dev(md); | |
242 | if (md->nr_zones) | |
243 | return 0; | |
244 | ||
28436ba3 DLM |
245 | /* |
246 | * Scan all zones to initialize everything. Ensure that all vmalloc | |
247 | * operations in this context are done as if GFP_NOIO was specified. | |
248 | */ | |
249 | noio_flag = memalloc_noio_save(); | |
bb37d772 DLM |
250 | ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones, |
251 | dm_zone_revalidate_cb, md); | |
28436ba3 | 252 | memalloc_noio_restore(noio_flag); |
bb37d772 DLM |
253 | if (ret < 0) |
254 | goto err; | |
255 | if (ret != q->nr_zones) { | |
256 | ret = -EIO; | |
257 | goto err; | |
258 | } | |
259 | ||
260 | md->nr_zones = q->nr_zones; | |
261 | ||
262 | return 0; | |
263 | ||
264 | err: | |
265 | DMERR("Revalidate zones failed %d", ret); | |
266 | dm_cleanup_zoned_dev(md); | |
267 | return ret; | |
268 | } | |
269 | ||
270 | static int device_not_zone_append_capable(struct dm_target *ti, | |
271 | struct dm_dev *dev, sector_t start, | |
272 | sector_t len, void *data) | |
273 | { | |
274 | return !blk_queue_is_zoned(bdev_get_queue(dev->bdev)); | |
275 | } | |
276 | ||
277 | static bool dm_table_supports_zone_append(struct dm_table *t) | |
278 | { | |
279 | struct dm_target *ti; | |
280 | unsigned int i; | |
281 | ||
282 | for (i = 0; i < dm_table_get_num_targets(t); i++) { | |
283 | ti = dm_table_get_target(t, i); | |
284 | ||
285 | if (ti->emulate_zone_append) | |
286 | return false; | |
287 | ||
288 | if (!ti->type->iterate_devices || | |
289 | ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) | |
290 | return false; | |
291 | } | |
292 | ||
293 | return true; | |
294 | } | |
295 | ||
296 | int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) | |
297 | { | |
298 | struct mapped_device *md = t->md; | |
7fc18728 DLM |
299 | |
300 | /* | |
301 | * For a zoned target, the number of zones should be updated for the | |
bb37d772 | 302 | * correct value to be exposed in sysfs queue/nr_zones. |
7fc18728 DLM |
303 | */ |
304 | WARN_ON_ONCE(queue_is_mq(q)); | |
bb37d772 DLM |
305 | q->nr_zones = blkdev_nr_zones(md->disk); |
306 | ||
307 | /* Check if zone append is natively supported */ | |
308 | if (dm_table_supports_zone_append(t)) { | |
309 | clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); | |
310 | dm_cleanup_zoned_dev(md); | |
311 | return 0; | |
312 | } | |
313 | ||
314 | /* | |
315 | * Mark the mapped device as needing zone append emulation and | |
316 | * initialize the emulation resources once the capacity is set. | |
317 | */ | |
318 | set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); | |
319 | if (!get_capacity(md->disk)) | |
320 | return 0; | |
321 | ||
322 | return dm_revalidate_zones(md, t); | |
323 | } | |
324 | ||
325 | static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, | |
326 | void *data) | |
327 | { | |
328 | unsigned int *wp_offset = data; | |
329 | ||
330 | *wp_offset = dm_get_zone_wp_offset(zone); | |
331 | ||
332 | return 0; | |
333 | } | |
334 | ||
335 | static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, | |
336 | unsigned int *wp_ofst) | |
337 | { | |
338 | sector_t sector = zno * blk_queue_zone_sectors(md->queue); | |
339 | unsigned int noio_flag; | |
340 | struct dm_table *t; | |
341 | int srcu_idx, ret; | |
342 | ||
343 | t = dm_get_live_table(md, &srcu_idx); | |
344 | if (!t) | |
345 | return -EIO; | |
346 | ||
347 | /* | |
348 | * Ensure that all memory allocations in this context are done as if | |
349 | * GFP_NOIO was specified. | |
350 | */ | |
351 | noio_flag = memalloc_noio_save(); | |
352 | ret = dm_blk_do_report_zones(md, t, sector, 1, | |
353 | dm_update_zone_wp_offset_cb, wp_ofst); | |
354 | memalloc_noio_restore(noio_flag); | |
355 | ||
356 | dm_put_live_table(md, srcu_idx); | |
357 | ||
358 | if (ret != 1) | |
359 | return -EIO; | |
360 | ||
361 | return 0; | |
362 | } | |
363 | ||
364 | /* | |
365 | * First phase of BIO mapping for targets with zone append emulation: | |
366 | * check all BIO that change a zone writer pointer and change zone | |
367 | * append operations into regular write operations. | |
368 | */ | |
369 | static bool dm_zone_map_bio_begin(struct mapped_device *md, | |
370 | struct bio *orig_bio, struct bio *clone) | |
371 | { | |
372 | sector_t zsectors = blk_queue_zone_sectors(md->queue); | |
373 | unsigned int zno = bio_zone_no(orig_bio); | |
374 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); | |
375 | ||
376 | /* | |
377 | * If the target zone is in an error state, recover by inspecting the | |
378 | * zone to get its current write pointer position. Note that since the | |
379 | * target zone is already locked, a BIO issuing context should never | |
380 | * see the zone write in the DM_ZONE_UPDATING_WP_OFST state. | |
381 | */ | |
382 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) { | |
383 | if (dm_update_zone_wp_offset(md, zno, &zwp_offset)) | |
384 | return false; | |
385 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset); | |
386 | } | |
387 | ||
388 | switch (bio_op(orig_bio)) { | |
389 | case REQ_OP_ZONE_RESET: | |
390 | case REQ_OP_ZONE_FINISH: | |
391 | return true; | |
392 | case REQ_OP_WRITE_ZEROES: | |
393 | case REQ_OP_WRITE_SAME: | |
394 | case REQ_OP_WRITE: | |
395 | /* Writes must be aligned to the zone write pointer */ | |
396 | if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset) | |
397 | return false; | |
398 | break; | |
399 | case REQ_OP_ZONE_APPEND: | |
400 | /* | |
401 | * Change zone append operations into a non-mergeable regular | |
402 | * writes directed at the current write pointer position of the | |
403 | * target zone. | |
404 | */ | |
405 | clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | | |
406 | (orig_bio->bi_opf & (~REQ_OP_MASK)); | |
407 | clone->bi_iter.bi_sector = | |
408 | orig_bio->bi_iter.bi_sector + zwp_offset; | |
409 | break; | |
410 | default: | |
411 | DMWARN_LIMIT("Invalid BIO operation"); | |
412 | return false; | |
413 | } | |
414 | ||
415 | /* Cannot write to a full zone */ | |
416 | if (zwp_offset >= zsectors) | |
417 | return false; | |
418 | ||
419 | return true; | |
420 | } | |
421 | ||
422 | /* | |
423 | * Second phase of BIO mapping for targets with zone append emulation: | |
424 | * update the zone write pointer offset array to account for the additional | |
425 | * data written to a zone. Note that at this point, the remapped clone BIO | |
426 | * may already have completed, so we do not touch it. | |
427 | */ | |
428 | static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, | |
429 | struct bio *orig_bio, | |
430 | unsigned int nr_sectors) | |
431 | { | |
432 | unsigned int zno = bio_zone_no(orig_bio); | |
433 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); | |
434 | ||
435 | /* The clone BIO may already have been completed and failed */ | |
436 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) | |
437 | return BLK_STS_IOERR; | |
438 | ||
439 | /* Update the zone wp offset */ | |
440 | switch (bio_op(orig_bio)) { | |
441 | case REQ_OP_ZONE_RESET: | |
442 | WRITE_ONCE(md->zwp_offset[zno], 0); | |
443 | return BLK_STS_OK; | |
444 | case REQ_OP_ZONE_FINISH: | |
445 | WRITE_ONCE(md->zwp_offset[zno], | |
446 | blk_queue_zone_sectors(md->queue)); | |
447 | return BLK_STS_OK; | |
448 | case REQ_OP_WRITE_ZEROES: | |
449 | case REQ_OP_WRITE_SAME: | |
450 | case REQ_OP_WRITE: | |
451 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); | |
452 | return BLK_STS_OK; | |
453 | case REQ_OP_ZONE_APPEND: | |
454 | /* | |
455 | * Check that the target did not truncate the write operation | |
456 | * emulating a zone append. | |
457 | */ | |
458 | if (nr_sectors != bio_sectors(orig_bio)) { | |
459 | DMWARN_LIMIT("Truncated write for zone append"); | |
460 | return BLK_STS_IOERR; | |
461 | } | |
462 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); | |
463 | return BLK_STS_OK; | |
464 | default: | |
465 | DMWARN_LIMIT("Invalid BIO operation"); | |
466 | return BLK_STS_IOERR; | |
467 | } | |
468 | } | |
469 | ||
470 | static inline void dm_zone_lock(struct request_queue *q, | |
471 | unsigned int zno, struct bio *clone) | |
472 | { | |
473 | if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) | |
474 | return; | |
475 | ||
476 | wait_on_bit_lock_io(q->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); | |
477 | bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED); | |
478 | } | |
479 | ||
480 | static inline void dm_zone_unlock(struct request_queue *q, | |
481 | unsigned int zno, struct bio *clone) | |
482 | { | |
483 | if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) | |
484 | return; | |
485 | ||
486 | WARN_ON_ONCE(!test_bit(zno, q->seq_zones_wlock)); | |
487 | clear_bit_unlock(zno, q->seq_zones_wlock); | |
488 | smp_mb__after_atomic(); | |
489 | wake_up_bit(q->seq_zones_wlock, zno); | |
490 | ||
491 | bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); | |
492 | } | |
493 | ||
494 | static bool dm_need_zone_wp_tracking(struct bio *orig_bio) | |
495 | { | |
496 | /* | |
497 | * Special processing is not needed for operations that do not need the | |
498 | * zone write lock, that is, all operations that target conventional | |
499 | * zones and all operations that do not modify directly a sequential | |
500 | * zone write pointer. | |
501 | */ | |
502 | if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio)) | |
503 | return false; | |
504 | switch (bio_op(orig_bio)) { | |
505 | case REQ_OP_WRITE_ZEROES: | |
506 | case REQ_OP_WRITE_SAME: | |
507 | case REQ_OP_WRITE: | |
508 | case REQ_OP_ZONE_RESET: | |
509 | case REQ_OP_ZONE_FINISH: | |
510 | case REQ_OP_ZONE_APPEND: | |
511 | return bio_zone_is_seq(orig_bio); | |
512 | default: | |
513 | return false; | |
514 | } | |
515 | } | |
516 | ||
517 | /* | |
518 | * Special IO mapping for targets needing zone append emulation. | |
519 | */ | |
520 | int dm_zone_map_bio(struct dm_target_io *tio) | |
521 | { | |
522 | struct dm_io *io = tio->io; | |
523 | struct dm_target *ti = tio->ti; | |
524 | struct mapped_device *md = io->md; | |
525 | struct request_queue *q = md->queue; | |
526 | struct bio *orig_bio = io->orig_bio; | |
527 | struct bio *clone = &tio->clone; | |
528 | unsigned int zno; | |
529 | blk_status_t sts; | |
530 | int r; | |
531 | ||
532 | /* | |
533 | * IOs that do not change a zone write pointer do not need | |
534 | * any additional special processing. | |
535 | */ | |
536 | if (!dm_need_zone_wp_tracking(orig_bio)) | |
537 | return ti->type->map(ti, clone); | |
538 | ||
539 | /* Lock the target zone */ | |
540 | zno = bio_zone_no(orig_bio); | |
541 | dm_zone_lock(q, zno, clone); | |
542 | ||
543 | /* | |
544 | * Check that the bio and the target zone write pointer offset are | |
545 | * both valid, and if the bio is a zone append, remap it to a write. | |
546 | */ | |
547 | if (!dm_zone_map_bio_begin(md, orig_bio, clone)) { | |
548 | dm_zone_unlock(q, zno, clone); | |
549 | return DM_MAPIO_KILL; | |
550 | } | |
551 | ||
552 | /* | |
553 | * The target map function may issue and complete the IO quickly. | |
554 | * Take an extra reference on the IO to make sure it does disappear | |
555 | * until we run dm_zone_map_bio_end(). | |
556 | */ | |
557 | dm_io_inc_pending(io); | |
558 | ||
559 | /* Let the target do its work */ | |
560 | r = ti->type->map(ti, clone); | |
561 | switch (r) { | |
562 | case DM_MAPIO_SUBMITTED: | |
563 | /* | |
564 | * The target submitted the clone BIO. The target zone will | |
565 | * be unlocked on completion of the clone. | |
566 | */ | |
567 | sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); | |
568 | break; | |
569 | case DM_MAPIO_REMAPPED: | |
570 | /* | |
571 | * The target only remapped the clone BIO. In case of error, | |
572 | * unlock the target zone here as the clone will not be | |
573 | * submitted. | |
574 | */ | |
575 | sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); | |
576 | if (sts != BLK_STS_OK) | |
577 | dm_zone_unlock(q, zno, clone); | |
578 | break; | |
579 | case DM_MAPIO_REQUEUE: | |
580 | case DM_MAPIO_KILL: | |
581 | default: | |
582 | dm_zone_unlock(q, zno, clone); | |
583 | sts = BLK_STS_IOERR; | |
584 | break; | |
585 | } | |
586 | ||
587 | /* Drop the extra reference on the IO */ | |
588 | dm_io_dec_pending(io, sts); | |
589 | ||
590 | if (sts != BLK_STS_OK) | |
591 | return DM_MAPIO_KILL; | |
592 | ||
593 | return r; | |
594 | } | |
595 | ||
596 | /* | |
597 | * IO completion callback called from clone_endio(). | |
598 | */ | |
599 | void dm_zone_endio(struct dm_io *io, struct bio *clone) | |
600 | { | |
601 | struct mapped_device *md = io->md; | |
602 | struct request_queue *q = md->queue; | |
603 | struct bio *orig_bio = io->orig_bio; | |
604 | unsigned int zwp_offset; | |
605 | unsigned int zno; | |
606 | ||
607 | /* | |
608 | * For targets that do not emulate zone append, we only need to | |
609 | * handle native zone-append bios. | |
610 | */ | |
611 | if (!dm_emulate_zone_append(md)) { | |
612 | /* | |
613 | * Get the offset within the zone of the written sector | |
614 | * and add that to the original bio sector position. | |
615 | */ | |
616 | if (clone->bi_status == BLK_STS_OK && | |
617 | bio_op(clone) == REQ_OP_ZONE_APPEND) { | |
618 | sector_t mask = (sector_t)blk_queue_zone_sectors(q) - 1; | |
619 | ||
620 | orig_bio->bi_iter.bi_sector += | |
621 | clone->bi_iter.bi_sector & mask; | |
622 | } | |
623 | ||
624 | return; | |
625 | } | |
626 | ||
627 | /* | |
628 | * For targets that do emulate zone append, if the clone BIO does not | |
629 | * own the target zone write lock, we have nothing to do. | |
630 | */ | |
631 | if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) | |
632 | return; | |
633 | ||
634 | zno = bio_zone_no(orig_bio); | |
635 | ||
636 | if (clone->bi_status != BLK_STS_OK) { | |
637 | /* | |
638 | * BIOs that modify a zone write pointer may leave the zone | |
639 | * in an unknown state in case of failure (e.g. the write | |
640 | * pointer was only partially advanced). In this case, set | |
641 | * the target zone write pointer as invalid unless it is | |
642 | * already being updated. | |
643 | */ | |
644 | WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST); | |
645 | } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { | |
646 | /* | |
647 | * Get the written sector for zone append operation that were | |
648 | * emulated using regular write operations. | |
649 | */ | |
650 | zwp_offset = READ_ONCE(md->zwp_offset[zno]); | |
651 | if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio))) | |
652 | WRITE_ONCE(md->zwp_offset[zno], | |
653 | DM_ZONE_INVALID_WP_OFST); | |
654 | else | |
655 | orig_bio->bi_iter.bi_sector += | |
656 | zwp_offset - bio_sectors(orig_bio); | |
657 | } | |
658 | ||
659 | dm_zone_unlock(q, zno, clone); | |
7fc18728 | 660 | } |