]>
Commit | Line | Data |
---|---|---|
0b61f8a4 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 | 2 | /* |
7b718769 | 3 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
98c1a7c0 | 4 | * Copyright (c) 2016-2018 Christoph Hellwig. |
7b718769 | 5 | * All Rights Reserved. |
1da177e4 | 6 | */ |
1da177e4 | 7 | #include "xfs.h" |
70a9883c | 8 | #include "xfs_shared.h" |
239880ef DC |
9 | #include "xfs_format.h" |
10 | #include "xfs_log_format.h" | |
11 | #include "xfs_trans_resv.h" | |
1da177e4 | 12 | #include "xfs_mount.h" |
1da177e4 | 13 | #include "xfs_inode.h" |
239880ef | 14 | #include "xfs_trans.h" |
281627df | 15 | #include "xfs_inode_item.h" |
a844f451 | 16 | #include "xfs_alloc.h" |
1da177e4 | 17 | #include "xfs_error.h" |
1da177e4 | 18 | #include "xfs_iomap.h" |
0b1b213f | 19 | #include "xfs_trace.h" |
3ed3a434 | 20 | #include "xfs_bmap.h" |
68988114 | 21 | #include "xfs_bmap_util.h" |
a4fbe6ab | 22 | #include "xfs_bmap_btree.h" |
ef473667 | 23 | #include "xfs_reflink.h" |
1da177e4 LT |
24 | #include <linux/writeback.h> |
25 | ||
fbcc0256 DC |
26 | /* |
27 | * structure owned by writepages passed to individual writepage calls | |
28 | */ | |
29 | struct xfs_writepage_ctx { | |
30 | struct xfs_bmbt_irec imap; | |
fbcc0256 | 31 | unsigned int io_type; |
fbcc0256 | 32 | struct xfs_ioend *ioend; |
fbcc0256 DC |
33 | }; |
34 | ||
20a90f58 | 35 | struct block_device * |
6214ed44 | 36 | xfs_find_bdev_for_inode( |
046f1685 | 37 | struct inode *inode) |
6214ed44 | 38 | { |
046f1685 | 39 | struct xfs_inode *ip = XFS_I(inode); |
6214ed44 CH |
40 | struct xfs_mount *mp = ip->i_mount; |
41 | ||
71ddabb9 | 42 | if (XFS_IS_REALTIME_INODE(ip)) |
6214ed44 CH |
43 | return mp->m_rtdev_targp->bt_bdev; |
44 | else | |
45 | return mp->m_ddev_targp->bt_bdev; | |
46 | } | |
47 | ||
486aff5e DW |
48 | struct dax_device * |
49 | xfs_find_daxdev_for_inode( | |
50 | struct inode *inode) | |
51 | { | |
52 | struct xfs_inode *ip = XFS_I(inode); | |
53 | struct xfs_mount *mp = ip->i_mount; | |
54 | ||
55 | if (XFS_IS_REALTIME_INODE(ip)) | |
56 | return mp->m_rtdev_targp->bt_daxdev; | |
57 | else | |
58 | return mp->m_ddev_targp->bt_daxdev; | |
59 | } | |
60 | ||
ac8ee546 CH |
61 | static void |
62 | xfs_finish_page_writeback( | |
63 | struct inode *inode, | |
64 | struct bio_vec *bvec, | |
65 | int error) | |
66 | { | |
82cb1417 CH |
67 | struct iomap_page *iop = to_iomap_page(bvec->bv_page); |
68 | ||
ac8ee546 CH |
69 | if (error) { |
70 | SetPageError(bvec->bv_page); | |
71 | mapping_set_error(inode->i_mapping, -EIO); | |
72 | } | |
ac8ee546 | 73 | |
82cb1417 CH |
74 | ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); |
75 | ASSERT(!iop || atomic_read(&iop->write_count) > 0); | |
8353a814 | 76 | |
82cb1417 | 77 | if (!iop || atomic_dec_and_test(&iop->write_count)) |
8353a814 | 78 | end_page_writeback(bvec->bv_page); |
37992c18 DC |
79 | } |
80 | ||
81 | /* | |
82 | * We're now finished for good with this ioend structure. Update the page | |
83 | * state, release holds on bios, and finally free up memory. Do not use the | |
84 | * ioend after this. | |
f6d6d4fc | 85 | */ |
0829c360 CH |
86 | STATIC void |
87 | xfs_destroy_ioend( | |
0e51a8e1 CH |
88 | struct xfs_ioend *ioend, |
89 | int error) | |
0829c360 | 90 | { |
37992c18 | 91 | struct inode *inode = ioend->io_inode; |
8353a814 CH |
92 | struct bio *bio = &ioend->io_inline_bio; |
93 | struct bio *last = ioend->io_bio, *next; | |
94 | u64 start = bio->bi_iter.bi_sector; | |
95 | bool quiet = bio_flagged(bio, BIO_QUIET); | |
f6d6d4fc | 96 | |
0e51a8e1 | 97 | for (bio = &ioend->io_inline_bio; bio; bio = next) { |
37992c18 DC |
98 | struct bio_vec *bvec; |
99 | int i; | |
100 | ||
0e51a8e1 CH |
101 | /* |
102 | * For the last bio, bi_private points to the ioend, so we | |
103 | * need to explicitly end the iteration here. | |
104 | */ | |
105 | if (bio == last) | |
106 | next = NULL; | |
107 | else | |
108 | next = bio->bi_private; | |
583fa586 | 109 | |
37992c18 | 110 | /* walk each page on bio, ending page IO on them */ |
82cb1417 CH |
111 | bio_for_each_segment_all(bvec, bio, i) |
112 | xfs_finish_page_writeback(inode, bvec, error); | |
37992c18 | 113 | bio_put(bio); |
f6d6d4fc | 114 | } |
8353a814 CH |
115 | |
116 | if (unlikely(error && !quiet)) { | |
117 | xfs_err_ratelimited(XFS_I(inode)->i_mount, | |
118 | "writeback error on sector %llu", start); | |
119 | } | |
0829c360 CH |
120 | } |
121 | ||
fc0063c4 CH |
122 | /* |
123 | * Fast and loose check if this write could update the on-disk inode size. | |
124 | */ | |
125 | static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) | |
126 | { | |
127 | return ioend->io_offset + ioend->io_size > | |
128 | XFS_I(ioend->io_inode)->i_d.di_size; | |
129 | } | |
130 | ||
281627df CH |
131 | STATIC int |
132 | xfs_setfilesize_trans_alloc( | |
133 | struct xfs_ioend *ioend) | |
134 | { | |
135 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | |
136 | struct xfs_trans *tp; | |
137 | int error; | |
138 | ||
4df0f7f1 DC |
139 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, |
140 | XFS_TRANS_NOFS, &tp); | |
253f4911 | 141 | if (error) |
281627df | 142 | return error; |
281627df CH |
143 | |
144 | ioend->io_append_trans = tp; | |
145 | ||
d9457dc0 | 146 | /* |
437a255a | 147 | * We may pass freeze protection with a transaction. So tell lockdep |
d9457dc0 JK |
148 | * we released it. |
149 | */ | |
bee9182d | 150 | __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); |
281627df CH |
151 | /* |
152 | * We hand off the transaction to the completion thread now, so | |
153 | * clear the flag here. | |
154 | */ | |
9070733b | 155 | current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); |
281627df CH |
156 | return 0; |
157 | } | |
158 | ||
ba87ea69 | 159 | /* |
2813d682 | 160 | * Update on-disk file size now that data has been written to disk. |
ba87ea69 | 161 | */ |
281627df | 162 | STATIC int |
e372843a | 163 | __xfs_setfilesize( |
2ba66237 CH |
164 | struct xfs_inode *ip, |
165 | struct xfs_trans *tp, | |
166 | xfs_off_t offset, | |
167 | size_t size) | |
ba87ea69 | 168 | { |
ba87ea69 | 169 | xfs_fsize_t isize; |
ba87ea69 | 170 | |
aa6bf01d | 171 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
2ba66237 | 172 | isize = xfs_new_eof(ip, offset + size); |
281627df CH |
173 | if (!isize) { |
174 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | |
4906e215 | 175 | xfs_trans_cancel(tp); |
281627df | 176 | return 0; |
ba87ea69 LM |
177 | } |
178 | ||
2ba66237 | 179 | trace_xfs_setfilesize(ip, offset, size); |
281627df CH |
180 | |
181 | ip->i_d.di_size = isize; | |
182 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | |
183 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | |
184 | ||
70393313 | 185 | return xfs_trans_commit(tp); |
77d7a0c2 DC |
186 | } |
187 | ||
e372843a CH |
188 | int |
189 | xfs_setfilesize( | |
190 | struct xfs_inode *ip, | |
191 | xfs_off_t offset, | |
192 | size_t size) | |
193 | { | |
194 | struct xfs_mount *mp = ip->i_mount; | |
195 | struct xfs_trans *tp; | |
196 | int error; | |
197 | ||
198 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); | |
199 | if (error) | |
200 | return error; | |
201 | ||
202 | return __xfs_setfilesize(ip, tp, offset, size); | |
203 | } | |
204 | ||
2ba66237 CH |
205 | STATIC int |
206 | xfs_setfilesize_ioend( | |
0e51a8e1 CH |
207 | struct xfs_ioend *ioend, |
208 | int error) | |
2ba66237 CH |
209 | { |
210 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | |
211 | struct xfs_trans *tp = ioend->io_append_trans; | |
212 | ||
213 | /* | |
214 | * The transaction may have been allocated in the I/O submission thread, | |
215 | * thus we need to mark ourselves as being in a transaction manually. | |
216 | * Similarly for freeze protection. | |
217 | */ | |
9070733b | 218 | current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); |
bee9182d | 219 | __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); |
2ba66237 | 220 | |
5cb13dcd | 221 | /* we abort the update if there was an IO error */ |
0e51a8e1 | 222 | if (error) { |
5cb13dcd | 223 | xfs_trans_cancel(tp); |
0e51a8e1 | 224 | return error; |
5cb13dcd Z |
225 | } |
226 | ||
e372843a | 227 | return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); |
2ba66237 CH |
228 | } |
229 | ||
0829c360 | 230 | /* |
5ec4fabb | 231 | * IO write completion. |
f6d6d4fc CH |
232 | */ |
233 | STATIC void | |
5ec4fabb | 234 | xfs_end_io( |
77d7a0c2 | 235 | struct work_struct *work) |
0829c360 | 236 | { |
0e51a8e1 CH |
237 | struct xfs_ioend *ioend = |
238 | container_of(work, struct xfs_ioend, io_work); | |
239 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | |
787eb485 CH |
240 | xfs_off_t offset = ioend->io_offset; |
241 | size_t size = ioend->io_size; | |
4e4cbee9 | 242 | int error; |
ba87ea69 | 243 | |
af055e37 | 244 | /* |
787eb485 | 245 | * Just clean up the in-memory strutures if the fs has been shut down. |
af055e37 | 246 | */ |
787eb485 | 247 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
0e51a8e1 | 248 | error = -EIO; |
787eb485 CH |
249 | goto done; |
250 | } | |
04f658ee | 251 | |
43caeb18 | 252 | /* |
787eb485 | 253 | * Clean up any COW blocks on an I/O error. |
43caeb18 | 254 | */ |
4e4cbee9 | 255 | error = blk_status_to_errno(ioend->io_bio->bi_status); |
787eb485 CH |
256 | if (unlikely(error)) { |
257 | switch (ioend->io_type) { | |
258 | case XFS_IO_COW: | |
259 | xfs_reflink_cancel_cow_range(ip, offset, size, true); | |
260 | break; | |
43caeb18 | 261 | } |
787eb485 CH |
262 | |
263 | goto done; | |
43caeb18 DW |
264 | } |
265 | ||
5ec4fabb | 266 | /* |
787eb485 | 267 | * Success: commit the COW or unwritten blocks if needed. |
5ec4fabb | 268 | */ |
787eb485 CH |
269 | switch (ioend->io_type) { |
270 | case XFS_IO_COW: | |
271 | error = xfs_reflink_end_cow(ip, offset, size); | |
272 | break; | |
273 | case XFS_IO_UNWRITTEN: | |
ee70daab EG |
274 | /* writeback should never update isize */ |
275 | error = xfs_iomap_write_unwritten(ip, offset, size, false); | |
787eb485 CH |
276 | break; |
277 | default: | |
278 | ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); | |
279 | break; | |
5ec4fabb | 280 | } |
ba87ea69 | 281 | |
04f658ee | 282 | done: |
787eb485 CH |
283 | if (ioend->io_append_trans) |
284 | error = xfs_setfilesize_ioend(ioend, error); | |
0e51a8e1 | 285 | xfs_destroy_ioend(ioend, error); |
c626d174 DC |
286 | } |
287 | ||
0e51a8e1 CH |
288 | STATIC void |
289 | xfs_end_bio( | |
290 | struct bio *bio) | |
0829c360 | 291 | { |
0e51a8e1 CH |
292 | struct xfs_ioend *ioend = bio->bi_private; |
293 | struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; | |
0829c360 | 294 | |
43caeb18 | 295 | if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW) |
0e51a8e1 CH |
296 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
297 | else if (ioend->io_append_trans) | |
298 | queue_work(mp->m_data_workqueue, &ioend->io_work); | |
299 | else | |
4e4cbee9 | 300 | xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status)); |
0829c360 CH |
301 | } |
302 | ||
1da177e4 LT |
303 | STATIC int |
304 | xfs_map_blocks( | |
5c665e5b | 305 | struct xfs_writepage_ctx *wpc, |
1da177e4 | 306 | struct inode *inode, |
5c665e5b | 307 | loff_t offset) |
1da177e4 | 308 | { |
a206c817 CH |
309 | struct xfs_inode *ip = XFS_I(inode); |
310 | struct xfs_mount *mp = ip->i_mount; | |
93407472 | 311 | ssize_t count = i_blocksize(inode); |
889c65b3 | 312 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb; |
5c665e5b CH |
313 | struct xfs_bmbt_irec imap; |
314 | int whichfork = XFS_DATA_FORK; | |
060d4eaa | 315 | struct xfs_iext_cursor icur; |
889c65b3 | 316 | bool imap_valid; |
a206c817 | 317 | int error = 0; |
a206c817 | 318 | |
889c65b3 CH |
319 | /* |
320 | * We have to make sure the cached mapping is within EOF to protect | |
321 | * against eofblocks trimming on file release leaving us with a stale | |
322 | * mapping. Otherwise, a page for a subsequent file extending buffered | |
323 | * write could get picked up by this writeback cycle and written to the | |
324 | * wrong blocks. | |
325 | * | |
326 | * Note that what we really want here is a generic mapping invalidation | |
327 | * mechanism to protect us from arbitrary extent modifying contexts, not | |
328 | * just eofblocks. | |
329 | */ | |
330 | xfs_trim_extent_eof(&wpc->imap, ip); | |
331 | ||
332 | /* | |
333 | * COW fork blocks can overlap data fork blocks even if the blocks | |
334 | * aren't shared. COW I/O always takes precedent, so we must always | |
335 | * check for overlap on reflink inodes unless the mapping is already a | |
336 | * COW one. | |
337 | */ | |
338 | imap_valid = offset_fsb >= wpc->imap.br_startoff && | |
339 | offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount; | |
340 | if (imap_valid && | |
51d62690 | 341 | (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW)) |
889c65b3 CH |
342 | return 0; |
343 | ||
a206c817 | 344 | if (XFS_FORCED_SHUTDOWN(mp)) |
b474c7ae | 345 | return -EIO; |
a206c817 | 346 | |
889c65b3 CH |
347 | /* |
348 | * If we don't have a valid map, now it's time to get a new one for this | |
349 | * offset. This will convert delayed allocations (including COW ones) | |
350 | * into real extents. If we return without a valid map, it means we | |
351 | * landed in a hole and we skip the block. | |
352 | */ | |
988ef927 | 353 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
8ff2957d CH |
354 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
355 | (ip->i_df.if_flags & XFS_IFEXTENTS)); | |
d2c28191 | 356 | ASSERT(offset <= mp->m_super->s_maxbytes); |
8ff2957d | 357 | |
060d4eaa CH |
358 | if (offset > mp->m_super->s_maxbytes - count) |
359 | count = mp->m_super->s_maxbytes - offset; | |
360 | end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); | |
060d4eaa CH |
361 | |
362 | /* | |
363 | * Check if this is offset is covered by a COW extents, and if yes use | |
364 | * it directly instead of looking up anything in the data fork. | |
365 | */ | |
51d62690 | 366 | if (xfs_inode_has_cow_data(ip) && |
060d4eaa CH |
367 | xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) && |
368 | imap.br_startoff <= offset_fsb) { | |
5c665e5b CH |
369 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
370 | /* | |
371 | * Truncate can race with writeback since writeback doesn't | |
372 | * take the iolock and truncate decreases the file size before | |
373 | * it starts truncating the pages between new_size and old_size. | |
374 | * Therefore, we can end up in the situation where writeback | |
375 | * gets a CoW fork mapping but the truncate makes the mapping | |
376 | * invalid and we end up in here trying to get a new mapping. | |
377 | * bail out here so that we simply never get a valid mapping | |
378 | * and so we drop the write altogether. The page truncation | |
379 | * will kill the contents anyway. | |
380 | */ | |
381 | if (offset > i_size_read(inode)) { | |
382 | wpc->io_type = XFS_IO_HOLE; | |
383 | return 0; | |
384 | } | |
385 | whichfork = XFS_COW_FORK; | |
386 | wpc->io_type = XFS_IO_COW; | |
387 | goto allocate_blocks; | |
388 | } | |
389 | ||
390 | /* | |
391 | * Map valid and no COW extent in the way? We're done. | |
392 | */ | |
889c65b3 | 393 | if (imap_valid) { |
5c665e5b CH |
394 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
395 | return 0; | |
396 | } | |
397 | ||
398 | /* | |
399 | * If we don't have a valid map, now it's time to get a new one for this | |
400 | * offset. This will convert delayed allocations (including COW ones) | |
401 | * into real extents. | |
402 | */ | |
3345746e CH |
403 | if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) |
404 | imap.br_startoff = end_fsb; /* fake a hole past EOF */ | |
8ff2957d | 405 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
a206c817 | 406 | |
3345746e CH |
407 | if (imap.br_startoff > offset_fsb) { |
408 | /* landed in a hole or beyond EOF */ | |
409 | imap.br_blockcount = imap.br_startoff - offset_fsb; | |
5c665e5b | 410 | imap.br_startoff = offset_fsb; |
5c665e5b CH |
411 | imap.br_startblock = HOLESTARTBLOCK; |
412 | wpc->io_type = XFS_IO_HOLE; | |
e2f6ad46 DC |
413 | } else { |
414 | if (isnullstartblock(imap.br_startblock)) { | |
415 | /* got a delalloc extent */ | |
416 | wpc->io_type = XFS_IO_DELALLOC; | |
417 | goto allocate_blocks; | |
418 | } | |
5c665e5b | 419 | |
e2f6ad46 DC |
420 | if (imap.br_state == XFS_EXT_UNWRITTEN) |
421 | wpc->io_type = XFS_IO_UNWRITTEN; | |
422 | else | |
423 | wpc->io_type = XFS_IO_OVERWRITE; | |
8ff2957d | 424 | } |
e2f6ad46 | 425 | |
5c665e5b CH |
426 | wpc->imap = imap; |
427 | trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap); | |
428 | return 0; | |
429 | allocate_blocks: | |
430 | error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap); | |
431 | if (error) | |
432 | return error; | |
433 | wpc->imap = imap; | |
434 | trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap); | |
8ff2957d | 435 | return 0; |
1da177e4 LT |
436 | } |
437 | ||
f6d6d4fc | 438 | /* |
bb18782a DC |
439 | * Submit the bio for an ioend. We are passed an ioend with a bio attached to |
440 | * it, and we submit that bio. The ioend may be used for multiple bio | |
441 | * submissions, so we only want to allocate an append transaction for the ioend | |
442 | * once. In the case of multiple bio submission, each bio will take an IO | |
443 | * reference to the ioend to ensure that the ioend completion is only done once | |
444 | * all bios have been submitted and the ioend is really done. | |
7bf7f352 DC |
445 | * |
446 | * If @fail is non-zero, it means that we have a situation where some part of | |
447 | * the submission process has failed after we have marked paged for writeback | |
bb18782a DC |
448 | * and unlocked them. In this situation, we need to fail the bio and ioend |
449 | * rather than submit it to IO. This typically only happens on a filesystem | |
450 | * shutdown. | |
f6d6d4fc | 451 | */ |
e10de372 | 452 | STATIC int |
f6d6d4fc | 453 | xfs_submit_ioend( |
06342cf8 | 454 | struct writeback_control *wbc, |
0e51a8e1 | 455 | struct xfs_ioend *ioend, |
e10de372 | 456 | int status) |
f6d6d4fc | 457 | { |
5eda4300 DW |
458 | /* Convert CoW extents to regular */ |
459 | if (!status && ioend->io_type == XFS_IO_COW) { | |
4a2d01b0 DC |
460 | /* |
461 | * Yuk. This can do memory allocation, but is not a | |
462 | * transactional operation so everything is done in GFP_KERNEL | |
463 | * context. That can deadlock, because we hold pages in | |
464 | * writeback state and GFP_KERNEL allocations can block on them. | |
465 | * Hence we must operate in nofs conditions here. | |
466 | */ | |
467 | unsigned nofs_flag; | |
468 | ||
469 | nofs_flag = memalloc_nofs_save(); | |
5eda4300 DW |
470 | status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), |
471 | ioend->io_offset, ioend->io_size); | |
4a2d01b0 | 472 | memalloc_nofs_restore(nofs_flag); |
5eda4300 DW |
473 | } |
474 | ||
e10de372 DC |
475 | /* Reserve log space if we might write beyond the on-disk inode size. */ |
476 | if (!status && | |
0e51a8e1 | 477 | ioend->io_type != XFS_IO_UNWRITTEN && |
bb18782a DC |
478 | xfs_ioend_is_append(ioend) && |
479 | !ioend->io_append_trans) | |
e10de372 | 480 | status = xfs_setfilesize_trans_alloc(ioend); |
bb18782a | 481 | |
0e51a8e1 CH |
482 | ioend->io_bio->bi_private = ioend; |
483 | ioend->io_bio->bi_end_io = xfs_end_bio; | |
7637241e | 484 | ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); |
70fd7614 | 485 | |
e10de372 DC |
486 | /* |
487 | * If we are failing the IO now, just mark the ioend with an | |
488 | * error and finish it. This will run IO completion immediately | |
489 | * as there is only one reference to the ioend at this point in | |
490 | * time. | |
491 | */ | |
492 | if (status) { | |
4e4cbee9 | 493 | ioend->io_bio->bi_status = errno_to_blk_status(status); |
0e51a8e1 | 494 | bio_endio(ioend->io_bio); |
e10de372 DC |
495 | return status; |
496 | } | |
d88992f6 | 497 | |
31d7d58d | 498 | ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; |
4e49ea4a | 499 | submit_bio(ioend->io_bio); |
e10de372 | 500 | return 0; |
f6d6d4fc | 501 | } |
f6d6d4fc | 502 | |
0e51a8e1 CH |
503 | static struct xfs_ioend * |
504 | xfs_alloc_ioend( | |
505 | struct inode *inode, | |
506 | unsigned int type, | |
507 | xfs_off_t offset, | |
3faed667 CH |
508 | struct block_device *bdev, |
509 | sector_t sector) | |
0e51a8e1 CH |
510 | { |
511 | struct xfs_ioend *ioend; | |
512 | struct bio *bio; | |
f6d6d4fc | 513 | |
e292d7bc | 514 | bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); |
3faed667 CH |
515 | bio_set_dev(bio, bdev); |
516 | bio->bi_iter.bi_sector = sector; | |
0e51a8e1 CH |
517 | |
518 | ioend = container_of(bio, struct xfs_ioend, io_inline_bio); | |
519 | INIT_LIST_HEAD(&ioend->io_list); | |
520 | ioend->io_type = type; | |
521 | ioend->io_inode = inode; | |
522 | ioend->io_size = 0; | |
523 | ioend->io_offset = offset; | |
524 | INIT_WORK(&ioend->io_work, xfs_end_io); | |
525 | ioend->io_append_trans = NULL; | |
526 | ioend->io_bio = bio; | |
527 | return ioend; | |
528 | } | |
529 | ||
530 | /* | |
531 | * Allocate a new bio, and chain the old bio to the new one. | |
532 | * | |
533 | * Note that we have to do perform the chaining in this unintuitive order | |
534 | * so that the bi_private linkage is set up in the right direction for the | |
535 | * traversal in xfs_destroy_ioend(). | |
536 | */ | |
537 | static void | |
538 | xfs_chain_bio( | |
539 | struct xfs_ioend *ioend, | |
540 | struct writeback_control *wbc, | |
3faed667 CH |
541 | struct block_device *bdev, |
542 | sector_t sector) | |
0e51a8e1 CH |
543 | { |
544 | struct bio *new; | |
545 | ||
546 | new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); | |
3faed667 CH |
547 | bio_set_dev(new, bdev); |
548 | new->bi_iter.bi_sector = sector; | |
0e51a8e1 CH |
549 | bio_chain(ioend->io_bio, new); |
550 | bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ | |
7637241e | 551 | ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); |
31d7d58d | 552 | ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; |
4e49ea4a | 553 | submit_bio(ioend->io_bio); |
0e51a8e1 | 554 | ioend->io_bio = new; |
f6d6d4fc CH |
555 | } |
556 | ||
557 | /* | |
3faed667 CH |
558 | * Test to see if we have an existing ioend structure that we could append to |
559 | * first, otherwise finish off the current ioend and start another. | |
f6d6d4fc CH |
560 | */ |
561 | STATIC void | |
562 | xfs_add_to_ioend( | |
563 | struct inode *inode, | |
7336cea8 | 564 | xfs_off_t offset, |
3faed667 | 565 | struct page *page, |
82cb1417 | 566 | struct iomap_page *iop, |
e10de372 | 567 | struct xfs_writepage_ctx *wpc, |
bb18782a | 568 | struct writeback_control *wbc, |
e10de372 | 569 | struct list_head *iolist) |
f6d6d4fc | 570 | { |
3faed667 CH |
571 | struct xfs_inode *ip = XFS_I(inode); |
572 | struct xfs_mount *mp = ip->i_mount; | |
573 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | |
574 | unsigned len = i_blocksize(inode); | |
575 | unsigned poff = offset & (PAGE_SIZE - 1); | |
576 | sector_t sector; | |
577 | ||
578 | sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) + | |
579 | ((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9); | |
580 | ||
fbcc0256 | 581 | if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || |
3faed667 | 582 | sector != bio_end_sector(wpc->ioend->io_bio) || |
0df61da8 | 583 | offset != wpc->ioend->io_offset + wpc->ioend->io_size) { |
e10de372 DC |
584 | if (wpc->ioend) |
585 | list_add(&wpc->ioend->io_list, iolist); | |
3faed667 CH |
586 | wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, |
587 | bdev, sector); | |
f6d6d4fc CH |
588 | } |
589 | ||
82cb1417 CH |
590 | if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) { |
591 | if (iop) | |
592 | atomic_inc(&iop->write_count); | |
593 | if (bio_full(wpc->ioend->io_bio)) | |
594 | xfs_chain_bio(wpc->ioend, wbc, bdev, sector); | |
595 | __bio_add_page(wpc->ioend->io_bio, page, len, poff); | |
596 | } | |
bb18782a | 597 | |
3faed667 | 598 | wpc->ioend->io_size += len; |
f6d6d4fc CH |
599 | } |
600 | ||
3ed3a434 DC |
601 | STATIC void |
602 | xfs_vm_invalidatepage( | |
603 | struct page *page, | |
d47992f8 LC |
604 | unsigned int offset, |
605 | unsigned int length) | |
3ed3a434 | 606 | { |
82cb1417 CH |
607 | trace_xfs_invalidatepage(page->mapping->host, page, offset, length); |
608 | iomap_invalidatepage(page, offset, length); | |
3ed3a434 DC |
609 | } |
610 | ||
611 | /* | |
82cb1417 CH |
612 | * If the page has delalloc blocks on it, we need to punch them out before we |
613 | * invalidate the page. If we don't, we leave a stale delalloc mapping on the | |
614 | * inode that can trip up a later direct I/O read operation on the same region. | |
3ed3a434 | 615 | * |
82cb1417 CH |
616 | * We prevent this by truncating away the delalloc regions on the page. Because |
617 | * they are delalloc, we can do this without needing a transaction. Indeed - if | |
618 | * we get ENOSPC errors, we have to be able to do this truncation without a | |
619 | * transaction as there is no space left for block reservation (typically why we | |
620 | * see a ENOSPC in writeback). | |
3ed3a434 DC |
621 | */ |
622 | STATIC void | |
623 | xfs_aops_discard_page( | |
624 | struct page *page) | |
625 | { | |
626 | struct inode *inode = page->mapping->host; | |
627 | struct xfs_inode *ip = XFS_I(inode); | |
03625721 | 628 | struct xfs_mount *mp = ip->i_mount; |
3ed3a434 | 629 | loff_t offset = page_offset(page); |
03625721 CH |
630 | xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); |
631 | int error; | |
3ed3a434 | 632 | |
03625721 | 633 | if (XFS_FORCED_SHUTDOWN(mp)) |
e8c3753c DC |
634 | goto out_invalidate; |
635 | ||
03625721 | 636 | xfs_alert(mp, |
c9690043 | 637 | "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", |
3ed3a434 DC |
638 | page, ip->i_ino, offset); |
639 | ||
03625721 CH |
640 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, |
641 | PAGE_SIZE / i_blocksize(inode)); | |
03625721 CH |
642 | if (error && !XFS_FORCED_SHUTDOWN(mp)) |
643 | xfs_alert(mp, "page discard unable to remove delalloc mapping."); | |
3ed3a434 | 644 | out_invalidate: |
09cbfeaf | 645 | xfs_vm_invalidatepage(page, 0, PAGE_SIZE); |
3ed3a434 DC |
646 | } |
647 | ||
e10de372 DC |
648 | /* |
649 | * We implement an immediate ioend submission policy here to avoid needing to | |
650 | * chain multiple ioends and hence nest mempool allocations which can violate | |
651 | * forward progress guarantees we need to provide. The current ioend we are | |
82cb1417 | 652 | * adding blocks to is cached on the writepage context, and if the new block |
e10de372 DC |
653 | * does not append to the cached ioend it will create a new ioend and cache that |
654 | * instead. | |
655 | * | |
656 | * If a new ioend is created and cached, the old ioend is returned and queued | |
657 | * locally for submission once the entire page is processed or an error has been | |
658 | * detected. While ioends are submitted immediately after they are completed, | |
659 | * batching optimisations are provided by higher level block plugging. | |
660 | * | |
661 | * At the end of a writeback pass, there will be a cached ioend remaining on the | |
662 | * writepage context that the caller will need to submit. | |
663 | */ | |
bfce7d2e DC |
664 | static int |
665 | xfs_writepage_map( | |
666 | struct xfs_writepage_ctx *wpc, | |
e10de372 | 667 | struct writeback_control *wbc, |
bfce7d2e DC |
668 | struct inode *inode, |
669 | struct page *page, | |
2d5f4b5b | 670 | uint64_t end_offset) |
bfce7d2e | 671 | { |
e10de372 | 672 | LIST_HEAD(submit_list); |
82cb1417 CH |
673 | struct iomap_page *iop = to_iomap_page(page); |
674 | unsigned len = i_blocksize(inode); | |
e10de372 | 675 | struct xfs_ioend *ioend, *next; |
6a4c9501 | 676 | uint64_t file_offset; /* file offset of page */ |
82cb1417 | 677 | int error = 0, count = 0, i; |
bfce7d2e | 678 | |
82cb1417 CH |
679 | ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); |
680 | ASSERT(!iop || atomic_read(&iop->write_count) == 0); | |
ac8ee546 | 681 | |
e2f6ad46 | 682 | /* |
82cb1417 CH |
683 | * Walk through the page to find areas to write back. If we run off the |
684 | * end of the current map or find the current map invalid, grab a new | |
685 | * one. | |
e2f6ad46 | 686 | */ |
82cb1417 CH |
687 | for (i = 0, file_offset = page_offset(page); |
688 | i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset; | |
689 | i++, file_offset += len) { | |
690 | if (iop && !test_bit(i, iop->uptodate)) | |
bfce7d2e | 691 | continue; |
bfce7d2e | 692 | |
889c65b3 CH |
693 | error = xfs_map_blocks(wpc, inode, file_offset); |
694 | if (error) | |
695 | break; | |
82cb1417 | 696 | if (wpc->io_type == XFS_IO_HOLE) |
5c665e5b | 697 | continue; |
82cb1417 CH |
698 | xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc, |
699 | &submit_list); | |
5c665e5b | 700 | count++; |
e2f6ad46 | 701 | } |
bfce7d2e | 702 | |
e10de372 | 703 | ASSERT(wpc->ioend || list_empty(&submit_list)); |
1b65d3dd CH |
704 | ASSERT(PageLocked(page)); |
705 | ASSERT(!PageWriteback(page)); | |
bfce7d2e | 706 | |
bfce7d2e | 707 | /* |
82cb1417 CH |
708 | * On error, we have to fail the ioend here because we may have set |
709 | * pages under writeback, we have to make sure we run IO completion to | |
710 | * mark the error state of the IO appropriately, so we can't cancel the | |
711 | * ioend directly here. That means we have to mark this page as under | |
712 | * writeback if we included any blocks from it in the ioend chain so | |
713 | * that completion treats it correctly. | |
bfce7d2e | 714 | * |
e10de372 DC |
715 | * If we didn't include the page in the ioend, the on error we can |
716 | * simply discard and unlock it as there are no other users of the page | |
82cb1417 CH |
717 | * now. The caller will still need to trigger submission of outstanding |
718 | * ioends on the writepage context so they are treated correctly on | |
719 | * error. | |
bfce7d2e | 720 | */ |
8e1f065b CH |
721 | if (unlikely(error)) { |
722 | if (!count) { | |
723 | xfs_aops_discard_page(page); | |
724 | ClearPageUptodate(page); | |
725 | unlock_page(page); | |
726 | goto done; | |
727 | } | |
728 | ||
1b65d3dd CH |
729 | /* |
730 | * If the page was not fully cleaned, we need to ensure that the | |
731 | * higher layers come back to it correctly. That means we need | |
732 | * to keep the page dirty, and for WB_SYNC_ALL writeback we need | |
733 | * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed | |
734 | * so another attempt to write this page in this writeback sweep | |
735 | * will be made. | |
736 | */ | |
8e1f065b | 737 | set_page_writeback_keepwrite(page); |
e10de372 | 738 | } else { |
1b65d3dd CH |
739 | clear_page_dirty_for_io(page); |
740 | set_page_writeback(page); | |
bfce7d2e | 741 | } |
e10de372 | 742 | |
8e1f065b CH |
743 | unlock_page(page); |
744 | ||
745 | /* | |
746 | * Preserve the original error if there was one, otherwise catch | |
747 | * submission errors here and propagate into subsequent ioend | |
748 | * submissions. | |
749 | */ | |
750 | list_for_each_entry_safe(ioend, next, &submit_list, io_list) { | |
751 | int error2; | |
752 | ||
753 | list_del_init(&ioend->io_list); | |
754 | error2 = xfs_submit_ioend(wbc, ioend, error); | |
755 | if (error2 && !error) | |
756 | error = error2; | |
757 | } | |
758 | ||
759 | /* | |
82cb1417 CH |
760 | * We can end up here with no error and nothing to write only if we race |
761 | * with a partial page truncate on a sub-page block sized filesystem. | |
8e1f065b CH |
762 | */ |
763 | if (!count) | |
764 | end_page_writeback(page); | |
765 | done: | |
bfce7d2e DC |
766 | mapping_set_error(page->mapping, error); |
767 | return error; | |
768 | } | |
769 | ||
1da177e4 | 770 | /* |
89f3b363 CH |
771 | * Write out a dirty page. |
772 | * | |
773 | * For delalloc space on the page we need to allocate space and flush it. | |
774 | * For unwritten space on the page we need to start the conversion to | |
775 | * regular allocated space. | |
1da177e4 | 776 | */ |
1da177e4 | 777 | STATIC int |
fbcc0256 | 778 | xfs_do_writepage( |
89f3b363 | 779 | struct page *page, |
fbcc0256 DC |
780 | struct writeback_control *wbc, |
781 | void *data) | |
1da177e4 | 782 | { |
fbcc0256 | 783 | struct xfs_writepage_ctx *wpc = data; |
89f3b363 | 784 | struct inode *inode = page->mapping->host; |
1da177e4 | 785 | loff_t offset; |
c8ce540d | 786 | uint64_t end_offset; |
ad68972a | 787 | pgoff_t end_index; |
89f3b363 | 788 | |
34097dfe | 789 | trace_xfs_writepage(inode, page, 0, 0); |
89f3b363 CH |
790 | |
791 | /* | |
792 | * Refuse to write the page out if we are called from reclaim context. | |
793 | * | |
d4f7a5cb CH |
794 | * This avoids stack overflows when called from deeply used stacks in |
795 | * random callers for direct reclaim or memcg reclaim. We explicitly | |
796 | * allow reclaim from kswapd as the stack usage there is relatively low. | |
89f3b363 | 797 | * |
94054fa3 MG |
798 | * This should never happen except in the case of a VM regression so |
799 | * warn about it. | |
89f3b363 | 800 | */ |
94054fa3 MG |
801 | if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == |
802 | PF_MEMALLOC)) | |
b5420f23 | 803 | goto redirty; |
1da177e4 | 804 | |
89f3b363 | 805 | /* |
680a647b CH |
806 | * Given that we do not allow direct reclaim to call us, we should |
807 | * never be called while in a filesystem transaction. | |
89f3b363 | 808 | */ |
9070733b | 809 | if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) |
b5420f23 | 810 | goto redirty; |
89f3b363 | 811 | |
8695d27e | 812 | /* |
ad68972a DC |
813 | * Is this page beyond the end of the file? |
814 | * | |
8695d27e JL |
815 | * The page index is less than the end_index, adjust the end_offset |
816 | * to the highest offset that this page should represent. | |
817 | * ----------------------------------------------------- | |
818 | * | file mapping | <EOF> | | |
819 | * ----------------------------------------------------- | |
820 | * | Page ... | Page N-2 | Page N-1 | Page N | | | |
821 | * ^--------------------------------^----------|-------- | |
822 | * | desired writeback range | see else | | |
823 | * ---------------------------------^------------------| | |
824 | */ | |
ad68972a | 825 | offset = i_size_read(inode); |
09cbfeaf | 826 | end_index = offset >> PAGE_SHIFT; |
8695d27e | 827 | if (page->index < end_index) |
09cbfeaf | 828 | end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT; |
8695d27e JL |
829 | else { |
830 | /* | |
831 | * Check whether the page to write out is beyond or straddles | |
832 | * i_size or not. | |
833 | * ------------------------------------------------------- | |
834 | * | file mapping | <EOF> | | |
835 | * ------------------------------------------------------- | |
836 | * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | | |
837 | * ^--------------------------------^-----------|--------- | |
838 | * | | Straddles | | |
839 | * ---------------------------------^-----------|--------| | |
840 | */ | |
09cbfeaf | 841 | unsigned offset_into_page = offset & (PAGE_SIZE - 1); |
6b7a03f0 CH |
842 | |
843 | /* | |
ff9a28f6 JK |
844 | * Skip the page if it is fully outside i_size, e.g. due to a |
845 | * truncate operation that is in progress. We must redirty the | |
846 | * page so that reclaim stops reclaiming it. Otherwise | |
847 | * xfs_vm_releasepage() is called on it and gets confused. | |
8695d27e JL |
848 | * |
849 | * Note that the end_index is unsigned long, it would overflow | |
850 | * if the given offset is greater than 16TB on 32-bit system | |
851 | * and if we do check the page is fully outside i_size or not | |
852 | * via "if (page->index >= end_index + 1)" as "end_index + 1" | |
853 | * will be evaluated to 0. Hence this page will be redirtied | |
854 | * and be written out repeatedly which would result in an | |
855 | * infinite loop, the user program that perform this operation | |
856 | * will hang. Instead, we can verify this situation by checking | |
857 | * if the page to write is totally beyond the i_size or if it's | |
858 | * offset is just equal to the EOF. | |
6b7a03f0 | 859 | */ |
8695d27e JL |
860 | if (page->index > end_index || |
861 | (page->index == end_index && offset_into_page == 0)) | |
ff9a28f6 | 862 | goto redirty; |
6b7a03f0 CH |
863 | |
864 | /* | |
865 | * The page straddles i_size. It must be zeroed out on each | |
866 | * and every writepage invocation because it may be mmapped. | |
867 | * "A file is mapped in multiples of the page size. For a file | |
8695d27e | 868 | * that is not a multiple of the page size, the remaining |
6b7a03f0 CH |
869 | * memory is zeroed when mapped, and writes to that region are |
870 | * not written out to the file." | |
871 | */ | |
09cbfeaf | 872 | zero_user_segment(page, offset_into_page, PAGE_SIZE); |
8695d27e JL |
873 | |
874 | /* Adjust the end_offset to the end of file */ | |
875 | end_offset = offset; | |
1da177e4 LT |
876 | } |
877 | ||
2d5f4b5b | 878 | return xfs_writepage_map(wpc, wbc, inode, page, end_offset); |
f51623b2 | 879 | |
b5420f23 | 880 | redirty: |
f51623b2 NS |
881 | redirty_page_for_writepage(wbc, page); |
882 | unlock_page(page); | |
883 | return 0; | |
f51623b2 NS |
884 | } |
885 | ||
fbcc0256 DC |
886 | STATIC int |
887 | xfs_vm_writepage( | |
888 | struct page *page, | |
889 | struct writeback_control *wbc) | |
890 | { | |
891 | struct xfs_writepage_ctx wpc = { | |
892 | .io_type = XFS_IO_INVALID, | |
893 | }; | |
894 | int ret; | |
895 | ||
896 | ret = xfs_do_writepage(page, wbc, &wpc); | |
e10de372 DC |
897 | if (wpc.ioend) |
898 | ret = xfs_submit_ioend(wbc, wpc.ioend, ret); | |
899 | return ret; | |
fbcc0256 DC |
900 | } |
901 | ||
7d4fb40a NS |
902 | STATIC int |
903 | xfs_vm_writepages( | |
904 | struct address_space *mapping, | |
905 | struct writeback_control *wbc) | |
906 | { | |
fbcc0256 DC |
907 | struct xfs_writepage_ctx wpc = { |
908 | .io_type = XFS_IO_INVALID, | |
909 | }; | |
910 | int ret; | |
911 | ||
b3aea4ed | 912 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); |
fbcc0256 | 913 | ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); |
e10de372 DC |
914 | if (wpc.ioend) |
915 | ret = xfs_submit_ioend(wbc, wpc.ioend, ret); | |
916 | return ret; | |
7d4fb40a NS |
917 | } |
918 | ||
6e2608df DW |
919 | STATIC int |
920 | xfs_dax_writepages( | |
921 | struct address_space *mapping, | |
922 | struct writeback_control *wbc) | |
923 | { | |
924 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); | |
925 | return dax_writeback_mapping_range(mapping, | |
926 | xfs_find_bdev_for_inode(mapping->host), wbc); | |
927 | } | |
928 | ||
f51623b2 | 929 | STATIC int |
238f4c54 | 930 | xfs_vm_releasepage( |
f51623b2 NS |
931 | struct page *page, |
932 | gfp_t gfp_mask) | |
933 | { | |
34097dfe | 934 | trace_xfs_releasepage(page->mapping->host, page, 0, 0); |
82cb1417 | 935 | return iomap_releasepage(page, gfp_mask); |
1da177e4 LT |
936 | } |
937 | ||
1da177e4 | 938 | STATIC sector_t |
e4c573bb | 939 | xfs_vm_bmap( |
1da177e4 LT |
940 | struct address_space *mapping, |
941 | sector_t block) | |
942 | { | |
b84e7722 | 943 | struct xfs_inode *ip = XFS_I(mapping->host); |
1da177e4 | 944 | |
b84e7722 | 945 | trace_xfs_vm_bmap(ip); |
db1327b1 DW |
946 | |
947 | /* | |
948 | * The swap code (ab-)uses ->bmap to get a block mapping and then | |
793057e1 | 949 | * bypasses the file system for actual I/O. We really can't allow |
db1327b1 | 950 | * that on reflinks inodes, so we have to skip out here. And yes, |
eb5e248d DW |
951 | * 0 is the magic code for a bmap error. |
952 | * | |
953 | * Since we don't pass back blockdev info, we can't return bmap | |
954 | * information for rt files either. | |
db1327b1 | 955 | */ |
eb5e248d | 956 | if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) |
db1327b1 | 957 | return 0; |
b84e7722 | 958 | return iomap_bmap(mapping, block, &xfs_iomap_ops); |
1da177e4 LT |
959 | } |
960 | ||
961 | STATIC int | |
e4c573bb | 962 | xfs_vm_readpage( |
1da177e4 LT |
963 | struct file *unused, |
964 | struct page *page) | |
965 | { | |
121e213e | 966 | trace_xfs_vm_readpage(page->mapping->host, 1); |
82cb1417 | 967 | return iomap_readpage(page, &xfs_iomap_ops); |
1da177e4 LT |
968 | } |
969 | ||
970 | STATIC int | |
e4c573bb | 971 | xfs_vm_readpages( |
1da177e4 LT |
972 | struct file *unused, |
973 | struct address_space *mapping, | |
974 | struct list_head *pages, | |
975 | unsigned nr_pages) | |
976 | { | |
121e213e | 977 | trace_xfs_vm_readpages(mapping->host, nr_pages); |
82cb1417 | 978 | return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops); |
22e757a4 DC |
979 | } |
980 | ||
67482129 DW |
981 | static int |
982 | xfs_iomap_swapfile_activate( | |
983 | struct swap_info_struct *sis, | |
984 | struct file *swap_file, | |
985 | sector_t *span) | |
986 | { | |
987 | sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file)); | |
988 | return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops); | |
989 | } | |
990 | ||
f5e54d6e | 991 | const struct address_space_operations xfs_address_space_operations = { |
e4c573bb NS |
992 | .readpage = xfs_vm_readpage, |
993 | .readpages = xfs_vm_readpages, | |
994 | .writepage = xfs_vm_writepage, | |
7d4fb40a | 995 | .writepages = xfs_vm_writepages, |
82cb1417 | 996 | .set_page_dirty = iomap_set_page_dirty, |
238f4c54 NS |
997 | .releasepage = xfs_vm_releasepage, |
998 | .invalidatepage = xfs_vm_invalidatepage, | |
e4c573bb | 999 | .bmap = xfs_vm_bmap, |
6e2608df | 1000 | .direct_IO = noop_direct_IO, |
82cb1417 CH |
1001 | .migratepage = iomap_migrate_page, |
1002 | .is_partially_uptodate = iomap_is_partially_uptodate, | |
aa261f54 | 1003 | .error_remove_page = generic_error_remove_page, |
67482129 | 1004 | .swap_activate = xfs_iomap_swapfile_activate, |
1da177e4 | 1005 | }; |
6e2608df DW |
1006 | |
1007 | const struct address_space_operations xfs_dax_aops = { | |
1008 | .writepages = xfs_dax_writepages, | |
1009 | .direct_IO = noop_direct_IO, | |
1010 | .set_page_dirty = noop_set_page_dirty, | |
1011 | .invalidatepage = noop_invalidatepage, | |
67482129 | 1012 | .swap_activate = xfs_iomap_swapfile_activate, |
6e2608df | 1013 | }; |