fs/netfs/write_issue.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Network filesystem high-level (buffered) writeback.
   3  *
   4  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
   5  * Written by David Howells ([email protected])
   6  *
   7  *
   8  * To support network filesystems with local caching, we manage a situation
   9  * that can be envisioned like the following:
  10  *
  11  *               +---+---+-----+-----+---+----------+
  12  *    Folios:    |   |   |     |     |   |          |
  13  *               +---+---+-----+-----+---+----------+
  14  *
  15  *                 +------+------+     +----+----+
  16  *    Upload:      |      |      |.....|    |    |
  17  *  (Stream 0)     +------+------+     +----+----+
  18  *
  19  *               +------+------+------+------+------+
  20  *    Cache:     |      |      |      |      |      |
  21  *  (Stream 1)   +------+------+------+------+------+
  22  *
  23  * Where we have a sequence of folios of varying sizes that we need to overlay
  24  * with multiple parallel streams of I/O requests, where the I/O requests in a
  25  * stream may also be of various sizes (in cifs, for example, the sizes are
  26  * negotiated with the server; in something like ceph, they may represent the
  27  * sizes of storage objects).
  28  *
  29  * The sequence in each stream may contain gaps and noncontiguous subrequests
  30  * may be glued together into single vectored write RPCs.
  31  */
  32
  33 #include <linux/export.h>
  34 #include <linux/fs.h>
  35 #include <linux/mm.h>
  36 #include <linux/pagemap.h>
  37 #include "internal.h"
  38
  39 /*
  40  * Kill all dirty folios in the event of an unrecoverable error, starting with
  41  * a locked folio we've already obtained from writeback_iter().
  42  */
  43 static void netfs_kill_dirty_pages(struct address_space *mapping,
  44                                    struct writeback_control *wbc,
  45                                    struct folio *folio)
  46 {
  47         int error = 0;
  48
  49         do {
  50                 enum netfs_folio_trace why = netfs_folio_trace_kill;
  51                 struct netfs_group *group = NULL;
  52                 struct netfs_folio *finfo = NULL;
  53                 void *priv;
  54
  55                 priv = folio_detach_private(folio);
  56                 if (priv) {
  57                         finfo = __netfs_folio_info(priv);
  58                         if (finfo) {
  59                                 /* Kill folio from streaming write. */
  60                                 group = finfo->netfs_group;
  61                                 why = netfs_folio_trace_kill_s;
  62                         } else {
  63                                 group = priv;
  64                                 if (group == NETFS_FOLIO_COPY_TO_CACHE) {
  65                                         /* Kill copy-to-cache folio */
  66                                         why = netfs_folio_trace_kill_cc;
  67                                         group = NULL;
  68                                 } else {
  69                                         /* Kill folio with group */
  70                                         why = netfs_folio_trace_kill_g;
  71                                 }
  72                         }
  73                 }
  74
  75                 trace_netfs_folio(folio, why);
  76
  77                 folio_start_writeback(folio);
  78                 folio_unlock(folio);
  79                 folio_end_writeback(folio);
  80
  81                 netfs_put_group(group);
  82                 kfree(finfo);
  83
  84         } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
  85 }
  86
  87 /*
  88  * Create a write request and set it up appropriately for the origin type.
  89  */
  90 struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
  91                                                 struct file *file,
  92                                                 loff_t start,
  93                                                 enum netfs_io_origin origin)
  94 {
  95         struct netfs_io_request *wreq;
  96         struct netfs_inode *ictx;
  97         bool is_buffered = (origin == NETFS_WRITEBACK ||
  98                             origin == NETFS_WRITETHROUGH ||
  99                             origin == NETFS_PGPRIV2_COPY_TO_CACHE);
 100
 101         wreq = netfs_alloc_request(mapping, file, start, 0, origin);
 102         if (IS_ERR(wreq))
 103                 return wreq;
 104
 105         _enter("R=%x", wreq->debug_id);
 106
 107         ictx = netfs_inode(wreq->inode);
 108         if (is_buffered && netfs_is_cache_enabled(ictx))
 109                 fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
 110
 111         wreq->cleaned_to = wreq->start;
 112
 113         wreq->io_streams[0].stream_nr           = 0;
 114         wreq->io_streams[0].source              = NETFS_UPLOAD_TO_SERVER;
 115         wreq->io_streams[0].prepare_write       = ictx->ops->prepare_write;
 116         wreq->io_streams[0].issue_write         = ictx->ops->issue_write;
 117         wreq->io_streams[0].collected_to        = start;
 118         wreq->io_streams[0].transferred         = LONG_MAX;
 119
 120         wreq->io_streams[1].stream_nr           = 1;
 121         wreq->io_streams[1].source              = NETFS_WRITE_TO_CACHE;
 122         wreq->io_streams[1].collected_to        = start;
 123         wreq->io_streams[1].transferred         = LONG_MAX;
 124         if (fscache_resources_valid(&wreq->cache_resources)) {
 125                 wreq->io_streams[1].avail       = true;
 126                 wreq->io_streams[1].active      = true;
 127                 wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
 128                 wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
 129         }
 130
 131         return wreq;
 132 }
 133
 134 /**
 135  * netfs_prepare_write_failed - Note write preparation failed
 136  * @subreq: The subrequest to mark
 137  *
 138  * Mark a subrequest to note that preparation for write failed.
 139  */
 140 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq)
 141 {
 142         __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
 143         trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed);
 144 }
 145 EXPORT_SYMBOL(netfs_prepare_write_failed);
 146
 147 /*
 148  * Prepare a write subrequest.  We need to allocate a new subrequest
 149  * if we don't have one.
 150  */
 151 static void netfs_prepare_write(struct netfs_io_request *wreq,
 152                                 struct netfs_io_stream *stream,
 153                                 loff_t start)
 154 {
 155         struct netfs_io_subrequest *subreq;
 156         struct iov_iter *wreq_iter = &wreq->io_iter;
 157
 158         /* Make sure we don't point the iterator at a used-up folio_queue
 159          * struct being used as a placeholder to prevent the queue from
 160          * collapsing.  In such a case, extend the queue.
 161          */
 162         if (iov_iter_is_folioq(wreq_iter) &&
 163             wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) {
 164                 netfs_buffer_make_space(wreq);
 165         }
 166
 167         subreq = netfs_alloc_subrequest(wreq);
 168         subreq->source          = stream->source;
 169         subreq->start           = start;
 170         subreq->stream_nr       = stream->stream_nr;
 171         subreq->io_iter         = *wreq_iter;
 172
 173         _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
 174
 175         trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
 176
 177         stream->sreq_max_len    = UINT_MAX;
 178         stream->sreq_max_segs   = INT_MAX;
 179         switch (stream->source) {
 180         case NETFS_UPLOAD_TO_SERVER:
 181                 netfs_stat(&netfs_n_wh_upload);
 182                 stream->sreq_max_len = wreq->wsize;
 183                 break;
 184         case NETFS_WRITE_TO_CACHE:
 185                 netfs_stat(&netfs_n_wh_write);
 186                 break;
 187         default:
 188                 WARN_ON_ONCE(1);
 189                 break;
 190         }
 191
 192         if (stream->prepare_write)
 193                 stream->prepare_write(subreq);
 194
 195         __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
 196
 197         /* We add to the end of the list whilst the collector may be walking
 198          * the list.  The collector only goes nextwards and uses the lock to
 199          * remove entries off of the front.
 200          */
 201         spin_lock_bh(&wreq->lock);
 202         list_add_tail(&subreq->rreq_link, &stream->subrequests);
 203         if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
 204                 stream->front = subreq;
 205                 if (!stream->active) {
 206                         stream->collected_to = stream->front->start;
 207                         /* Write list pointers before active flag */
 208                         smp_store_release(&stream->active, true);
 209                 }
 210         }
 211
 212         spin_unlock_bh(&wreq->lock);
 213
 214         stream->construct = subreq;
 215 }
 216
 217 /*
 218  * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O
 219  * operation.  The operation may be asynchronous and should call
 220  * netfs_write_subrequest_terminated() when complete.
 221  */
 222 static void netfs_do_issue_write(struct netfs_io_stream *stream,
 223                                  struct netfs_io_subrequest *subreq)
 224 {
 225         struct netfs_io_request *wreq = subreq->rreq;
 226
 227         _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
 228
 229         if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
 230                 return netfs_write_subrequest_terminated(subreq, subreq->error, false);
 231
 232         trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
 233         stream->issue_write(subreq);
 234 }
 235
 236 void netfs_reissue_write(struct netfs_io_stream *stream,
 237                          struct netfs_io_subrequest *subreq,
 238                          struct iov_iter *source)
 239 {
 240         size_t size = subreq->len - subreq->transferred;
 241
 242         // TODO: Use encrypted buffer
 243         subreq->io_iter = *source;
 244         iov_iter_advance(source, size);
 245         iov_iter_truncate(&subreq->io_iter, size);
 246
 247         subreq->retry_count++;
 248         __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
 249         __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
 250         netfs_do_issue_write(stream, subreq);
 251 }
 252
 253 void netfs_issue_write(struct netfs_io_request *wreq,
 254                        struct netfs_io_stream *stream)
 255 {
 256         struct netfs_io_subrequest *subreq = stream->construct;
 257
 258         if (!subreq)
 259                 return;
 260         stream->construct = NULL;
 261         subreq->io_iter.count = subreq->len;
 262         netfs_do_issue_write(stream, subreq);
 263 }
 264
 265 /*
 266  * Add data to the write subrequest, dispatching each as we fill it up or if it
 267  * is discontiguous with the previous.  We only fill one part at a time so that
 268  * we can avoid overrunning the credits obtained (cifs) and try to parallelise
 269  * content-crypto preparation with network writes.
 270  */
 271 int netfs_advance_write(struct netfs_io_request *wreq,
 272                         struct netfs_io_stream *stream,
 273                         loff_t start, size_t len, bool to_eof)
 274 {
 275         struct netfs_io_subrequest *subreq = stream->construct;
 276         size_t part;
 277
 278         if (!stream->avail) {
 279                 _leave("no write");
 280                 return len;
 281         }
 282
 283         _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
 284
 285         if (subreq && start != subreq->start + subreq->len) {
 286                 netfs_issue_write(wreq, stream);
 287                 subreq = NULL;
 288         }
 289
 290         if (!stream->construct)
 291                 netfs_prepare_write(wreq, stream, start);
 292         subreq = stream->construct;
 293
 294         part = umin(stream->sreq_max_len - subreq->len, len);
 295         _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len);
 296         subreq->len += part;
 297         subreq->nr_segs++;
 298         stream->submit_extendable_to -= part;
 299
 300         if (subreq->len >= stream->sreq_max_len ||
 301             subreq->nr_segs >= stream->sreq_max_segs ||
 302             to_eof) {
 303                 netfs_issue_write(wreq, stream);
 304                 subreq = NULL;
 305         }
 306
 307         return part;
 308 }
 309
 310 /*
 311  * Write some of a pending folio data back to the server.
 312  */
 313 static int netfs_write_folio(struct netfs_io_request *wreq,
 314                              struct writeback_control *wbc,
 315                              struct folio *folio)
 316 {
 317         struct netfs_io_stream *upload = &wreq->io_streams[0];
 318         struct netfs_io_stream *cache  = &wreq->io_streams[1];
 319         struct netfs_io_stream *stream;
 320         struct netfs_group *fgroup; /* TODO: Use this with ceph */
 321         struct netfs_folio *finfo;
 322         size_t iter_off = 0;
 323         size_t fsize = folio_size(folio), flen = fsize, foff = 0;
 324         loff_t fpos = folio_pos(folio), i_size;
 325         bool to_eof = false, streamw = false;
 326         bool debug = false;
 327
 328         _enter("");
 329
 330         /* netfs_perform_write() may shift i_size around the page or from out
 331          * of the page to beyond it, but cannot move i_size into or through the
 332          * page since we have it locked.
 333          */
 334         i_size = i_size_read(wreq->inode);
 335
 336         if (fpos >= i_size) {
 337                 /* mmap beyond eof. */
 338                 _debug("beyond eof");
 339                 folio_start_writeback(folio);
 340                 folio_unlock(folio);
 341                 wreq->nr_group_rel += netfs_folio_written_back(folio);
 342                 netfs_put_group_many(wreq->group, wreq->nr_group_rel);
 343                 wreq->nr_group_rel = 0;
 344                 return 0;
 345         }
 346
 347         if (fpos + fsize > wreq->i_size)
 348                 wreq->i_size = i_size;
 349
 350         fgroup = netfs_folio_group(folio);
 351         finfo = netfs_folio_info(folio);
 352         if (finfo) {
 353                 foff = finfo->dirty_offset;
 354                 flen = foff + finfo->dirty_len;
 355                 streamw = true;
 356         }
 357
 358         if (wreq->origin == NETFS_WRITETHROUGH) {
 359                 to_eof = false;
 360                 if (flen > i_size - fpos)
 361                         flen = i_size - fpos;
 362         } else if (flen > i_size - fpos) {
 363                 flen = i_size - fpos;
 364                 if (!streamw)
 365                         folio_zero_segment(folio, flen, fsize);
 366                 to_eof = true;
 367         } else if (flen == i_size - fpos) {
 368                 to_eof = true;
 369         }
 370         flen -= foff;
 371
 372         _debug("folio %zx %zx %zx", foff, flen, fsize);
 373
 374         /* Deal with discontinuities in the stream of dirty pages.  These can
 375          * arise from a number of sources:
 376          *
 377          * (1) Intervening non-dirty pages from random-access writes, multiple
 378          *     flushers writing back different parts simultaneously and manual
 379          *     syncing.
 380          *
 381          * (2) Partially-written pages from write-streaming.
 382          *
 383          * (3) Pages that belong to a different write-back group (eg.  Ceph
 384          *     snapshots).
 385          *
 386          * (4) Actually-clean pages that were marked for write to the cache
 387          *     when they were read.  Note that these appear as a special
 388          *     write-back group.
 389          */
 390         if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
 391                 netfs_issue_write(wreq, upload);
 392         } else if (fgroup != wreq->group) {
 393                 /* We can't write this page to the server yet. */
 394                 kdebug("wrong group");
 395                 folio_redirty_for_writepage(wbc, folio);
 396                 folio_unlock(folio);
 397                 netfs_issue_write(wreq, upload);
 398                 netfs_issue_write(wreq, cache);
 399                 return 0;
 400         }
 401
 402         if (foff > 0)
 403                 netfs_issue_write(wreq, upload);
 404         if (streamw)
 405                 netfs_issue_write(wreq, cache);
 406
 407         /* Flip the page to the writeback state and unlock.  If we're called
 408          * from write-through, then the page has already been put into the wb
 409          * state.
 410          */
 411         if (wreq->origin == NETFS_WRITEBACK)
 412                 folio_start_writeback(folio);
 413         folio_unlock(folio);
 414
 415         if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
 416                 if (!cache->avail) {
 417                         trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
 418                         netfs_issue_write(wreq, upload);
 419                         netfs_folio_written_back(folio);
 420                         return 0;
 421                 }
 422                 trace_netfs_folio(folio, netfs_folio_trace_store_copy);
 423         } else if (!upload->avail && !cache->avail) {
 424                 trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
 425                 netfs_folio_written_back(folio);
 426                 return 0;
 427         } else if (!upload->construct) {
 428                 trace_netfs_folio(folio, netfs_folio_trace_store);
 429         } else {
 430                 trace_netfs_folio(folio, netfs_folio_trace_store_plus);
 431         }
 432
 433         /* Attach the folio to the rolling buffer. */
 434         netfs_buffer_append_folio(wreq, folio, false);
 435
 436         /* Move the submission point forward to allow for write-streaming data
 437          * not starting at the front of the page.  We don't do write-streaming
 438          * with the cache as the cache requires DIO alignment.
 439          *
 440          * Also skip uploading for data that's been read and just needs copying
 441          * to the cache.
 442          */
 443         for (int s = 0; s < NR_IO_STREAMS; s++) {
 444                 stream = &wreq->io_streams[s];
 445                 stream->submit_off = foff;
 446                 stream->submit_len = flen;
 447                 if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
 448                     (stream->source == NETFS_UPLOAD_TO_SERVER &&
 449                      fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
 450                         stream->submit_off = UINT_MAX;
 451                         stream->submit_len = 0;
 452                 }
 453         }
 454
 455         /* Attach the folio to one or more subrequests.  For a big folio, we
 456          * could end up with thousands of subrequests if the wsize is small -
 457          * but we might need to wait during the creation of subrequests for
 458          * network resources (eg. SMB credits).
 459          */
 460         for (;;) {
 461                 ssize_t part;
 462                 size_t lowest_off = ULONG_MAX;
 463                 int choose_s = -1;
 464
 465                 /* Always add to the lowest-submitted stream first. */
 466                 for (int s = 0; s < NR_IO_STREAMS; s++) {
 467                         stream = &wreq->io_streams[s];
 468                         if (stream->submit_len > 0 &&
 469                             stream->submit_off < lowest_off) {
 470                                 lowest_off = stream->submit_off;
 471                                 choose_s = s;
 472                         }
 473                 }
 474
 475                 if (choose_s < 0)
 476                         break;
 477                 stream = &wreq->io_streams[choose_s];
 478
 479                 /* Advance the iterator(s). */
 480                 if (stream->submit_off > iter_off) {
 481                         iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off);
 482                         iter_off = stream->submit_off;
 483                 }
 484
 485                 atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
 486                 stream->submit_extendable_to = fsize - stream->submit_off;
 487                 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
 488                                            stream->submit_len, to_eof);
 489                 stream->submit_off += part;
 490                 if (part > stream->submit_len)
 491                         stream->submit_len = 0;
 492                 else
 493                         stream->submit_len -= part;
 494                 if (part > 0)
 495                         debug = true;
 496         }
 497
 498         if (fsize > iter_off)
 499                 iov_iter_advance(&wreq->io_iter, fsize - iter_off);
 500         atomic64_set(&wreq->issued_to, fpos + fsize);
 501
 502         if (!debug)
 503                 kdebug("R=%x: No submit", wreq->debug_id);
 504
 505         if (foff + flen < fsize)
 506                 for (int s = 0; s < NR_IO_STREAMS; s++)
 507                         netfs_issue_write(wreq, &wreq->io_streams[s]);
 508
 509         _leave(" = 0");
 510         return 0;
 511 }
 512
 513 /*
 514  * End the issuing of writes, letting the collector know we're done.
 515  */
 516 static void netfs_end_issue_write(struct netfs_io_request *wreq)
 517 {
 518         bool needs_poke = true;
 519
 520         smp_wmb(); /* Write subreq lists before ALL_QUEUED. */
 521         set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
 522
 523         for (int s = 0; s < NR_IO_STREAMS; s++) {
 524                 struct netfs_io_stream *stream = &wreq->io_streams[s];
 525
 526                 if (!stream->active)
 527                         continue;
 528                 if (!list_empty(&stream->subrequests))
 529                         needs_poke = false;
 530                 netfs_issue_write(wreq, stream);
 531         }
 532
 533         if (needs_poke)
 534                 netfs_wake_write_collector(wreq, false);
 535 }
 536
 537 /*
 538  * Write some of the pending data back to the server
 539  */
 540 int netfs_writepages(struct address_space *mapping,
 541                      struct writeback_control *wbc)
 542 {
 543         struct netfs_inode *ictx = netfs_inode(mapping->host);
 544         struct netfs_io_request *wreq = NULL;
 545         struct folio *folio;
 546         int error = 0;
 547
 548         if (!mutex_trylock(&ictx->wb_lock)) {
 549                 if (wbc->sync_mode == WB_SYNC_NONE) {
 550                         netfs_stat(&netfs_n_wb_lock_skip);
 551                         return 0;
 552                 }
 553                 netfs_stat(&netfs_n_wb_lock_wait);
 554                 mutex_lock(&ictx->wb_lock);
 555         }
 556
 557         /* Need the first folio to be able to set up the op. */
 558         folio = writeback_iter(mapping, wbc, NULL, &error);
 559         if (!folio)
 560                 goto out;
 561
 562         wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
 563         if (IS_ERR(wreq)) {
 564                 error = PTR_ERR(wreq);
 565                 goto couldnt_start;
 566         }
 567
 568         trace_netfs_write(wreq, netfs_write_trace_writeback);
 569         netfs_stat(&netfs_n_wh_writepages);
 570
 571         do {
 572                 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
 573
 574                 /* It appears we don't have to handle cyclic writeback wrapping. */
 575                 WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to));
 576
 577                 if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE &&
 578                     unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) {
 579                         set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
 580                         wreq->netfs_ops->begin_writeback(wreq);
 581                 }
 582
 583                 error = netfs_write_folio(wreq, wbc, folio);
 584                 if (error < 0)
 585                         break;
 586         } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
 587
 588         netfs_end_issue_write(wreq);
 589
 590         mutex_unlock(&ictx->wb_lock);
 591
 592         netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
 593         _leave(" = %d", error);
 594         return error;
 595
 596 couldnt_start:
 597         netfs_kill_dirty_pages(mapping, wbc, folio);
 598 out:
 599         mutex_unlock(&ictx->wb_lock);
 600         _leave(" = %d", error);
 601         return error;
 602 }
 603 EXPORT_SYMBOL(netfs_writepages);
 604
 605 /*
 606  * Begin a write operation for writing through the pagecache.
 607  */
 608 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
 609 {
 610         struct netfs_io_request *wreq = NULL;
 611         struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));
 612
 613         mutex_lock(&ictx->wb_lock);
 614
 615         wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
 616                                       iocb->ki_pos, NETFS_WRITETHROUGH);
 617         if (IS_ERR(wreq)) {
 618                 mutex_unlock(&ictx->wb_lock);
 619                 return wreq;
 620         }
 621
 622         wreq->io_streams[0].avail = true;
 623         trace_netfs_write(wreq, netfs_write_trace_writethrough);
 624         return wreq;
 625 }
 626
 627 /*
 628  * Advance the state of the write operation used when writing through the
 629  * pagecache.  Data has been copied into the pagecache that we need to append
 630  * to the request.  If we've added more than wsize then we need to create a new
 631  * subrequest.
 632  */
 633 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
 634                                struct folio *folio, size_t copied, bool to_page_end,
 635                                struct folio **writethrough_cache)
 636 {
 637         _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
 638                wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
 639
 640         if (!*writethrough_cache) {
 641                 if (folio_test_dirty(folio))
 642                         /* Sigh.  mmap. */
 643                         folio_clear_dirty_for_io(folio);
 644
 645                 /* We can make multiple writes to the folio... */
 646                 folio_start_writeback(folio);
 647                 if (wreq->len == 0)
 648                         trace_netfs_folio(folio, netfs_folio_trace_wthru);
 649                 else
 650                         trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
 651                 *writethrough_cache = folio;
 652         }
 653
 654         wreq->len += copied;
 655         if (!to_page_end)
 656                 return 0;
 657
 658         *writethrough_cache = NULL;
 659         return netfs_write_folio(wreq, wbc, folio);
 660 }
 661
 662 /*
 663  * End a write operation used when writing through the pagecache.
 664  */
 665 int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
 666                            struct folio *writethrough_cache)
 667 {
 668         struct netfs_inode *ictx = netfs_inode(wreq->inode);
 669         int ret;
 670
 671         _enter("R=%x", wreq->debug_id);
 672
 673         if (writethrough_cache)
 674                 netfs_write_folio(wreq, wbc, writethrough_cache);
 675
 676         netfs_end_issue_write(wreq);
 677
 678         mutex_unlock(&ictx->wb_lock);
 679
 680         if (wreq->iocb) {
 681                 ret = -EIOCBQUEUED;
 682         } else {
 683                 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
 684                 ret = wreq->error;
 685         }
 686         netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
 687         return ret;
 688 }
 689
 690 /*
 691  * Write data to the server without going through the pagecache and without
 692  * writing it to the local cache.
 693  */
 694 int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
 695 {
 696         struct netfs_io_stream *upload = &wreq->io_streams[0];
 697         ssize_t part;
 698         loff_t start = wreq->start;
 699         int error = 0;
 700
 701         _enter("%zx", len);
 702
 703         if (wreq->origin == NETFS_DIO_WRITE)
 704                 inode_dio_begin(wreq->inode);
 705
 706         while (len) {
 707                 // TODO: Prepare content encryption
 708
 709                 _debug("unbuffered %zx", len);
 710                 part = netfs_advance_write(wreq, upload, start, len, false);
 711                 start += part;
 712                 len -= part;
 713                 iov_iter_advance(&wreq->io_iter, part);
 714                 if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
 715                         trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
 716                         wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
 717                 }
 718                 if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
 719                         break;
 720         }
 721
 722         netfs_end_issue_write(wreq);
 723         _leave(" = %d", error);
 724         return error;
 725 }