]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | |
3 | */ | |
4 | ||
1da177e4 LT |
5 | #include <linux/time.h> |
6 | #include <linux/reiserfs_fs.h> | |
7 | #include <linux/reiserfs_acl.h> | |
8 | #include <linux/reiserfs_xattr.h> | |
1da177e4 LT |
9 | #include <asm/uaccess.h> |
10 | #include <linux/pagemap.h> | |
11 | #include <linux/swap.h> | |
12 | #include <linux/writeback.h> | |
13 | #include <linux/blkdev.h> | |
14 | #include <linux/buffer_head.h> | |
15 | #include <linux/quotaops.h> | |
16 | ||
17 | /* | |
18 | ** We pack the tails of files on file close, not at the time they are written. | |
19 | ** This implies an unnecessary copy of the tail and an unnecessary indirect item | |
20 | ** insertion/balancing, for files that are written in one write. | |
21 | ** It avoids unnecessary tail packings (balances) for files that are written in | |
22 | ** multiple writes and are small enough to have tails. | |
0222e657 | 23 | ** |
1da177e4 LT |
24 | ** file_release is called by the VFS layer when the file is closed. If |
25 | ** this is the last open file descriptor, and the file | |
26 | ** small enough to have a tail, and the tail is currently in an | |
27 | ** unformatted node, the tail is converted back into a direct item. | |
0222e657 | 28 | ** |
1da177e4 | 29 | ** We use reiserfs_truncate_file to pack the tail, since it already has |
0222e657 | 30 | ** all the conditions coded. |
1da177e4 | 31 | */ |
bd4c625c | 32 | static int reiserfs_file_release(struct inode *inode, struct file *filp) |
1da177e4 LT |
33 | { |
34 | ||
bd4c625c LT |
35 | struct reiserfs_transaction_handle th; |
36 | int err; | |
37 | int jbegin_failure = 0; | |
1da177e4 | 38 | |
14a61442 | 39 | BUG_ON(!S_ISREG(inode->i_mode)); |
1da177e4 | 40 | |
0e4f6a79 AV |
41 | if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1)) |
42 | return 0; | |
43 | ||
44 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | |
45 | ||
46 | if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) { | |
47 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | |
48 | return 0; | |
49 | } | |
50 | ||
bd4c625c | 51 | /* fast out for when nothing needs to be done */ |
0e4f6a79 | 52 | if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || |
bd4c625c LT |
53 | !tail_has_to_be_packed(inode)) && |
54 | REISERFS_I(inode)->i_prealloc_count <= 0) { | |
0e4f6a79 | 55 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); |
bd4c625c LT |
56 | return 0; |
57 | } | |
1da177e4 | 58 | |
b5f3953c | 59 | reiserfs_write_lock(inode->i_sb); |
bd4c625c LT |
60 | /* freeing preallocation only involves relogging blocks that |
61 | * are already in the current transaction. preallocation gets | |
62 | * freed at the end of each transaction, so it is impossible for | |
63 | * us to log any additional blocks (including quota blocks) | |
64 | */ | |
65 | err = journal_begin(&th, inode->i_sb, 1); | |
1da177e4 | 66 | if (err) { |
bd4c625c LT |
67 | /* uh oh, we can't allow the inode to go away while there |
68 | * is still preallocation blocks pending. Try to join the | |
69 | * aborted transaction | |
70 | */ | |
71 | jbegin_failure = err; | |
72 | err = journal_join_abort(&th, inode->i_sb, 1); | |
73 | ||
74 | if (err) { | |
75 | /* hmpf, our choices here aren't good. We can pin the inode | |
76 | * which will disallow unmount from every happening, we can | |
77 | * do nothing, which will corrupt random memory on unmount, | |
78 | * or we can forcibly remove the file from the preallocation | |
79 | * list, which will leak blocks on disk. Lets pin the inode | |
80 | * and let the admin know what is going on. | |
81 | */ | |
82 | igrab(inode); | |
45b03d5e | 83 | reiserfs_warning(inode->i_sb, "clm-9001", |
bd4c625c | 84 | "pinning inode %lu because the " |
533221fb AD |
85 | "preallocation can't be freed", |
86 | inode->i_ino); | |
bd4c625c LT |
87 | goto out; |
88 | } | |
1da177e4 | 89 | } |
bd4c625c | 90 | reiserfs_update_inode_transaction(inode); |
1da177e4 LT |
91 | |
92 | #ifdef REISERFS_PREALLOCATE | |
bd4c625c | 93 | reiserfs_discard_prealloc(&th, inode); |
1da177e4 | 94 | #endif |
bd4c625c LT |
95 | err = journal_end(&th, inode->i_sb, 1); |
96 | ||
97 | /* copy back the error code from journal_begin */ | |
98 | if (!err) | |
99 | err = jbegin_failure; | |
100 | ||
0e4f6a79 | 101 | if (!err && |
bd4c625c LT |
102 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && |
103 | tail_has_to_be_packed(inode)) { | |
0e4f6a79 | 104 | |
bd4c625c LT |
105 | /* if regular file is released by last holder and it has been |
106 | appended (we append by unformatted node only) or its direct | |
107 | item(s) had to be converted, then it may have to be | |
108 | indirect2direct converted */ | |
109 | err = reiserfs_truncate_file(inode, 0); | |
110 | } | |
111 | out: | |
bd4c625c | 112 | reiserfs_write_unlock(inode->i_sb); |
0e4f6a79 | 113 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); |
bd4c625c | 114 | return err; |
1da177e4 LT |
115 | } |
116 | ||
0e4f6a79 | 117 | static int reiserfs_file_open(struct inode *inode, struct file *file) |
de14569f | 118 | { |
0e4f6a79 AV |
119 | int err = dquot_file_open(inode, file); |
120 | if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { | |
121 | /* somebody might be tailpacking on final close; wait for it */ | |
122 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | |
123 | atomic_inc(&REISERFS_I(inode)->openers); | |
124 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | |
125 | } | |
126 | return err; | |
de14569f VS |
127 | } |
128 | ||
bd4c625c LT |
129 | static void reiserfs_vfs_truncate_file(struct inode *inode) |
130 | { | |
0e4f6a79 | 131 | mutex_lock(&(REISERFS_I(inode)->tailpack)); |
bd4c625c | 132 | reiserfs_truncate_file(inode, 1); |
0e4f6a79 | 133 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); |
1da177e4 LT |
134 | } |
135 | ||
136 | /* Sync a reiserfs file. */ | |
137 | ||
138 | /* | |
139 | * FIXME: sync_mapping_buffers() never has anything to sync. Can | |
140 | * be removed... | |
141 | */ | |
142 | ||
02c24a82 JB |
143 | static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end, |
144 | int datasync) | |
bd4c625c | 145 | { |
7ea80859 | 146 | struct inode *inode = filp->f_mapping->host; |
ee93961b | 147 | int err; |
bd4c625c LT |
148 | int barrier_done; |
149 | ||
02c24a82 JB |
150 | err = filemap_write_and_wait_range(inode->i_mapping, start, end); |
151 | if (err) | |
152 | return err; | |
153 | ||
154 | mutex_lock(&inode->i_mutex); | |
995c762e | 155 | BUG_ON(!S_ISREG(inode->i_mode)); |
ee93961b | 156 | err = sync_mapping_buffers(inode->i_mapping); |
995c762e JM |
157 | reiserfs_write_lock(inode->i_sb); |
158 | barrier_done = reiserfs_commit_for_inode(inode); | |
159 | reiserfs_write_unlock(inode->i_sb); | |
160 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) | |
dd3932ed | 161 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
02c24a82 | 162 | mutex_unlock(&inode->i_mutex); |
bd4c625c LT |
163 | if (barrier_done < 0) |
164 | return barrier_done; | |
ee93961b | 165 | return (err < 0) ? -EIO : 0; |
1da177e4 LT |
166 | } |
167 | ||
1da177e4 LT |
168 | /* taken fs/buffer.c:__block_commit_write */ |
169 | int reiserfs_commit_page(struct inode *inode, struct page *page, | |
bd4c625c | 170 | unsigned from, unsigned to) |
1da177e4 | 171 | { |
bd4c625c LT |
172 | unsigned block_start, block_end; |
173 | int partial = 0; | |
174 | unsigned blocksize; | |
175 | struct buffer_head *bh, *head; | |
176 | unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; | |
177 | int new; | |
178 | int logit = reiserfs_file_data_log(inode); | |
179 | struct super_block *s = inode->i_sb; | |
180 | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; | |
181 | struct reiserfs_transaction_handle th; | |
182 | int ret = 0; | |
183 | ||
184 | th.t_trans_id = 0; | |
185 | blocksize = 1 << inode->i_blkbits; | |
186 | ||
187 | if (logit) { | |
188 | reiserfs_write_lock(s); | |
189 | ret = journal_begin(&th, s, bh_per_page + 1); | |
190 | if (ret) | |
191 | goto drop_write_lock; | |
192 | reiserfs_update_inode_transaction(inode); | |
193 | } | |
194 | for (bh = head = page_buffers(page), block_start = 0; | |
195 | bh != head || !block_start; | |
196 | block_start = block_end, bh = bh->b_this_page) { | |
197 | ||
198 | new = buffer_new(bh); | |
199 | clear_buffer_new(bh); | |
200 | block_end = block_start + blocksize; | |
201 | if (block_end <= from || block_start >= to) { | |
202 | if (!buffer_uptodate(bh)) | |
203 | partial = 1; | |
204 | } else { | |
205 | set_buffer_uptodate(bh); | |
206 | if (logit) { | |
207 | reiserfs_prepare_for_journal(s, bh, 1); | |
208 | journal_mark_dirty(&th, s, bh); | |
209 | } else if (!buffer_dirty(bh)) { | |
210 | mark_buffer_dirty(bh); | |
211 | /* do data=ordered on any page past the end | |
212 | * of file and any buffer marked BH_New. | |
213 | */ | |
214 | if (reiserfs_data_ordered(inode->i_sb) && | |
215 | (new || page->index >= i_size_index)) { | |
216 | reiserfs_add_ordered_list(inode, bh); | |
217 | } | |
218 | } | |
219 | } | |
1da177e4 | 220 | } |
bd4c625c LT |
221 | if (logit) { |
222 | ret = journal_end(&th, s, bh_per_page + 1); | |
223 | drop_write_lock: | |
224 | reiserfs_write_unlock(s); | |
225 | } | |
226 | /* | |
227 | * If this is a partial write which happened to make all buffers | |
228 | * uptodate then we can optimize away a bogus readpage() for | |
229 | * the next read(). Here we 'discover' whether the page went | |
230 | * uptodate as a result of this (potentially partial) write. | |
231 | */ | |
232 | if (!partial) | |
233 | SetPageUptodate(page); | |
234 | return ret; | |
1da177e4 LT |
235 | } |
236 | ||
1da177e4 | 237 | /* Write @count bytes at position @ppos in a file indicated by @file |
0222e657 | 238 | from the buffer @buf. |
1da177e4 LT |
239 | |
240 | generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want | |
241 | something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was | |
242 | written for (ext2/3). This is for several reasons: | |
243 | ||
244 | * It has no understanding of any filesystem specific optimizations. | |
245 | ||
246 | * It enters the filesystem repeatedly for each page that is written. | |
247 | ||
248 | * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key | |
249 | * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time | |
250 | * to reiserfs which allows for fewer tree traversals. | |
251 | ||
252 | * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks. | |
253 | ||
254 | * Asking the block allocation code for blocks one at a time is slightly less efficient. | |
255 | ||
256 | All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to | |
257 | use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make | |
258 | things right finally. | |
259 | ||
260 | Future Features: providing search_by_key with hints. | |
261 | ||
262 | */ | |
bd4c625c LT |
263 | static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */ |
264 | const char __user * buf, /* pointer to user supplied data | |
265 | (in userspace) */ | |
266 | size_t count, /* amount of bytes to write */ | |
267 | loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to | |
268 | * new current position before returning. */ | |
269 | ) | |
1da177e4 | 270 | { |
1fc5adbd | 271 | struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. |
bd4c625c LT |
272 | /* To simplify coding at this time, we store |
273 | locked pages in array for now */ | |
bd4c625c LT |
274 | struct reiserfs_transaction_handle th; |
275 | th.t_trans_id = 0; | |
276 | ||
fa385bef JM |
277 | /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items |
278 | * lying around (most of the disk, in fact). Despite the filesystem | |
279 | * now being a v3.6 format, the old items still can't support large | |
280 | * file sizes. Catch this case here, as the rest of the VFS layer is | |
281 | * oblivious to the different limitations between old and new items. | |
282 | * reiserfs_setattr catches this for truncates. This chunk is lifted | |
283 | * from generic_write_checks. */ | |
284 | if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && | |
285 | *ppos + count > MAX_NON_LFS) { | |
286 | if (*ppos >= MAX_NON_LFS) { | |
fa385bef JM |
287 | return -EFBIG; |
288 | } | |
289 | if (count > MAX_NON_LFS - (unsigned long)*ppos) | |
290 | count = MAX_NON_LFS - (unsigned long)*ppos; | |
291 | } | |
292 | ||
797b4cff | 293 | return do_sync_write(file, buf, count, ppos); |
1da177e4 LT |
294 | } |
295 | ||
4b6f5d20 | 296 | const struct file_operations reiserfs_file_operations = { |
027445c3 | 297 | .read = do_sync_read, |
bd4c625c | 298 | .write = reiserfs_file_write, |
205cb37b | 299 | .unlocked_ioctl = reiserfs_ioctl, |
52b499c4 DH |
300 | #ifdef CONFIG_COMPAT |
301 | .compat_ioctl = reiserfs_compat_ioctl, | |
302 | #endif | |
0e4f6a79 AV |
303 | .mmap = generic_file_mmap, |
304 | .open = reiserfs_file_open, | |
bd4c625c LT |
305 | .release = reiserfs_file_release, |
306 | .fsync = reiserfs_sync_file, | |
bd4c625c | 307 | .aio_read = generic_file_aio_read, |
9637f28f | 308 | .aio_write = generic_file_aio_write, |
5274f052 JA |
309 | .splice_read = generic_file_splice_read, |
310 | .splice_write = generic_file_splice_write, | |
91efc167 | 311 | .llseek = generic_file_llseek, |
1da177e4 LT |
312 | }; |
313 | ||
c5ef1c42 | 314 | const struct inode_operations reiserfs_file_inode_operations = { |
bd4c625c LT |
315 | .truncate = reiserfs_vfs_truncate_file, |
316 | .setattr = reiserfs_setattr, | |
317 | .setxattr = reiserfs_setxattr, | |
318 | .getxattr = reiserfs_getxattr, | |
319 | .listxattr = reiserfs_listxattr, | |
320 | .removexattr = reiserfs_removexattr, | |
321 | .permission = reiserfs_permission, | |
4e34e719 | 322 | .get_acl = reiserfs_get_acl, |
1da177e4 | 323 | }; |