]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2005 | |
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License as published by | |
63f83c9f | 7 | * the Free Software Foundation; either version 2 of the License, or |
1da177e4 | 8 | * (at your option) any later version. |
63f83c9f | 9 | * |
1da177e4 LT |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
13 | * the GNU General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License | |
63f83c9f | 16 | * along with this program; if not, write to the Free Software |
1da177e4 LT |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | |
19 | ||
20 | /* | |
f720e3ba | 21 | * jfs_txnmgr.c: transaction manager |
1da177e4 LT |
22 | * |
23 | * notes: | |
24 | * transaction starts with txBegin() and ends with txCommit() | |
25 | * or txAbort(). | |
26 | * | |
27 | * tlock is acquired at the time of update; | |
28 | * (obviate scan at commit time for xtree and dtree) | |
29 | * tlock and mp points to each other; | |
30 | * (no hashlist for mp -> tlock). | |
31 | * | |
32 | * special cases: | |
33 | * tlock on in-memory inode: | |
34 | * in-place tlock in the in-memory inode itself; | |
35 | * converted to page lock by iWrite() at commit time. | |
36 | * | |
37 | * tlock during write()/mmap() under anonymous transaction (tid = 0): | |
38 | * transferred (?) to transaction at commit time. | |
39 | * | |
40 | * use the page itself to update allocation maps | |
41 | * (obviate intermediate replication of allocation/deallocation data) | |
42 | * hold on to mp+lock thru update of maps | |
43 | */ | |
44 | ||
1da177e4 LT |
45 | #include <linux/fs.h> |
46 | #include <linux/vmalloc.h> | |
1da177e4 | 47 | #include <linux/completion.h> |
7dfb7103 | 48 | #include <linux/freezer.h> |
1da177e4 LT |
49 | #include <linux/module.h> |
50 | #include <linux/moduleparam.h> | |
91dbb4de | 51 | #include <linux/kthread.h> |
b2e03ca7 | 52 | #include <linux/seq_file.h> |
1da177e4 | 53 | #include "jfs_incore.h" |
1868f4aa | 54 | #include "jfs_inode.h" |
1da177e4 LT |
55 | #include "jfs_filsys.h" |
56 | #include "jfs_metapage.h" | |
57 | #include "jfs_dinode.h" | |
58 | #include "jfs_imap.h" | |
59 | #include "jfs_dmap.h" | |
60 | #include "jfs_superblock.h" | |
61 | #include "jfs_debug.h" | |
62 | ||
63 | /* | |
f720e3ba | 64 | * transaction management structures |
1da177e4 LT |
65 | */ |
66 | static struct { | |
67 | int freetid; /* index of a free tid structure */ | |
68 | int freelock; /* index first free lock word */ | |
69 | wait_queue_head_t freewait; /* eventlist of free tblock */ | |
70 | wait_queue_head_t freelockwait; /* eventlist of free tlock */ | |
71 | wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ | |
72 | int tlocksInUse; /* Number of tlocks in use */ | |
73 | spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ | |
74 | /* struct tblock *sync_queue; * Transactions waiting for data sync */ | |
75 | struct list_head unlock_queue; /* Txns waiting to be released */ | |
76 | struct list_head anon_list; /* inodes having anonymous txns */ | |
77 | struct list_head anon_list2; /* inodes having anonymous txns | |
78 | that couldn't be sync'ed */ | |
79 | } TxAnchor; | |
80 | ||
81 | int jfs_tlocks_low; /* Indicates low number of available tlocks */ | |
82 | ||
83 | #ifdef CONFIG_JFS_STATISTICS | |
84 | static struct { | |
85 | uint txBegin; | |
86 | uint txBegin_barrier; | |
87 | uint txBegin_lockslow; | |
88 | uint txBegin_freetid; | |
89 | uint txBeginAnon; | |
90 | uint txBeginAnon_barrier; | |
91 | uint txBeginAnon_lockslow; | |
92 | uint txLockAlloc; | |
93 | uint txLockAlloc_freelock; | |
94 | } TxStat; | |
95 | #endif | |
96 | ||
97 | static int nTxBlock = -1; /* number of transaction blocks */ | |
98 | module_param(nTxBlock, int, 0); | |
99 | MODULE_PARM_DESC(nTxBlock, | |
100 | "Number of transaction blocks (max:65536)"); | |
101 | ||
102 | static int nTxLock = -1; /* number of transaction locks */ | |
103 | module_param(nTxLock, int, 0); | |
104 | MODULE_PARM_DESC(nTxLock, | |
105 | "Number of transaction locks (max:65536)"); | |
106 | ||
f720e3ba DK |
107 | struct tblock *TxBlock; /* transaction block table */ |
108 | static int TxLockLWM; /* Low water mark for number of txLocks used */ | |
109 | static int TxLockHWM; /* High water mark for number of txLocks used */ | |
110 | static int TxLockVHWM; /* Very High water mark */ | |
111 | struct tlock *TxLock; /* transaction lock table */ | |
1da177e4 | 112 | |
1da177e4 | 113 | /* |
f720e3ba | 114 | * transaction management lock |
1da177e4 LT |
115 | */ |
116 | static DEFINE_SPINLOCK(jfsTxnLock); | |
117 | ||
f720e3ba DK |
118 | #define TXN_LOCK() spin_lock(&jfsTxnLock) |
119 | #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) | |
1da177e4 LT |
120 | |
121 | #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); | |
122 | #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) | |
123 | #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) | |
124 | ||
91dbb4de | 125 | static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); |
1da177e4 LT |
126 | static int jfs_commit_thread_waking; |
127 | ||
128 | /* | |
129 | * Retry logic exist outside these macros to protect from spurrious wakeups. | |
130 | */ | |
131 | static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) | |
132 | { | |
133 | DECLARE_WAITQUEUE(wait, current); | |
134 | ||
135 | add_wait_queue(event, &wait); | |
136 | set_current_state(TASK_UNINTERRUPTIBLE); | |
137 | TXN_UNLOCK(); | |
4aa0d230 | 138 | io_schedule(); |
3cbb1c8e | 139 | __set_current_state(TASK_RUNNING); |
1da177e4 LT |
140 | remove_wait_queue(event, &wait); |
141 | } | |
142 | ||
143 | #define TXN_SLEEP(event)\ | |
144 | {\ | |
145 | TXN_SLEEP_DROP_LOCK(event);\ | |
146 | TXN_LOCK();\ | |
147 | } | |
148 | ||
149 | #define TXN_WAKEUP(event) wake_up_all(event) | |
150 | ||
1da177e4 | 151 | /* |
f720e3ba | 152 | * statistics |
1da177e4 LT |
153 | */ |
154 | static struct { | |
155 | tid_t maxtid; /* 4: biggest tid ever used */ | |
156 | lid_t maxlid; /* 4: biggest lid ever used */ | |
157 | int ntid; /* 4: # of transactions performed */ | |
158 | int nlid; /* 4: # of tlocks acquired */ | |
159 | int waitlock; /* 4: # of tlock wait */ | |
160 | } stattx; | |
161 | ||
1da177e4 LT |
162 | /* |
163 | * forward references | |
164 | */ | |
165 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
166 | struct tlock * tlck, struct commit * cd); | |
167 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
168 | struct tlock * tlck); | |
169 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
170 | struct tlock * tlck); | |
171 | static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
172 | struct tlock * tlck); | |
173 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, | |
174 | struct tblock * tblk); | |
175 | static void txForce(struct tblock * tblk); | |
176 | static int txLog(struct jfs_log * log, struct tblock * tblk, | |
177 | struct commit * cd); | |
178 | static void txUpdateMap(struct tblock * tblk); | |
179 | static void txRelease(struct tblock * tblk); | |
180 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
181 | struct tlock * tlck); | |
182 | static void LogSyncRelease(struct metapage * mp); | |
183 | ||
184 | /* | |
f720e3ba DK |
185 | * transaction block/lock management |
186 | * --------------------------------- | |
1da177e4 LT |
187 | */ |
188 | ||
189 | /* | |
190 | * Get a transaction lock from the free list. If the number in use is | |
191 | * greater than the high water mark, wake up the sync daemon. This should | |
192 | * free some anonymous transaction locks. (TXN_LOCK must be held.) | |
193 | */ | |
194 | static lid_t txLockAlloc(void) | |
195 | { | |
196 | lid_t lid; | |
197 | ||
198 | INCREMENT(TxStat.txLockAlloc); | |
199 | if (!TxAnchor.freelock) { | |
200 | INCREMENT(TxStat.txLockAlloc_freelock); | |
201 | } | |
202 | ||
203 | while (!(lid = TxAnchor.freelock)) | |
204 | TXN_SLEEP(&TxAnchor.freelockwait); | |
205 | TxAnchor.freelock = TxLock[lid].next; | |
206 | HIGHWATERMARK(stattx.maxlid, lid); | |
207 | if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { | |
208 | jfs_info("txLockAlloc tlocks low"); | |
209 | jfs_tlocks_low = 1; | |
91dbb4de | 210 | wake_up_process(jfsSyncThread); |
1da177e4 LT |
211 | } |
212 | ||
213 | return lid; | |
214 | } | |
215 | ||
216 | static void txLockFree(lid_t lid) | |
217 | { | |
7fab479b | 218 | TxLock[lid].tid = 0; |
1da177e4 LT |
219 | TxLock[lid].next = TxAnchor.freelock; |
220 | TxAnchor.freelock = lid; | |
221 | TxAnchor.tlocksInUse--; | |
222 | if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { | |
223 | jfs_info("txLockFree jfs_tlocks_low no more"); | |
224 | jfs_tlocks_low = 0; | |
225 | TXN_WAKEUP(&TxAnchor.lowlockwait); | |
226 | } | |
227 | TXN_WAKEUP(&TxAnchor.freelockwait); | |
228 | } | |
229 | ||
230 | /* | |
f720e3ba | 231 | * NAME: txInit() |
1da177e4 | 232 | * |
f720e3ba | 233 | * FUNCTION: initialize transaction management structures |
1da177e4 LT |
234 | * |
235 | * RETURN: | |
236 | * | |
237 | * serialization: single thread at jfs_init() | |
238 | */ | |
239 | int txInit(void) | |
240 | { | |
241 | int k, size; | |
242 | struct sysinfo si; | |
243 | ||
244 | /* Set defaults for nTxLock and nTxBlock if unset */ | |
245 | ||
246 | if (nTxLock == -1) { | |
247 | if (nTxBlock == -1) { | |
248 | /* Base default on memory size */ | |
249 | si_meminfo(&si); | |
250 | if (si.totalram > (256 * 1024)) /* 1 GB */ | |
251 | nTxLock = 64 * 1024; | |
252 | else | |
253 | nTxLock = si.totalram >> 2; | |
254 | } else if (nTxBlock > (8 * 1024)) | |
255 | nTxLock = 64 * 1024; | |
256 | else | |
257 | nTxLock = nTxBlock << 3; | |
258 | } | |
259 | if (nTxBlock == -1) | |
260 | nTxBlock = nTxLock >> 3; | |
261 | ||
262 | /* Verify tunable parameters */ | |
263 | if (nTxBlock < 16) | |
264 | nTxBlock = 16; /* No one should set it this low */ | |
265 | if (nTxBlock > 65536) | |
266 | nTxBlock = 65536; | |
267 | if (nTxLock < 256) | |
268 | nTxLock = 256; /* No one should set it this low */ | |
269 | if (nTxLock > 65536) | |
270 | nTxLock = 65536; | |
271 | ||
272 | printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", | |
273 | nTxBlock, nTxLock); | |
274 | /* | |
275 | * initialize transaction block (tblock) table | |
276 | * | |
277 | * transaction id (tid) = tblock index | |
278 | * tid = 0 is reserved. | |
279 | */ | |
280 | TxLockLWM = (nTxLock * 4) / 10; | |
281 | TxLockHWM = (nTxLock * 7) / 10; | |
282 | TxLockVHWM = (nTxLock * 8) / 10; | |
283 | ||
284 | size = sizeof(struct tblock) * nTxBlock; | |
f52720ca | 285 | TxBlock = vmalloc(size); |
1da177e4 LT |
286 | if (TxBlock == NULL) |
287 | return -ENOMEM; | |
288 | ||
289 | for (k = 1; k < nTxBlock - 1; k++) { | |
290 | TxBlock[k].next = k + 1; | |
291 | init_waitqueue_head(&TxBlock[k].gcwait); | |
292 | init_waitqueue_head(&TxBlock[k].waitor); | |
293 | } | |
294 | TxBlock[k].next = 0; | |
295 | init_waitqueue_head(&TxBlock[k].gcwait); | |
296 | init_waitqueue_head(&TxBlock[k].waitor); | |
297 | ||
298 | TxAnchor.freetid = 1; | |
299 | init_waitqueue_head(&TxAnchor.freewait); | |
300 | ||
301 | stattx.maxtid = 1; /* statistics */ | |
302 | ||
303 | /* | |
304 | * initialize transaction lock (tlock) table | |
305 | * | |
306 | * transaction lock id = tlock index | |
307 | * tlock id = 0 is reserved. | |
308 | */ | |
309 | size = sizeof(struct tlock) * nTxLock; | |
f52720ca | 310 | TxLock = vmalloc(size); |
1da177e4 LT |
311 | if (TxLock == NULL) { |
312 | vfree(TxBlock); | |
313 | return -ENOMEM; | |
314 | } | |
315 | ||
316 | /* initialize tlock table */ | |
317 | for (k = 1; k < nTxLock - 1; k++) | |
318 | TxLock[k].next = k + 1; | |
319 | TxLock[k].next = 0; | |
320 | init_waitqueue_head(&TxAnchor.freelockwait); | |
321 | init_waitqueue_head(&TxAnchor.lowlockwait); | |
322 | ||
323 | TxAnchor.freelock = 1; | |
324 | TxAnchor.tlocksInUse = 0; | |
325 | INIT_LIST_HEAD(&TxAnchor.anon_list); | |
326 | INIT_LIST_HEAD(&TxAnchor.anon_list2); | |
327 | ||
328 | LAZY_LOCK_INIT(); | |
329 | INIT_LIST_HEAD(&TxAnchor.unlock_queue); | |
330 | ||
331 | stattx.maxlid = 1; /* statistics */ | |
332 | ||
333 | return 0; | |
334 | } | |
335 | ||
336 | /* | |
f720e3ba | 337 | * NAME: txExit() |
1da177e4 | 338 | * |
f720e3ba | 339 | * FUNCTION: clean up when module is unloaded |
1da177e4 LT |
340 | */ |
341 | void txExit(void) | |
342 | { | |
343 | vfree(TxLock); | |
344 | TxLock = NULL; | |
345 | vfree(TxBlock); | |
346 | TxBlock = NULL; | |
347 | } | |
348 | ||
1da177e4 | 349 | /* |
f720e3ba | 350 | * NAME: txBegin() |
1da177e4 | 351 | * |
f720e3ba | 352 | * FUNCTION: start a transaction. |
1da177e4 | 353 | * |
f720e3ba DK |
354 | * PARAMETER: sb - superblock |
355 | * flag - force for nested tx; | |
1da177e4 LT |
356 | * |
357 | * RETURN: tid - transaction id | |
358 | * | |
359 | * note: flag force allows to start tx for nested tx | |
360 | * to prevent deadlock on logsync barrier; | |
361 | */ | |
362 | tid_t txBegin(struct super_block *sb, int flag) | |
363 | { | |
364 | tid_t t; | |
365 | struct tblock *tblk; | |
366 | struct jfs_log *log; | |
367 | ||
368 | jfs_info("txBegin: flag = 0x%x", flag); | |
369 | log = JFS_SBI(sb)->log; | |
370 | ||
371 | TXN_LOCK(); | |
372 | ||
373 | INCREMENT(TxStat.txBegin); | |
374 | ||
375 | retry: | |
376 | if (!(flag & COMMIT_FORCE)) { | |
377 | /* | |
378 | * synchronize with logsync barrier | |
379 | */ | |
380 | if (test_bit(log_SYNCBARRIER, &log->flag) || | |
381 | test_bit(log_QUIESCE, &log->flag)) { | |
382 | INCREMENT(TxStat.txBegin_barrier); | |
383 | TXN_SLEEP(&log->syncwait); | |
384 | goto retry; | |
385 | } | |
386 | } | |
387 | if (flag == 0) { | |
388 | /* | |
389 | * Don't begin transaction if we're getting starved for tlocks | |
390 | * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately | |
391 | * free tlocks) | |
392 | */ | |
393 | if (TxAnchor.tlocksInUse > TxLockVHWM) { | |
394 | INCREMENT(TxStat.txBegin_lockslow); | |
395 | TXN_SLEEP(&TxAnchor.lowlockwait); | |
396 | goto retry; | |
397 | } | |
398 | } | |
399 | ||
400 | /* | |
401 | * allocate transaction id/block | |
402 | */ | |
403 | if ((t = TxAnchor.freetid) == 0) { | |
404 | jfs_info("txBegin: waiting for free tid"); | |
405 | INCREMENT(TxStat.txBegin_freetid); | |
406 | TXN_SLEEP(&TxAnchor.freewait); | |
407 | goto retry; | |
408 | } | |
409 | ||
410 | tblk = tid_to_tblock(t); | |
411 | ||
412 | if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { | |
413 | /* Don't let a non-forced transaction take the last tblk */ | |
414 | jfs_info("txBegin: waiting for free tid"); | |
415 | INCREMENT(TxStat.txBegin_freetid); | |
416 | TXN_SLEEP(&TxAnchor.freewait); | |
417 | goto retry; | |
418 | } | |
419 | ||
420 | TxAnchor.freetid = tblk->next; | |
421 | ||
422 | /* | |
423 | * initialize transaction | |
424 | */ | |
425 | ||
426 | /* | |
427 | * We can't zero the whole thing or we screw up another thread being | |
428 | * awakened after sleeping on tblk->waitor | |
429 | * | |
430 | * memset(tblk, 0, sizeof(struct tblock)); | |
431 | */ | |
432 | tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; | |
433 | ||
434 | tblk->sb = sb; | |
435 | ++log->logtid; | |
436 | tblk->logtid = log->logtid; | |
437 | ||
438 | ++log->active; | |
439 | ||
440 | HIGHWATERMARK(stattx.maxtid, t); /* statistics */ | |
441 | INCREMENT(stattx.ntid); /* statistics */ | |
442 | ||
443 | TXN_UNLOCK(); | |
444 | ||
445 | jfs_info("txBegin: returning tid = %d", t); | |
446 | ||
447 | return t; | |
448 | } | |
449 | ||
1da177e4 | 450 | /* |
f720e3ba | 451 | * NAME: txBeginAnon() |
1da177e4 | 452 | * |
f720e3ba | 453 | * FUNCTION: start an anonymous transaction. |
1da177e4 LT |
454 | * Blocks if logsync or available tlocks are low to prevent |
455 | * anonymous tlocks from depleting supply. | |
456 | * | |
f720e3ba | 457 | * PARAMETER: sb - superblock |
1da177e4 LT |
458 | * |
459 | * RETURN: none | |
460 | */ | |
461 | void txBeginAnon(struct super_block *sb) | |
462 | { | |
463 | struct jfs_log *log; | |
464 | ||
465 | log = JFS_SBI(sb)->log; | |
466 | ||
467 | TXN_LOCK(); | |
468 | INCREMENT(TxStat.txBeginAnon); | |
469 | ||
470 | retry: | |
471 | /* | |
472 | * synchronize with logsync barrier | |
473 | */ | |
474 | if (test_bit(log_SYNCBARRIER, &log->flag) || | |
475 | test_bit(log_QUIESCE, &log->flag)) { | |
476 | INCREMENT(TxStat.txBeginAnon_barrier); | |
477 | TXN_SLEEP(&log->syncwait); | |
478 | goto retry; | |
479 | } | |
480 | ||
481 | /* | |
482 | * Don't begin transaction if we're getting starved for tlocks | |
483 | */ | |
484 | if (TxAnchor.tlocksInUse > TxLockVHWM) { | |
485 | INCREMENT(TxStat.txBeginAnon_lockslow); | |
486 | TXN_SLEEP(&TxAnchor.lowlockwait); | |
487 | goto retry; | |
488 | } | |
489 | TXN_UNLOCK(); | |
490 | } | |
491 | ||
1da177e4 | 492 | /* |
f720e3ba | 493 | * txEnd() |
1da177e4 LT |
494 | * |
495 | * function: free specified transaction block. | |
496 | * | |
f720e3ba | 497 | * logsync barrier processing: |
1da177e4 LT |
498 | * |
499 | * serialization: | |
500 | */ | |
501 | void txEnd(tid_t tid) | |
502 | { | |
503 | struct tblock *tblk = tid_to_tblock(tid); | |
504 | struct jfs_log *log; | |
505 | ||
506 | jfs_info("txEnd: tid = %d", tid); | |
507 | TXN_LOCK(); | |
508 | ||
509 | /* | |
510 | * wakeup transactions waiting on the page locked | |
511 | * by the current transaction | |
512 | */ | |
513 | TXN_WAKEUP(&tblk->waitor); | |
514 | ||
515 | log = JFS_SBI(tblk->sb)->log; | |
516 | ||
517 | /* | |
518 | * Lazy commit thread can't free this guy until we mark it UNLOCKED, | |
519 | * otherwise, we would be left with a transaction that may have been | |
520 | * reused. | |
521 | * | |
522 | * Lazy commit thread will turn off tblkGC_LAZY before calling this | |
523 | * routine. | |
524 | */ | |
525 | if (tblk->flag & tblkGC_LAZY) { | |
526 | jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); | |
527 | TXN_UNLOCK(); | |
528 | ||
529 | spin_lock_irq(&log->gclock); // LOGGC_LOCK | |
530 | tblk->flag |= tblkGC_UNLOCKED; | |
531 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | |
532 | return; | |
533 | } | |
534 | ||
535 | jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); | |
536 | ||
537 | assert(tblk->next == 0); | |
538 | ||
539 | /* | |
540 | * insert tblock back on freelist | |
541 | */ | |
542 | tblk->next = TxAnchor.freetid; | |
543 | TxAnchor.freetid = tid; | |
544 | ||
545 | /* | |
546 | * mark the tblock not active | |
547 | */ | |
548 | if (--log->active == 0) { | |
549 | clear_bit(log_FLUSH, &log->flag); | |
550 | ||
551 | /* | |
552 | * synchronize with logsync barrier | |
553 | */ | |
554 | if (test_bit(log_SYNCBARRIER, &log->flag)) { | |
cbc3d65e DK |
555 | TXN_UNLOCK(); |
556 | ||
557 | /* write dirty metadata & forward log syncpt */ | |
558 | jfs_syncpt(log, 1); | |
559 | ||
1da177e4 LT |
560 | jfs_info("log barrier off: 0x%x", log->lsn); |
561 | ||
562 | /* enable new transactions start */ | |
563 | clear_bit(log_SYNCBARRIER, &log->flag); | |
564 | ||
565 | /* wakeup all waitors for logsync barrier */ | |
566 | TXN_WAKEUP(&log->syncwait); | |
1c627829 | 567 | |
1c627829 | 568 | goto wakeup; |
1da177e4 LT |
569 | } |
570 | } | |
571 | ||
1c627829 DK |
572 | TXN_UNLOCK(); |
573 | wakeup: | |
1da177e4 LT |
574 | /* |
575 | * wakeup all waitors for a free tblock | |
576 | */ | |
577 | TXN_WAKEUP(&TxAnchor.freewait); | |
1da177e4 LT |
578 | } |
579 | ||
1da177e4 | 580 | /* |
f720e3ba | 581 | * txLock() |
1da177e4 LT |
582 | * |
583 | * function: acquire a transaction lock on the specified <mp> | |
584 | * | |
585 | * parameter: | |
586 | * | |
f720e3ba | 587 | * return: transaction lock id |
1da177e4 LT |
588 | * |
589 | * serialization: | |
590 | */ | |
591 | struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, | |
592 | int type) | |
593 | { | |
594 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | |
595 | int dir_xtree = 0; | |
596 | lid_t lid; | |
597 | tid_t xtid; | |
598 | struct tlock *tlck; | |
599 | struct xtlock *xtlck; | |
600 | struct linelock *linelock; | |
601 | xtpage_t *p; | |
602 | struct tblock *tblk; | |
603 | ||
604 | TXN_LOCK(); | |
605 | ||
606 | if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && | |
607 | !(mp->xflag & COMMIT_PAGE)) { | |
608 | /* | |
609 | * Directory inode is special. It can have both an xtree tlock | |
610 | * and a dtree tlock associated with it. | |
611 | */ | |
612 | dir_xtree = 1; | |
613 | lid = jfs_ip->xtlid; | |
614 | } else | |
615 | lid = mp->lid; | |
616 | ||
617 | /* is page not locked by a transaction ? */ | |
618 | if (lid == 0) | |
619 | goto allocateLock; | |
620 | ||
621 | jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); | |
622 | ||
623 | /* is page locked by the requester transaction ? */ | |
624 | tlck = lid_to_tlock(lid); | |
7fab479b DK |
625 | if ((xtid = tlck->tid) == tid) { |
626 | TXN_UNLOCK(); | |
1da177e4 | 627 | goto grantLock; |
7fab479b | 628 | } |
1da177e4 LT |
629 | |
630 | /* | |
631 | * is page locked by anonymous transaction/lock ? | |
632 | * | |
633 | * (page update without transaction (i.e., file write) is | |
634 | * locked under anonymous transaction tid = 0: | |
635 | * anonymous tlocks maintained on anonymous tlock list of | |
636 | * the inode of the page and available to all anonymous | |
637 | * transactions until txCommit() time at which point | |
638 | * they are transferred to the transaction tlock list of | |
25985edc | 639 | * the committing transaction of the inode) |
1da177e4 LT |
640 | */ |
641 | if (xtid == 0) { | |
642 | tlck->tid = tid; | |
7fab479b | 643 | TXN_UNLOCK(); |
1da177e4 LT |
644 | tblk = tid_to_tblock(tid); |
645 | /* | |
646 | * The order of the tlocks in the transaction is important | |
647 | * (during truncate, child xtree pages must be freed before | |
648 | * parent's tlocks change the working map). | |
649 | * Take tlock off anonymous list and add to tail of | |
650 | * transaction list | |
651 | * | |
652 | * Note: We really need to get rid of the tid & lid and | |
653 | * use list_head's. This code is getting UGLY! | |
654 | */ | |
655 | if (jfs_ip->atlhead == lid) { | |
656 | if (jfs_ip->atltail == lid) { | |
657 | /* only anonymous txn. | |
658 | * Remove from anon_list | |
659 | */ | |
8a9cd6d6 | 660 | TXN_LOCK(); |
1da177e4 | 661 | list_del_init(&jfs_ip->anon_inode_list); |
8a9cd6d6 | 662 | TXN_UNLOCK(); |
1da177e4 LT |
663 | } |
664 | jfs_ip->atlhead = tlck->next; | |
665 | } else { | |
666 | lid_t last; | |
667 | for (last = jfs_ip->atlhead; | |
668 | lid_to_tlock(last)->next != lid; | |
669 | last = lid_to_tlock(last)->next) { | |
670 | assert(last); | |
671 | } | |
672 | lid_to_tlock(last)->next = tlck->next; | |
673 | if (jfs_ip->atltail == lid) | |
674 | jfs_ip->atltail = last; | |
675 | } | |
676 | ||
677 | /* insert the tlock at tail of transaction tlock list */ | |
678 | ||
679 | if (tblk->next) | |
680 | lid_to_tlock(tblk->last)->next = lid; | |
681 | else | |
682 | tblk->next = lid; | |
683 | tlck->next = 0; | |
684 | tblk->last = lid; | |
685 | ||
686 | goto grantLock; | |
687 | } | |
688 | ||
689 | goto waitLock; | |
690 | ||
691 | /* | |
692 | * allocate a tlock | |
693 | */ | |
694 | allocateLock: | |
695 | lid = txLockAlloc(); | |
696 | tlck = lid_to_tlock(lid); | |
697 | ||
698 | /* | |
699 | * initialize tlock | |
700 | */ | |
701 | tlck->tid = tid; | |
702 | ||
7fab479b DK |
703 | TXN_UNLOCK(); |
704 | ||
1da177e4 LT |
705 | /* mark tlock for meta-data page */ |
706 | if (mp->xflag & COMMIT_PAGE) { | |
707 | ||
708 | tlck->flag = tlckPAGELOCK; | |
709 | ||
710 | /* mark the page dirty and nohomeok */ | |
7fab479b | 711 | metapage_nohomeok(mp); |
1da177e4 LT |
712 | |
713 | jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", | |
7fab479b | 714 | mp, mp->nohomeok, tid, tlck); |
1da177e4 LT |
715 | |
716 | /* if anonymous transaction, and buffer is on the group | |
717 | * commit synclist, mark inode to show this. This will | |
718 | * prevent the buffer from being marked nohomeok for too | |
719 | * long a time. | |
720 | */ | |
721 | if ((tid == 0) && mp->lsn) | |
722 | set_cflag(COMMIT_Synclist, ip); | |
723 | } | |
724 | /* mark tlock for in-memory inode */ | |
725 | else | |
726 | tlck->flag = tlckINODELOCK; | |
727 | ||
438282d8 DK |
728 | if (S_ISDIR(ip->i_mode)) |
729 | tlck->flag |= tlckDIRECTORY; | |
730 | ||
1da177e4 LT |
731 | tlck->type = 0; |
732 | ||
733 | /* bind the tlock and the page */ | |
734 | tlck->ip = ip; | |
735 | tlck->mp = mp; | |
736 | if (dir_xtree) | |
737 | jfs_ip->xtlid = lid; | |
738 | else | |
739 | mp->lid = lid; | |
740 | ||
741 | /* | |
742 | * enqueue transaction lock to transaction/inode | |
743 | */ | |
744 | /* insert the tlock at tail of transaction tlock list */ | |
745 | if (tid) { | |
746 | tblk = tid_to_tblock(tid); | |
747 | if (tblk->next) | |
748 | lid_to_tlock(tblk->last)->next = lid; | |
749 | else | |
750 | tblk->next = lid; | |
751 | tlck->next = 0; | |
752 | tblk->last = lid; | |
753 | } | |
754 | /* anonymous transaction: | |
755 | * insert the tlock at head of inode anonymous tlock list | |
756 | */ | |
757 | else { | |
758 | tlck->next = jfs_ip->atlhead; | |
759 | jfs_ip->atlhead = lid; | |
760 | if (tlck->next == 0) { | |
761 | /* This inode's first anonymous transaction */ | |
762 | jfs_ip->atltail = lid; | |
7fab479b | 763 | TXN_LOCK(); |
1da177e4 LT |
764 | list_add_tail(&jfs_ip->anon_inode_list, |
765 | &TxAnchor.anon_list); | |
7fab479b | 766 | TXN_UNLOCK(); |
1da177e4 LT |
767 | } |
768 | } | |
769 | ||
770 | /* initialize type dependent area for linelock */ | |
771 | linelock = (struct linelock *) & tlck->lock; | |
772 | linelock->next = 0; | |
773 | linelock->flag = tlckLINELOCK; | |
774 | linelock->maxcnt = TLOCKSHORT; | |
775 | linelock->index = 0; | |
776 | ||
777 | switch (type & tlckTYPE) { | |
778 | case tlckDTREE: | |
779 | linelock->l2linesize = L2DTSLOTSIZE; | |
780 | break; | |
781 | ||
782 | case tlckXTREE: | |
783 | linelock->l2linesize = L2XTSLOTSIZE; | |
784 | ||
785 | xtlck = (struct xtlock *) linelock; | |
786 | xtlck->header.offset = 0; | |
787 | xtlck->header.length = 2; | |
788 | ||
789 | if (type & tlckNEW) { | |
790 | xtlck->lwm.offset = XTENTRYSTART; | |
791 | } else { | |
792 | if (mp->xflag & COMMIT_PAGE) | |
793 | p = (xtpage_t *) mp->data; | |
794 | else | |
795 | p = &jfs_ip->i_xtroot; | |
796 | xtlck->lwm.offset = | |
797 | le16_to_cpu(p->header.nextindex); | |
798 | } | |
799 | xtlck->lwm.length = 0; /* ! */ | |
800 | xtlck->twm.offset = 0; | |
801 | xtlck->hwm.offset = 0; | |
802 | ||
803 | xtlck->index = 2; | |
804 | break; | |
805 | ||
806 | case tlckINODE: | |
807 | linelock->l2linesize = L2INODESLOTSIZE; | |
808 | break; | |
809 | ||
810 | case tlckDATA: | |
811 | linelock->l2linesize = L2DATASLOTSIZE; | |
812 | break; | |
813 | ||
814 | default: | |
815 | jfs_err("UFO tlock:0x%p", tlck); | |
816 | } | |
817 | ||
818 | /* | |
819 | * update tlock vector | |
820 | */ | |
821 | grantLock: | |
822 | tlck->type |= type; | |
823 | ||
1da177e4 LT |
824 | return tlck; |
825 | ||
826 | /* | |
827 | * page is being locked by another transaction: | |
828 | */ | |
829 | waitLock: | |
830 | /* Only locks on ipimap or ipaimap should reach here */ | |
831 | /* assert(jfs_ip->fileset == AGGREGATE_I); */ | |
832 | if (jfs_ip->fileset != AGGREGATE_I) { | |
209e101b | 833 | printk(KERN_ERR "txLock: trying to lock locked page!"); |
288e4d83 DK |
834 | print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, |
835 | ip, sizeof(*ip), 0); | |
836 | print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, | |
837 | mp, sizeof(*mp), 0); | |
838 | print_hex_dump(KERN_ERR, "Locker's tblock: ", | |
839 | DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), | |
840 | sizeof(struct tblock), 0); | |
841 | print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, | |
842 | tlck, sizeof(*tlck), 0); | |
1da177e4 LT |
843 | BUG(); |
844 | } | |
845 | INCREMENT(stattx.waitlock); /* statistics */ | |
7fab479b | 846 | TXN_UNLOCK(); |
1da177e4 | 847 | release_metapage(mp); |
7fab479b | 848 | TXN_LOCK(); |
0418726b | 849 | xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ |
1da177e4 LT |
850 | |
851 | jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", | |
852 | tid, xtid, lid); | |
7fab479b DK |
853 | |
854 | /* Recheck everything since dropping TXN_LOCK */ | |
855 | if (xtid && (tlck->mp == mp) && (mp->lid == lid)) | |
856 | TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); | |
857 | else | |
858 | TXN_UNLOCK(); | |
1da177e4 LT |
859 | jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); |
860 | ||
861 | return NULL; | |
862 | } | |
863 | ||
1da177e4 | 864 | /* |
f720e3ba | 865 | * NAME: txRelease() |
1da177e4 | 866 | * |
f720e3ba | 867 | * FUNCTION: Release buffers associated with transaction locks, but don't |
1da177e4 LT |
868 | * mark homeok yet. The allows other transactions to modify |
869 | * buffers, but won't let them go to disk until commit record | |
870 | * actually gets written. | |
871 | * | |
872 | * PARAMETER: | |
f720e3ba | 873 | * tblk - |
1da177e4 | 874 | * |
f720e3ba | 875 | * RETURN: Errors from subroutines. |
1da177e4 LT |
876 | */ |
877 | static void txRelease(struct tblock * tblk) | |
878 | { | |
879 | struct metapage *mp; | |
880 | lid_t lid; | |
881 | struct tlock *tlck; | |
882 | ||
883 | TXN_LOCK(); | |
884 | ||
885 | for (lid = tblk->next; lid; lid = tlck->next) { | |
886 | tlck = lid_to_tlock(lid); | |
887 | if ((mp = tlck->mp) != NULL && | |
888 | (tlck->type & tlckBTROOT) == 0) { | |
889 | assert(mp->xflag & COMMIT_PAGE); | |
890 | mp->lid = 0; | |
891 | } | |
892 | } | |
893 | ||
894 | /* | |
895 | * wakeup transactions waiting on a page locked | |
896 | * by the current transaction | |
897 | */ | |
898 | TXN_WAKEUP(&tblk->waitor); | |
899 | ||
900 | TXN_UNLOCK(); | |
901 | } | |
902 | ||
1da177e4 | 903 | /* |
f720e3ba | 904 | * NAME: txUnlock() |
1da177e4 | 905 | * |
f720e3ba DK |
906 | * FUNCTION: Initiates pageout of pages modified by tid in journalled |
907 | * objects and frees their lockwords. | |
1da177e4 LT |
908 | */ |
909 | static void txUnlock(struct tblock * tblk) | |
910 | { | |
911 | struct tlock *tlck; | |
912 | struct linelock *linelock; | |
913 | lid_t lid, next, llid, k; | |
914 | struct metapage *mp; | |
915 | struct jfs_log *log; | |
916 | int difft, diffp; | |
7fab479b | 917 | unsigned long flags; |
1da177e4 LT |
918 | |
919 | jfs_info("txUnlock: tblk = 0x%p", tblk); | |
920 | log = JFS_SBI(tblk->sb)->log; | |
921 | ||
922 | /* | |
923 | * mark page under tlock homeok (its log has been written): | |
924 | */ | |
925 | for (lid = tblk->next; lid; lid = next) { | |
926 | tlck = lid_to_tlock(lid); | |
927 | next = tlck->next; | |
928 | ||
929 | jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); | |
930 | ||
931 | /* unbind page from tlock */ | |
932 | if ((mp = tlck->mp) != NULL && | |
933 | (tlck->type & tlckBTROOT) == 0) { | |
934 | assert(mp->xflag & COMMIT_PAGE); | |
935 | ||
936 | /* hold buffer | |
1da177e4 | 937 | */ |
7fab479b | 938 | hold_metapage(mp); |
1da177e4 | 939 | |
7fab479b DK |
940 | assert(mp->nohomeok > 0); |
941 | _metapage_homeok(mp); | |
1da177e4 LT |
942 | |
943 | /* inherit younger/larger clsn */ | |
7fab479b | 944 | LOGSYNC_LOCK(log, flags); |
1da177e4 LT |
945 | if (mp->clsn) { |
946 | logdiff(difft, tblk->clsn, log); | |
947 | logdiff(diffp, mp->clsn, log); | |
948 | if (difft > diffp) | |
949 | mp->clsn = tblk->clsn; | |
950 | } else | |
951 | mp->clsn = tblk->clsn; | |
7fab479b | 952 | LOGSYNC_UNLOCK(log, flags); |
1da177e4 LT |
953 | |
954 | assert(!(tlck->flag & tlckFREEPAGE)); | |
955 | ||
7fab479b | 956 | put_metapage(mp); |
1da177e4 LT |
957 | } |
958 | ||
959 | /* insert tlock, and linelock(s) of the tlock if any, | |
960 | * at head of freelist | |
961 | */ | |
962 | TXN_LOCK(); | |
963 | ||
964 | llid = ((struct linelock *) & tlck->lock)->next; | |
965 | while (llid) { | |
966 | linelock = (struct linelock *) lid_to_tlock(llid); | |
967 | k = linelock->next; | |
968 | txLockFree(llid); | |
969 | llid = k; | |
970 | } | |
971 | txLockFree(lid); | |
972 | ||
973 | TXN_UNLOCK(); | |
974 | } | |
975 | tblk->next = tblk->last = 0; | |
976 | ||
977 | /* | |
978 | * remove tblock from logsynclist | |
979 | * (allocation map pages inherited lsn of tblk and | |
980 | * has been inserted in logsync list at txUpdateMap()) | |
981 | */ | |
982 | if (tblk->lsn) { | |
7fab479b | 983 | LOGSYNC_LOCK(log, flags); |
1da177e4 LT |
984 | log->count--; |
985 | list_del(&tblk->synclist); | |
7fab479b | 986 | LOGSYNC_UNLOCK(log, flags); |
1da177e4 LT |
987 | } |
988 | } | |
989 | ||
1da177e4 | 990 | /* |
f720e3ba | 991 | * txMaplock() |
1da177e4 LT |
992 | * |
993 | * function: allocate a transaction lock for freed page/entry; | |
f720e3ba | 994 | * for freed page, maplock is used as xtlock/dtlock type; |
1da177e4 LT |
995 | */ |
996 | struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) | |
997 | { | |
998 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | |
999 | lid_t lid; | |
1000 | struct tblock *tblk; | |
1001 | struct tlock *tlck; | |
1002 | struct maplock *maplock; | |
1003 | ||
1004 | TXN_LOCK(); | |
1005 | ||
1006 | /* | |
1007 | * allocate a tlock | |
1008 | */ | |
1009 | lid = txLockAlloc(); | |
1010 | tlck = lid_to_tlock(lid); | |
1011 | ||
1012 | /* | |
1013 | * initialize tlock | |
1014 | */ | |
1015 | tlck->tid = tid; | |
1016 | ||
1017 | /* bind the tlock and the object */ | |
1018 | tlck->flag = tlckINODELOCK; | |
438282d8 DK |
1019 | if (S_ISDIR(ip->i_mode)) |
1020 | tlck->flag |= tlckDIRECTORY; | |
1da177e4 LT |
1021 | tlck->ip = ip; |
1022 | tlck->mp = NULL; | |
1023 | ||
1024 | tlck->type = type; | |
1025 | ||
1026 | /* | |
1027 | * enqueue transaction lock to transaction/inode | |
1028 | */ | |
1029 | /* insert the tlock at tail of transaction tlock list */ | |
1030 | if (tid) { | |
1031 | tblk = tid_to_tblock(tid); | |
1032 | if (tblk->next) | |
1033 | lid_to_tlock(tblk->last)->next = lid; | |
1034 | else | |
1035 | tblk->next = lid; | |
1036 | tlck->next = 0; | |
1037 | tblk->last = lid; | |
1038 | } | |
1039 | /* anonymous transaction: | |
1040 | * insert the tlock at head of inode anonymous tlock list | |
1041 | */ | |
1042 | else { | |
1043 | tlck->next = jfs_ip->atlhead; | |
1044 | jfs_ip->atlhead = lid; | |
1045 | if (tlck->next == 0) { | |
1046 | /* This inode's first anonymous transaction */ | |
1047 | jfs_ip->atltail = lid; | |
1048 | list_add_tail(&jfs_ip->anon_inode_list, | |
1049 | &TxAnchor.anon_list); | |
1050 | } | |
1051 | } | |
1052 | ||
1053 | TXN_UNLOCK(); | |
1054 | ||
1055 | /* initialize type dependent area for maplock */ | |
1056 | maplock = (struct maplock *) & tlck->lock; | |
1057 | maplock->next = 0; | |
1058 | maplock->maxcnt = 0; | |
1059 | maplock->index = 0; | |
1060 | ||
1061 | return tlck; | |
1062 | } | |
1063 | ||
1da177e4 | 1064 | /* |
f720e3ba | 1065 | * txLinelock() |
1da177e4 LT |
1066 | * |
1067 | * function: allocate a transaction lock for log vector list | |
1068 | */ | |
1069 | struct linelock *txLinelock(struct linelock * tlock) | |
1070 | { | |
1071 | lid_t lid; | |
1072 | struct tlock *tlck; | |
1073 | struct linelock *linelock; | |
1074 | ||
1075 | TXN_LOCK(); | |
1076 | ||
1077 | /* allocate a TxLock structure */ | |
1078 | lid = txLockAlloc(); | |
1079 | tlck = lid_to_tlock(lid); | |
1080 | ||
1081 | TXN_UNLOCK(); | |
1082 | ||
1083 | /* initialize linelock */ | |
1084 | linelock = (struct linelock *) tlck; | |
1085 | linelock->next = 0; | |
1086 | linelock->flag = tlckLINELOCK; | |
1087 | linelock->maxcnt = TLOCKLONG; | |
1088 | linelock->index = 0; | |
438282d8 DK |
1089 | if (tlck->flag & tlckDIRECTORY) |
1090 | linelock->flag |= tlckDIRECTORY; | |
1da177e4 LT |
1091 | |
1092 | /* append linelock after tlock */ | |
1093 | linelock->next = tlock->next; | |
1094 | tlock->next = lid; | |
1095 | ||
1096 | return linelock; | |
1097 | } | |
1098 | ||
1da177e4 | 1099 | /* |
f720e3ba DK |
1100 | * transaction commit management |
1101 | * ----------------------------- | |
1da177e4 LT |
1102 | */ |
1103 | ||
1104 | /* | |
f720e3ba DK |
1105 | * NAME: txCommit() |
1106 | * | |
1107 | * FUNCTION: commit the changes to the objects specified in | |
1108 | * clist. For journalled segments only the | |
1109 | * changes of the caller are committed, ie by tid. | |
1110 | * for non-journalled segments the data are flushed to | |
1111 | * disk and then the change to the disk inode and indirect | |
1112 | * blocks committed (so blocks newly allocated to the | |
1113 | * segment will be made a part of the segment atomically). | |
1114 | * | |
1115 | * all of the segments specified in clist must be in | |
1116 | * one file system. no more than 6 segments are needed | |
1117 | * to handle all unix svcs. | |
1118 | * | |
1119 | * if the i_nlink field (i.e. disk inode link count) | |
1120 | * is zero, and the type of inode is a regular file or | |
1121 | * directory, or symbolic link , the inode is truncated | |
1122 | * to zero length. the truncation is committed but the | |
1123 | * VM resources are unaffected until it is closed (see | |
1124 | * iput and iclose). | |
1da177e4 LT |
1125 | * |
1126 | * PARAMETER: | |
1127 | * | |
1128 | * RETURN: | |
1129 | * | |
1130 | * serialization: | |
f720e3ba DK |
1131 | * on entry the inode lock on each segment is assumed |
1132 | * to be held. | |
1da177e4 LT |
1133 | * |
1134 | * i/o error: | |
1135 | */ | |
1136 | int txCommit(tid_t tid, /* transaction identifier */ | |
1137 | int nip, /* number of inodes to commit */ | |
1138 | struct inode **iplist, /* list of inode to commit */ | |
1139 | int flag) | |
1140 | { | |
1141 | int rc = 0; | |
1142 | struct commit cd; | |
1143 | struct jfs_log *log; | |
1144 | struct tblock *tblk; | |
1145 | struct lrd *lrd; | |
1146 | int lsn; | |
1147 | struct inode *ip; | |
1148 | struct jfs_inode_info *jfs_ip; | |
1149 | int k, n; | |
1150 | ino_t top; | |
1151 | struct super_block *sb; | |
1152 | ||
1153 | jfs_info("txCommit, tid = %d, flag = %d", tid, flag); | |
1154 | /* is read-only file system ? */ | |
1155 | if (isReadOnly(iplist[0])) { | |
1156 | rc = -EROFS; | |
1157 | goto TheEnd; | |
1158 | } | |
1159 | ||
1160 | sb = cd.sb = iplist[0]->i_sb; | |
1161 | cd.tid = tid; | |
1162 | ||
1163 | if (tid == 0) | |
1164 | tid = txBegin(sb, 0); | |
1165 | tblk = tid_to_tblock(tid); | |
1166 | ||
1167 | /* | |
1168 | * initialize commit structure | |
1169 | */ | |
1170 | log = JFS_SBI(sb)->log; | |
1171 | cd.log = log; | |
1172 | ||
1173 | /* initialize log record descriptor in commit */ | |
1174 | lrd = &cd.lrd; | |
1175 | lrd->logtid = cpu_to_le32(tblk->logtid); | |
1176 | lrd->backchain = 0; | |
1177 | ||
1178 | tblk->xflag |= flag; | |
1179 | ||
1180 | if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) | |
1181 | tblk->xflag |= COMMIT_LAZY; | |
1182 | /* | |
f720e3ba | 1183 | * prepare non-journaled objects for commit |
1da177e4 LT |
1184 | * |
1185 | * flush data pages of non-journaled file | |
1186 | * to prevent the file getting non-initialized disk blocks | |
1187 | * in case of crash. | |
1188 | * (new blocks - ) | |
1189 | */ | |
1190 | cd.iplist = iplist; | |
1191 | cd.nip = nip; | |
1192 | ||
1193 | /* | |
f720e3ba | 1194 | * acquire transaction lock on (on-disk) inodes |
1da177e4 LT |
1195 | * |
1196 | * update on-disk inode from in-memory inode | |
1197 | * acquiring transaction locks for AFTER records | |
1198 | * on the on-disk inode of file object | |
1199 | * | |
1200 | * sort the inodes array by inode number in descending order | |
1201 | * to prevent deadlock when acquiring transaction lock | |
1202 | * of on-disk inodes on multiple on-disk inode pages by | |
1203 | * multiple concurrent transactions | |
1204 | */ | |
1205 | for (k = 0; k < cd.nip; k++) { | |
1206 | top = (cd.iplist[k])->i_ino; | |
1207 | for (n = k + 1; n < cd.nip; n++) { | |
1208 | ip = cd.iplist[n]; | |
1209 | if (ip->i_ino > top) { | |
1210 | top = ip->i_ino; | |
1211 | cd.iplist[n] = cd.iplist[k]; | |
1212 | cd.iplist[k] = ip; | |
1213 | } | |
1214 | } | |
1215 | ||
1216 | ip = cd.iplist[k]; | |
1217 | jfs_ip = JFS_IP(ip); | |
1218 | ||
1219 | /* | |
1220 | * BUGBUG - This code has temporarily been removed. The | |
1221 | * intent is to ensure that any file data is written before | |
1222 | * the metadata is committed to the journal. This prevents | |
1223 | * uninitialized data from appearing in a file after the | |
1224 | * journal has been replayed. (The uninitialized data | |
1225 | * could be sensitive data removed by another user.) | |
1226 | * | |
1227 | * The problem now is that we are holding the IWRITELOCK | |
1228 | * on the inode, and calling filemap_fdatawrite on an | |
1229 | * unmapped page will cause a deadlock in jfs_get_block. | |
1230 | * | |
1231 | * The long term solution is to pare down the use of | |
1232 | * IWRITELOCK. We are currently holding it too long. | |
1233 | * We could also be smarter about which data pages need | |
1234 | * to be written before the transaction is committed and | |
1235 | * when we don't need to worry about it at all. | |
1236 | * | |
1237 | * if ((!S_ISDIR(ip->i_mode)) | |
28fd1298 OH |
1238 | * && (tblk->flag & COMMIT_DELETE) == 0) |
1239 | * filemap_write_and_wait(ip->i_mapping); | |
1da177e4 LT |
1240 | */ |
1241 | ||
1242 | /* | |
1243 | * Mark inode as not dirty. It will still be on the dirty | |
1244 | * inode list, but we'll know not to commit it again unless | |
1245 | * it gets marked dirty again | |
1246 | */ | |
1247 | clear_cflag(COMMIT_Dirty, ip); | |
1248 | ||
1249 | /* inherit anonymous tlock(s) of inode */ | |
1250 | if (jfs_ip->atlhead) { | |
1251 | lid_to_tlock(jfs_ip->atltail)->next = tblk->next; | |
1252 | tblk->next = jfs_ip->atlhead; | |
1253 | if (!tblk->last) | |
1254 | tblk->last = jfs_ip->atltail; | |
1255 | jfs_ip->atlhead = jfs_ip->atltail = 0; | |
1256 | TXN_LOCK(); | |
1257 | list_del_init(&jfs_ip->anon_inode_list); | |
1258 | TXN_UNLOCK(); | |
1259 | } | |
1260 | ||
1261 | /* | |
1262 | * acquire transaction lock on on-disk inode page | |
1263 | * (become first tlock of the tblk's tlock list) | |
1264 | */ | |
1265 | if (((rc = diWrite(tid, ip)))) | |
1266 | goto out; | |
1267 | } | |
1268 | ||
1269 | /* | |
f720e3ba | 1270 | * write log records from transaction locks |
1da177e4 LT |
1271 | * |
1272 | * txUpdateMap() resets XAD_NEW in XAD. | |
1273 | */ | |
1274 | if ((rc = txLog(log, tblk, &cd))) | |
1275 | goto TheEnd; | |
1276 | ||
1277 | /* | |
1278 | * Ensure that inode isn't reused before | |
1279 | * lazy commit thread finishes processing | |
1280 | */ | |
1281 | if (tblk->xflag & COMMIT_DELETE) { | |
7de9c6ee | 1282 | ihold(tblk->u.ip); |
1da177e4 LT |
1283 | /* |
1284 | * Avoid a rare deadlock | |
1285 | * | |
1286 | * If the inode is locked, we may be blocked in | |
1287 | * jfs_commit_inode. If so, we don't want the | |
1288 | * lazy_commit thread doing the last iput() on the inode | |
1289 | * since that may block on the locked inode. Instead, | |
1290 | * commit the transaction synchronously, so the last iput | |
1291 | * will be done by the calling thread (or later) | |
1292 | */ | |
1c0eeaf5 JE |
1293 | /* |
1294 | * I believe this code is no longer needed. Splitting I_LOCK | |
eaff8079 | 1295 | * into two bits, I_NEW and I_SYNC should prevent this |
1c0eeaf5 JE |
1296 | * deadlock as well. But since I don't have a JFS testload |
1297 | * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. | |
1298 | * Joern | |
1299 | */ | |
1300 | if (tblk->u.ip->i_state & I_SYNC) | |
1da177e4 LT |
1301 | tblk->xflag &= ~COMMIT_LAZY; |
1302 | } | |
1303 | ||
1304 | ASSERT((!(tblk->xflag & COMMIT_DELETE)) || | |
1305 | ((tblk->u.ip->i_nlink == 0) && | |
1306 | !test_cflag(COMMIT_Nolink, tblk->u.ip))); | |
1307 | ||
1308 | /* | |
f720e3ba | 1309 | * write COMMIT log record |
1da177e4 LT |
1310 | */ |
1311 | lrd->type = cpu_to_le16(LOG_COMMIT); | |
1312 | lrd->length = 0; | |
1313 | lsn = lmLog(log, tblk, lrd, NULL); | |
1314 | ||
1315 | lmGroupCommit(log, tblk); | |
1316 | ||
1317 | /* | |
f720e3ba | 1318 | * - transaction is now committed - |
1da177e4 LT |
1319 | */ |
1320 | ||
1321 | /* | |
1322 | * force pages in careful update | |
1323 | * (imap addressing structure update) | |
1324 | */ | |
1325 | if (flag & COMMIT_FORCE) | |
1326 | txForce(tblk); | |
1327 | ||
1328 | /* | |
f720e3ba | 1329 | * update allocation map. |
1da177e4 LT |
1330 | * |
1331 | * update inode allocation map and inode: | |
1332 | * free pager lock on memory object of inode if any. | |
f720e3ba | 1333 | * update block allocation map. |
1da177e4 LT |
1334 | * |
1335 | * txUpdateMap() resets XAD_NEW in XAD. | |
1336 | */ | |
1337 | if (tblk->xflag & COMMIT_FORCE) | |
1338 | txUpdateMap(tblk); | |
1339 | ||
1340 | /* | |
f720e3ba | 1341 | * free transaction locks and pageout/free pages |
1da177e4 LT |
1342 | */ |
1343 | txRelease(tblk); | |
1344 | ||
1345 | if ((tblk->flag & tblkGC_LAZY) == 0) | |
1346 | txUnlock(tblk); | |
1347 | ||
1348 | ||
1349 | /* | |
f720e3ba | 1350 | * reset in-memory object state |
1da177e4 LT |
1351 | */ |
1352 | for (k = 0; k < cd.nip; k++) { | |
1353 | ip = cd.iplist[k]; | |
1354 | jfs_ip = JFS_IP(ip); | |
1355 | ||
1356 | /* | |
1357 | * reset in-memory inode state | |
1358 | */ | |
1359 | jfs_ip->bxflag = 0; | |
1360 | jfs_ip->blid = 0; | |
1361 | } | |
1362 | ||
1363 | out: | |
1364 | if (rc != 0) | |
1365 | txAbort(tid, 1); | |
1366 | ||
1367 | TheEnd: | |
1368 | jfs_info("txCommit: tid = %d, returning %d", tid, rc); | |
1369 | return rc; | |
1370 | } | |
1371 | ||
1da177e4 | 1372 | /* |
f720e3ba | 1373 | * NAME: txLog() |
1da177e4 | 1374 | * |
f720e3ba DK |
1375 | * FUNCTION: Writes AFTER log records for all lines modified |
1376 | * by tid for segments specified by inodes in comdata. | |
1377 | * Code assumes only WRITELOCKS are recorded in lockwords. | |
1da177e4 LT |
1378 | * |
1379 | * PARAMETERS: | |
1380 | * | |
1381 | * RETURN : | |
1382 | */ | |
1383 | static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) | |
1384 | { | |
1385 | int rc = 0; | |
1386 | struct inode *ip; | |
1387 | lid_t lid; | |
1388 | struct tlock *tlck; | |
1389 | struct lrd *lrd = &cd->lrd; | |
1390 | ||
1391 | /* | |
1392 | * write log record(s) for each tlock of transaction, | |
1393 | */ | |
1394 | for (lid = tblk->next; lid; lid = tlck->next) { | |
1395 | tlck = lid_to_tlock(lid); | |
1396 | ||
1397 | tlck->flag |= tlckLOG; | |
1398 | ||
1399 | /* initialize lrd common */ | |
1400 | ip = tlck->ip; | |
1401 | lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); | |
1402 | lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); | |
1403 | lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); | |
1404 | ||
1405 | /* write log record of page from the tlock */ | |
1406 | switch (tlck->type & tlckTYPE) { | |
1407 | case tlckXTREE: | |
1408 | xtLog(log, tblk, lrd, tlck); | |
1409 | break; | |
1410 | ||
1411 | case tlckDTREE: | |
1412 | dtLog(log, tblk, lrd, tlck); | |
1413 | break; | |
1414 | ||
1415 | case tlckINODE: | |
1416 | diLog(log, tblk, lrd, tlck, cd); | |
1417 | break; | |
1418 | ||
1419 | case tlckMAP: | |
1420 | mapLog(log, tblk, lrd, tlck); | |
1421 | break; | |
1422 | ||
1423 | case tlckDATA: | |
1424 | dataLog(log, tblk, lrd, tlck); | |
1425 | break; | |
1426 | ||
1427 | default: | |
1428 | jfs_err("UFO tlock:0x%p", tlck); | |
1429 | } | |
1430 | } | |
1431 | ||
1432 | return rc; | |
1433 | } | |
1434 | ||
1da177e4 | 1435 | /* |
f720e3ba | 1436 | * diLog() |
1da177e4 | 1437 | * |
f720e3ba | 1438 | * function: log inode tlock and format maplock to update bmap; |
1da177e4 LT |
1439 | */ |
1440 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
f720e3ba | 1441 | struct tlock * tlck, struct commit * cd) |
1da177e4 LT |
1442 | { |
1443 | int rc = 0; | |
1444 | struct metapage *mp; | |
1445 | pxd_t *pxd; | |
1446 | struct pxd_lock *pxdlock; | |
1447 | ||
1448 | mp = tlck->mp; | |
1449 | ||
1450 | /* initialize as REDOPAGE record format */ | |
1451 | lrd->log.redopage.type = cpu_to_le16(LOG_INODE); | |
1452 | lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); | |
1453 | ||
1454 | pxd = &lrd->log.redopage.pxd; | |
1455 | ||
1456 | /* | |
f720e3ba | 1457 | * inode after image |
1da177e4 LT |
1458 | */ |
1459 | if (tlck->type & tlckENTRY) { | |
1460 | /* log after-image for logredo(): */ | |
1461 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | |
1da177e4 LT |
1462 | PXDaddress(pxd, mp->index); |
1463 | PXDlength(pxd, | |
1464 | mp->logical_size >> tblk->sb->s_blocksize_bits); | |
1465 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
1466 | ||
1467 | /* mark page as homeward bound */ | |
1468 | tlck->flag |= tlckWRITEPAGE; | |
1469 | } else if (tlck->type & tlckFREE) { | |
1470 | /* | |
f720e3ba | 1471 | * free inode extent |
1da177e4 LT |
1472 | * |
1473 | * (pages of the freed inode extent have been invalidated and | |
1474 | * a maplock for free of the extent has been formatted at | |
1475 | * txLock() time); | |
1476 | * | |
1477 | * the tlock had been acquired on the inode allocation map page | |
1478 | * (iag) that specifies the freed extent, even though the map | |
1479 | * page is not itself logged, to prevent pageout of the map | |
1480 | * page before the log; | |
1481 | */ | |
1482 | ||
1483 | /* log LOG_NOREDOINOEXT of the freed inode extent for | |
1484 | * logredo() to start NoRedoPage filters, and to update | |
1485 | * imap and bmap for free of the extent; | |
1486 | */ | |
1487 | lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); | |
1488 | /* | |
1489 | * For the LOG_NOREDOINOEXT record, we need | |
1490 | * to pass the IAG number and inode extent | |
1491 | * index (within that IAG) from which the | |
1492 | * the extent being released. These have been | |
1493 | * passed to us in the iplist[1] and iplist[2]. | |
1494 | */ | |
1495 | lrd->log.noredoinoext.iagnum = | |
1496 | cpu_to_le32((u32) (size_t) cd->iplist[1]); | |
1497 | lrd->log.noredoinoext.inoext_idx = | |
1498 | cpu_to_le32((u32) (size_t) cd->iplist[2]); | |
1499 | ||
1500 | pxdlock = (struct pxd_lock *) & tlck->lock; | |
1501 | *pxd = pxdlock->pxd; | |
1502 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
1503 | ||
1504 | /* update bmap */ | |
1505 | tlck->flag |= tlckUPDATEMAP; | |
1506 | ||
1507 | /* mark page as homeward bound */ | |
1508 | tlck->flag |= tlckWRITEPAGE; | |
1509 | } else | |
1510 | jfs_err("diLog: UFO type tlck:0x%p", tlck); | |
1511 | #ifdef _JFS_WIP | |
1512 | /* | |
f720e3ba | 1513 | * alloc/free external EA extent |
1da177e4 LT |
1514 | * |
1515 | * a maplock for txUpdateMap() to update bPWMAP for alloc/free | |
1516 | * of the extent has been formatted at txLock() time; | |
1517 | */ | |
1518 | else { | |
1519 | assert(tlck->type & tlckEA); | |
1520 | ||
1521 | /* log LOG_UPDATEMAP for logredo() to update bmap for | |
1522 | * alloc of new (and free of old) external EA extent; | |
1523 | */ | |
1524 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | |
1525 | pxdlock = (struct pxd_lock *) & tlck->lock; | |
1526 | nlock = pxdlock->index; | |
1527 | for (i = 0; i < nlock; i++, pxdlock++) { | |
1528 | if (pxdlock->flag & mlckALLOCPXD) | |
1529 | lrd->log.updatemap.type = | |
1530 | cpu_to_le16(LOG_ALLOCPXD); | |
1531 | else | |
1532 | lrd->log.updatemap.type = | |
1533 | cpu_to_le16(LOG_FREEPXD); | |
1534 | lrd->log.updatemap.nxd = cpu_to_le16(1); | |
1535 | lrd->log.updatemap.pxd = pxdlock->pxd; | |
1536 | lrd->backchain = | |
1537 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
1538 | } | |
1539 | ||
1540 | /* update bmap */ | |
1541 | tlck->flag |= tlckUPDATEMAP; | |
1542 | } | |
1543 | #endif /* _JFS_WIP */ | |
1544 | ||
1545 | return rc; | |
1546 | } | |
1547 | ||
1da177e4 | 1548 | /* |
f720e3ba | 1549 | * dataLog() |
1da177e4 | 1550 | * |
f720e3ba | 1551 | * function: log data tlock |
1da177e4 LT |
1552 | */ |
1553 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
1554 | struct tlock * tlck) | |
1555 | { | |
1556 | struct metapage *mp; | |
1557 | pxd_t *pxd; | |
1558 | ||
1559 | mp = tlck->mp; | |
1560 | ||
1561 | /* initialize as REDOPAGE record format */ | |
1562 | lrd->log.redopage.type = cpu_to_le16(LOG_DATA); | |
1563 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); | |
1564 | ||
1565 | pxd = &lrd->log.redopage.pxd; | |
1566 | ||
1567 | /* log after-image for logredo(): */ | |
1568 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | |
1569 | ||
1570 | if (jfs_dirtable_inline(tlck->ip)) { | |
1571 | /* | |
1572 | * The table has been truncated, we've must have deleted | |
1573 | * the last entry, so don't bother logging this | |
1574 | */ | |
1575 | mp->lid = 0; | |
7fab479b DK |
1576 | grab_metapage(mp); |
1577 | metapage_homeok(mp); | |
1da177e4 LT |
1578 | discard_metapage(mp); |
1579 | tlck->mp = NULL; | |
1580 | return 0; | |
1581 | } | |
1582 | ||
1583 | PXDaddress(pxd, mp->index); | |
1584 | PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); | |
1585 | ||
1586 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
1587 | ||
1588 | /* mark page as homeward bound */ | |
1589 | tlck->flag |= tlckWRITEPAGE; | |
1590 | ||
1591 | return 0; | |
1592 | } | |
1593 | ||
1da177e4 | 1594 | /* |
f720e3ba | 1595 | * dtLog() |
1da177e4 | 1596 | * |
f720e3ba | 1597 | * function: log dtree tlock and format maplock to update bmap; |
1da177e4 LT |
1598 | */ |
1599 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
1600 | struct tlock * tlck) | |
1601 | { | |
1602 | struct metapage *mp; | |
1603 | struct pxd_lock *pxdlock; | |
1604 | pxd_t *pxd; | |
1605 | ||
1606 | mp = tlck->mp; | |
1607 | ||
1608 | /* initialize as REDOPAGE/NOREDOPAGE record format */ | |
1609 | lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); | |
1610 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); | |
1611 | ||
1612 | pxd = &lrd->log.redopage.pxd; | |
1613 | ||
1614 | if (tlck->type & tlckBTROOT) | |
1615 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); | |
1616 | ||
1617 | /* | |
f720e3ba DK |
1618 | * page extension via relocation: entry insertion; |
1619 | * page extension in-place: entry insertion; | |
1620 | * new right page from page split, reinitialized in-line | |
1621 | * root from root page split: entry insertion; | |
1da177e4 LT |
1622 | */ |
1623 | if (tlck->type & (tlckNEW | tlckEXTEND)) { | |
1624 | /* log after-image of the new page for logredo(): | |
1625 | * mark log (LOG_NEW) for logredo() to initialize | |
1626 | * freelist and update bmap for alloc of the new page; | |
1627 | */ | |
1628 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | |
1629 | if (tlck->type & tlckEXTEND) | |
1630 | lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); | |
1631 | else | |
1632 | lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); | |
1da177e4 LT |
1633 | PXDaddress(pxd, mp->index); |
1634 | PXDlength(pxd, | |
1635 | mp->logical_size >> tblk->sb->s_blocksize_bits); | |
1636 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
1637 | ||
1638 | /* format a maplock for txUpdateMap() to update bPMAP for | |
1639 | * alloc of the new page; | |
1640 | */ | |
1641 | if (tlck->type & tlckBTROOT) | |
1642 | return; | |
1643 | tlck->flag |= tlckUPDATEMAP; | |
1644 | pxdlock = (struct pxd_lock *) & tlck->lock; | |
1645 | pxdlock->flag = mlckALLOCPXD; | |
1646 | pxdlock->pxd = *pxd; | |
1647 | ||
1648 | pxdlock->index = 1; | |
1649 | ||
1650 | /* mark page as homeward bound */ | |
1651 | tlck->flag |= tlckWRITEPAGE; | |
1652 | return; | |
1653 | } | |
1654 | ||
1655 | /* | |
f720e3ba DK |
1656 | * entry insertion/deletion, |
1657 | * sibling page link update (old right page before split); | |
1da177e4 LT |
1658 | */ |
1659 | if (tlck->type & (tlckENTRY | tlckRELINK)) { | |
1660 | /* log after-image for logredo(): */ | |
1661 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | |
1662 | PXDaddress(pxd, mp->index); | |
1663 | PXDlength(pxd, | |
1664 | mp->logical_size >> tblk->sb->s_blocksize_bits); | |
1665 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
1666 | ||
1667 | /* mark page as homeward bound */ | |
1668 | tlck->flag |= tlckWRITEPAGE; | |
1669 | return; | |
1670 | } | |
1671 | ||
1672 | /* | |
f720e3ba DK |
1673 | * page deletion: page has been invalidated |
1674 | * page relocation: source extent | |
1da177e4 | 1675 | * |
f720e3ba DK |
1676 | * a maplock for free of the page has been formatted |
1677 | * at txLock() time); | |
1da177e4 LT |
1678 | */ |
1679 | if (tlck->type & (tlckFREE | tlckRELOCATE)) { | |
1680 | /* log LOG_NOREDOPAGE of the deleted page for logredo() | |
1681 | * to start NoRedoPage filter and to update bmap for free | |
1682 | * of the deletd page | |
1683 | */ | |
1684 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | |
1685 | pxdlock = (struct pxd_lock *) & tlck->lock; | |
1686 | *pxd = pxdlock->pxd; | |
1687 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
1688 | ||
1689 | /* a maplock for txUpdateMap() for free of the page | |
1690 | * has been formatted at txLock() time; | |
1691 | */ | |
1692 | tlck->flag |= tlckUPDATEMAP; | |
1693 | } | |
1694 | return; | |
1695 | } | |
1696 | ||
1da177e4 | 1697 | /* |
f720e3ba | 1698 | * xtLog() |
1da177e4 | 1699 | * |
f720e3ba | 1700 | * function: log xtree tlock and format maplock to update bmap; |
1da177e4 LT |
1701 | */ |
1702 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | |
1703 | struct tlock * tlck) | |
1704 | { | |
1705 | struct inode *ip; | |
1706 | struct metapage *mp; | |
1707 | xtpage_t *p; | |
1708 | struct xtlock *xtlck; | |
1709 | struct maplock *maplock; | |
1710 | struct xdlistlock *xadlock; | |
1711 | struct pxd_lock *pxdlock; | |
66f3131f | 1712 | pxd_t *page_pxd; |
1da177e4 LT |
1713 | int next, lwm, hwm; |
1714 | ||
1715 | ip = tlck->ip; | |
1716 | mp = tlck->mp; | |
1717 | ||
1718 | /* initialize as REDOPAGE/NOREDOPAGE record format */ | |
1719 | lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); | |
1720 | lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); | |
1721 | ||
66f3131f | 1722 | page_pxd = &lrd->log.redopage.pxd; |
1da177e4 LT |
1723 | |
1724 | if (tlck->type & tlckBTROOT) { | |
1725 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); | |
1726 | p = &JFS_IP(ip)->i_xtroot; | |
1727 | if (S_ISDIR(ip->i_mode)) | |
1728 | lrd->log.redopage.type |= | |
1729 | cpu_to_le16(LOG_DIR_XTREE); | |
1730 | } else | |
1731 | p = (xtpage_t *) mp->data; | |
1732 | next = le16_to_cpu(p->header.nextindex); | |
1733 | ||
1734 | xtlck = (struct xtlock *) & tlck->lock; | |
1735 | ||
1736 | maplock = (struct maplock *) & tlck->lock; | |
1737 | xadlock = (struct xdlistlock *) maplock; | |
1738 | ||
1739 | /* | |
f720e3ba DK |
1740 | * entry insertion/extension; |
1741 | * sibling page link update (old right page before split); | |
1da177e4 LT |
1742 | */ |
1743 | if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { | |
1744 | /* log after-image for logredo(): | |
1745 | * logredo() will update bmap for alloc of new/extended | |
1746 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from | |
1747 | * after-image of XADlist; | |
1748 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when | |
1749 | * applying the after-image to the meta-data page. | |
1750 | */ | |
1751 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | |
66f3131f DK |
1752 | PXDaddress(page_pxd, mp->index); |
1753 | PXDlength(page_pxd, | |
1da177e4 LT |
1754 | mp->logical_size >> tblk->sb->s_blocksize_bits); |
1755 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
1756 | ||
1757 | /* format a maplock for txUpdateMap() to update bPMAP | |
1758 | * for alloc of new/extended extents of XAD[lwm:next) | |
1759 | * from the page itself; | |
1760 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. | |
1761 | */ | |
1762 | lwm = xtlck->lwm.offset; | |
1763 | if (lwm == 0) | |
1764 | lwm = XTPAGEMAXSLOT; | |
1765 | ||
1766 | if (lwm == next) | |
1767 | goto out; | |
1768 | if (lwm > next) { | |
1769 | jfs_err("xtLog: lwm > next\n"); | |
1770 | goto out; | |
1771 | } | |
1772 | tlck->flag |= tlckUPDATEMAP; | |
1773 | xadlock->flag = mlckALLOCXADLIST; | |
1774 | xadlock->count = next - lwm; | |
66f3131f | 1775 | if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { |
1da177e4 | 1776 | int i; |
66f3131f | 1777 | pxd_t *pxd; |
1da177e4 LT |
1778 | /* |
1779 | * Lazy commit may allow xtree to be modified before | |
1780 | * txUpdateMap runs. Copy xad into linelock to | |
1781 | * preserve correct data. | |
66f3131f DK |
1782 | * |
1783 | * We can fit twice as may pxd's as xads in the lock | |
1da177e4 | 1784 | */ |
66f3131f DK |
1785 | xadlock->flag = mlckALLOCPXDLIST; |
1786 | pxd = xadlock->xdlist = &xtlck->pxdlock; | |
1787 | for (i = 0; i < xadlock->count; i++) { | |
1788 | PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); | |
1789 | PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); | |
1da177e4 LT |
1790 | p->xad[lwm + i].flag &= |
1791 | ~(XAD_NEW | XAD_EXTENDED); | |
66f3131f DK |
1792 | pxd++; |
1793 | } | |
1da177e4 LT |
1794 | } else { |
1795 | /* | |
1796 | * xdlist will point to into inode's xtree, ensure | |
1797 | * that transaction is not committed lazily. | |
1798 | */ | |
66f3131f | 1799 | xadlock->flag = mlckALLOCXADLIST; |
1da177e4 LT |
1800 | xadlock->xdlist = &p->xad[lwm]; |
1801 | tblk->xflag &= ~COMMIT_LAZY; | |
1802 | } | |
1803 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d " | |
1804 | "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); | |
1805 | ||
1806 | maplock->index = 1; | |
1807 | ||
1808 | out: | |
1809 | /* mark page as homeward bound */ | |
1810 | tlck->flag |= tlckWRITEPAGE; | |
1811 | ||
1812 | return; | |
1813 | } | |
1814 | ||
1815 | /* | |
f720e3ba | 1816 | * page deletion: file deletion/truncation (ref. xtTruncate()) |
1da177e4 LT |
1817 | * |
1818 | * (page will be invalidated after log is written and bmap | |
1819 | * is updated from the page); | |
1820 | */ | |
1821 | if (tlck->type & tlckFREE) { | |
1822 | /* LOG_NOREDOPAGE log for NoRedoPage filter: | |
1823 | * if page free from file delete, NoRedoFile filter from | |
1824 | * inode image of zero link count will subsume NoRedoPage | |
1825 | * filters for each page; | |
1826 | * if page free from file truncattion, write NoRedoPage | |
1827 | * filter; | |
1828 | * | |
1829 | * upadte of block allocation map for the page itself: | |
1830 | * if page free from deletion and truncation, LOG_UPDATEMAP | |
1831 | * log for the page itself is generated from processing | |
1832 | * its parent page xad entries; | |
1833 | */ | |
1834 | /* if page free from file truncation, log LOG_NOREDOPAGE | |
1835 | * of the deleted page for logredo() to start NoRedoPage | |
1836 | * filter for the page; | |
1837 | */ | |
1838 | if (tblk->xflag & COMMIT_TRUNCATE) { | |
1839 | /* write NOREDOPAGE for the page */ | |
1840 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | |
66f3131f DK |
1841 | PXDaddress(page_pxd, mp->index); |
1842 | PXDlength(page_pxd, | |
1da177e4 LT |
1843 | mp->logical_size >> tblk->sb-> |
1844 | s_blocksize_bits); | |
1845 | lrd->backchain = | |
1846 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
1847 | ||
1848 | if (tlck->type & tlckBTROOT) { | |
1849 | /* Empty xtree must be logged */ | |
1850 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | |
1851 | lrd->backchain = | |
1852 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
1853 | } | |
1854 | } | |
1855 | ||
1856 | /* init LOG_UPDATEMAP of the freed extents | |
1857 | * XAD[XTENTRYSTART:hwm) from the deleted page itself | |
1858 | * for logredo() to update bmap; | |
1859 | */ | |
1860 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | |
1861 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); | |
1862 | xtlck = (struct xtlock *) & tlck->lock; | |
1863 | hwm = xtlck->hwm.offset; | |
1864 | lrd->log.updatemap.nxd = | |
1865 | cpu_to_le16(hwm - XTENTRYSTART + 1); | |
1866 | /* reformat linelock for lmLog() */ | |
1867 | xtlck->header.offset = XTENTRYSTART; | |
1868 | xtlck->header.length = hwm - XTENTRYSTART + 1; | |
1869 | xtlck->index = 1; | |
1870 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
1871 | ||
1872 | /* format a maplock for txUpdateMap() to update bmap | |
1873 | * to free extents of XAD[XTENTRYSTART:hwm) from the | |
1874 | * deleted page itself; | |
1875 | */ | |
1876 | tlck->flag |= tlckUPDATEMAP; | |
1da177e4 | 1877 | xadlock->count = hwm - XTENTRYSTART + 1; |
66f3131f DK |
1878 | if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { |
1879 | int i; | |
1880 | pxd_t *pxd; | |
1da177e4 LT |
1881 | /* |
1882 | * Lazy commit may allow xtree to be modified before | |
1883 | * txUpdateMap runs. Copy xad into linelock to | |
1884 | * preserve correct data. | |
66f3131f DK |
1885 | * |
1886 | * We can fit twice as may pxd's as xads in the lock | |
1da177e4 | 1887 | */ |
66f3131f DK |
1888 | xadlock->flag = mlckFREEPXDLIST; |
1889 | pxd = xadlock->xdlist = &xtlck->pxdlock; | |
1890 | for (i = 0; i < xadlock->count; i++) { | |
1891 | PXDaddress(pxd, | |
1892 | addressXAD(&p->xad[XTENTRYSTART + i])); | |
1893 | PXDlength(pxd, | |
1894 | lengthXAD(&p->xad[XTENTRYSTART + i])); | |
1895 | pxd++; | |
1896 | } | |
1da177e4 LT |
1897 | } else { |
1898 | /* | |
1899 | * xdlist will point to into inode's xtree, ensure | |
1900 | * that transaction is not committed lazily. | |
1901 | */ | |
66f3131f | 1902 | xadlock->flag = mlckFREEXADLIST; |
1da177e4 LT |
1903 | xadlock->xdlist = &p->xad[XTENTRYSTART]; |
1904 | tblk->xflag &= ~COMMIT_LAZY; | |
1905 | } | |
1906 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", | |
1907 | tlck->ip, mp, xadlock->count); | |
1908 | ||
1909 | maplock->index = 1; | |
1910 | ||
1911 | /* mark page as invalid */ | |
1912 | if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) | |
1913 | && !(tlck->type & tlckBTROOT)) | |
1914 | tlck->flag |= tlckFREEPAGE; | |
1915 | /* | |
1916 | else (tblk->xflag & COMMIT_PMAP) | |
1917 | ? release the page; | |
1918 | */ | |
1919 | return; | |
1920 | } | |
1921 | ||
1922 | /* | |
f720e3ba | 1923 | * page/entry truncation: file truncation (ref. xtTruncate()) |
1da177e4 | 1924 | * |
f720e3ba DK |
1925 | * |----------+------+------+---------------| |
1926 | * | | | | |
1927 | * | | hwm - hwm before truncation | |
1928 | * | next - truncation point | |
1929 | * lwm - lwm before truncation | |
1da177e4 LT |
1930 | * header ? |
1931 | */ | |
1932 | if (tlck->type & tlckTRUNCATE) { | |
c9e3ad60 DK |
1933 | /* This odd declaration suppresses a bogus gcc warning */ |
1934 | pxd_t pxd = pxd; /* truncated extent of xad */ | |
1da177e4 LT |
1935 | int twm; |
1936 | ||
1937 | /* | |
1938 | * For truncation the entire linelock may be used, so it would | |
1939 | * be difficult to store xad list in linelock itself. | |
1940 | * Therefore, we'll just force transaction to be committed | |
1941 | * synchronously, so that xtree pages won't be changed before | |
1942 | * txUpdateMap runs. | |
1943 | */ | |
1944 | tblk->xflag &= ~COMMIT_LAZY; | |
1945 | lwm = xtlck->lwm.offset; | |
1946 | if (lwm == 0) | |
1947 | lwm = XTPAGEMAXSLOT; | |
1948 | hwm = xtlck->hwm.offset; | |
1949 | twm = xtlck->twm.offset; | |
1950 | ||
1951 | /* | |
f720e3ba | 1952 | * write log records |
1da177e4 LT |
1953 | */ |
1954 | /* log after-image for logredo(): | |
1955 | * | |
1956 | * logredo() will update bmap for alloc of new/extended | |
1957 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from | |
1958 | * after-image of XADlist; | |
1959 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when | |
1960 | * applying the after-image to the meta-data page. | |
1961 | */ | |
1962 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | |
66f3131f DK |
1963 | PXDaddress(page_pxd, mp->index); |
1964 | PXDlength(page_pxd, | |
1965 | mp->logical_size >> tblk->sb->s_blocksize_bits); | |
1da177e4 LT |
1966 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); |
1967 | ||
1968 | /* | |
1969 | * truncate entry XAD[twm == next - 1]: | |
1970 | */ | |
1971 | if (twm == next - 1) { | |
1972 | /* init LOG_UPDATEMAP for logredo() to update bmap for | |
1973 | * free of truncated delta extent of the truncated | |
1974 | * entry XAD[next - 1]: | |
1975 | * (xtlck->pxdlock = truncated delta extent); | |
1976 | */ | |
1977 | pxdlock = (struct pxd_lock *) & xtlck->pxdlock; | |
1978 | /* assert(pxdlock->type & tlckTRUNCATE); */ | |
1979 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | |
1980 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); | |
1981 | lrd->log.updatemap.nxd = cpu_to_le16(1); | |
1982 | lrd->log.updatemap.pxd = pxdlock->pxd; | |
66f3131f | 1983 | pxd = pxdlock->pxd; /* save to format maplock */ |
1da177e4 LT |
1984 | lrd->backchain = |
1985 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
1986 | } | |
1987 | ||
1988 | /* | |
1989 | * free entries XAD[next:hwm]: | |
1990 | */ | |
1991 | if (hwm >= next) { | |
1992 | /* init LOG_UPDATEMAP of the freed extents | |
1993 | * XAD[next:hwm] from the deleted page itself | |
1994 | * for logredo() to update bmap; | |
1995 | */ | |
1996 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | |
1997 | lrd->log.updatemap.type = | |
1998 | cpu_to_le16(LOG_FREEXADLIST); | |
1999 | xtlck = (struct xtlock *) & tlck->lock; | |
2000 | hwm = xtlck->hwm.offset; | |
2001 | lrd->log.updatemap.nxd = | |
2002 | cpu_to_le16(hwm - next + 1); | |
2003 | /* reformat linelock for lmLog() */ | |
2004 | xtlck->header.offset = next; | |
2005 | xtlck->header.length = hwm - next + 1; | |
2006 | xtlck->index = 1; | |
2007 | lrd->backchain = | |
2008 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | |
2009 | } | |
2010 | ||
2011 | /* | |
f720e3ba | 2012 | * format maplock(s) for txUpdateMap() to update bmap |
1da177e4 LT |
2013 | */ |
2014 | maplock->index = 0; | |
2015 | ||
2016 | /* | |
2017 | * allocate entries XAD[lwm:next): | |
2018 | */ | |
2019 | if (lwm < next) { | |
2020 | /* format a maplock for txUpdateMap() to update bPMAP | |
2021 | * for alloc of new/extended extents of XAD[lwm:next) | |
2022 | * from the page itself; | |
2023 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. | |
2024 | */ | |
2025 | tlck->flag |= tlckUPDATEMAP; | |
2026 | xadlock->flag = mlckALLOCXADLIST; | |
2027 | xadlock->count = next - lwm; | |
2028 | xadlock->xdlist = &p->xad[lwm]; | |
2029 | ||
2030 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d " | |
2031 | "lwm:%d next:%d", | |
2032 | tlck->ip, mp, xadlock->count, lwm, next); | |
2033 | maplock->index++; | |
2034 | xadlock++; | |
2035 | } | |
2036 | ||
2037 | /* | |
2038 | * truncate entry XAD[twm == next - 1]: | |
2039 | */ | |
2040 | if (twm == next - 1) { | |
1da177e4 LT |
2041 | /* format a maplock for txUpdateMap() to update bmap |
2042 | * to free truncated delta extent of the truncated | |
2043 | * entry XAD[next - 1]; | |
2044 | * (xtlck->pxdlock = truncated delta extent); | |
2045 | */ | |
2046 | tlck->flag |= tlckUPDATEMAP; | |
2047 | pxdlock = (struct pxd_lock *) xadlock; | |
2048 | pxdlock->flag = mlckFREEPXD; | |
2049 | pxdlock->count = 1; | |
66f3131f | 2050 | pxdlock->pxd = pxd; |
1da177e4 LT |
2051 | |
2052 | jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " | |
2053 | "hwm:%d", ip, mp, pxdlock->count, hwm); | |
2054 | maplock->index++; | |
2055 | xadlock++; | |
2056 | } | |
2057 | ||
2058 | /* | |
2059 | * free entries XAD[next:hwm]: | |
2060 | */ | |
2061 | if (hwm >= next) { | |
2062 | /* format a maplock for txUpdateMap() to update bmap | |
2063 | * to free extents of XAD[next:hwm] from thedeleted | |
2064 | * page itself; | |
2065 | */ | |
2066 | tlck->flag |= tlckUPDATEMAP; | |
2067 | xadlock->flag = mlckFREEXADLIST; | |
2068 | xadlock->count = hwm - next + 1; | |
2069 | xadlock->xdlist = &p->xad[next]; | |
2070 | ||
2071 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d " | |
2072 | "next:%d hwm:%d", | |
2073 | tlck->ip, mp, xadlock->count, next, hwm); | |
2074 | maplock->index++; | |
2075 | } | |
2076 | ||
2077 | /* mark page as homeward bound */ | |
2078 | tlck->flag |= tlckWRITEPAGE; | |
2079 | } | |
2080 | return; | |
2081 | } | |
2082 | ||
1da177e4 | 2083 | /* |
f720e3ba | 2084 | * mapLog() |
1da177e4 | 2085 | * |
f720e3ba | 2086 | * function: log from maplock of freed data extents; |
1da177e4 | 2087 | */ |
6cb1269b DK |
2088 | static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, |
2089 | struct tlock * tlck) | |
1da177e4 LT |
2090 | { |
2091 | struct pxd_lock *pxdlock; | |
2092 | int i, nlock; | |
2093 | pxd_t *pxd; | |
2094 | ||
2095 | /* | |
f720e3ba | 2096 | * page relocation: free the source page extent |
1da177e4 LT |
2097 | * |
2098 | * a maplock for txUpdateMap() for free of the page | |
2099 | * has been formatted at txLock() time saving the src | |
2100 | * relocated page address; | |
2101 | */ | |
2102 | if (tlck->type & tlckRELOCATE) { | |
2103 | /* log LOG_NOREDOPAGE of the old relocated page | |
2104 | * for logredo() to start NoRedoPage filter; | |
2105 | */ | |
2106 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | |
2107 | pxdlock = (struct pxd_lock *) & tlck->lock; | |
2108 | pxd = &lrd->log.redopage.pxd; | |
2109 | *pxd = pxdlock->pxd; | |
2110 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
2111 | ||
2112 | /* (N.B. currently, logredo() does NOT update bmap | |
2113 | * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); | |
2114 | * if page free from relocation, LOG_UPDATEMAP log is | |
2115 | * specifically generated now for logredo() | |
2116 | * to update bmap for free of src relocated page; | |
2117 | * (new flag LOG_RELOCATE may be introduced which will | |
2118 | * inform logredo() to start NORedoPage filter and also | |
2119 | * update block allocation map at the same time, thus | |
2120 | * avoiding an extra log write); | |
2121 | */ | |
2122 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | |
2123 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); | |
2124 | lrd->log.updatemap.nxd = cpu_to_le16(1); | |
2125 | lrd->log.updatemap.pxd = pxdlock->pxd; | |
2126 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
2127 | ||
2128 | /* a maplock for txUpdateMap() for free of the page | |
2129 | * has been formatted at txLock() time; | |
2130 | */ | |
2131 | tlck->flag |= tlckUPDATEMAP; | |
2132 | return; | |
2133 | } | |
2134 | /* | |
2135 | ||
2136 | * Otherwise it's not a relocate request | |
2137 | * | |
2138 | */ | |
2139 | else { | |
2140 | /* log LOG_UPDATEMAP for logredo() to update bmap for | |
2141 | * free of truncated/relocated delta extent of the data; | |
2142 | * e.g.: external EA extent, relocated/truncated extent | |
2143 | * from xtTailgate(); | |
2144 | */ | |
2145 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | |
2146 | pxdlock = (struct pxd_lock *) & tlck->lock; | |
2147 | nlock = pxdlock->index; | |
2148 | for (i = 0; i < nlock; i++, pxdlock++) { | |
2149 | if (pxdlock->flag & mlckALLOCPXD) | |
2150 | lrd->log.updatemap.type = | |
2151 | cpu_to_le16(LOG_ALLOCPXD); | |
2152 | else | |
2153 | lrd->log.updatemap.type = | |
2154 | cpu_to_le16(LOG_FREEPXD); | |
2155 | lrd->log.updatemap.nxd = cpu_to_le16(1); | |
2156 | lrd->log.updatemap.pxd = pxdlock->pxd; | |
2157 | lrd->backchain = | |
2158 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | |
2159 | jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", | |
2160 | (ulong) addressPXD(&pxdlock->pxd), | |
2161 | lengthPXD(&pxdlock->pxd)); | |
2162 | } | |
2163 | ||
2164 | /* update bmap */ | |
2165 | tlck->flag |= tlckUPDATEMAP; | |
2166 | } | |
2167 | } | |
2168 | ||
1da177e4 | 2169 | /* |
f720e3ba | 2170 | * txEA() |
1da177e4 | 2171 | * |
f720e3ba DK |
2172 | * function: acquire maplock for EA/ACL extents or |
2173 | * set COMMIT_INLINE flag; | |
1da177e4 LT |
2174 | */ |
2175 | void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) | |
2176 | { | |
2177 | struct tlock *tlck = NULL; | |
2178 | struct pxd_lock *maplock = NULL, *pxdlock = NULL; | |
2179 | ||
2180 | /* | |
2181 | * format maplock for alloc of new EA extent | |
2182 | */ | |
2183 | if (newea) { | |
2184 | /* Since the newea could be a completely zeroed entry we need to | |
2185 | * check for the two flags which indicate we should actually | |
2186 | * commit new EA data | |
2187 | */ | |
2188 | if (newea->flag & DXD_EXTENT) { | |
2189 | tlck = txMaplock(tid, ip, tlckMAP); | |
2190 | maplock = (struct pxd_lock *) & tlck->lock; | |
2191 | pxdlock = (struct pxd_lock *) maplock; | |
2192 | pxdlock->flag = mlckALLOCPXD; | |
2193 | PXDaddress(&pxdlock->pxd, addressDXD(newea)); | |
2194 | PXDlength(&pxdlock->pxd, lengthDXD(newea)); | |
2195 | pxdlock++; | |
2196 | maplock->index = 1; | |
2197 | } else if (newea->flag & DXD_INLINE) { | |
2198 | tlck = NULL; | |
2199 | ||
2200 | set_cflag(COMMIT_Inlineea, ip); | |
2201 | } | |
2202 | } | |
2203 | ||
2204 | /* | |
2205 | * format maplock for free of old EA extent | |
2206 | */ | |
2207 | if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { | |
2208 | if (tlck == NULL) { | |
2209 | tlck = txMaplock(tid, ip, tlckMAP); | |
2210 | maplock = (struct pxd_lock *) & tlck->lock; | |
2211 | pxdlock = (struct pxd_lock *) maplock; | |
2212 | maplock->index = 0; | |
2213 | } | |
2214 | pxdlock->flag = mlckFREEPXD; | |
2215 | PXDaddress(&pxdlock->pxd, addressDXD(oldea)); | |
2216 | PXDlength(&pxdlock->pxd, lengthDXD(oldea)); | |
2217 | maplock->index++; | |
2218 | } | |
2219 | } | |
2220 | ||
1da177e4 | 2221 | /* |
f720e3ba | 2222 | * txForce() |
1da177e4 LT |
2223 | * |
2224 | * function: synchronously write pages locked by transaction | |
f720e3ba | 2225 | * after txLog() but before txUpdateMap(); |
1da177e4 | 2226 | */ |
6cb1269b | 2227 | static void txForce(struct tblock * tblk) |
1da177e4 LT |
2228 | { |
2229 | struct tlock *tlck; | |
2230 | lid_t lid, next; | |
2231 | struct metapage *mp; | |
2232 | ||
2233 | /* | |
2234 | * reverse the order of transaction tlocks in | |
2235 | * careful update order of address index pages | |
2236 | * (right to left, bottom up) | |
2237 | */ | |
2238 | tlck = lid_to_tlock(tblk->next); | |
2239 | lid = tlck->next; | |
2240 | tlck->next = 0; | |
2241 | while (lid) { | |
2242 | tlck = lid_to_tlock(lid); | |
2243 | next = tlck->next; | |
2244 | tlck->next = tblk->next; | |
2245 | tblk->next = lid; | |
2246 | lid = next; | |
2247 | } | |
2248 | ||
2249 | /* | |
2250 | * synchronously write the page, and | |
2251 | * hold the page for txUpdateMap(); | |
2252 | */ | |
2253 | for (lid = tblk->next; lid; lid = next) { | |
2254 | tlck = lid_to_tlock(lid); | |
2255 | next = tlck->next; | |
2256 | ||
2257 | if ((mp = tlck->mp) != NULL && | |
2258 | (tlck->type & tlckBTROOT) == 0) { | |
2259 | assert(mp->xflag & COMMIT_PAGE); | |
2260 | ||
2261 | if (tlck->flag & tlckWRITEPAGE) { | |
2262 | tlck->flag &= ~tlckWRITEPAGE; | |
2263 | ||
2264 | /* do not release page to freelist */ | |
7fab479b DK |
2265 | force_metapage(mp); |
2266 | #if 0 | |
1da177e4 LT |
2267 | /* |
2268 | * The "right" thing to do here is to | |
2269 | * synchronously write the metadata. | |
2270 | * With the current implementation this | |
2271 | * is hard since write_metapage requires | |
2272 | * us to kunmap & remap the page. If we | |
2273 | * have tlocks pointing into the metadata | |
2274 | * pages, we don't want to do this. I think | |
2275 | * we can get by with synchronously writing | |
2276 | * the pages when they are released. | |
2277 | */ | |
7fab479b | 2278 | assert(mp->nohomeok); |
1da177e4 LT |
2279 | set_bit(META_dirty, &mp->flag); |
2280 | set_bit(META_sync, &mp->flag); | |
7fab479b | 2281 | #endif |
1da177e4 LT |
2282 | } |
2283 | } | |
2284 | } | |
2285 | } | |
2286 | ||
1da177e4 | 2287 | /* |
f720e3ba | 2288 | * txUpdateMap() |
1da177e4 | 2289 | * |
f720e3ba DK |
2290 | * function: update persistent allocation map (and working map |
2291 | * if appropriate); | |
1da177e4 LT |
2292 | * |
2293 | * parameter: | |
2294 | */ | |
2295 | static void txUpdateMap(struct tblock * tblk) | |
2296 | { | |
2297 | struct inode *ip; | |
2298 | struct inode *ipimap; | |
2299 | lid_t lid; | |
2300 | struct tlock *tlck; | |
2301 | struct maplock *maplock; | |
2302 | struct pxd_lock pxdlock; | |
2303 | int maptype; | |
2304 | int k, nlock; | |
2305 | struct metapage *mp = NULL; | |
2306 | ||
2307 | ipimap = JFS_SBI(tblk->sb)->ipimap; | |
2308 | ||
2309 | maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; | |
2310 | ||
2311 | ||
2312 | /* | |
f720e3ba | 2313 | * update block allocation map |
1da177e4 LT |
2314 | * |
2315 | * update allocation state in pmap (and wmap) and | |
2316 | * update lsn of the pmap page; | |
2317 | */ | |
2318 | /* | |
2319 | * scan each tlock/page of transaction for block allocation/free: | |
2320 | * | |
2321 | * for each tlock/page of transaction, update map. | |
2322 | * ? are there tlock for pmap and pwmap at the same time ? | |
2323 | */ | |
2324 | for (lid = tblk->next; lid; lid = tlck->next) { | |
2325 | tlck = lid_to_tlock(lid); | |
2326 | ||
2327 | if ((tlck->flag & tlckUPDATEMAP) == 0) | |
2328 | continue; | |
2329 | ||
2330 | if (tlck->flag & tlckFREEPAGE) { | |
2331 | /* | |
2332 | * Another thread may attempt to reuse freed space | |
2333 | * immediately, so we want to get rid of the metapage | |
2334 | * before anyone else has a chance to get it. | |
2335 | * Lock metapage, update maps, then invalidate | |
2336 | * the metapage. | |
2337 | */ | |
2338 | mp = tlck->mp; | |
2339 | ASSERT(mp->xflag & COMMIT_PAGE); | |
7fab479b | 2340 | grab_metapage(mp); |
1da177e4 LT |
2341 | } |
2342 | ||
2343 | /* | |
2344 | * extent list: | |
2345 | * . in-line PXD list: | |
2346 | * . out-of-line XAD list: | |
2347 | */ | |
2348 | maplock = (struct maplock *) & tlck->lock; | |
2349 | nlock = maplock->index; | |
2350 | ||
2351 | for (k = 0; k < nlock; k++, maplock++) { | |
2352 | /* | |
2353 | * allocate blocks in persistent map: | |
2354 | * | |
2355 | * blocks have been allocated from wmap at alloc time; | |
2356 | */ | |
2357 | if (maplock->flag & mlckALLOC) { | |
2358 | txAllocPMap(ipimap, maplock, tblk); | |
2359 | } | |
2360 | /* | |
2361 | * free blocks in persistent and working map: | |
2362 | * blocks will be freed in pmap and then in wmap; | |
2363 | * | |
2364 | * ? tblock specifies the PMAP/PWMAP based upon | |
2365 | * transaction | |
2366 | * | |
2367 | * free blocks in persistent map: | |
2368 | * blocks will be freed from wmap at last reference | |
2369 | * release of the object for regular files; | |
2370 | * | |
2371 | * Alway free blocks from both persistent & working | |
2372 | * maps for directories | |
2373 | */ | |
2374 | else { /* (maplock->flag & mlckFREE) */ | |
2375 | ||
438282d8 | 2376 | if (tlck->flag & tlckDIRECTORY) |
1da177e4 LT |
2377 | txFreeMap(ipimap, maplock, |
2378 | tblk, COMMIT_PWMAP); | |
2379 | else | |
2380 | txFreeMap(ipimap, maplock, | |
2381 | tblk, maptype); | |
2382 | } | |
2383 | } | |
2384 | if (tlck->flag & tlckFREEPAGE) { | |
2385 | if (!(tblk->flag & tblkGC_LAZY)) { | |
2386 | /* This is equivalent to txRelease */ | |
2387 | ASSERT(mp->lid == lid); | |
2388 | tlck->mp->lid = 0; | |
2389 | } | |
7fab479b DK |
2390 | assert(mp->nohomeok == 1); |
2391 | metapage_homeok(mp); | |
1da177e4 LT |
2392 | discard_metapage(mp); |
2393 | tlck->mp = NULL; | |
2394 | } | |
2395 | } | |
2396 | /* | |
f720e3ba | 2397 | * update inode allocation map |
1da177e4 LT |
2398 | * |
2399 | * update allocation state in pmap and | |
2400 | * update lsn of the pmap page; | |
2401 | * update in-memory inode flag/state | |
2402 | * | |
2403 | * unlock mapper/write lock | |
2404 | */ | |
2405 | if (tblk->xflag & COMMIT_CREATE) { | |
4d81715f | 2406 | diUpdatePMap(ipimap, tblk->ino, false, tblk); |
1da177e4 LT |
2407 | /* update persistent block allocation map |
2408 | * for the allocation of inode extent; | |
2409 | */ | |
2410 | pxdlock.flag = mlckALLOCPXD; | |
2411 | pxdlock.pxd = tblk->u.ixpxd; | |
2412 | pxdlock.index = 1; | |
2413 | txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); | |
2414 | } else if (tblk->xflag & COMMIT_DELETE) { | |
2415 | ip = tblk->u.ip; | |
4d81715f | 2416 | diUpdatePMap(ipimap, ip->i_ino, true, tblk); |
1da177e4 LT |
2417 | iput(ip); |
2418 | } | |
2419 | } | |
2420 | ||
1da177e4 | 2421 | /* |
f720e3ba | 2422 | * txAllocPMap() |
1da177e4 LT |
2423 | * |
2424 | * function: allocate from persistent map; | |
2425 | * | |
2426 | * parameter: | |
f720e3ba DK |
2427 | * ipbmap - |
2428 | * malock - | |
2429 | * xad list: | |
2430 | * pxd: | |
2431 | * | |
2432 | * maptype - | |
2433 | * allocate from persistent map; | |
2434 | * free from persistent map; | |
2435 | * (e.g., tmp file - free from working map at releae | |
2436 | * of last reference); | |
2437 | * free from persistent and working map; | |
2438 | * | |
2439 | * lsn - log sequence number; | |
1da177e4 LT |
2440 | */ |
2441 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, | |
2442 | struct tblock * tblk) | |
2443 | { | |
2444 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | |
2445 | struct xdlistlock *xadlistlock; | |
2446 | xad_t *xad; | |
2447 | s64 xaddr; | |
2448 | int xlen; | |
2449 | struct pxd_lock *pxdlock; | |
2450 | struct xdlistlock *pxdlistlock; | |
2451 | pxd_t *pxd; | |
2452 | int n; | |
2453 | ||
2454 | /* | |
2455 | * allocate from persistent map; | |
2456 | */ | |
2457 | if (maplock->flag & mlckALLOCXADLIST) { | |
2458 | xadlistlock = (struct xdlistlock *) maplock; | |
2459 | xad = xadlistlock->xdlist; | |
2460 | for (n = 0; n < xadlistlock->count; n++, xad++) { | |
2461 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { | |
2462 | xaddr = addressXAD(xad); | |
2463 | xlen = lengthXAD(xad); | |
4d81715f | 2464 | dbUpdatePMap(ipbmap, false, xaddr, |
1da177e4 LT |
2465 | (s64) xlen, tblk); |
2466 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); | |
2467 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", | |
2468 | (ulong) xaddr, xlen); | |
2469 | } | |
2470 | } | |
2471 | } else if (maplock->flag & mlckALLOCPXD) { | |
2472 | pxdlock = (struct pxd_lock *) maplock; | |
2473 | xaddr = addressPXD(&pxdlock->pxd); | |
2474 | xlen = lengthPXD(&pxdlock->pxd); | |
4d81715f | 2475 | dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); |
1da177e4 LT |
2476 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); |
2477 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ | |
2478 | ||
2479 | pxdlistlock = (struct xdlistlock *) maplock; | |
2480 | pxd = pxdlistlock->xdlist; | |
2481 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | |
2482 | xaddr = addressPXD(pxd); | |
2483 | xlen = lengthPXD(pxd); | |
4d81715f | 2484 | dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, |
1da177e4 LT |
2485 | tblk); |
2486 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", | |
2487 | (ulong) xaddr, xlen); | |
2488 | } | |
2489 | } | |
2490 | } | |
2491 | ||
1da177e4 | 2492 | /* |
f720e3ba | 2493 | * txFreeMap() |
1da177e4 | 2494 | * |
f720e3ba | 2495 | * function: free from persistent and/or working map; |
1da177e4 LT |
2496 | * |
2497 | * todo: optimization | |
2498 | */ | |
2499 | void txFreeMap(struct inode *ip, | |
2500 | struct maplock * maplock, struct tblock * tblk, int maptype) | |
2501 | { | |
2502 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | |
2503 | struct xdlistlock *xadlistlock; | |
2504 | xad_t *xad; | |
2505 | s64 xaddr; | |
2506 | int xlen; | |
2507 | struct pxd_lock *pxdlock; | |
2508 | struct xdlistlock *pxdlistlock; | |
2509 | pxd_t *pxd; | |
2510 | int n; | |
2511 | ||
2512 | jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", | |
2513 | tblk, maplock, maptype); | |
2514 | ||
2515 | /* | |
2516 | * free from persistent map; | |
2517 | */ | |
2518 | if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { | |
2519 | if (maplock->flag & mlckFREEXADLIST) { | |
2520 | xadlistlock = (struct xdlistlock *) maplock; | |
2521 | xad = xadlistlock->xdlist; | |
2522 | for (n = 0; n < xadlistlock->count; n++, xad++) { | |
2523 | if (!(xad->flag & XAD_NEW)) { | |
2524 | xaddr = addressXAD(xad); | |
2525 | xlen = lengthXAD(xad); | |
4d81715f | 2526 | dbUpdatePMap(ipbmap, true, xaddr, |
1da177e4 LT |
2527 | (s64) xlen, tblk); |
2528 | jfs_info("freePMap: xaddr:0x%lx " | |
2529 | "xlen:%d", | |
2530 | (ulong) xaddr, xlen); | |
2531 | } | |
2532 | } | |
2533 | } else if (maplock->flag & mlckFREEPXD) { | |
2534 | pxdlock = (struct pxd_lock *) maplock; | |
2535 | xaddr = addressPXD(&pxdlock->pxd); | |
2536 | xlen = lengthPXD(&pxdlock->pxd); | |
4d81715f | 2537 | dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, |
1da177e4 LT |
2538 | tblk); |
2539 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", | |
2540 | (ulong) xaddr, xlen); | |
2541 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ | |
2542 | ||
2543 | pxdlistlock = (struct xdlistlock *) maplock; | |
2544 | pxd = pxdlistlock->xdlist; | |
2545 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | |
2546 | xaddr = addressPXD(pxd); | |
2547 | xlen = lengthPXD(pxd); | |
4d81715f | 2548 | dbUpdatePMap(ipbmap, true, xaddr, |
1da177e4 LT |
2549 | (s64) xlen, tblk); |
2550 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", | |
2551 | (ulong) xaddr, xlen); | |
2552 | } | |
2553 | } | |
2554 | } | |
2555 | ||
2556 | /* | |
2557 | * free from working map; | |
2558 | */ | |
2559 | if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { | |
2560 | if (maplock->flag & mlckFREEXADLIST) { | |
2561 | xadlistlock = (struct xdlistlock *) maplock; | |
2562 | xad = xadlistlock->xdlist; | |
2563 | for (n = 0; n < xadlistlock->count; n++, xad++) { | |
2564 | xaddr = addressXAD(xad); | |
2565 | xlen = lengthXAD(xad); | |
2566 | dbFree(ip, xaddr, (s64) xlen); | |
2567 | xad->flag = 0; | |
2568 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | |
2569 | (ulong) xaddr, xlen); | |
2570 | } | |
2571 | } else if (maplock->flag & mlckFREEPXD) { | |
2572 | pxdlock = (struct pxd_lock *) maplock; | |
2573 | xaddr = addressPXD(&pxdlock->pxd); | |
2574 | xlen = lengthPXD(&pxdlock->pxd); | |
2575 | dbFree(ip, xaddr, (s64) xlen); | |
2576 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | |
2577 | (ulong) xaddr, xlen); | |
2578 | } else { /* (maplock->flag & mlckFREEPXDLIST) */ | |
2579 | ||
2580 | pxdlistlock = (struct xdlistlock *) maplock; | |
2581 | pxd = pxdlistlock->xdlist; | |
2582 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | |
2583 | xaddr = addressPXD(pxd); | |
2584 | xlen = lengthPXD(pxd); | |
2585 | dbFree(ip, xaddr, (s64) xlen); | |
2586 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | |
2587 | (ulong) xaddr, xlen); | |
2588 | } | |
2589 | } | |
2590 | } | |
2591 | } | |
2592 | ||
1da177e4 | 2593 | /* |
f720e3ba | 2594 | * txFreelock() |
1da177e4 | 2595 | * |
f720e3ba | 2596 | * function: remove tlock from inode anonymous locklist |
1da177e4 LT |
2597 | */ |
2598 | void txFreelock(struct inode *ip) | |
2599 | { | |
2600 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | |
2601 | struct tlock *xtlck, *tlck; | |
2602 | lid_t xlid = 0, lid; | |
2603 | ||
2604 | if (!jfs_ip->atlhead) | |
2605 | return; | |
2606 | ||
2607 | TXN_LOCK(); | |
2608 | xtlck = (struct tlock *) &jfs_ip->atlhead; | |
2609 | ||
2610 | while ((lid = xtlck->next) != 0) { | |
2611 | tlck = lid_to_tlock(lid); | |
2612 | if (tlck->flag & tlckFREELOCK) { | |
2613 | xtlck->next = tlck->next; | |
2614 | txLockFree(lid); | |
2615 | } else { | |
2616 | xtlck = tlck; | |
2617 | xlid = lid; | |
2618 | } | |
2619 | } | |
2620 | ||
2621 | if (jfs_ip->atlhead) | |
2622 | jfs_ip->atltail = xlid; | |
2623 | else { | |
2624 | jfs_ip->atltail = 0; | |
2625 | /* | |
2626 | * If inode was on anon_list, remove it | |
2627 | */ | |
2628 | list_del_init(&jfs_ip->anon_inode_list); | |
2629 | } | |
2630 | TXN_UNLOCK(); | |
2631 | } | |
2632 | ||
1da177e4 | 2633 | /* |
f720e3ba | 2634 | * txAbort() |
1da177e4 LT |
2635 | * |
2636 | * function: abort tx before commit; | |
2637 | * | |
2638 | * frees line-locks and segment locks for all | |
2639 | * segments in comdata structure. | |
2640 | * Optionally sets state of file-system to FM_DIRTY in super-block. | |
2641 | * log age of page-frames in memory for which caller has | |
2642 | * are reset to 0 (to avoid logwarap). | |
2643 | */ | |
2644 | void txAbort(tid_t tid, int dirty) | |
2645 | { | |
2646 | lid_t lid, next; | |
2647 | struct metapage *mp; | |
2648 | struct tblock *tblk = tid_to_tblock(tid); | |
2649 | struct tlock *tlck; | |
2650 | ||
2651 | /* | |
2652 | * free tlocks of the transaction | |
2653 | */ | |
2654 | for (lid = tblk->next; lid; lid = next) { | |
2655 | tlck = lid_to_tlock(lid); | |
2656 | next = tlck->next; | |
2657 | mp = tlck->mp; | |
2658 | JFS_IP(tlck->ip)->xtlid = 0; | |
2659 | ||
2660 | if (mp) { | |
2661 | mp->lid = 0; | |
2662 | ||
2663 | /* | |
2664 | * reset lsn of page to avoid logwarap: | |
2665 | * | |
2666 | * (page may have been previously committed by another | |
2667 | * transaction(s) but has not been paged, i.e., | |
2668 | * it may be on logsync list even though it has not | |
2669 | * been logged for the current tx.) | |
2670 | */ | |
2671 | if (mp->xflag & COMMIT_PAGE && mp->lsn) | |
2672 | LogSyncRelease(mp); | |
2673 | } | |
2674 | /* insert tlock at head of freelist */ | |
2675 | TXN_LOCK(); | |
2676 | txLockFree(lid); | |
2677 | TXN_UNLOCK(); | |
2678 | } | |
2679 | ||
2680 | /* caller will free the transaction block */ | |
2681 | ||
2682 | tblk->next = tblk->last = 0; | |
2683 | ||
2684 | /* | |
2685 | * mark filesystem dirty | |
2686 | */ | |
2687 | if (dirty) | |
2688 | jfs_error(tblk->sb, "txAbort"); | |
2689 | ||
2690 | return; | |
2691 | } | |
2692 | ||
2693 | /* | |
f720e3ba | 2694 | * txLazyCommit(void) |
1da177e4 LT |
2695 | * |
2696 | * All transactions except those changing ipimap (COMMIT_FORCE) are | |
2697 | * processed by this routine. This insures that the inode and block | |
2698 | * allocation maps are updated in order. For synchronous transactions, | |
2699 | * let the user thread finish processing after txUpdateMap() is called. | |
2700 | */ | |
2701 | static void txLazyCommit(struct tblock * tblk) | |
2702 | { | |
2703 | struct jfs_log *log; | |
2704 | ||
2705 | while (((tblk->flag & tblkGC_READY) == 0) && | |
2706 | ((tblk->flag & tblkGC_UNLOCKED) == 0)) { | |
2707 | /* We must have gotten ahead of the user thread | |
2708 | */ | |
2709 | jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); | |
2710 | yield(); | |
2711 | } | |
2712 | ||
2713 | jfs_info("txLazyCommit: processing tblk 0x%p", tblk); | |
2714 | ||
2715 | txUpdateMap(tblk); | |
2716 | ||
2717 | log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; | |
2718 | ||
2719 | spin_lock_irq(&log->gclock); // LOGGC_LOCK | |
2720 | ||
2721 | tblk->flag |= tblkGC_COMMITTED; | |
2722 | ||
2723 | if (tblk->flag & tblkGC_READY) | |
2724 | log->gcrtc--; | |
2725 | ||
2726 | wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP | |
2727 | ||
2728 | /* | |
2729 | * Can't release log->gclock until we've tested tblk->flag | |
2730 | */ | |
2731 | if (tblk->flag & tblkGC_LAZY) { | |
2732 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | |
2733 | txUnlock(tblk); | |
2734 | tblk->flag &= ~tblkGC_LAZY; | |
2735 | txEnd(tblk - TxBlock); /* Convert back to tid */ | |
2736 | } else | |
2737 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | |
2738 | ||
2739 | jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); | |
2740 | } | |
2741 | ||
2742 | /* | |
f720e3ba | 2743 | * jfs_lazycommit(void) |
1da177e4 LT |
2744 | * |
2745 | * To be run as a kernel daemon. If lbmIODone is called in an interrupt | |
2746 | * context, or where blocking is not wanted, this routine will process | |
2747 | * committed transactions from the unlock queue. | |
2748 | */ | |
2749 | int jfs_lazycommit(void *arg) | |
2750 | { | |
2751 | int WorkDone; | |
2752 | struct tblock *tblk; | |
2753 | unsigned long flags; | |
2754 | struct jfs_sb_info *sbi; | |
2755 | ||
1da177e4 LT |
2756 | do { |
2757 | LAZY_LOCK(flags); | |
2758 | jfs_commit_thread_waking = 0; /* OK to wake another thread */ | |
2759 | while (!list_empty(&TxAnchor.unlock_queue)) { | |
2760 | WorkDone = 0; | |
2761 | list_for_each_entry(tblk, &TxAnchor.unlock_queue, | |
2762 | cqueue) { | |
2763 | ||
2764 | sbi = JFS_SBI(tblk->sb); | |
2765 | /* | |
2766 | * For each volume, the transactions must be | |
2767 | * handled in order. If another commit thread | |
2768 | * is handling a tblk for this superblock, | |
2769 | * skip it | |
2770 | */ | |
2771 | if (sbi->commit_state & IN_LAZYCOMMIT) | |
2772 | continue; | |
2773 | ||
2774 | sbi->commit_state |= IN_LAZYCOMMIT; | |
2775 | WorkDone = 1; | |
2776 | ||
2777 | /* | |
2778 | * Remove transaction from queue | |
2779 | */ | |
2780 | list_del(&tblk->cqueue); | |
2781 | ||
2782 | LAZY_UNLOCK(flags); | |
2783 | txLazyCommit(tblk); | |
2784 | LAZY_LOCK(flags); | |
2785 | ||
2786 | sbi->commit_state &= ~IN_LAZYCOMMIT; | |
2787 | /* | |
2788 | * Don't continue in the for loop. (We can't | |
2789 | * anyway, it's unsafe!) We want to go back to | |
2790 | * the beginning of the list. | |
2791 | */ | |
2792 | break; | |
2793 | } | |
2794 | ||
2795 | /* If there was nothing to do, don't continue */ | |
2796 | if (!WorkDone) | |
2797 | break; | |
2798 | } | |
2799 | /* In case a wakeup came while all threads were active */ | |
2800 | jfs_commit_thread_waking = 0; | |
2801 | ||
3e1d1d28 | 2802 | if (freezing(current)) { |
1da177e4 | 2803 | LAZY_UNLOCK(flags); |
3e1d1d28 | 2804 | refrigerator(); |
1da177e4 LT |
2805 | } else { |
2806 | DECLARE_WAITQUEUE(wq, current); | |
2807 | ||
2808 | add_wait_queue(&jfs_commit_thread_wait, &wq); | |
2809 | set_current_state(TASK_INTERRUPTIBLE); | |
2810 | LAZY_UNLOCK(flags); | |
2811 | schedule(); | |
3cbb1c8e | 2812 | __set_current_state(TASK_RUNNING); |
1da177e4 LT |
2813 | remove_wait_queue(&jfs_commit_thread_wait, &wq); |
2814 | } | |
91dbb4de | 2815 | } while (!kthread_should_stop()); |
1da177e4 LT |
2816 | |
2817 | if (!list_empty(&TxAnchor.unlock_queue)) | |
2818 | jfs_err("jfs_lazycommit being killed w/pending transactions!"); | |
2819 | else | |
2820 | jfs_info("jfs_lazycommit being killed\n"); | |
91dbb4de | 2821 | return 0; |
1da177e4 LT |
2822 | } |
2823 | ||
2824 | void txLazyUnlock(struct tblock * tblk) | |
2825 | { | |
2826 | unsigned long flags; | |
2827 | ||
2828 | LAZY_LOCK(flags); | |
2829 | ||
2830 | list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); | |
2831 | /* | |
2832 | * Don't wake up a commit thread if there is already one servicing | |
2833 | * this superblock, or if the last one we woke up hasn't started yet. | |
2834 | */ | |
2835 | if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && | |
2836 | !jfs_commit_thread_waking) { | |
2837 | jfs_commit_thread_waking = 1; | |
2838 | wake_up(&jfs_commit_thread_wait); | |
2839 | } | |
2840 | LAZY_UNLOCK(flags); | |
2841 | } | |
2842 | ||
2843 | static void LogSyncRelease(struct metapage * mp) | |
2844 | { | |
2845 | struct jfs_log *log = mp->log; | |
2846 | ||
7fab479b | 2847 | assert(mp->nohomeok); |
1da177e4 | 2848 | assert(log); |
7fab479b | 2849 | metapage_homeok(mp); |
1da177e4 LT |
2850 | } |
2851 | ||
2852 | /* | |
2853 | * txQuiesce | |
2854 | * | |
2855 | * Block all new transactions and push anonymous transactions to | |
2856 | * completion | |
2857 | * | |
2858 | * This does almost the same thing as jfs_sync below. We don't | |
2859 | * worry about deadlocking when jfs_tlocks_low is set, since we would | |
2860 | * expect jfs_sync to get us out of that jam. | |
2861 | */ | |
2862 | void txQuiesce(struct super_block *sb) | |
2863 | { | |
2864 | struct inode *ip; | |
2865 | struct jfs_inode_info *jfs_ip; | |
2866 | struct jfs_log *log = JFS_SBI(sb)->log; | |
2867 | tid_t tid; | |
2868 | ||
2869 | set_bit(log_QUIESCE, &log->flag); | |
2870 | ||
2871 | TXN_LOCK(); | |
2872 | restart: | |
2873 | while (!list_empty(&TxAnchor.anon_list)) { | |
2874 | jfs_ip = list_entry(TxAnchor.anon_list.next, | |
2875 | struct jfs_inode_info, | |
2876 | anon_inode_list); | |
2877 | ip = &jfs_ip->vfs_inode; | |
2878 | ||
2879 | /* | |
2880 | * inode will be removed from anonymous list | |
2881 | * when it is committed | |
2882 | */ | |
2883 | TXN_UNLOCK(); | |
2884 | tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); | |
1de87444 | 2885 | mutex_lock(&jfs_ip->commit_mutex); |
1da177e4 LT |
2886 | txCommit(tid, 1, &ip, 0); |
2887 | txEnd(tid); | |
1de87444 | 2888 | mutex_unlock(&jfs_ip->commit_mutex); |
1da177e4 LT |
2889 | /* |
2890 | * Just to be safe. I don't know how | |
2891 | * long we can run without blocking | |
2892 | */ | |
2893 | cond_resched(); | |
2894 | TXN_LOCK(); | |
2895 | } | |
2896 | ||
2897 | /* | |
2898 | * If jfs_sync is running in parallel, there could be some inodes | |
2899 | * on anon_list2. Let's check. | |
2900 | */ | |
2901 | if (!list_empty(&TxAnchor.anon_list2)) { | |
2902 | list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list); | |
2903 | INIT_LIST_HEAD(&TxAnchor.anon_list2); | |
2904 | goto restart; | |
2905 | } | |
2906 | TXN_UNLOCK(); | |
2907 | ||
2908 | /* | |
2909 | * We may need to kick off the group commit | |
2910 | */ | |
2911 | jfs_flush_journal(log, 0); | |
2912 | } | |
2913 | ||
2914 | /* | |
2915 | * txResume() | |
2916 | * | |
2917 | * Allows transactions to start again following txQuiesce | |
2918 | */ | |
2919 | void txResume(struct super_block *sb) | |
2920 | { | |
2921 | struct jfs_log *log = JFS_SBI(sb)->log; | |
2922 | ||
2923 | clear_bit(log_QUIESCE, &log->flag); | |
2924 | TXN_WAKEUP(&log->syncwait); | |
2925 | } | |
2926 | ||
2927 | /* | |
f720e3ba | 2928 | * jfs_sync(void) |
1da177e4 LT |
2929 | * |
2930 | * To be run as a kernel daemon. This is awakened when tlocks run low. | |
2931 | * We write any inodes that have anonymous tlocks so they will become | |
2932 | * available. | |
2933 | */ | |
2934 | int jfs_sync(void *arg) | |
2935 | { | |
2936 | struct inode *ip; | |
2937 | struct jfs_inode_info *jfs_ip; | |
2938 | int rc; | |
2939 | tid_t tid; | |
2940 | ||
1da177e4 LT |
2941 | do { |
2942 | /* | |
2943 | * write each inode on the anonymous inode list | |
2944 | */ | |
2945 | TXN_LOCK(); | |
2946 | while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { | |
2947 | jfs_ip = list_entry(TxAnchor.anon_list.next, | |
2948 | struct jfs_inode_info, | |
2949 | anon_inode_list); | |
2950 | ip = &jfs_ip->vfs_inode; | |
2951 | ||
2952 | if (! igrab(ip)) { | |
2953 | /* | |
2954 | * Inode is being freed | |
2955 | */ | |
2956 | list_del_init(&jfs_ip->anon_inode_list); | |
48ce8b05 | 2957 | } else if (mutex_trylock(&jfs_ip->commit_mutex)) { |
1da177e4 LT |
2958 | /* |
2959 | * inode will be removed from anonymous list | |
2960 | * when it is committed | |
2961 | */ | |
2962 | TXN_UNLOCK(); | |
2963 | tid = txBegin(ip->i_sb, COMMIT_INODE); | |
2964 | rc = txCommit(tid, 1, &ip, 0); | |
2965 | txEnd(tid); | |
1de87444 | 2966 | mutex_unlock(&jfs_ip->commit_mutex); |
1da177e4 LT |
2967 | |
2968 | iput(ip); | |
2969 | /* | |
2970 | * Just to be safe. I don't know how | |
2971 | * long we can run without blocking | |
2972 | */ | |
2973 | cond_resched(); | |
2974 | TXN_LOCK(); | |
2975 | } else { | |
1de87444 | 2976 | /* We can't get the commit mutex. It may |
1da177e4 LT |
2977 | * be held by a thread waiting for tlock's |
2978 | * so let's not block here. Save it to | |
2979 | * put back on the anon_list. | |
2980 | */ | |
2981 | ||
2982 | /* Take off anon_list */ | |
2983 | list_del(&jfs_ip->anon_inode_list); | |
2984 | ||
2985 | /* Put on anon_list2 */ | |
2986 | list_add(&jfs_ip->anon_inode_list, | |
2987 | &TxAnchor.anon_list2); | |
2988 | ||
2989 | TXN_UNLOCK(); | |
2990 | iput(ip); | |
2991 | TXN_LOCK(); | |
2992 | } | |
2993 | } | |
2994 | /* Add anon_list2 back to anon_list */ | |
2995 | list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); | |
2996 | ||
3e1d1d28 | 2997 | if (freezing(current)) { |
1da177e4 | 2998 | TXN_UNLOCK(); |
3e1d1d28 | 2999 | refrigerator(); |
1da177e4 | 3000 | } else { |
1da177e4 LT |
3001 | set_current_state(TASK_INTERRUPTIBLE); |
3002 | TXN_UNLOCK(); | |
3003 | schedule(); | |
3cbb1c8e | 3004 | __set_current_state(TASK_RUNNING); |
1da177e4 | 3005 | } |
91dbb4de | 3006 | } while (!kthread_should_stop()); |
1da177e4 LT |
3007 | |
3008 | jfs_info("jfs_sync being killed"); | |
91dbb4de | 3009 | return 0; |
1da177e4 LT |
3010 | } |
3011 | ||
3012 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) | |
b2e03ca7 | 3013 | static int jfs_txanchor_proc_show(struct seq_file *m, void *v) |
1da177e4 | 3014 | { |
1da177e4 LT |
3015 | char *freewait; |
3016 | char *freelockwait; | |
3017 | char *lowlockwait; | |
3018 | ||
3019 | freewait = | |
3020 | waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; | |
3021 | freelockwait = | |
3022 | waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; | |
3023 | lowlockwait = | |
3024 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; | |
3025 | ||
b2e03ca7 | 3026 | seq_printf(m, |
1da177e4 LT |
3027 | "JFS TxAnchor\n" |
3028 | "============\n" | |
3029 | "freetid = %d\n" | |
3030 | "freewait = %s\n" | |
3031 | "freelock = %d\n" | |
3032 | "freelockwait = %s\n" | |
3033 | "lowlockwait = %s\n" | |
3034 | "tlocksInUse = %d\n" | |
3035 | "jfs_tlocks_low = %d\n" | |
3036 | "unlock_queue is %sempty\n", | |
3037 | TxAnchor.freetid, | |
3038 | freewait, | |
3039 | TxAnchor.freelock, | |
3040 | freelockwait, | |
3041 | lowlockwait, | |
3042 | TxAnchor.tlocksInUse, | |
3043 | jfs_tlocks_low, | |
3044 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); | |
b2e03ca7 AD |
3045 | return 0; |
3046 | } | |
1da177e4 | 3047 | |
b2e03ca7 AD |
3048 | static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) |
3049 | { | |
3050 | return single_open(file, jfs_txanchor_proc_show, NULL); | |
1da177e4 | 3051 | } |
b2e03ca7 AD |
3052 | |
3053 | const struct file_operations jfs_txanchor_proc_fops = { | |
3054 | .owner = THIS_MODULE, | |
3055 | .open = jfs_txanchor_proc_open, | |
3056 | .read = seq_read, | |
3057 | .llseek = seq_lseek, | |
3058 | .release = single_release, | |
3059 | }; | |
1da177e4 LT |
3060 | #endif |
3061 | ||
3062 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) | |
b2e03ca7 | 3063 | static int jfs_txstats_proc_show(struct seq_file *m, void *v) |
1da177e4 | 3064 | { |
b2e03ca7 | 3065 | seq_printf(m, |
1da177e4 LT |
3066 | "JFS TxStats\n" |
3067 | "===========\n" | |
3068 | "calls to txBegin = %d\n" | |
3069 | "txBegin blocked by sync barrier = %d\n" | |
3070 | "txBegin blocked by tlocks low = %d\n" | |
3071 | "txBegin blocked by no free tid = %d\n" | |
3072 | "calls to txBeginAnon = %d\n" | |
3073 | "txBeginAnon blocked by sync barrier = %d\n" | |
3074 | "txBeginAnon blocked by tlocks low = %d\n" | |
3075 | "calls to txLockAlloc = %d\n" | |
3076 | "tLockAlloc blocked by no free lock = %d\n", | |
3077 | TxStat.txBegin, | |
3078 | TxStat.txBegin_barrier, | |
3079 | TxStat.txBegin_lockslow, | |
3080 | TxStat.txBegin_freetid, | |
3081 | TxStat.txBeginAnon, | |
3082 | TxStat.txBeginAnon_barrier, | |
3083 | TxStat.txBeginAnon_lockslow, | |
3084 | TxStat.txLockAlloc, | |
3085 | TxStat.txLockAlloc_freelock); | |
b2e03ca7 AD |
3086 | return 0; |
3087 | } | |
1da177e4 | 3088 | |
b2e03ca7 AD |
3089 | static int jfs_txstats_proc_open(struct inode *inode, struct file *file) |
3090 | { | |
3091 | return single_open(file, jfs_txstats_proc_show, NULL); | |
1da177e4 | 3092 | } |
b2e03ca7 AD |
3093 | |
3094 | const struct file_operations jfs_txstats_proc_fops = { | |
3095 | .owner = THIS_MODULE, | |
3096 | .open = jfs_txstats_proc_open, | |
3097 | .read = seq_read, | |
3098 | .llseek = seq_lseek, | |
3099 | .release = single_release, | |
3100 | }; | |
1da177e4 | 3101 | #endif |