]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2004 | |
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License as published by | |
63f83c9f | 7 | * the Free Software Foundation; either version 2 of the License, or |
1da177e4 | 8 | * (at your option) any later version. |
63f83c9f | 9 | * |
1da177e4 LT |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
13 | * the GNU General Public License for more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License | |
63f83c9f | 16 | * along with this program; if not, write to the Free Software |
1da177e4 LT |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | |
19 | #ifndef _H_JFS_LOGMGR | |
20 | #define _H_JFS_LOGMGR | |
21 | ||
22 | #include "jfs_filsys.h" | |
23 | #include "jfs_lock.h" | |
24 | ||
25 | /* | |
26 | * log manager configuration parameters | |
27 | */ | |
28 | ||
29 | /* log page size */ | |
30 | #define LOGPSIZE 4096 | |
31 | #define L2LOGPSIZE 12 | |
32 | ||
33 | #define LOGPAGES 16 /* Log pages per mounted file system */ | |
34 | ||
35 | /* | |
36 | * log logical volume | |
37 | * | |
63f83c9f | 38 | * a log is used to make the commit operation on journalled |
1da177e4 LT |
39 | * files within the same logical volume group atomic. |
40 | * a log is implemented with a logical volume. | |
63f83c9f | 41 | * there is one log per logical volume group. |
1da177e4 LT |
42 | * |
43 | * block 0 of the log logical volume is not used (ipl etc). | |
44 | * block 1 contains a log "superblock" and is used by logFormat(), | |
63f83c9f DK |
45 | * lmLogInit(), lmLogShutdown(), and logRedo() to record status |
46 | * of the log but is not otherwise used during normal processing. | |
1da177e4 LT |
47 | * blocks 2 - (N-1) are used to contain log records. |
48 | * | |
63f83c9f DK |
49 | * when a volume group is varied-on-line, logRedo() must have |
50 | * been executed before the file systems (logical volumes) in | |
1da177e4 LT |
51 | * the volume group can be mounted. |
52 | */ | |
53 | /* | |
54 | * log superblock (block 1 of logical volume) | |
55 | */ | |
56 | #define LOGSUPER_B 1 | |
57 | #define LOGSTART_B 2 | |
58 | ||
59 | #define LOGMAGIC 0x87654321 | |
60 | #define LOGVERSION 1 | |
61 | ||
62 | #define MAX_ACTIVE 128 /* Max active file systems sharing log */ | |
63 | ||
64 | struct logsuper { | |
65 | __le32 magic; /* 4: log lv identifier */ | |
66 | __le32 version; /* 4: version number */ | |
67 | __le32 serial; /* 4: log open/mount counter */ | |
68 | __le32 size; /* 4: size in number of LOGPSIZE blocks */ | |
69 | __le32 bsize; /* 4: logical block size in byte */ | |
70 | __le32 l2bsize; /* 4: log2 of bsize */ | |
71 | ||
72 | __le32 flag; /* 4: option */ | |
73 | __le32 state; /* 4: state - see below */ | |
74 | ||
75 | __le32 end; /* 4: addr of last log record set by logredo */ | |
76 | char uuid[16]; /* 16: 128-bit journal uuid */ | |
77 | char label[16]; /* 16: journal label */ | |
78 | struct { | |
79 | char uuid[16]; | |
80 | } active[MAX_ACTIVE]; /* 2048: active file systems list */ | |
81 | }; | |
82 | ||
83 | #define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" | |
84 | ||
85 | /* log flag: commit option (see jfs_filsys.h) */ | |
86 | ||
87 | /* log state */ | |
88 | #define LOGMOUNT 0 /* log mounted by lmLogInit() */ | |
89 | #define LOGREDONE 1 /* log shutdown by lmLogShutdown(). | |
90 | * log redo completed by logredo(). | |
91 | */ | |
92 | #define LOGWRAP 2 /* log wrapped */ | |
93 | #define LOGREADERR 3 /* log read error detected in logredo() */ | |
94 | ||
95 | ||
96 | /* | |
97 | * log logical page | |
98 | * | |
99 | * (this comment should be rewritten !) | |
63f83c9f | 100 | * the header and trailer structures (h,t) will normally have |
1da177e4 | 101 | * the same page and eor value. |
63f83c9f | 102 | * An exception to this occurs when a complete page write is not |
1da177e4 | 103 | * accomplished on a power failure. Since the hardware may "split write" |
63f83c9f | 104 | * sectors in the page, any out of order sequence may occur during powerfail |
1da177e4 LT |
105 | * and needs to be recognized during log replay. The xor value is |
106 | * an "exclusive or" of all log words in the page up to eor. This | |
107 | * 32 bit eor is stored with the top 16 bits in the header and the | |
108 | * bottom 16 bits in the trailer. logredo can easily recognize pages | |
63f83c9f | 109 | * that were not completed by reconstructing this eor and checking |
1da177e4 LT |
110 | * the log page. |
111 | * | |
63f83c9f DK |
112 | * Previous versions of the operating system did not allow split |
113 | * writes and detected partially written records in logredo by | |
114 | * ordering the updates to the header, trailer, and the move of data | |
115 | * into the logdata area. The order: (1) data is moved (2) header | |
116 | * is updated (3) trailer is updated. In logredo, when the header | |
117 | * differed from the trailer, the header and trailer were reconciled | |
118 | * as follows: if h.page != t.page they were set to the smaller of | |
119 | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) | |
1da177e4 LT |
120 | * h.eor != t.eor they were set to the smaller of their two values. |
121 | */ | |
122 | struct logpage { | |
123 | struct { /* header */ | |
124 | __le32 page; /* 4: log sequence page number */ | |
125 | __le16 rsrvd; /* 2: */ | |
126 | __le16 eor; /* 2: end-of-log offset of lasrt record write */ | |
127 | } h; | |
128 | ||
129 | __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ | |
130 | ||
131 | struct { /* trailer */ | |
132 | __le32 page; /* 4: normally the same as h.page */ | |
133 | __le16 rsrvd; /* 2: */ | |
134 | __le16 eor; /* 2: normally the same as h.eor */ | |
135 | } t; | |
136 | }; | |
137 | ||
138 | #define LOGPHDRSIZE 8 /* log page header size */ | |
139 | #define LOGPTLRSIZE 8 /* log page trailer size */ | |
140 | ||
141 | ||
142 | /* | |
143 | * log record | |
144 | * | |
145 | * (this comment should be rewritten !) | |
146 | * jfs uses only "after" log records (only a single writer is allowed | |
f720e3ba | 147 | * in a page, pages are written to temporary paging space if |
1da177e4 LT |
148 | * if they must be written to disk before commit, and i/o is |
149 | * scheduled for modified pages to their home location after | |
63f83c9f | 150 | * the log records containing the after values and the commit |
1da177e4 LT |
151 | * record is written to the log on disk, undo discards the copy |
152 | * in main-memory.) | |
153 | * | |
63f83c9f | 154 | * a log record consists of a data area of variable length followed by |
1da177e4 | 155 | * a descriptor of fixed size LOGRDSIZE bytes. |
f720e3ba | 156 | * the data area is rounded up to an integral number of 4-bytes and |
1da177e4 | 157 | * must be no longer than LOGPSIZE. |
63f83c9f DK |
158 | * the descriptor is of size of multiple of 4-bytes and aligned on a |
159 | * 4-byte boundary. | |
1da177e4 | 160 | * records are packed one after the other in the data area of log pages. |
63f83c9f | 161 | * (sometimes a DUMMY record is inserted so that at least one record ends |
1da177e4 | 162 | * on every page or the longest record is placed on at most two pages). |
63f83c9f | 163 | * the field eor in page header/trailer points to the byte following |
1da177e4 LT |
164 | * the last record on a page. |
165 | */ | |
166 | ||
167 | /* log record types */ | |
168 | #define LOG_COMMIT 0x8000 | |
169 | #define LOG_SYNCPT 0x4000 | |
170 | #define LOG_MOUNT 0x2000 | |
171 | #define LOG_REDOPAGE 0x0800 | |
172 | #define LOG_NOREDOPAGE 0x0080 | |
173 | #define LOG_NOREDOINOEXT 0x0040 | |
174 | #define LOG_UPDATEMAP 0x0008 | |
175 | #define LOG_NOREDOFILE 0x0001 | |
176 | ||
177 | /* REDOPAGE/NOREDOPAGE log record data type */ | |
178 | #define LOG_INODE 0x0001 | |
179 | #define LOG_XTREE 0x0002 | |
180 | #define LOG_DTREE 0x0004 | |
181 | #define LOG_BTROOT 0x0010 | |
182 | #define LOG_EA 0x0020 | |
183 | #define LOG_ACL 0x0040 | |
184 | #define LOG_DATA 0x0080 | |
185 | #define LOG_NEW 0x0100 | |
186 | #define LOG_EXTEND 0x0200 | |
187 | #define LOG_RELOCATE 0x0400 | |
188 | #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ | |
189 | ||
190 | /* UPDATEMAP log record descriptor type */ | |
191 | #define LOG_ALLOCXADLIST 0x0080 | |
192 | #define LOG_ALLOCPXDLIST 0x0040 | |
193 | #define LOG_ALLOCXAD 0x0020 | |
194 | #define LOG_ALLOCPXD 0x0010 | |
195 | #define LOG_FREEXADLIST 0x0008 | |
196 | #define LOG_FREEPXDLIST 0x0004 | |
197 | #define LOG_FREEXAD 0x0002 | |
198 | #define LOG_FREEPXD 0x0001 | |
199 | ||
200 | ||
201 | struct lrd { | |
202 | /* | |
203 | * type independent area | |
204 | */ | |
205 | __le32 logtid; /* 4: log transaction identifier */ | |
206 | __le32 backchain; /* 4: ptr to prev record of same transaction */ | |
207 | __le16 type; /* 2: record type */ | |
208 | __le16 length; /* 2: length of data in record (in byte) */ | |
209 | __le32 aggregate; /* 4: file system lv/aggregate */ | |
210 | /* (16) */ | |
211 | ||
212 | /* | |
213 | * type dependent area (20) | |
214 | */ | |
215 | union { | |
216 | ||
217 | /* | |
f720e3ba | 218 | * COMMIT: commit |
1da177e4 LT |
219 | * |
220 | * transaction commit: no type-dependent information; | |
221 | */ | |
222 | ||
223 | /* | |
f720e3ba | 224 | * REDOPAGE: after-image |
1da177e4 LT |
225 | * |
226 | * apply after-image; | |
227 | * | |
228 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | |
229 | */ | |
230 | struct { | |
231 | __le32 fileset; /* 4: fileset number */ | |
232 | __le32 inode; /* 4: inode number */ | |
233 | __le16 type; /* 2: REDOPAGE record type */ | |
234 | __le16 l2linesize; /* 2: log2 of line size */ | |
235 | pxd_t pxd; /* 8: on-disk page pxd */ | |
236 | } redopage; /* (20) */ | |
237 | ||
238 | /* | |
f720e3ba | 239 | * NOREDOPAGE: the page is freed |
1da177e4 LT |
240 | * |
241 | * do not apply after-image records which precede this record | |
242 | * in the log with the same page block number to this page. | |
243 | * | |
244 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | |
245 | */ | |
246 | struct { | |
247 | __le32 fileset; /* 4: fileset number */ | |
248 | __le32 inode; /* 4: inode number */ | |
249 | __le16 type; /* 2: NOREDOPAGE record type */ | |
250 | __le16 rsrvd; /* 2: reserved */ | |
251 | pxd_t pxd; /* 8: on-disk page pxd */ | |
252 | } noredopage; /* (20) */ | |
253 | ||
254 | /* | |
f720e3ba | 255 | * UPDATEMAP: update block allocation map |
1da177e4 LT |
256 | * |
257 | * either in-line PXD, | |
258 | * or out-of-line XADLIST; | |
259 | * | |
260 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | |
261 | */ | |
262 | struct { | |
263 | __le32 fileset; /* 4: fileset number */ | |
264 | __le32 inode; /* 4: inode number */ | |
265 | __le16 type; /* 2: UPDATEMAP record type */ | |
266 | __le16 nxd; /* 2: number of extents */ | |
267 | pxd_t pxd; /* 8: pxd */ | |
268 | } updatemap; /* (20) */ | |
269 | ||
270 | /* | |
f720e3ba | 271 | * NOREDOINOEXT: the inode extent is freed |
1da177e4 | 272 | * |
63f83c9f DK |
273 | * do not apply after-image records which precede this |
274 | * record in the log with the any of the 4 page block | |
275 | * numbers in this inode extent. | |
276 | * | |
277 | * NOTE: The fileset and pxd fields MUST remain in | |
1da177e4 LT |
278 | * the same fields in the REDOPAGE record format. |
279 | * | |
280 | */ | |
281 | struct { | |
282 | __le32 fileset; /* 4: fileset number */ | |
283 | __le32 iagnum; /* 4: IAG number */ | |
284 | __le32 inoext_idx; /* 4: inode extent index */ | |
285 | pxd_t pxd; /* 8: on-disk page pxd */ | |
286 | } noredoinoext; /* (20) */ | |
287 | ||
288 | /* | |
f720e3ba | 289 | * SYNCPT: log sync point |
1da177e4 | 290 | * |
25985edc | 291 | * replay log up to syncpt address specified; |
1da177e4 LT |
292 | */ |
293 | struct { | |
294 | __le32 sync; /* 4: syncpt address (0 = here) */ | |
295 | } syncpt; | |
296 | ||
297 | /* | |
f720e3ba | 298 | * MOUNT: file system mount |
1da177e4 LT |
299 | * |
300 | * file system mount: no type-dependent information; | |
301 | */ | |
302 | ||
303 | /* | |
f720e3ba | 304 | * ? FREEXTENT: free specified extent(s) |
1da177e4 LT |
305 | * |
306 | * free specified extent(s) from block allocation map | |
307 | * N.B.: nextents should be length of data/sizeof(xad_t) | |
308 | */ | |
309 | struct { | |
310 | __le32 type; /* 4: FREEXTENT record type */ | |
311 | __le32 nextent; /* 4: number of extents */ | |
312 | ||
313 | /* data: PXD or XAD list */ | |
314 | } freextent; | |
315 | ||
316 | /* | |
f720e3ba | 317 | * ? NOREDOFILE: this file is freed |
1da177e4 LT |
318 | * |
319 | * do not apply records which precede this record in the log | |
320 | * with the same inode number. | |
321 | * | |
63f83c9f | 322 | * NOREDOFILE must be the first to be written at commit |
1da177e4 LT |
323 | * (last to be read in logredo()) - it prevents |
324 | * replay of preceding updates of all preceding generations | |
63f83c9f | 325 | * of the inumber esp. the on-disk inode itself. |
1da177e4 LT |
326 | */ |
327 | struct { | |
328 | __le32 fileset; /* 4: fileset number */ | |
329 | __le32 inode; /* 4: inode number */ | |
330 | } noredofile; | |
331 | ||
332 | /* | |
f720e3ba | 333 | * ? NEWPAGE: |
1da177e4 LT |
334 | * |
335 | * metadata type dependent | |
336 | */ | |
337 | struct { | |
338 | __le32 fileset; /* 4: fileset number */ | |
339 | __le32 inode; /* 4: inode number */ | |
340 | __le32 type; /* 4: NEWPAGE record type */ | |
341 | pxd_t pxd; /* 8: on-disk page pxd */ | |
342 | } newpage; | |
343 | ||
344 | /* | |
f720e3ba | 345 | * ? DUMMY: filler |
1da177e4 LT |
346 | * |
347 | * no type-dependent information | |
348 | */ | |
349 | } log; | |
350 | }; /* (36) */ | |
351 | ||
352 | #define LOGRDSIZE (sizeof(struct lrd)) | |
353 | ||
354 | /* | |
355 | * line vector descriptor | |
356 | */ | |
357 | struct lvd { | |
358 | __le16 offset; | |
359 | __le16 length; | |
360 | }; | |
361 | ||
362 | ||
363 | /* | |
364 | * log logical volume | |
365 | */ | |
366 | struct jfs_log { | |
367 | ||
368 | struct list_head sb_list;/* This is used to sync metadata | |
369 | * before writing syncpt. | |
370 | */ | |
371 | struct list_head journal_list; /* Global list */ | |
372 | struct block_device *bdev; /* 4: log lv pointer */ | |
373 | int serial; /* 4: log mount serial number */ | |
374 | ||
375 | s64 base; /* @8: log extent address (inline log ) */ | |
376 | int size; /* 4: log size in log page (in page) */ | |
377 | int l2bsize; /* 4: log2 of bsize */ | |
378 | ||
5ba25331 | 379 | unsigned long flag; /* 4: flag */ |
1da177e4 LT |
380 | |
381 | struct lbuf *lbuf_free; /* 4: free lbufs */ | |
382 | wait_queue_head_t free_wait; /* 4: */ | |
383 | ||
384 | /* log write */ | |
385 | int logtid; /* 4: log tid */ | |
386 | int page; /* 4: page number of eol page */ | |
387 | int eor; /* 4: eor of last record in eol page */ | |
388 | struct lbuf *bp; /* 4: current log page buffer */ | |
389 | ||
1de87444 | 390 | struct mutex loglock; /* 4: log write serialization lock */ |
1da177e4 LT |
391 | |
392 | /* syncpt */ | |
393 | int nextsync; /* 4: bytes to write before next syncpt */ | |
394 | int active; /* 4: */ | |
395 | wait_queue_head_t syncwait; /* 4: */ | |
396 | ||
397 | /* commit */ | |
398 | uint cflag; /* 4: */ | |
399 | struct list_head cqueue; /* FIFO commit queue */ | |
400 | struct tblock *flush_tblk; /* tblk we're waiting on for flush */ | |
401 | int gcrtc; /* 4: GC_READY transaction count */ | |
402 | struct tblock *gclrt; /* 4: latest GC_READY transaction */ | |
403 | spinlock_t gclock; /* 4: group commit lock */ | |
404 | int logsize; /* 4: log data area size in byte */ | |
405 | int lsn; /* 4: end-of-log */ | |
406 | int clsn; /* 4: clsn */ | |
407 | int syncpt; /* 4: addr of last syncpt record */ | |
408 | int sync; /* 4: addr from last logsync() */ | |
409 | struct list_head synclist; /* 8: logsynclist anchor */ | |
410 | spinlock_t synclock; /* 4: synclist lock */ | |
411 | struct lbuf *wqueue; /* 4: log pageout queue */ | |
412 | int count; /* 4: count */ | |
413 | char uuid[16]; /* 16: 128-bit uuid of log device */ | |
414 | ||
415 | int no_integrity; /* 3: flag to disable journaling to disk */ | |
416 | }; | |
417 | ||
418 | /* | |
419 | * Log flag | |
420 | */ | |
421 | #define log_INLINELOG 1 | |
422 | #define log_SYNCBARRIER 2 | |
423 | #define log_QUIESCE 3 | |
424 | #define log_FLUSH 4 | |
425 | ||
426 | /* | |
427 | * group commit flag | |
428 | */ | |
429 | /* jfs_log */ | |
430 | #define logGC_PAGEOUT 0x00000001 | |
431 | ||
432 | /* tblock/lbuf */ | |
433 | #define tblkGC_QUEUE 0x0001 | |
434 | #define tblkGC_READY 0x0002 | |
435 | #define tblkGC_COMMIT 0x0004 | |
436 | #define tblkGC_COMMITTED 0x0008 | |
437 | #define tblkGC_EOP 0x0010 | |
438 | #define tblkGC_FREE 0x0020 | |
439 | #define tblkGC_LEADER 0x0040 | |
440 | #define tblkGC_ERROR 0x0080 | |
441 | #define tblkGC_LAZY 0x0100 // D230860 | |
442 | #define tblkGC_UNLOCKED 0x0200 // D230860 | |
443 | ||
444 | /* | |
445 | * log cache buffer header | |
446 | */ | |
447 | struct lbuf { | |
448 | struct jfs_log *l_log; /* 4: log associated with buffer */ | |
449 | ||
450 | /* | |
451 | * data buffer base area | |
452 | */ | |
453 | uint l_flag; /* 4: pageout control flags */ | |
454 | ||
455 | struct lbuf *l_wqnext; /* 4: write queue link */ | |
456 | struct lbuf *l_freelist; /* 4: freelistlink */ | |
457 | ||
458 | int l_pn; /* 4: log page number */ | |
459 | int l_eor; /* 4: log record eor */ | |
460 | int l_ceor; /* 4: committed log record eor */ | |
461 | ||
462 | s64 l_blkno; /* 8: log page block number */ | |
463 | caddr_t l_ldata; /* 4: data page */ | |
dc5798d9 | 464 | struct page *l_page; /* The page itself */ |
63f83c9f | 465 | uint l_offset; /* Offset of l_ldata within the page */ |
1da177e4 LT |
466 | |
467 | wait_queue_head_t l_ioevent; /* 4: i/o done event */ | |
1da177e4 LT |
468 | }; |
469 | ||
470 | /* Reuse l_freelist for redrive list */ | |
471 | #define l_redrive_next l_freelist | |
472 | ||
473 | /* | |
474 | * logsynclist block | |
475 | * | |
476 | * common logsyncblk prefix for jbuf_t and tblock | |
477 | */ | |
478 | struct logsyncblk { | |
479 | u16 xflag; /* flags */ | |
480 | u16 flag; /* only meaninful in tblock */ | |
481 | lid_t lid; /* lock id */ | |
482 | s32 lsn; /* log sequence number */ | |
483 | struct list_head synclist; /* log sync list link */ | |
484 | }; | |
485 | ||
486 | /* | |
487 | * logsynclist serialization (per log) | |
488 | */ | |
489 | ||
490 | #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) | |
7fab479b DK |
491 | #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) |
492 | #define LOGSYNC_UNLOCK(log, flags) \ | |
493 | spin_unlock_irqrestore(&(log)->synclock, flags) | |
1da177e4 LT |
494 | |
495 | /* compute the difference in bytes of lsn from sync point */ | |
496 | #define logdiff(diff, lsn, log)\ | |
497 | {\ | |
498 | diff = (lsn) - (log)->syncpt;\ | |
499 | if (diff < 0)\ | |
500 | diff += (log)->logsize;\ | |
501 | } | |
502 | ||
503 | extern int lmLogOpen(struct super_block *sb); | |
504 | extern int lmLogClose(struct super_block *sb); | |
505 | extern int lmLogShutdown(struct jfs_log * log); | |
506 | extern int lmLogInit(struct jfs_log * log); | |
507 | extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); | |
1868f4aa DK |
508 | extern int lmGroupCommit(struct jfs_log *, struct tblock *); |
509 | extern int jfsIOWait(void *); | |
1da177e4 | 510 | extern void jfs_flush_journal(struct jfs_log * log, int wait); |
cbc3d65e | 511 | extern void jfs_syncpt(struct jfs_log *log, int hard_sync); |
1da177e4 LT |
512 | |
513 | #endif /* _H_JFS_LOGMGR */ |