Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
1da177e4 LT |
2 | /* |
3 | * Copyright (C) International Business Machines Corp., 2000-2004 | |
4 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | |
1da177e4 LT |
5 | */ |
6 | #ifndef _H_JFS_LOGMGR | |
7 | #define _H_JFS_LOGMGR | |
8 | ||
2e3bc612 AS |
9 | #include <linux/uuid.h> |
10 | ||
1da177e4 LT |
11 | #include "jfs_filsys.h" |
12 | #include "jfs_lock.h" | |
13 | ||
14 | /* | |
15 | * log manager configuration parameters | |
16 | */ | |
17 | ||
18 | /* log page size */ | |
19 | #define LOGPSIZE 4096 | |
20 | #define L2LOGPSIZE 12 | |
21 | ||
22 | #define LOGPAGES 16 /* Log pages per mounted file system */ | |
23 | ||
24 | /* | |
25 | * log logical volume | |
26 | * | |
63f83c9f | 27 | * a log is used to make the commit operation on journalled |
1da177e4 LT |
28 | * files within the same logical volume group atomic. |
29 | * a log is implemented with a logical volume. | |
63f83c9f | 30 | * there is one log per logical volume group. |
1da177e4 LT |
31 | * |
32 | * block 0 of the log logical volume is not used (ipl etc). | |
33 | * block 1 contains a log "superblock" and is used by logFormat(), | |
63f83c9f DK |
34 | * lmLogInit(), lmLogShutdown(), and logRedo() to record status |
35 | * of the log but is not otherwise used during normal processing. | |
1da177e4 LT |
36 | * blocks 2 - (N-1) are used to contain log records. |
37 | * | |
63f83c9f DK |
38 | * when a volume group is varied-on-line, logRedo() must have |
39 | * been executed before the file systems (logical volumes) in | |
1da177e4 LT |
40 | * the volume group can be mounted. |
41 | */ | |
42 | /* | |
43 | * log superblock (block 1 of logical volume) | |
44 | */ | |
45 | #define LOGSUPER_B 1 | |
46 | #define LOGSTART_B 2 | |
47 | ||
48 | #define LOGMAGIC 0x87654321 | |
49 | #define LOGVERSION 1 | |
50 | ||
51 | #define MAX_ACTIVE 128 /* Max active file systems sharing log */ | |
52 | ||
53 | struct logsuper { | |
54 | __le32 magic; /* 4: log lv identifier */ | |
55 | __le32 version; /* 4: version number */ | |
56 | __le32 serial; /* 4: log open/mount counter */ | |
57 | __le32 size; /* 4: size in number of LOGPSIZE blocks */ | |
58 | __le32 bsize; /* 4: logical block size in byte */ | |
59 | __le32 l2bsize; /* 4: log2 of bsize */ | |
60 | ||
61 | __le32 flag; /* 4: option */ | |
62 | __le32 state; /* 4: state - see below */ | |
63 | ||
64 | __le32 end; /* 4: addr of last log record set by logredo */ | |
2e3bc612 | 65 | uuid_t uuid; /* 16: 128-bit journal uuid */ |
1da177e4 LT |
66 | char label[16]; /* 16: journal label */ |
67 | struct { | |
2e3bc612 | 68 | uuid_t uuid; |
1da177e4 LT |
69 | } active[MAX_ACTIVE]; /* 2048: active file systems list */ |
70 | }; | |
71 | ||
1da177e4 LT |
72 | /* log flag: commit option (see jfs_filsys.h) */ |
73 | ||
74 | /* log state */ | |
75 | #define LOGMOUNT 0 /* log mounted by lmLogInit() */ | |
76 | #define LOGREDONE 1 /* log shutdown by lmLogShutdown(). | |
77 | * log redo completed by logredo(). | |
78 | */ | |
79 | #define LOGWRAP 2 /* log wrapped */ | |
80 | #define LOGREADERR 3 /* log read error detected in logredo() */ | |
81 | ||
82 | ||
83 | /* | |
84 | * log logical page | |
85 | * | |
86 | * (this comment should be rewritten !) | |
63f83c9f | 87 | * the header and trailer structures (h,t) will normally have |
1da177e4 | 88 | * the same page and eor value. |
63f83c9f | 89 | * An exception to this occurs when a complete page write is not |
1da177e4 | 90 | * accomplished on a power failure. Since the hardware may "split write" |
63f83c9f | 91 | * sectors in the page, any out of order sequence may occur during powerfail |
1da177e4 LT |
92 | * and needs to be recognized during log replay. The xor value is |
93 | * an "exclusive or" of all log words in the page up to eor. This | |
94 | * 32 bit eor is stored with the top 16 bits in the header and the | |
95 | * bottom 16 bits in the trailer. logredo can easily recognize pages | |
63f83c9f | 96 | * that were not completed by reconstructing this eor and checking |
1da177e4 LT |
97 | * the log page. |
98 | * | |
63f83c9f DK |
99 | * Previous versions of the operating system did not allow split |
100 | * writes and detected partially written records in logredo by | |
101 | * ordering the updates to the header, trailer, and the move of data | |
102 | * into the logdata area. The order: (1) data is moved (2) header | |
103 | * is updated (3) trailer is updated. In logredo, when the header | |
104 | * differed from the trailer, the header and trailer were reconciled | |
105 | * as follows: if h.page != t.page they were set to the smaller of | |
106 | * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) | |
1da177e4 LT |
107 | * h.eor != t.eor they were set to the smaller of their two values. |
108 | */ | |
109 | struct logpage { | |
110 | struct { /* header */ | |
111 | __le32 page; /* 4: log sequence page number */ | |
112 | __le16 rsrvd; /* 2: */ | |
113 | __le16 eor; /* 2: end-of-log offset of lasrt record write */ | |
114 | } h; | |
115 | ||
116 | __le32 data[LOGPSIZE / 4 - 4]; /* log record area */ | |
117 | ||
118 | struct { /* trailer */ | |
119 | __le32 page; /* 4: normally the same as h.page */ | |
120 | __le16 rsrvd; /* 2: */ | |
121 | __le16 eor; /* 2: normally the same as h.eor */ | |
122 | } t; | |
123 | }; | |
124 | ||
125 | #define LOGPHDRSIZE 8 /* log page header size */ | |
126 | #define LOGPTLRSIZE 8 /* log page trailer size */ | |
127 | ||
128 | ||
129 | /* | |
130 | * log record | |
131 | * | |
132 | * (this comment should be rewritten !) | |
133 | * jfs uses only "after" log records (only a single writer is allowed | |
f720e3ba | 134 | * in a page, pages are written to temporary paging space if |
ed1c9a7a | 135 | * they must be written to disk before commit, and i/o is |
1da177e4 | 136 | * scheduled for modified pages to their home location after |
63f83c9f | 137 | * the log records containing the after values and the commit |
1da177e4 LT |
138 | * record is written to the log on disk, undo discards the copy |
139 | * in main-memory.) | |
140 | * | |
63f83c9f | 141 | * a log record consists of a data area of variable length followed by |
1da177e4 | 142 | * a descriptor of fixed size LOGRDSIZE bytes. |
f720e3ba | 143 | * the data area is rounded up to an integral number of 4-bytes and |
1da177e4 | 144 | * must be no longer than LOGPSIZE. |
63f83c9f DK |
145 | * the descriptor is of size of multiple of 4-bytes and aligned on a |
146 | * 4-byte boundary. | |
1da177e4 | 147 | * records are packed one after the other in the data area of log pages. |
63f83c9f | 148 | * (sometimes a DUMMY record is inserted so that at least one record ends |
1da177e4 | 149 | * on every page or the longest record is placed on at most two pages). |
63f83c9f | 150 | * the field eor in page header/trailer points to the byte following |
1da177e4 LT |
151 | * the last record on a page. |
152 | */ | |
153 | ||
154 | /* log record types */ | |
155 | #define LOG_COMMIT 0x8000 | |
156 | #define LOG_SYNCPT 0x4000 | |
157 | #define LOG_MOUNT 0x2000 | |
158 | #define LOG_REDOPAGE 0x0800 | |
159 | #define LOG_NOREDOPAGE 0x0080 | |
160 | #define LOG_NOREDOINOEXT 0x0040 | |
161 | #define LOG_UPDATEMAP 0x0008 | |
162 | #define LOG_NOREDOFILE 0x0001 | |
163 | ||
164 | /* REDOPAGE/NOREDOPAGE log record data type */ | |
165 | #define LOG_INODE 0x0001 | |
166 | #define LOG_XTREE 0x0002 | |
167 | #define LOG_DTREE 0x0004 | |
168 | #define LOG_BTROOT 0x0010 | |
169 | #define LOG_EA 0x0020 | |
170 | #define LOG_ACL 0x0040 | |
171 | #define LOG_DATA 0x0080 | |
172 | #define LOG_NEW 0x0100 | |
173 | #define LOG_EXTEND 0x0200 | |
174 | #define LOG_RELOCATE 0x0400 | |
175 | #define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ | |
176 | ||
177 | /* UPDATEMAP log record descriptor type */ | |
178 | #define LOG_ALLOCXADLIST 0x0080 | |
179 | #define LOG_ALLOCPXDLIST 0x0040 | |
180 | #define LOG_ALLOCXAD 0x0020 | |
181 | #define LOG_ALLOCPXD 0x0010 | |
182 | #define LOG_FREEXADLIST 0x0008 | |
183 | #define LOG_FREEPXDLIST 0x0004 | |
184 | #define LOG_FREEXAD 0x0002 | |
185 | #define LOG_FREEPXD 0x0001 | |
186 | ||
187 | ||
188 | struct lrd { | |
189 | /* | |
190 | * type independent area | |
191 | */ | |
192 | __le32 logtid; /* 4: log transaction identifier */ | |
193 | __le32 backchain; /* 4: ptr to prev record of same transaction */ | |
194 | __le16 type; /* 2: record type */ | |
195 | __le16 length; /* 2: length of data in record (in byte) */ | |
196 | __le32 aggregate; /* 4: file system lv/aggregate */ | |
197 | /* (16) */ | |
198 | ||
199 | /* | |
200 | * type dependent area (20) | |
201 | */ | |
202 | union { | |
203 | ||
204 | /* | |
f720e3ba | 205 | * COMMIT: commit |
1da177e4 LT |
206 | * |
207 | * transaction commit: no type-dependent information; | |
208 | */ | |
209 | ||
210 | /* | |
f720e3ba | 211 | * REDOPAGE: after-image |
1da177e4 LT |
212 | * |
213 | * apply after-image; | |
214 | * | |
215 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | |
216 | */ | |
217 | struct { | |
218 | __le32 fileset; /* 4: fileset number */ | |
219 | __le32 inode; /* 4: inode number */ | |
220 | __le16 type; /* 2: REDOPAGE record type */ | |
221 | __le16 l2linesize; /* 2: log2 of line size */ | |
222 | pxd_t pxd; /* 8: on-disk page pxd */ | |
223 | } redopage; /* (20) */ | |
224 | ||
225 | /* | |
f720e3ba | 226 | * NOREDOPAGE: the page is freed |
1da177e4 LT |
227 | * |
228 | * do not apply after-image records which precede this record | |
229 | * in the log with the same page block number to this page. | |
230 | * | |
231 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | |
232 | */ | |
233 | struct { | |
234 | __le32 fileset; /* 4: fileset number */ | |
235 | __le32 inode; /* 4: inode number */ | |
236 | __le16 type; /* 2: NOREDOPAGE record type */ | |
237 | __le16 rsrvd; /* 2: reserved */ | |
238 | pxd_t pxd; /* 8: on-disk page pxd */ | |
239 | } noredopage; /* (20) */ | |
240 | ||
241 | /* | |
f720e3ba | 242 | * UPDATEMAP: update block allocation map |
1da177e4 LT |
243 | * |
244 | * either in-line PXD, | |
245 | * or out-of-line XADLIST; | |
246 | * | |
247 | * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; | |
248 | */ | |
249 | struct { | |
250 | __le32 fileset; /* 4: fileset number */ | |
251 | __le32 inode; /* 4: inode number */ | |
252 | __le16 type; /* 2: UPDATEMAP record type */ | |
253 | __le16 nxd; /* 2: number of extents */ | |
254 | pxd_t pxd; /* 8: pxd */ | |
255 | } updatemap; /* (20) */ | |
256 | ||
257 | /* | |
f720e3ba | 258 | * NOREDOINOEXT: the inode extent is freed |
1da177e4 | 259 | * |
63f83c9f DK |
260 | * do not apply after-image records which precede this |
261 | * record in the log with the any of the 4 page block | |
262 | * numbers in this inode extent. | |
263 | * | |
264 | * NOTE: The fileset and pxd fields MUST remain in | |
1da177e4 LT |
265 | * the same fields in the REDOPAGE record format. |
266 | * | |
267 | */ | |
268 | struct { | |
269 | __le32 fileset; /* 4: fileset number */ | |
270 | __le32 iagnum; /* 4: IAG number */ | |
271 | __le32 inoext_idx; /* 4: inode extent index */ | |
272 | pxd_t pxd; /* 8: on-disk page pxd */ | |
273 | } noredoinoext; /* (20) */ | |
274 | ||
275 | /* | |
f720e3ba | 276 | * SYNCPT: log sync point |
1da177e4 | 277 | * |
25985edc | 278 | * replay log up to syncpt address specified; |
1da177e4 LT |
279 | */ |
280 | struct { | |
281 | __le32 sync; /* 4: syncpt address (0 = here) */ | |
282 | } syncpt; | |
283 | ||
284 | /* | |
f720e3ba | 285 | * MOUNT: file system mount |
1da177e4 LT |
286 | * |
287 | * file system mount: no type-dependent information; | |
288 | */ | |
289 | ||
290 | /* | |
f720e3ba | 291 | * ? FREEXTENT: free specified extent(s) |
1da177e4 LT |
292 | * |
293 | * free specified extent(s) from block allocation map | |
294 | * N.B.: nextents should be length of data/sizeof(xad_t) | |
295 | */ | |
296 | struct { | |
297 | __le32 type; /* 4: FREEXTENT record type */ | |
298 | __le32 nextent; /* 4: number of extents */ | |
299 | ||
300 | /* data: PXD or XAD list */ | |
301 | } freextent; | |
302 | ||
303 | /* | |
f720e3ba | 304 | * ? NOREDOFILE: this file is freed |
1da177e4 LT |
305 | * |
306 | * do not apply records which precede this record in the log | |
307 | * with the same inode number. | |
308 | * | |
63f83c9f | 309 | * NOREDOFILE must be the first to be written at commit |
1da177e4 LT |
310 | * (last to be read in logredo()) - it prevents |
311 | * replay of preceding updates of all preceding generations | |
63f83c9f | 312 | * of the inumber esp. the on-disk inode itself. |
1da177e4 LT |
313 | */ |
314 | struct { | |
315 | __le32 fileset; /* 4: fileset number */ | |
316 | __le32 inode; /* 4: inode number */ | |
317 | } noredofile; | |
318 | ||
319 | /* | |
f720e3ba | 320 | * ? NEWPAGE: |
1da177e4 LT |
321 | * |
322 | * metadata type dependent | |
323 | */ | |
324 | struct { | |
325 | __le32 fileset; /* 4: fileset number */ | |
326 | __le32 inode; /* 4: inode number */ | |
327 | __le32 type; /* 4: NEWPAGE record type */ | |
328 | pxd_t pxd; /* 8: on-disk page pxd */ | |
329 | } newpage; | |
330 | ||
331 | /* | |
f720e3ba | 332 | * ? DUMMY: filler |
1da177e4 LT |
333 | * |
334 | * no type-dependent information | |
335 | */ | |
336 | } log; | |
337 | }; /* (36) */ | |
338 | ||
339 | #define LOGRDSIZE (sizeof(struct lrd)) | |
340 | ||
341 | /* | |
342 | * line vector descriptor | |
343 | */ | |
344 | struct lvd { | |
345 | __le16 offset; | |
346 | __le16 length; | |
347 | }; | |
348 | ||
349 | ||
350 | /* | |
351 | * log logical volume | |
352 | */ | |
353 | struct jfs_log { | |
354 | ||
355 | struct list_head sb_list;/* This is used to sync metadata | |
356 | * before writing syncpt. | |
357 | */ | |
358 | struct list_head journal_list; /* Global list */ | |
ac4e78bd | 359 | struct file *bdev_file; /* 4: log lv pointer */ |
1da177e4 LT |
360 | int serial; /* 4: log mount serial number */ |
361 | ||
362 | s64 base; /* @8: log extent address (inline log ) */ | |
363 | int size; /* 4: log size in log page (in page) */ | |
364 | int l2bsize; /* 4: log2 of bsize */ | |
365 | ||
5ba25331 | 366 | unsigned long flag; /* 4: flag */ |
1da177e4 LT |
367 | |
368 | struct lbuf *lbuf_free; /* 4: free lbufs */ | |
369 | wait_queue_head_t free_wait; /* 4: */ | |
370 | ||
371 | /* log write */ | |
372 | int logtid; /* 4: log tid */ | |
373 | int page; /* 4: page number of eol page */ | |
374 | int eor; /* 4: eor of last record in eol page */ | |
375 | struct lbuf *bp; /* 4: current log page buffer */ | |
376 | ||
1de87444 | 377 | struct mutex loglock; /* 4: log write serialization lock */ |
1da177e4 LT |
378 | |
379 | /* syncpt */ | |
380 | int nextsync; /* 4: bytes to write before next syncpt */ | |
381 | int active; /* 4: */ | |
382 | wait_queue_head_t syncwait; /* 4: */ | |
383 | ||
384 | /* commit */ | |
385 | uint cflag; /* 4: */ | |
386 | struct list_head cqueue; /* FIFO commit queue */ | |
387 | struct tblock *flush_tblk; /* tblk we're waiting on for flush */ | |
388 | int gcrtc; /* 4: GC_READY transaction count */ | |
389 | struct tblock *gclrt; /* 4: latest GC_READY transaction */ | |
390 | spinlock_t gclock; /* 4: group commit lock */ | |
391 | int logsize; /* 4: log data area size in byte */ | |
392 | int lsn; /* 4: end-of-log */ | |
393 | int clsn; /* 4: clsn */ | |
394 | int syncpt; /* 4: addr of last syncpt record */ | |
395 | int sync; /* 4: addr from last logsync() */ | |
396 | struct list_head synclist; /* 8: logsynclist anchor */ | |
397 | spinlock_t synclock; /* 4: synclist lock */ | |
398 | struct lbuf *wqueue; /* 4: log pageout queue */ | |
399 | int count; /* 4: count */ | |
2e3bc612 | 400 | uuid_t uuid; /* 16: 128-bit uuid of log device */ |
1da177e4 LT |
401 | |
402 | int no_integrity; /* 3: flag to disable journaling to disk */ | |
403 | }; | |
404 | ||
405 | /* | |
406 | * Log flag | |
407 | */ | |
408 | #define log_INLINELOG 1 | |
409 | #define log_SYNCBARRIER 2 | |
410 | #define log_QUIESCE 3 | |
411 | #define log_FLUSH 4 | |
412 | ||
413 | /* | |
414 | * group commit flag | |
415 | */ | |
416 | /* jfs_log */ | |
417 | #define logGC_PAGEOUT 0x00000001 | |
418 | ||
419 | /* tblock/lbuf */ | |
420 | #define tblkGC_QUEUE 0x0001 | |
421 | #define tblkGC_READY 0x0002 | |
422 | #define tblkGC_COMMIT 0x0004 | |
423 | #define tblkGC_COMMITTED 0x0008 | |
424 | #define tblkGC_EOP 0x0010 | |
425 | #define tblkGC_FREE 0x0020 | |
426 | #define tblkGC_LEADER 0x0040 | |
427 | #define tblkGC_ERROR 0x0080 | |
428 | #define tblkGC_LAZY 0x0100 // D230860 | |
429 | #define tblkGC_UNLOCKED 0x0200 // D230860 | |
430 | ||
431 | /* | |
432 | * log cache buffer header | |
433 | */ | |
434 | struct lbuf { | |
435 | struct jfs_log *l_log; /* 4: log associated with buffer */ | |
436 | ||
437 | /* | |
438 | * data buffer base area | |
439 | */ | |
440 | uint l_flag; /* 4: pageout control flags */ | |
441 | ||
442 | struct lbuf *l_wqnext; /* 4: write queue link */ | |
443 | struct lbuf *l_freelist; /* 4: freelistlink */ | |
444 | ||
445 | int l_pn; /* 4: log page number */ | |
446 | int l_eor; /* 4: log record eor */ | |
447 | int l_ceor; /* 4: committed log record eor */ | |
448 | ||
449 | s64 l_blkno; /* 8: log page block number */ | |
450 | caddr_t l_ldata; /* 4: data page */ | |
dc5798d9 | 451 | struct page *l_page; /* The page itself */ |
63f83c9f | 452 | uint l_offset; /* Offset of l_ldata within the page */ |
1da177e4 LT |
453 | |
454 | wait_queue_head_t l_ioevent; /* 4: i/o done event */ | |
1da177e4 LT |
455 | }; |
456 | ||
457 | /* Reuse l_freelist for redrive list */ | |
458 | #define l_redrive_next l_freelist | |
459 | ||
460 | /* | |
461 | * logsynclist block | |
462 | * | |
463 | * common logsyncblk prefix for jbuf_t and tblock | |
464 | */ | |
465 | struct logsyncblk { | |
466 | u16 xflag; /* flags */ | |
467 | u16 flag; /* only meaninful in tblock */ | |
468 | lid_t lid; /* lock id */ | |
469 | s32 lsn; /* log sequence number */ | |
470 | struct list_head synclist; /* log sync list link */ | |
471 | }; | |
472 | ||
473 | /* | |
474 | * logsynclist serialization (per log) | |
475 | */ | |
476 | ||
477 | #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) | |
7fab479b DK |
478 | #define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) |
479 | #define LOGSYNC_UNLOCK(log, flags) \ | |
480 | spin_unlock_irqrestore(&(log)->synclock, flags) | |
1da177e4 LT |
481 | |
482 | /* compute the difference in bytes of lsn from sync point */ | |
483 | #define logdiff(diff, lsn, log)\ | |
484 | {\ | |
485 | diff = (lsn) - (log)->syncpt;\ | |
486 | if (diff < 0)\ | |
487 | diff += (log)->logsize;\ | |
488 | } | |
489 | ||
490 | extern int lmLogOpen(struct super_block *sb); | |
491 | extern int lmLogClose(struct super_block *sb); | |
492 | extern int lmLogShutdown(struct jfs_log * log); | |
493 | extern int lmLogInit(struct jfs_log * log); | |
494 | extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); | |
1868f4aa DK |
495 | extern int lmGroupCommit(struct jfs_log *, struct tblock *); |
496 | extern int jfsIOWait(void *); | |
1da177e4 | 497 | extern void jfs_flush_journal(struct jfs_log * log, int wait); |
cbc3d65e | 498 | extern void jfs_syncpt(struct jfs_log *log, int hard_sync); |
1da177e4 LT |
499 | |
500 | #endif /* _H_JFS_LOGMGR */ |