* Copyright (C) International Business Machines Corp., 2000-2004
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*/
#ifndef _H_JFS_LOGMGR
#define _H_JFS_LOGMGR
#include <linux/uuid.h>
#include "jfs_filsys.h"
#include "jfs_lock.h"
* log manager configuration parameters
*/
#define LOGPSIZE 4096
#define L2LOGPSIZE 12
#define LOGPAGES 16
* log logical volume
*
* a log is used to make the commit operation on journalled
* files within the same logical volume group atomic.
* a log is implemented with a logical volume.
* there is one log per logical volume group.
*
* block 0 of the log logical volume is not used (ipl etc).
* block 1 contains a log "superblock" and is used by logFormat(),
* lmLogInit(), lmLogShutdown(), and logRedo() to record status
* of the log but is not otherwise used during normal processing.
* blocks 2 - (N-1) are used to contain log records.
*
* when a volume group is varied-on-line, logRedo() must have
* been executed before the file systems (logical volumes) in
* the volume group can be mounted.
*/
* log superblock (block 1 of logical volume)
*/
#define LOGSUPER_B 1
#define LOGSTART_B 2
#define LOGMAGIC 0x87654321
#define LOGVERSION 1
#define MAX_ACTIVE 128
struct logsuper {
__le32 magic;
__le32 version;
__le32 serial;
__le32 size;
__le32 bsize;
__le32 l2bsize;
__le32 flag;
__le32 state;
__le32 end;
uuid_t uuid;
char label[16];
struct {
uuid_t uuid;
} active[MAX_ACTIVE];
};
#define LOGMOUNT 0
#define LOGREDONE 1
* log redo completed by logredo().
*/
#define LOGWRAP 2
#define LOGREADERR 3
* log logical page
*
* (this comment should be rewritten !)
* the header and trailer structures (h,t) will normally have
* the same page and eor value.
* An exception to this occurs when a complete page write is not
* accomplished on a power failure. Since the hardware may "split write"
* sectors in the page, any out of order sequence may occur during powerfail
* and needs to be recognized during log replay. The xor value is
* an "exclusive or" of all log words in the page up to eor. This
* 32 bit eor is stored with the top 16 bits in the header and the
* bottom 16 bits in the trailer. logredo can easily recognize pages
* that were not completed by reconstructing this eor and checking
* the log page.
*
* Previous versions of the operating system did not allow split
* writes and detected partially written records in logredo by
* ordering the updates to the header, trailer, and the move of data
* into the logdata area. The order: (1) data is moved (2) header
* is updated (3) trailer is updated. In logredo, when the header
* differed from the trailer, the header and trailer were reconciled
* as follows: if h.page != t.page they were set to the smaller of
* the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
* h.eor != t.eor they were set to the smaller of their two values.
*/
struct logpage {
struct {
__le32 page;
__le16 rsrvd;
__le16 eor;
} h;
__le32 data[LOGPSIZE / 4 - 4];
struct {
__le32 page;
__le16 rsrvd;
__le16 eor;
} t;
};
#define LOGPHDRSIZE 8
#define LOGPTLRSIZE 8
* log record
*
* (this comment should be rewritten !)
* jfs uses only "after" log records (only a single writer is allowed
* in a page, pages are written to temporary paging space if
* if they must be written to disk before commit, and i/o is
* scheduled for modified pages to their home location after
* the log records containing the after values and the commit
* record is written to the log on disk, undo discards the copy
* in main-memory.)
*
* a log record consists of a data area of variable length followed by
* a descriptor of fixed size LOGRDSIZE bytes.
* the data area is rounded up to an integral number of 4-bytes and
* must be no longer than LOGPSIZE.
* the descriptor is of size of multiple of 4-bytes and aligned on a
* 4-byte boundary.
* records are packed one after the other in the data area of log pages.
* (sometimes a DUMMY record is inserted so that at least one record ends
* on every page or the longest record is placed on at most two pages).
* the field eor in page header/trailer points to the byte following
* the last record on a page.
*/
#define LOG_COMMIT 0x8000
#define LOG_SYNCPT 0x4000
#define LOG_MOUNT 0x2000
#define LOG_REDOPAGE 0x0800
#define LOG_NOREDOPAGE 0x0080
#define LOG_NOREDOINOEXT 0x0040
#define LOG_UPDATEMAP 0x0008
#define LOG_NOREDOFILE 0x0001
#define LOG_INODE 0x0001
#define LOG_XTREE 0x0002
#define LOG_DTREE 0x0004
#define LOG_BTROOT 0x0010
#define LOG_EA 0x0020
#define LOG_ACL 0x0040
#define LOG_DATA 0x0080
#define LOG_NEW 0x0100
#define LOG_EXTEND 0x0200
#define LOG_RELOCATE 0x0400
#define LOG_DIR_XTREE 0x0800
#define LOG_ALLOCXADLIST 0x0080
#define LOG_ALLOCPXDLIST 0x0040
#define LOG_ALLOCXAD 0x0020
#define LOG_ALLOCPXD 0x0010
#define LOG_FREEXADLIST 0x0008
#define LOG_FREEPXDLIST 0x0004
#define LOG_FREEXAD 0x0002
#define LOG_FREEPXD 0x0001
struct lrd {
* type independent area
*/
__le32 logtid;
__le32 backchain;
__le16 type;
__le16 length;
__le32 aggregate;
* type dependent area (20)
*/
union {
* COMMIT: commit
*
* transaction commit: no type-dependent information;
*/
* REDOPAGE: after-image
*
* apply after-image;
*
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
*/
struct {
__le32 fileset;
__le32 inode;
__le16 type;
__le16 l2linesize;
pxd_t pxd;
} redopage;
* NOREDOPAGE: the page is freed
*
* do not apply after-image records which precede this record
* in the log with the same page block number to this page.
*
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
*/
struct {
__le32 fileset;
__le32 inode;
__le16 type;
__le16 rsrvd;
pxd_t pxd;
} noredopage;
* UPDATEMAP: update block allocation map
*
* either in-line PXD,
* or out-of-line XADLIST;
*
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
*/
struct {
__le32 fileset;
__le32 inode;
__le16 type;
__le16 nxd;
pxd_t pxd;
} updatemap;
* NOREDOINOEXT: the inode extent is freed
*
* do not apply after-image records which precede this
* record in the log with the any of the 4 page block
* numbers in this inode extent.
*
* NOTE: The fileset and pxd fields MUST remain in
* the same fields in the REDOPAGE record format.
*
*/
struct {
__le32 fileset;
__le32 iagnum;
__le32 inoext_idx;
pxd_t pxd;
} noredoinoext;
* SYNCPT: log sync point
*
* replay log up to syncpt address specified;
*/
struct {
__le32 sync;
} syncpt;
* MOUNT: file system mount
*
* file system mount: no type-dependent information;
*/
* ? FREEXTENT: free specified extent(s)
*
* free specified extent(s) from block allocation map
* N.B.: nextents should be length of data/sizeof(xad_t)
*/
struct {
__le32 type;
__le32 nextent;
} freextent;
* ? NOREDOFILE: this file is freed
*
* do not apply records which precede this record in the log
* with the same inode number.
*
* NOREDOFILE must be the first to be written at commit
* (last to be read in logredo()) - it prevents
* replay of preceding updates of all preceding generations
* of the inumber esp. the on-disk inode itself.
*/
struct {
__le32 fileset;
__le32 inode;
} noredofile;
* ? NEWPAGE:
*
* metadata type dependent
*/
struct {
__le32 fileset;
__le32 inode;
__le32 type;
pxd_t pxd;
} newpage;
* ? DUMMY: filler
*
* no type-dependent information
*/
} log;
};
#define LOGRDSIZE (sizeof(struct lrd))
* line vector descriptor
*/
struct lvd {
__le16 offset;
__le16 length;
};
* log logical volume
*/
struct jfs_log {
struct list_head sb_list;
* before writing syncpt.
*/
struct list_head journal_list;
struct block_device *bdev;
int serial;
s64 base;
int size;
int l2bsize;
unsigned long flag;
struct lbuf *lbuf_free;
wait_queue_head_t free_wait;
int logtid;
int page;
int eor;
struct lbuf *bp;
struct mutex loglock;
int nextsync;
int active;
wait_queue_head_t syncwait;
uint cflag;
struct list_head cqueue;
struct tblock *flush_tblk;
int gcrtc;
struct tblock *gclrt;
spinlock_t gclock;
int logsize;
int lsn;
int clsn;
int syncpt;
int sync;
struct list_head synclist;
spinlock_t synclock;
struct lbuf *wqueue;
int count;
uuid_t uuid;
int no_integrity;
};
* Log flag
*/
#define log_INLINELOG 1
#define log_SYNCBARRIER 2
#define log_QUIESCE 3
#define log_FLUSH 4
* group commit flag
*/
#define logGC_PAGEOUT 0x00000001
#define tblkGC_QUEUE 0x0001
#define tblkGC_READY 0x0002
#define tblkGC_COMMIT 0x0004
#define tblkGC_COMMITTED 0x0008
#define tblkGC_EOP 0x0010
#define tblkGC_FREE 0x0020
#define tblkGC_LEADER 0x0040
#define tblkGC_ERROR 0x0080
#define tblkGC_LAZY 0x0100
#define tblkGC_UNLOCKED 0x0200
* log cache buffer header
*/
struct lbuf {
struct jfs_log *l_log;
* data buffer base area
*/
uint l_flag;
struct lbuf *l_wqnext;
struct lbuf *l_freelist;
int l_pn;
int l_eor;
int l_ceor;
s64 l_blkno;
caddr_t l_ldata;
struct page *l_page;
uint l_offset;
wait_queue_head_t l_ioevent;
};
#define l_redrive_next l_freelist
* logsynclist block
*
* common logsyncblk prefix for jbuf_t and tblock
*/
struct logsyncblk {
u16 xflag;
u16 flag;
lid_t lid;
s32 lsn;
struct list_head synclist;
};
* logsynclist serialization (per log)
*/
#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
#define LOGSYNC_UNLOCK(log, flags) \
spin_unlock_irqrestore(&(log)->synclock, flags)
#define logdiff(diff, lsn, log)\
{\
diff = (lsn) - (log)->syncpt;\
if (diff < 0)\
diff += (log)->logsize;\
}
extern int lmLogOpen(struct super_block *sb);
extern int lmLogClose(struct super_block *sb);
extern int lmLogShutdown(struct jfs_log * log);
extern int lmLogInit(struct jfs_log * log);
extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
extern int lmGroupCommit(struct jfs_log *, struct tblock *);
extern int jfsIOWait(void *);
extern void jfs_flush_journal(struct jfs_log * log, int wait);
extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
#endif