Linux Audio

Check our new training course

Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *
   4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
   5 *
   6 */
   7
   8#include <linux/blkdev.h>
   9#include <linux/fs.h>
  10#include <linux/random.h>
  11#include <linux/slab.h>
  12
  13#include "debug.h"
  14#include "ntfs.h"
  15#include "ntfs_fs.h"
  16
  17/*
  18 * LOG FILE structs
  19 */
  20
  21// clang-format off
  22
  23#define MaxLogFileSize     0x100000000ull
  24#define DefaultLogPageSize 4096
  25#define MinLogRecordPages  0x30
  26
  27struct RESTART_HDR {
  28	struct NTFS_RECORD_HEADER rhdr; // 'RSTR'
  29	__le32 sys_page_size; // 0x10: Page size of the system which initialized the log.
  30	__le32 page_size;     // 0x14: Log page size used for this log file.
  31	__le16 ra_off;        // 0x18:
  32	__le16 minor_ver;     // 0x1A:
  33	__le16 major_ver;     // 0x1C:
  34	__le16 fixups[];
  35};
  36
  37#define LFS_NO_CLIENT 0xffff
  38#define LFS_NO_CLIENT_LE cpu_to_le16(0xffff)
  39
  40struct CLIENT_REC {
  41	__le64 oldest_lsn;
  42	__le64 restart_lsn; // 0x08:
  43	__le16 prev_client; // 0x10:
  44	__le16 next_client; // 0x12:
  45	__le16 seq_num;     // 0x14:
  46	u8 align[6];        // 0x16:
  47	__le32 name_bytes;  // 0x1C: In bytes.
  48	__le16 name[32];    // 0x20: Name of client.
  49};
  50
  51static_assert(sizeof(struct CLIENT_REC) == 0x60);
  52
  53/* Two copies of these will exist at the beginning of the log file */
  54struct RESTART_AREA {
  55	__le64 current_lsn;    // 0x00: Current logical end of log file.
  56	__le16 log_clients;    // 0x08: Maximum number of clients.
  57	__le16 client_idx[2];  // 0x0A: Free/use index into the client record arrays.
  58	__le16 flags;          // 0x0E: See RESTART_SINGLE_PAGE_IO.
  59	__le32 seq_num_bits;   // 0x10: The number of bits in sequence number.
  60	__le16 ra_len;         // 0x14:
  61	__le16 client_off;     // 0x16:
  62	__le64 l_size;         // 0x18: Usable log file size.
  63	__le32 last_lsn_data_len; // 0x20:
  64	__le16 rec_hdr_len;    // 0x24: Log page data offset.
  65	__le16 data_off;       // 0x26: Log page data length.
  66	__le32 open_log_count; // 0x28:
  67	__le32 align[5];       // 0x2C:
  68	struct CLIENT_REC clients[]; // 0x40:
  69};
  70
  71struct LOG_REC_HDR {
  72	__le16 redo_op;      // 0x00:  NTFS_LOG_OPERATION
  73	__le16 undo_op;      // 0x02:  NTFS_LOG_OPERATION
  74	__le16 redo_off;     // 0x04:  Offset to Redo record.
  75	__le16 redo_len;     // 0x06:  Redo length.
  76	__le16 undo_off;     // 0x08:  Offset to Undo record.
  77	__le16 undo_len;     // 0x0A:  Undo length.
  78	__le16 target_attr;  // 0x0C:
  79	__le16 lcns_follow;  // 0x0E:
  80	__le16 record_off;   // 0x10:
  81	__le16 attr_off;     // 0x12:
  82	__le16 cluster_off;  // 0x14:
  83	__le16 reserved;     // 0x16:
  84	__le64 target_vcn;   // 0x18:
  85	__le64 page_lcns[];  // 0x20:
  86};
  87
  88static_assert(sizeof(struct LOG_REC_HDR) == 0x20);
  89
  90#define RESTART_ENTRY_ALLOCATED    0xFFFFFFFF
  91#define RESTART_ENTRY_ALLOCATED_LE cpu_to_le32(0xFFFFFFFF)
  92
  93struct RESTART_TABLE {
  94	__le16 size;       // 0x00: In bytes
  95	__le16 used;       // 0x02: Entries
  96	__le16 total;      // 0x04: Entries
  97	__le16 res[3];     // 0x06:
  98	__le32 free_goal;  // 0x0C:
  99	__le32 first_free; // 0x10:
 100	__le32 last_free;  // 0x14:
 101
 102};
 103
 104static_assert(sizeof(struct RESTART_TABLE) == 0x18);
 105
 106struct ATTR_NAME_ENTRY {
 107	__le16 off; // Offset in the Open attribute Table.
 108	__le16 name_bytes;
 109	__le16 name[];
 110};
 111
 112struct OPEN_ATTR_ENRTY {
 113	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 114	__le32 bytes_per_index; // 0x04:
 115	enum ATTR_TYPE type;    // 0x08:
 116	u8 is_dirty_pages;      // 0x0C:
 117	u8 is_attr_name;        // 0x0B: Faked field to manage 'ptr'
 118	u8 name_len;            // 0x0C: Faked field to manage 'ptr'
 119	u8 res;
 120	struct MFT_REF ref;     // 0x10: File Reference of file containing attribute
 121	__le64 open_record_lsn; // 0x18:
 122	void *ptr;              // 0x20:
 123};
 124
 125/* 32 bit version of 'struct OPEN_ATTR_ENRTY' */
 126struct OPEN_ATTR_ENRTY_32 {
 127	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 128	__le32 ptr;             // 0x04:
 129	struct MFT_REF ref;     // 0x08:
 130	__le64 open_record_lsn; // 0x10:
 131	u8 is_dirty_pages;      // 0x18:
 132	u8 is_attr_name;        // 0x19:
 133	u8 res1[2];
 134	enum ATTR_TYPE type;    // 0x1C:
 135	u8 name_len;            // 0x20: In wchar
 136	u8 res2[3];
 137	__le32 AttributeName;   // 0x24:
 138	__le32 bytes_per_index; // 0x28:
 139};
 140
 141#define SIZEOF_OPENATTRIBUTEENTRY0 0x2c
 142// static_assert( 0x2C == sizeof(struct OPEN_ATTR_ENRTY_32) );
 143static_assert(sizeof(struct OPEN_ATTR_ENRTY) < SIZEOF_OPENATTRIBUTEENTRY0);
 144
 145/*
 146 * One entry exists in the Dirty Pages Table for each page which is dirty at
 147 * the time the Restart Area is written.
 148 */
 149struct DIR_PAGE_ENTRY {
 150	__le32 next;         // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 151	__le32 target_attr;  // 0x04: Index into the Open attribute Table
 152	__le32 transfer_len; // 0x08:
 153	__le32 lcns_follow;  // 0x0C:
 154	__le64 vcn;          // 0x10: Vcn of dirty page
 155	__le64 oldest_lsn;   // 0x18:
 156	__le64 page_lcns[];  // 0x20:
 157};
 158
 159static_assert(sizeof(struct DIR_PAGE_ENTRY) == 0x20);
 160
 161/* 32 bit version of 'struct DIR_PAGE_ENTRY' */
 162struct DIR_PAGE_ENTRY_32 {
 163	__le32 next;		// 0x00: RESTART_ENTRY_ALLOCATED if allocated
 164	__le32 target_attr;	// 0x04: Index into the Open attribute Table
 165	__le32 transfer_len;	// 0x08:
 166	__le32 lcns_follow;	// 0x0C:
 167	__le32 reserved;	// 0x10:
 168	__le32 vcn_low;		// 0x14: Vcn of dirty page
 169	__le32 vcn_hi;		// 0x18: Vcn of dirty page
 170	__le32 oldest_lsn_low;	// 0x1C:
 171	__le32 oldest_lsn_hi;	// 0x1C:
 172	__le32 page_lcns_low;	// 0x24:
 173	__le32 page_lcns_hi;	// 0x24:
 174};
 175
 176static_assert(offsetof(struct DIR_PAGE_ENTRY_32, vcn_low) == 0x14);
 177static_assert(sizeof(struct DIR_PAGE_ENTRY_32) == 0x2c);
 178
 179enum transact_state {
 180	TransactionUninitialized = 0,
 181	TransactionActive,
 182	TransactionPrepared,
 183	TransactionCommitted
 184};
 185
 186struct TRANSACTION_ENTRY {
 187	__le32 next;          // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 188	u8 transact_state;    // 0x04:
 189	u8 reserved[3];       // 0x05:
 190	__le64 first_lsn;     // 0x08:
 191	__le64 prev_lsn;      // 0x10:
 192	__le64 undo_next_lsn; // 0x18:
 193	__le32 undo_records;  // 0x20: Number of undo log records pending abort
 194	__le32 undo_len;      // 0x24: Total undo size
 195};
 196
 197static_assert(sizeof(struct TRANSACTION_ENTRY) == 0x28);
 198
 199struct NTFS_RESTART {
 200	__le32 major_ver;             // 0x00:
 201	__le32 minor_ver;             // 0x04:
 202	__le64 check_point_start;     // 0x08:
 203	__le64 open_attr_table_lsn;   // 0x10:
 204	__le64 attr_names_lsn;        // 0x18:
 205	__le64 dirty_pages_table_lsn; // 0x20:
 206	__le64 transact_table_lsn;    // 0x28:
 207	__le32 open_attr_len;         // 0x30: In bytes
 208	__le32 attr_names_len;        // 0x34: In bytes
 209	__le32 dirty_pages_len;       // 0x38: In bytes
 210	__le32 transact_table_len;    // 0x3C: In bytes
 211};
 212
 213static_assert(sizeof(struct NTFS_RESTART) == 0x40);
 214
 215struct NEW_ATTRIBUTE_SIZES {
 216	__le64 alloc_size;
 217	__le64 valid_size;
 218	__le64 data_size;
 219	__le64 total_size;
 220};
 221
 222struct BITMAP_RANGE {
 223	__le32 bitmap_off;
 224	__le32 bits;
 225};
 226
 227struct LCN_RANGE {
 228	__le64 lcn;
 229	__le64 len;
 230};
 231
 232/* The following type defines the different log record types. */
 233#define LfsClientRecord  cpu_to_le32(1)
 234#define LfsClientRestart cpu_to_le32(2)
 235
 236/* This is used to uniquely identify a client for a particular log file. */
 237struct CLIENT_ID {
 238	__le16 seq_num;
 239	__le16 client_idx;
 240};
 241
 242/* This is the header that begins every Log Record in the log file. */
 243struct LFS_RECORD_HDR {
 244	__le64 this_lsn;		// 0x00:
 245	__le64 client_prev_lsn;		// 0x08:
 246	__le64 client_undo_next_lsn;	// 0x10:
 247	__le32 client_data_len;		// 0x18:
 248	struct CLIENT_ID client;	// 0x1C: Owner of this log record.
 249	__le32 record_type;		// 0x20: LfsClientRecord or LfsClientRestart.
 250	__le32 transact_id;		// 0x24:
 251	__le16 flags;			// 0x28: LOG_RECORD_MULTI_PAGE
 252	u8 align[6];			// 0x2A:
 253};
 254
 255#define LOG_RECORD_MULTI_PAGE cpu_to_le16(1)
 256
 257static_assert(sizeof(struct LFS_RECORD_HDR) == 0x30);
 258
 259struct LFS_RECORD {
 260	__le16 next_record_off;	// 0x00: Offset of the free space in the page,
 261	u8 align[6];		// 0x02:
 262	__le64 last_end_lsn;	// 0x08: lsn for the last log record which ends on the page,
 263};
 264
 265static_assert(sizeof(struct LFS_RECORD) == 0x10);
 266
 267struct RECORD_PAGE_HDR {
 268	struct NTFS_RECORD_HEADER rhdr;	// 'RCRD'
 269	__le32 rflags;			// 0x10: See LOG_PAGE_LOG_RECORD_END
 270	__le16 page_count;		// 0x14:
 271	__le16 page_pos;		// 0x16:
 272	struct LFS_RECORD record_hdr;	// 0x18:
 273	__le16 fixups[10];		// 0x28:
 274	__le32 file_off;		// 0x3c: Used when major version >= 2
 275};
 276
 277// clang-format on
 278
 279// Page contains the end of a log record.
 280#define LOG_PAGE_LOG_RECORD_END cpu_to_le32(0x00000001)
 281
 282static inline bool is_log_record_end(const struct RECORD_PAGE_HDR *hdr)
 283{
 284	return hdr->rflags & LOG_PAGE_LOG_RECORD_END;
 285}
 286
 287static_assert(offsetof(struct RECORD_PAGE_HDR, file_off) == 0x3c);
 288
 289/*
 290 * END of NTFS LOG structures
 291 */
 292
 293/* Define some tuning parameters to keep the restart tables a reasonable size. */
 294#define INITIAL_NUMBER_TRANSACTIONS 5
 295
 296enum NTFS_LOG_OPERATION {
 297
 298	Noop = 0x00,
 299	CompensationLogRecord = 0x01,
 300	InitializeFileRecordSegment = 0x02,
 301	DeallocateFileRecordSegment = 0x03,
 302	WriteEndOfFileRecordSegment = 0x04,
 303	CreateAttribute = 0x05,
 304	DeleteAttribute = 0x06,
 305	UpdateResidentValue = 0x07,
 306	UpdateNonresidentValue = 0x08,
 307	UpdateMappingPairs = 0x09,
 308	DeleteDirtyClusters = 0x0A,
 309	SetNewAttributeSizes = 0x0B,
 310	AddIndexEntryRoot = 0x0C,
 311	DeleteIndexEntryRoot = 0x0D,
 312	AddIndexEntryAllocation = 0x0E,
 313	DeleteIndexEntryAllocation = 0x0F,
 314	WriteEndOfIndexBuffer = 0x10,
 315	SetIndexEntryVcnRoot = 0x11,
 316	SetIndexEntryVcnAllocation = 0x12,
 317	UpdateFileNameRoot = 0x13,
 318	UpdateFileNameAllocation = 0x14,
 319	SetBitsInNonresidentBitMap = 0x15,
 320	ClearBitsInNonresidentBitMap = 0x16,
 321	HotFix = 0x17,
 322	EndTopLevelAction = 0x18,
 323	PrepareTransaction = 0x19,
 324	CommitTransaction = 0x1A,
 325	ForgetTransaction = 0x1B,
 326	OpenNonresidentAttribute = 0x1C,
 327	OpenAttributeTableDump = 0x1D,
 328	AttributeNamesDump = 0x1E,
 329	DirtyPageTableDump = 0x1F,
 330	TransactionTableDump = 0x20,
 331	UpdateRecordDataRoot = 0x21,
 332	UpdateRecordDataAllocation = 0x22,
 333
 334	UpdateRelativeDataInIndex =
 335		0x23, // NtOfsRestartUpdateRelativeDataInIndex
 336	UpdateRelativeDataInIndex2 = 0x24,
 337	ZeroEndOfFileRecord = 0x25,
 338};
 339
 340/*
 341 * Array for log records which require a target attribute.
 342 * A true indicates that the corresponding restart operation
 343 * requires a target attribute.
 344 */
 345static const u8 AttributeRequired[] = {
 346	0xFC, 0xFB, 0xFF, 0x10, 0x06,
 347};
 348
 349static inline bool is_target_required(u16 op)
 350{
 351	bool ret = op <= UpdateRecordDataAllocation &&
 352		   (AttributeRequired[op >> 3] >> (op & 7) & 1);
 353	return ret;
 354}
 355
 356static inline bool can_skip_action(enum NTFS_LOG_OPERATION op)
 357{
 358	switch (op) {
 359	case Noop:
 360	case DeleteDirtyClusters:
 361	case HotFix:
 362	case EndTopLevelAction:
 363	case PrepareTransaction:
 364	case CommitTransaction:
 365	case ForgetTransaction:
 366	case CompensationLogRecord:
 367	case OpenNonresidentAttribute:
 368	case OpenAttributeTableDump:
 369	case AttributeNamesDump:
 370	case DirtyPageTableDump:
 371	case TransactionTableDump:
 372		return true;
 373	default:
 374		return false;
 375	}
 376}
 377
 378enum { lcb_ctx_undo_next, lcb_ctx_prev, lcb_ctx_next };
 379
 380/* Bytes per restart table. */
 381static inline u32 bytes_per_rt(const struct RESTART_TABLE *rt)
 382{
 383	return le16_to_cpu(rt->used) * le16_to_cpu(rt->size) +
 384	       sizeof(struct RESTART_TABLE);
 385}
 386
 387/* Log record length. */
 388static inline u32 lrh_length(const struct LOG_REC_HDR *lr)
 389{
 390	u16 t16 = le16_to_cpu(lr->lcns_follow);
 391
 392	return struct_size(lr, page_lcns, max_t(u16, 1, t16));
 393}
 394
 395struct lcb {
 396	struct LFS_RECORD_HDR *lrh; // Log record header of the current lsn.
 397	struct LOG_REC_HDR *log_rec;
 398	u32 ctx_mode; // lcb_ctx_undo_next/lcb_ctx_prev/lcb_ctx_next
 399	struct CLIENT_ID client;
 400	bool alloc; // If true the we should deallocate 'log_rec'.
 401};
 402
 403static void lcb_put(struct lcb *lcb)
 404{
 405	if (lcb->alloc)
 406		kfree(lcb->log_rec);
 407	kfree(lcb->lrh);
 408	kfree(lcb);
 409}
 410
 411/* Find the oldest lsn from active clients. */
 412static inline void oldest_client_lsn(const struct CLIENT_REC *ca,
 413				     __le16 next_client, u64 *oldest_lsn)
 414{
 415	while (next_client != LFS_NO_CLIENT_LE) {
 416		const struct CLIENT_REC *cr = ca + le16_to_cpu(next_client);
 417		u64 lsn = le64_to_cpu(cr->oldest_lsn);
 418
 419		/* Ignore this block if it's oldest lsn is 0. */
 420		if (lsn && lsn < *oldest_lsn)
 421			*oldest_lsn = lsn;
 422
 423		next_client = cr->next_client;
 424	}
 425}
 426
 427static inline bool is_rst_page_hdr_valid(u32 file_off,
 428					 const struct RESTART_HDR *rhdr)
 429{
 430	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
 431	u32 page_size = le32_to_cpu(rhdr->page_size);
 432	u32 end_usa;
 433	u16 ro;
 434
 435	if (sys_page < SECTOR_SIZE || page_size < SECTOR_SIZE ||
 436	    sys_page & (sys_page - 1) || page_size & (page_size - 1)) {
 437		return false;
 438	}
 439
 440	/* Check that if the file offset isn't 0, it is the system page size. */
 441	if (file_off && file_off != sys_page)
 442		return false;
 443
 444	/* Check support version 1.1+. */
 445	if (le16_to_cpu(rhdr->major_ver) <= 1 && !rhdr->minor_ver)
 446		return false;
 447
 448	if (le16_to_cpu(rhdr->major_ver) > 2)
 449		return false;
 450
 451	ro = le16_to_cpu(rhdr->ra_off);
 452	if (!IS_ALIGNED(ro, 8) || ro > sys_page)
 453		return false;
 454
 455	end_usa = ((sys_page >> SECTOR_SHIFT) + 1) * sizeof(short);
 456	end_usa += le16_to_cpu(rhdr->rhdr.fix_off);
 457
 458	if (ro < end_usa)
 459		return false;
 460
 461	return true;
 462}
 463
 464static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
 465{
 466	const struct RESTART_AREA *ra;
 467	u16 cl, fl, ul;
 468	u32 off, l_size, seq_bits;
 469	u16 ro = le16_to_cpu(rhdr->ra_off);
 470	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
 471
 472	if (ro + offsetof(struct RESTART_AREA, l_size) >
 473	    SECTOR_SIZE - sizeof(short))
 474		return false;
 475
 476	ra = Add2Ptr(rhdr, ro);
 477	cl = le16_to_cpu(ra->log_clients);
 478
 479	if (cl > 1)
 480		return false;
 481
 482	off = le16_to_cpu(ra->client_off);
 483
 484	if (!IS_ALIGNED(off, 8) || ro + off > SECTOR_SIZE - sizeof(short))
 485		return false;
 486
 487	off += cl * sizeof(struct CLIENT_REC);
 488
 489	if (off > sys_page)
 490		return false;
 491
 492	/*
 493	 * Check the restart length field and whether the entire
 494	 * restart area is contained that length.
 495	 */
 496	if (le16_to_cpu(rhdr->ra_off) + le16_to_cpu(ra->ra_len) > sys_page ||
 497	    off > le16_to_cpu(ra->ra_len)) {
 498		return false;
 499	}
 500
 501	/*
 502	 * As a final check make sure that the use list and the free list
 503	 * are either empty or point to a valid client.
 504	 */
 505	fl = le16_to_cpu(ra->client_idx[0]);
 506	ul = le16_to_cpu(ra->client_idx[1]);
 507	if ((fl != LFS_NO_CLIENT && fl >= cl) ||
 508	    (ul != LFS_NO_CLIENT && ul >= cl))
 509		return false;
 510
 511	/* Make sure the sequence number bits match the log file size. */
 512	l_size = le64_to_cpu(ra->l_size);
 513
 514	seq_bits = sizeof(u64) * 8 + 3;
 515	while (l_size) {
 516		l_size >>= 1;
 517		seq_bits -= 1;
 518	}
 519
 520	if (seq_bits != ra->seq_num_bits)
 521		return false;
 522
 523	/* The log page data offset and record header length must be quad-aligned. */
 524	if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) ||
 525	    !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
 526		return false;
 527
 528	return true;
 529}
 530
 531static inline bool is_client_area_valid(const struct RESTART_HDR *rhdr,
 532					bool usa_error)
 533{
 534	u16 ro = le16_to_cpu(rhdr->ra_off);
 535	const struct RESTART_AREA *ra = Add2Ptr(rhdr, ro);
 536	u16 ra_len = le16_to_cpu(ra->ra_len);
 537	const struct CLIENT_REC *ca;
 538	u32 i;
 539
 540	if (usa_error && ra_len + ro > SECTOR_SIZE - sizeof(short))
 541		return false;
 542
 543	/* Find the start of the client array. */
 544	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
 545
 546	/*
 547	 * Start with the free list.
 548	 * Check that all the clients are valid and that there isn't a cycle.
 549	 * Do the in-use list on the second pass.
 550	 */
 551	for (i = 0; i < 2; i++) {
 552		u16 client_idx = le16_to_cpu(ra->client_idx[i]);
 553		bool first_client = true;
 554		u16 clients = le16_to_cpu(ra->log_clients);
 555
 556		while (client_idx != LFS_NO_CLIENT) {
 557			const struct CLIENT_REC *cr;
 558
 559			if (!clients ||
 560			    client_idx >= le16_to_cpu(ra->log_clients))
 561				return false;
 562
 563			clients -= 1;
 564			cr = ca + client_idx;
 565
 566			client_idx = le16_to_cpu(cr->next_client);
 567
 568			if (first_client) {
 569				first_client = false;
 570				if (cr->prev_client != LFS_NO_CLIENT_LE)
 571					return false;
 572			}
 573		}
 574	}
 575
 576	return true;
 577}
 578
 579/*
 580 * remove_client
 581 *
 582 * Remove a client record from a client record list an restart area.
 583 */
 584static inline void remove_client(struct CLIENT_REC *ca,
 585				 const struct CLIENT_REC *cr, __le16 *head)
 586{
 587	if (cr->prev_client == LFS_NO_CLIENT_LE)
 588		*head = cr->next_client;
 589	else
 590		ca[le16_to_cpu(cr->prev_client)].next_client = cr->next_client;
 591
 592	if (cr->next_client != LFS_NO_CLIENT_LE)
 593		ca[le16_to_cpu(cr->next_client)].prev_client = cr->prev_client;
 594}
 595
 596/*
 597 * add_client - Add a client record to the start of a list.
 598 */
 599static inline void add_client(struct CLIENT_REC *ca, u16 index, __le16 *head)
 600{
 601	struct CLIENT_REC *cr = ca + index;
 602
 603	cr->prev_client = LFS_NO_CLIENT_LE;
 604	cr->next_client = *head;
 605
 606	if (*head != LFS_NO_CLIENT_LE)
 607		ca[le16_to_cpu(*head)].prev_client = cpu_to_le16(index);
 608
 609	*head = cpu_to_le16(index);
 610}
 611
 612static inline void *enum_rstbl(struct RESTART_TABLE *t, void *c)
 613{
 614	__le32 *e;
 615	u32 bprt;
 616	u16 rsize = t ? le16_to_cpu(t->size) : 0;
 617
 618	if (!c) {
 619		if (!t || !t->total)
 620			return NULL;
 621		e = Add2Ptr(t, sizeof(struct RESTART_TABLE));
 622	} else {
 623		e = Add2Ptr(c, rsize);
 624	}
 625
 626	/* Loop until we hit the first one allocated, or the end of the list. */
 627	for (bprt = bytes_per_rt(t); PtrOffset(t, e) < bprt;
 628	     e = Add2Ptr(e, rsize)) {
 629		if (*e == RESTART_ENTRY_ALLOCATED_LE)
 630			return e;
 631	}
 632	return NULL;
 633}
 634
 635/*
 636 * find_dp - Search for a @vcn in Dirty Page Table.
 637 */
 638static inline struct DIR_PAGE_ENTRY *find_dp(struct RESTART_TABLE *dptbl,
 639					     u32 target_attr, u64 vcn)
 640{
 641	__le32 ta = cpu_to_le32(target_attr);
 642	struct DIR_PAGE_ENTRY *dp = NULL;
 643
 644	while ((dp = enum_rstbl(dptbl, dp))) {
 645		u64 dp_vcn = le64_to_cpu(dp->vcn);
 646
 647		if (dp->target_attr == ta && vcn >= dp_vcn &&
 648		    vcn < dp_vcn + le32_to_cpu(dp->lcns_follow)) {
 649			return dp;
 650		}
 651	}
 652	return NULL;
 653}
 654
 655static inline u32 norm_file_page(u32 page_size, u32 *l_size, bool use_default)
 656{
 657	if (use_default)
 658		page_size = DefaultLogPageSize;
 659
 660	/* Round the file size down to a system page boundary. */
 661	*l_size &= ~(page_size - 1);
 662
 663	/* File should contain at least 2 restart pages and MinLogRecordPages pages. */
 664	if (*l_size < (MinLogRecordPages + 2) * page_size)
 665		return 0;
 666
 667	return page_size;
 668}
 669
 670static bool check_log_rec(const struct LOG_REC_HDR *lr, u32 bytes, u32 tr,
 671			  u32 bytes_per_attr_entry)
 672{
 673	u16 t16;
 674
 675	if (bytes < sizeof(struct LOG_REC_HDR))
 676		return false;
 677	if (!tr)
 678		return false;
 679
 680	if ((tr - sizeof(struct RESTART_TABLE)) %
 681	    sizeof(struct TRANSACTION_ENTRY))
 682		return false;
 683
 684	if (le16_to_cpu(lr->redo_off) & 7)
 685		return false;
 686
 687	if (le16_to_cpu(lr->undo_off) & 7)
 688		return false;
 689
 690	if (lr->target_attr)
 691		goto check_lcns;
 692
 693	if (is_target_required(le16_to_cpu(lr->redo_op)))
 694		return false;
 695
 696	if (is_target_required(le16_to_cpu(lr->undo_op)))
 697		return false;
 698
 699check_lcns:
 700	if (!lr->lcns_follow)
 701		goto check_length;
 702
 703	t16 = le16_to_cpu(lr->target_attr);
 704	if ((t16 - sizeof(struct RESTART_TABLE)) % bytes_per_attr_entry)
 705		return false;
 706
 707check_length:
 708	if (bytes < lrh_length(lr))
 709		return false;
 710
 711	return true;
 712}
 713
 714static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
 715{
 716	u32 ts;
 717	u32 i, off;
 718	u16 rsize = le16_to_cpu(rt->size);
 719	u16 ne = le16_to_cpu(rt->used);
 720	u32 ff = le32_to_cpu(rt->first_free);
 721	u32 lf = le32_to_cpu(rt->last_free);
 722
 723	ts = rsize * ne + sizeof(struct RESTART_TABLE);
 724
 725	if (!rsize || rsize > bytes ||
 726	    rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts ||
 727	    le16_to_cpu(rt->total) > ne || ff > ts || lf > ts ||
 728	    (ff && ff < sizeof(struct RESTART_TABLE)) ||
 729	    (lf && lf < sizeof(struct RESTART_TABLE))) {
 730		return false;
 731	}
 732
 733	/*
 734	 * Verify each entry is either allocated or points
 735	 * to a valid offset the table.
 736	 */
 737	for (i = 0; i < ne; i++) {
 738		off = le32_to_cpu(*(__le32 *)Add2Ptr(
 739			rt, i * rsize + sizeof(struct RESTART_TABLE)));
 740
 741		if (off != RESTART_ENTRY_ALLOCATED && off &&
 742		    (off < sizeof(struct RESTART_TABLE) ||
 743		     ((off - sizeof(struct RESTART_TABLE)) % rsize))) {
 744			return false;
 745		}
 746	}
 747
 748	/*
 749	 * Walk through the list headed by the first entry to make
 750	 * sure none of the entries are currently being used.
 751	 */
 752	for (off = ff; off;) {
 753		if (off == RESTART_ENTRY_ALLOCATED)
 754			return false;
 755
 756		off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off));
 757	}
 758
 759	return true;
 760}
 761
 762/*
 763 * free_rsttbl_idx - Free a previously allocated index a Restart Table.
 764 */
 765static inline void free_rsttbl_idx(struct RESTART_TABLE *rt, u32 off)
 766{
 767	__le32 *e;
 768	u32 lf = le32_to_cpu(rt->last_free);
 769	__le32 off_le = cpu_to_le32(off);
 770
 771	e = Add2Ptr(rt, off);
 772
 773	if (off < le32_to_cpu(rt->free_goal)) {
 774		*e = rt->first_free;
 775		rt->first_free = off_le;
 776		if (!lf)
 777			rt->last_free = off_le;
 778	} else {
 779		if (lf)
 780			*(__le32 *)Add2Ptr(rt, lf) = off_le;
 781		else
 782			rt->first_free = off_le;
 783
 784		rt->last_free = off_le;
 785		*e = 0;
 786	}
 787
 788	le16_sub_cpu(&rt->total, 1);
 789}
 790
 791static inline struct RESTART_TABLE *init_rsttbl(u16 esize, u16 used)
 792{
 793	__le32 *e, *last_free;
 794	u32 off;
 795	u32 bytes = esize * used + sizeof(struct RESTART_TABLE);
 796	u32 lf = sizeof(struct RESTART_TABLE) + (used - 1) * esize;
 797	struct RESTART_TABLE *t = kzalloc(bytes, GFP_NOFS);
 798
 799	if (!t)
 800		return NULL;
 801
 802	t->size = cpu_to_le16(esize);
 803	t->used = cpu_to_le16(used);
 804	t->free_goal = cpu_to_le32(~0u);
 805	t->first_free = cpu_to_le32(sizeof(struct RESTART_TABLE));
 806	t->last_free = cpu_to_le32(lf);
 807
 808	e = (__le32 *)(t + 1);
 809	last_free = Add2Ptr(t, lf);
 810
 811	for (off = sizeof(struct RESTART_TABLE) + esize; e < last_free;
 812	     e = Add2Ptr(e, esize), off += esize) {
 813		*e = cpu_to_le32(off);
 814	}
 815	return t;
 816}
 817
 818static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl,
 819						  u32 add, u32 free_goal)
 820{
 821	u16 esize = le16_to_cpu(tbl->size);
 822	__le32 osize = cpu_to_le32(bytes_per_rt(tbl));
 823	u32 used = le16_to_cpu(tbl->used);
 824	struct RESTART_TABLE *rt;
 825
 826	rt = init_rsttbl(esize, used + add);
 827	if (!rt)
 828		return NULL;
 829
 830	memcpy(rt + 1, tbl + 1, esize * used);
 831
 832	rt->free_goal = free_goal == ~0u ?
 833				cpu_to_le32(~0u) :
 834				cpu_to_le32(sizeof(struct RESTART_TABLE) +
 835					    free_goal * esize);
 836
 837	if (tbl->first_free) {
 838		rt->first_free = tbl->first_free;
 839		*(__le32 *)Add2Ptr(rt, le32_to_cpu(tbl->last_free)) = osize;
 840	} else {
 841		rt->first_free = osize;
 842	}
 843
 844	rt->total = tbl->total;
 845
 846	kfree(tbl);
 847	return rt;
 848}
 849
 850/*
 851 * alloc_rsttbl_idx
 852 *
 853 * Allocate an index from within a previously initialized Restart Table.
 854 */
 855static inline void *alloc_rsttbl_idx(struct RESTART_TABLE **tbl)
 856{
 857	u32 off;
 858	__le32 *e;
 859	struct RESTART_TABLE *t = *tbl;
 860
 861	if (!t->first_free) {
 862		*tbl = t = extend_rsttbl(t, 16, ~0u);
 863		if (!t)
 864			return NULL;
 865	}
 866
 867	off = le32_to_cpu(t->first_free);
 868
 869	/* Dequeue this entry and zero it. */
 870	e = Add2Ptr(t, off);
 871
 872	t->first_free = *e;
 873
 874	memset(e, 0, le16_to_cpu(t->size));
 875
 876	*e = RESTART_ENTRY_ALLOCATED_LE;
 877
 878	/* If list is going empty, then we fix the last_free as well. */
 879	if (!t->first_free)
 880		t->last_free = 0;
 881
 882	le16_add_cpu(&t->total, 1);
 883
 884	return Add2Ptr(t, off);
 885}
 886
 887/*
 888 * alloc_rsttbl_from_idx
 889 *
 890 * Allocate a specific index from within a previously initialized Restart Table.
 891 */
 892static inline void *alloc_rsttbl_from_idx(struct RESTART_TABLE **tbl, u32 vbo)
 893{
 894	u32 off;
 895	__le32 *e;
 896	struct RESTART_TABLE *rt = *tbl;
 897	u32 bytes = bytes_per_rt(rt);
 898	u16 esize = le16_to_cpu(rt->size);
 899
 900	/* If the entry is not the table, we will have to extend the table. */
 901	if (vbo >= bytes) {
 902		/*
 903		 * Extend the size by computing the number of entries between
 904		 * the existing size and the desired index and adding 1 to that.
 905		 */
 906		u32 bytes2idx = vbo - bytes;
 907
 908		/*
 909		 * There should always be an integral number of entries
 910		 * being added. Now extend the table.
 911		 */
 912		*tbl = rt = extend_rsttbl(rt, bytes2idx / esize + 1, bytes);
 913		if (!rt)
 914			return NULL;
 915	}
 916
 917	/* See if the entry is already allocated, and just return if it is. */
 918	e = Add2Ptr(rt, vbo);
 919
 920	if (*e == RESTART_ENTRY_ALLOCATED_LE)
 921		return e;
 922
 923	/*
 924	 * Walk through the table, looking for the entry we're
 925	 * interested and the previous entry.
 926	 */
 927	off = le32_to_cpu(rt->first_free);
 928	e = Add2Ptr(rt, off);
 929
 930	if (off == vbo) {
 931		/* this is a match */
 932		rt->first_free = *e;
 933		goto skip_looking;
 934	}
 935
 936	/*
 937	 * Need to walk through the list looking for the predecessor
 938	 * of our entry.
 939	 */
 940	for (;;) {
 941		/* Remember the entry just found */
 942		u32 last_off = off;
 943		__le32 *last_e = e;
 944
 945		/* Should never run of entries. */
 946
 947		/* Lookup up the next entry the list. */
 948		off = le32_to_cpu(*last_e);
 949		e = Add2Ptr(rt, off);
 950
 951		/* If this is our match we are done. */
 952		if (off == vbo) {
 953			*last_e = *e;
 954
 955			/*
 956			 * If this was the last entry, we update that
 957			 * table as well.
 958			 */
 959			if (le32_to_cpu(rt->last_free) == off)
 960				rt->last_free = cpu_to_le32(last_off);
 961			break;
 962		}
 963	}
 964
 965skip_looking:
 966	/* If the list is now empty, we fix the last_free as well. */
 967	if (!rt->first_free)
 968		rt->last_free = 0;
 969
 970	/* Zero this entry. */
 971	memset(e, 0, esize);
 972	*e = RESTART_ENTRY_ALLOCATED_LE;
 973
 974	le16_add_cpu(&rt->total, 1);
 975
 976	return e;
 977}
 978
 979struct restart_info {
 980	u64 last_lsn;
 981	struct RESTART_HDR *r_page;
 982	u32 vbo;
 983	bool chkdsk_was_run;
 984	bool valid_page;
 985	bool initialized;
 986	bool restart;
 987};
 988
 989#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
 990
 991#define NTFSLOG_WRAPPED 0x00000001
 992#define NTFSLOG_MULTIPLE_PAGE_IO 0x00000002
 993#define NTFSLOG_NO_LAST_LSN 0x00000004
 994#define NTFSLOG_REUSE_TAIL 0x00000010
 995#define NTFSLOG_NO_OLDEST_LSN 0x00000020
 996
 997/* Helper struct to work with NTFS $LogFile. */
 998struct ntfs_log {
 999	struct ntfs_inode *ni;
1000
1001	u32 l_size;
1002	u32 orig_file_size;
1003	u32 sys_page_size;
1004	u32 sys_page_mask;
1005	u32 page_size;
1006	u32 page_mask; // page_size - 1
1007	u8 page_bits;
1008	struct RECORD_PAGE_HDR *one_page_buf;
1009
1010	struct RESTART_TABLE *open_attr_tbl;
1011	u32 transaction_id;
1012	u32 clst_per_page;
1013
1014	u32 first_page;
1015	u32 next_page;
1016	u32 ra_off;
1017	u32 data_off;
1018	u32 restart_size;
1019	u32 data_size;
1020	u16 record_header_len;
1021	u64 seq_num;
1022	u32 seq_num_bits;
1023	u32 file_data_bits;
1024	u32 seq_num_mask; /* (1 << file_data_bits) - 1 */
1025
1026	struct RESTART_AREA *ra; /* In-memory image of the next restart area. */
1027	u32 ra_size; /* The usable size of the restart area. */
1028
1029	/*
1030	 * If true, then the in-memory restart area is to be written
1031	 * to the first position on the disk.
1032	 */
1033	bool init_ra;
1034	bool set_dirty; /* True if we need to set dirty flag. */
1035
1036	u64 oldest_lsn;
1037
1038	u32 oldest_lsn_off;
1039	u64 last_lsn;
1040
1041	u32 total_avail;
1042	u32 total_avail_pages;
1043	u32 total_undo_commit;
1044	u32 max_current_avail;
1045	u32 current_avail;
1046	u32 reserved;
1047
1048	short major_ver;
1049	short minor_ver;
1050
1051	u32 l_flags; /* See NTFSLOG_XXX */
1052	u32 current_openlog_count; /* On-disk value for open_log_count. */
1053
1054	struct CLIENT_ID client_id;
1055	u32 client_undo_commit;
1056
1057	struct restart_info rst_info, rst_info2;
1058};
1059
1060static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
1061{
1062	u32 vbo = (lsn << log->seq_num_bits) >> (log->seq_num_bits - 3);
1063
1064	return vbo;
1065}
1066
1067/* Compute the offset in the log file of the next log page. */
1068static inline u32 next_page_off(struct ntfs_log *log, u32 off)
1069{
1070	off = (off & ~log->sys_page_mask) + log->page_size;
1071	return off >= log->l_size ? log->first_page : off;
1072}
1073
1074static inline u32 lsn_to_page_off(struct ntfs_log *log, u64 lsn)
1075{
1076	return (((u32)lsn) << 3) & log->page_mask;
1077}
1078
1079static inline u64 vbo_to_lsn(struct ntfs_log *log, u32 off, u64 Seq)
1080{
1081	return (off >> 3) + (Seq << log->file_data_bits);
1082}
1083
1084static inline bool is_lsn_in_file(struct ntfs_log *log, u64 lsn)
1085{
1086	return lsn >= log->oldest_lsn &&
1087	       lsn <= le64_to_cpu(log->ra->current_lsn);
1088}
1089
1090static inline u32 hdr_file_off(struct ntfs_log *log,
1091			       struct RECORD_PAGE_HDR *hdr)
1092{
1093	if (log->major_ver < 2)
1094		return le64_to_cpu(hdr->rhdr.lsn);
1095
1096	return le32_to_cpu(hdr->file_off);
1097}
1098
1099static inline u64 base_lsn(struct ntfs_log *log,
1100			   const struct RECORD_PAGE_HDR *hdr, u64 lsn)
1101{
1102	u64 h_lsn = le64_to_cpu(hdr->rhdr.lsn);
1103	u64 ret = (((h_lsn >> log->file_data_bits) +
1104		    (lsn < (lsn_to_vbo(log, h_lsn) & ~log->page_mask) ? 1 : 0))
1105		   << log->file_data_bits) +
1106		  ((((is_log_record_end(hdr) &&
1107		      h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn)) ?
1108			     le16_to_cpu(hdr->record_hdr.next_record_off) :
1109			     log->page_size) +
1110		    lsn) >>
1111		   3);
1112
1113	return ret;
1114}
1115
1116static inline bool verify_client_lsn(struct ntfs_log *log,
1117				     const struct CLIENT_REC *client, u64 lsn)
1118{
1119	return lsn >= le64_to_cpu(client->oldest_lsn) &&
1120	       lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
1121}
1122
1123static int read_log_page(struct ntfs_log *log, u32 vbo,
1124			 struct RECORD_PAGE_HDR **buffer, bool *usa_error)
1125{
1126	int err = 0;
1127	u32 page_idx = vbo >> log->page_bits;
1128	u32 page_off = vbo & log->page_mask;
1129	u32 bytes = log->page_size - page_off;
1130	void *to_free = NULL;
1131	u32 page_vbo = page_idx << log->page_bits;
1132	struct RECORD_PAGE_HDR *page_buf;
1133	struct ntfs_inode *ni = log->ni;
1134	bool bBAAD;
1135
1136	if (vbo >= log->l_size)
1137		return -EINVAL;
1138
1139	if (!*buffer) {
1140		to_free = kmalloc(log->page_size, GFP_NOFS);
1141		if (!to_free)
1142			return -ENOMEM;
1143		*buffer = to_free;
1144	}
1145
1146	page_buf = page_off ? log->one_page_buf : *buffer;
1147
1148	err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf,
1149			       log->page_size, NULL);
1150	if (err)
1151		goto out;
1152
1153	if (page_buf->rhdr.sign != NTFS_FFFF_SIGNATURE)
1154		ntfs_fix_post_read(&page_buf->rhdr, PAGE_SIZE, false);
1155
1156	if (page_buf != *buffer)
1157		memcpy(*buffer, Add2Ptr(page_buf, page_off), bytes);
1158
1159	bBAAD = page_buf->rhdr.sign == NTFS_BAAD_SIGNATURE;
1160
1161	if (usa_error)
1162		*usa_error = bBAAD;
1163	/* Check that the update sequence array for this page is valid */
1164	/* If we don't allow errors, raise an error status */
1165	else if (bBAAD)
1166		err = -EINVAL;
1167
1168out:
1169	if (err && to_free) {
1170		kfree(to_free);
1171		*buffer = NULL;
1172	}
1173
1174	return err;
1175}
1176
1177/*
1178 * log_read_rst
1179 *
1180 * It walks through 512 blocks of the file looking for a valid
1181 * restart page header. It will stop the first time we find a
1182 * valid page header.
1183 */
1184static int log_read_rst(struct ntfs_log *log, bool first,
1185			struct restart_info *info)
1186{
1187	u32 skip, vbo;
 
1188	struct RESTART_HDR *r_page = NULL;
1189
1190	/* Determine which restart area we are looking for. */
1191	if (first) {
1192		vbo = 0;
1193		skip = 512;
1194	} else {
1195		vbo = 512;
1196		skip = 0;
1197	}
1198
1199	/* Loop continuously until we succeed. */
1200	for (; vbo < log->l_size; vbo = 2 * vbo + skip, skip = 0) {
1201		bool usa_error;
1202		bool brst, bchk;
1203		struct RESTART_AREA *ra;
1204
1205		/* Read a page header at the current offset. */
1206		if (read_log_page(log, vbo, (struct RECORD_PAGE_HDR **)&r_page,
1207				  &usa_error)) {
1208			/* Ignore any errors. */
1209			continue;
1210		}
1211
1212		/* Exit if the signature is a log record page. */
1213		if (r_page->rhdr.sign == NTFS_RCRD_SIGNATURE) {
1214			info->initialized = true;
1215			break;
1216		}
1217
1218		brst = r_page->rhdr.sign == NTFS_RSTR_SIGNATURE;
1219		bchk = r_page->rhdr.sign == NTFS_CHKD_SIGNATURE;
1220
1221		if (!bchk && !brst) {
1222			if (r_page->rhdr.sign != NTFS_FFFF_SIGNATURE) {
1223				/*
1224				 * Remember if the signature does not
1225				 * indicate uninitialized file.
1226				 */
1227				info->initialized = true;
1228			}
1229			continue;
1230		}
1231
1232		ra = NULL;
1233		info->valid_page = false;
1234		info->initialized = true;
1235		info->vbo = vbo;
1236
1237		/* Let's check the restart area if this is a valid page. */
1238		if (!is_rst_page_hdr_valid(vbo, r_page))
1239			goto check_result;
1240		ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
1241
1242		if (!is_rst_area_valid(r_page))
1243			goto check_result;
1244
1245		/*
1246		 * We have a valid restart page header and restart area.
1247		 * If chkdsk was run or we have no clients then we have
1248		 * no more checking to do.
1249		 */
1250		if (bchk || ra->client_idx[1] == LFS_NO_CLIENT_LE) {
1251			info->valid_page = true;
1252			goto check_result;
1253		}
1254
1255		if (is_client_area_valid(r_page, usa_error)) {
1256			info->valid_page = true;
1257			ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
1258		}
1259
1260check_result:
1261		/*
1262		 * If chkdsk was run then update the caller's
1263		 * values and return.
1264		 */
1265		if (r_page->rhdr.sign == NTFS_CHKD_SIGNATURE) {
1266			info->chkdsk_was_run = true;
1267			info->last_lsn = le64_to_cpu(r_page->rhdr.lsn);
1268			info->restart = true;
1269			info->r_page = r_page;
1270			return 0;
1271		}
1272
1273		/*
1274		 * If we have a valid page then copy the values
1275		 * we need from it.
1276		 */
1277		if (info->valid_page) {
1278			info->last_lsn = le64_to_cpu(ra->current_lsn);
1279			info->restart = true;
1280			info->r_page = r_page;
1281			return 0;
1282		}
1283	}
1284
1285	kfree(r_page);
1286
1287	return 0;
1288}
1289
1290/*
1291 * Ilog_init_pg_hdr - Init @log from restart page header.
1292 */
1293static void log_init_pg_hdr(struct ntfs_log *log, u16 major_ver, u16 minor_ver)
1294{
1295	log->sys_page_size = log->page_size;
1296	log->sys_page_mask = log->page_mask;
1297
1298	log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
1299	if (!log->clst_per_page)
1300		log->clst_per_page = 1;
1301
1302	log->first_page = major_ver >= 2 ? 0x22 * log->page_size :
1303					   4 * log->page_size;
1304	log->major_ver = major_ver;
1305	log->minor_ver = minor_ver;
1306}
1307
1308/*
1309 * log_create - Init @log in cases when we don't have a restart area to use.
1310 */
1311static void log_create(struct ntfs_log *log, const u64 last_lsn,
1312		       u32 open_log_count, bool wrapped, bool use_multi_page)
1313{
1314	/* All file offsets must be quadword aligned. */
1315	log->file_data_bits = blksize_bits(log->l_size) - 3;
1316	log->seq_num_mask = (8 << log->file_data_bits) - 1;
1317	log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
1318	log->seq_num = (last_lsn >> log->file_data_bits) + 2;
1319	log->next_page = log->first_page;
1320	log->oldest_lsn = log->seq_num << log->file_data_bits;
1321	log->oldest_lsn_off = 0;
1322	log->last_lsn = log->oldest_lsn;
1323
1324	log->l_flags |= NTFSLOG_NO_LAST_LSN | NTFSLOG_NO_OLDEST_LSN;
1325
1326	/* Set the correct flags for the I/O and indicate if we have wrapped. */
1327	if (wrapped)
1328		log->l_flags |= NTFSLOG_WRAPPED;
1329
1330	if (use_multi_page)
1331		log->l_flags |= NTFSLOG_MULTIPLE_PAGE_IO;
1332
1333	/* Compute the log page values. */
1334	log->data_off = ALIGN(
1335		offsetof(struct RECORD_PAGE_HDR, fixups) +
1336			sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1),
1337		8);
1338	log->data_size = log->page_size - log->data_off;
1339	log->record_header_len = sizeof(struct LFS_RECORD_HDR);
1340
1341	/* Remember the different page sizes for reservation. */
1342	log->reserved = log->data_size - log->record_header_len;
1343
1344	/* Compute the restart page values. */
1345	log->ra_off = ALIGN(
1346		offsetof(struct RESTART_HDR, fixups) +
1347			sizeof(short) *
1348				((log->sys_page_size >> SECTOR_SHIFT) + 1),
1349		8);
1350	log->restart_size = log->sys_page_size - log->ra_off;
1351	log->ra_size = struct_size(log->ra, clients, 1);
1352	log->current_openlog_count = open_log_count;
1353
1354	/*
1355	 * The total available log file space is the number of
1356	 * log file pages times the space available on each page.
1357	 */
1358	log->total_avail_pages = log->l_size - log->first_page;
1359	log->total_avail = log->total_avail_pages >> log->page_bits;
1360
1361	/*
1362	 * We assume that we can't use the end of the page less than
1363	 * the file record size.
1364	 * Then we won't need to reserve more than the caller asks for.
1365	 */
1366	log->max_current_avail = log->total_avail * log->reserved;
1367	log->total_avail = log->total_avail * log->data_size;
1368	log->current_avail = log->max_current_avail;
1369}
1370
1371/*
1372 * log_create_ra - Fill a restart area from the values stored in @log.
1373 */
1374static struct RESTART_AREA *log_create_ra(struct ntfs_log *log)
1375{
1376	struct CLIENT_REC *cr;
1377	struct RESTART_AREA *ra = kzalloc(log->restart_size, GFP_NOFS);
1378
1379	if (!ra)
1380		return NULL;
1381
1382	ra->current_lsn = cpu_to_le64(log->last_lsn);
1383	ra->log_clients = cpu_to_le16(1);
1384	ra->client_idx[1] = LFS_NO_CLIENT_LE;
1385	if (log->l_flags & NTFSLOG_MULTIPLE_PAGE_IO)
1386		ra->flags = RESTART_SINGLE_PAGE_IO;
1387	ra->seq_num_bits = cpu_to_le32(log->seq_num_bits);
1388	ra->ra_len = cpu_to_le16(log->ra_size);
1389	ra->client_off = cpu_to_le16(offsetof(struct RESTART_AREA, clients));
1390	ra->l_size = cpu_to_le64(log->l_size);
1391	ra->rec_hdr_len = cpu_to_le16(log->record_header_len);
1392	ra->data_off = cpu_to_le16(log->data_off);
1393	ra->open_log_count = cpu_to_le32(log->current_openlog_count + 1);
1394
1395	cr = ra->clients;
1396
1397	cr->prev_client = LFS_NO_CLIENT_LE;
1398	cr->next_client = LFS_NO_CLIENT_LE;
1399
1400	return ra;
1401}
1402
1403static u32 final_log_off(struct ntfs_log *log, u64 lsn, u32 data_len)
1404{
1405	u32 base_vbo = lsn << 3;
1406	u32 final_log_off = (base_vbo & log->seq_num_mask) & ~log->page_mask;
1407	u32 page_off = base_vbo & log->page_mask;
1408	u32 tail = log->page_size - page_off;
1409
1410	page_off -= 1;
1411
1412	/* Add the length of the header. */
1413	data_len += log->record_header_len;
1414
1415	/*
1416	 * If this lsn is contained this log page we are done.
1417	 * Otherwise we need to walk through several log pages.
1418	 */
1419	if (data_len > tail) {
1420		data_len -= tail;
1421		tail = log->data_size;
1422		page_off = log->data_off - 1;
1423
1424		for (;;) {
1425			final_log_off = next_page_off(log, final_log_off);
1426
1427			/*
1428			 * We are done if the remaining bytes
1429			 * fit on this page.
1430			 */
1431			if (data_len <= tail)
1432				break;
1433			data_len -= tail;
1434		}
1435	}
1436
1437	/*
1438	 * We add the remaining bytes to our starting position on this page
1439	 * and then add that value to the file offset of this log page.
1440	 */
1441	return final_log_off + data_len + page_off;
1442}
1443
1444static int next_log_lsn(struct ntfs_log *log, const struct LFS_RECORD_HDR *rh,
1445			u64 *lsn)
1446{
1447	int err;
1448	u64 this_lsn = le64_to_cpu(rh->this_lsn);
1449	u32 vbo = lsn_to_vbo(log, this_lsn);
1450	u32 end =
1451		final_log_off(log, this_lsn, le32_to_cpu(rh->client_data_len));
1452	u32 hdr_off = end & ~log->sys_page_mask;
1453	u64 seq = this_lsn >> log->file_data_bits;
1454	struct RECORD_PAGE_HDR *page = NULL;
1455
1456	/* Remember if we wrapped. */
1457	if (end <= vbo)
1458		seq += 1;
1459
1460	/* Log page header for this page. */
1461	err = read_log_page(log, hdr_off, &page, NULL);
1462	if (err)
1463		return err;
1464
1465	/*
1466	 * If the lsn we were given was not the last lsn on this page,
1467	 * then the starting offset for the next lsn is on a quad word
1468	 * boundary following the last file offset for the current lsn.
1469	 * Otherwise the file offset is the start of the data on the next page.
1470	 */
1471	if (this_lsn == le64_to_cpu(page->rhdr.lsn)) {
1472		/* If we wrapped, we need to increment the sequence number. */
1473		hdr_off = next_page_off(log, hdr_off);
1474		if (hdr_off == log->first_page)
1475			seq += 1;
1476
1477		vbo = hdr_off + log->data_off;
1478	} else {
1479		vbo = ALIGN(end, 8);
1480	}
1481
1482	/* Compute the lsn based on the file offset and the sequence count. */
1483	*lsn = vbo_to_lsn(log, vbo, seq);
1484
1485	/*
1486	 * If this lsn is within the legal range for the file, we return true.
1487	 * Otherwise false indicates that there are no more lsn's.
1488	 */
1489	if (!is_lsn_in_file(log, *lsn))
1490		*lsn = 0;
1491
1492	kfree(page);
1493
1494	return 0;
1495}
1496
1497/*
1498 * current_log_avail - Calculate the number of bytes available for log records.
1499 */
1500static u32 current_log_avail(struct ntfs_log *log)
1501{
1502	u32 oldest_off, next_free_off, free_bytes;
1503
1504	if (log->l_flags & NTFSLOG_NO_LAST_LSN) {
1505		/* The entire file is available. */
1506		return log->max_current_avail;
1507	}
1508
1509	/*
1510	 * If there is a last lsn the restart area then we know that we will
1511	 * have to compute the free range.
1512	 * If there is no oldest lsn then start at the first page of the file.
1513	 */
1514	oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN) ?
1515			     log->first_page :
1516			     (log->oldest_lsn_off & ~log->sys_page_mask);
1517
1518	/*
1519	 * We will use the next log page offset to compute the next free page.
1520	 * If we are going to reuse this page go to the next page.
1521	 * If we are at the first page then use the end of the file.
1522	 */
1523	next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL) ?
1524				log->next_page + log->page_size :
1525			log->next_page == log->first_page ? log->l_size :
1526							    log->next_page;
1527
1528	/* If the two offsets are the same then there is no available space. */
1529	if (oldest_off == next_free_off)
1530		return 0;
1531	/*
1532	 * If the free offset follows the oldest offset then subtract
1533	 * this range from the total available pages.
1534	 */
1535	free_bytes =
1536		oldest_off < next_free_off ?
1537			log->total_avail_pages - (next_free_off - oldest_off) :
1538			oldest_off - next_free_off;
1539
1540	free_bytes >>= log->page_bits;
1541	return free_bytes * log->reserved;
1542}
1543
1544static bool check_subseq_log_page(struct ntfs_log *log,
1545				  const struct RECORD_PAGE_HDR *rp, u32 vbo,
1546				  u64 seq)
1547{
1548	u64 lsn_seq;
1549	const struct NTFS_RECORD_HEADER *rhdr = &rp->rhdr;
1550	u64 lsn = le64_to_cpu(rhdr->lsn);
1551
1552	if (rhdr->sign == NTFS_FFFF_SIGNATURE || !rhdr->sign)
1553		return false;
1554
1555	/*
1556	 * If the last lsn on the page occurs was written after the page
1557	 * that caused the original error then we have a fatal error.
1558	 */
1559	lsn_seq = lsn >> log->file_data_bits;
1560
1561	/*
1562	 * If the sequence number for the lsn the page is equal or greater
1563	 * than lsn we expect, then this is a subsequent write.
1564	 */
1565	return lsn_seq >= seq ||
1566	       (lsn_seq == seq - 1 && log->first_page == vbo &&
1567		vbo != (lsn_to_vbo(log, lsn) & ~log->page_mask));
1568}
1569
1570/*
1571 * last_log_lsn
1572 *
1573 * Walks through the log pages for a file, searching for the
1574 * last log page written to the file.
1575 */
1576static int last_log_lsn(struct ntfs_log *log)
1577{
1578	int err;
1579	bool usa_error = false;
1580	bool replace_page = false;
1581	bool reuse_page = log->l_flags & NTFSLOG_REUSE_TAIL;
1582	bool wrapped_file, wrapped;
1583
1584	u32 page_cnt = 1, page_pos = 1;
1585	u32 page_off = 0, page_off1 = 0, saved_off = 0;
1586	u32 final_off, second_off, final_off_prev = 0, second_off_prev = 0;
1587	u32 first_file_off = 0, second_file_off = 0;
1588	u32 part_io_count = 0;
1589	u32 tails = 0;
1590	u32 this_off, curpage_off, nextpage_off, remain_pages;
1591
1592	u64 expected_seq, seq_base = 0, lsn_base = 0;
1593	u64 best_lsn, best_lsn1, best_lsn2;
1594	u64 lsn_cur, lsn1, lsn2;
1595	u64 last_ok_lsn = reuse_page ? log->last_lsn : 0;
1596
1597	u16 cur_pos, best_page_pos;
1598
1599	struct RECORD_PAGE_HDR *page = NULL;
1600	struct RECORD_PAGE_HDR *tst_page = NULL;
1601	struct RECORD_PAGE_HDR *first_tail = NULL;
1602	struct RECORD_PAGE_HDR *second_tail = NULL;
1603	struct RECORD_PAGE_HDR *tail_page = NULL;
1604	struct RECORD_PAGE_HDR *second_tail_prev = NULL;
1605	struct RECORD_PAGE_HDR *first_tail_prev = NULL;
1606	struct RECORD_PAGE_HDR *page_bufs = NULL;
1607	struct RECORD_PAGE_HDR *best_page;
1608
1609	if (log->major_ver >= 2) {
1610		final_off = 0x02 * log->page_size;
1611		second_off = 0x12 * log->page_size;
1612
1613		// 0x10 == 0x12 - 0x2
1614		page_bufs = kmalloc(log->page_size * 0x10, GFP_NOFS);
1615		if (!page_bufs)
1616			return -ENOMEM;
1617	} else {
1618		second_off = log->first_page - log->page_size;
1619		final_off = second_off - log->page_size;
1620	}
1621
1622next_tail:
1623	/* Read second tail page (at pos 3/0x12000). */
1624	if (read_log_page(log, second_off, &second_tail, &usa_error) ||
1625	    usa_error || second_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
1626		kfree(second_tail);
1627		second_tail = NULL;
1628		second_file_off = 0;
1629		lsn2 = 0;
1630	} else {
1631		second_file_off = hdr_file_off(log, second_tail);
1632		lsn2 = le64_to_cpu(second_tail->record_hdr.last_end_lsn);
1633	}
1634
1635	/* Read first tail page (at pos 2/0x2000). */
1636	if (read_log_page(log, final_off, &first_tail, &usa_error) ||
1637	    usa_error || first_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
1638		kfree(first_tail);
1639		first_tail = NULL;
1640		first_file_off = 0;
1641		lsn1 = 0;
1642	} else {
1643		first_file_off = hdr_file_off(log, first_tail);
1644		lsn1 = le64_to_cpu(first_tail->record_hdr.last_end_lsn);
1645	}
1646
1647	if (log->major_ver < 2) {
1648		int best_page;
1649
1650		first_tail_prev = first_tail;
1651		final_off_prev = first_file_off;
1652		second_tail_prev = second_tail;
1653		second_off_prev = second_file_off;
1654		tails = 1;
1655
1656		if (!first_tail && !second_tail)
1657			goto tail_read;
1658
1659		if (first_tail && second_tail)
1660			best_page = lsn1 < lsn2 ? 1 : 0;
1661		else if (first_tail)
1662			best_page = 0;
1663		else
1664			best_page = 1;
1665
1666		page_off = best_page ? second_file_off : first_file_off;
1667		seq_base = (best_page ? lsn2 : lsn1) >> log->file_data_bits;
1668		goto tail_read;
1669	}
1670
1671	best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0;
1672	best_lsn2 = second_tail ? base_lsn(log, second_tail, second_file_off) :
1673				  0;
1674
1675	if (first_tail && second_tail) {
1676		if (best_lsn1 > best_lsn2) {
1677			best_lsn = best_lsn1;
1678			best_page = first_tail;
1679			this_off = first_file_off;
1680		} else {
1681			best_lsn = best_lsn2;
1682			best_page = second_tail;
1683			this_off = second_file_off;
1684		}
1685	} else if (first_tail) {
1686		best_lsn = best_lsn1;
1687		best_page = first_tail;
1688		this_off = first_file_off;
1689	} else if (second_tail) {
1690		best_lsn = best_lsn2;
1691		best_page = second_tail;
1692		this_off = second_file_off;
1693	} else {
1694		goto tail_read;
1695	}
1696
1697	best_page_pos = le16_to_cpu(best_page->page_pos);
1698
1699	if (!tails) {
1700		if (best_page_pos == page_pos) {
1701			seq_base = best_lsn >> log->file_data_bits;
1702			saved_off = page_off = le32_to_cpu(best_page->file_off);
1703			lsn_base = best_lsn;
1704
1705			memmove(page_bufs, best_page, log->page_size);
1706
1707			page_cnt = le16_to_cpu(best_page->page_count);
1708			if (page_cnt > 1)
1709				page_pos += 1;
1710
1711			tails = 1;
1712		}
1713	} else if (seq_base == (best_lsn >> log->file_data_bits) &&
1714		   saved_off + log->page_size == this_off &&
1715		   lsn_base < best_lsn &&
1716		   (page_pos != page_cnt || best_page_pos == page_pos ||
1717		    best_page_pos == 1) &&
1718		   (page_pos >= page_cnt || best_page_pos == page_pos)) {
1719		u16 bppc = le16_to_cpu(best_page->page_count);
1720
1721		saved_off += log->page_size;
1722		lsn_base = best_lsn;
1723
1724		memmove(Add2Ptr(page_bufs, tails * log->page_size), best_page,
1725			log->page_size);
1726
1727		tails += 1;
1728
1729		if (best_page_pos != bppc) {
1730			page_cnt = bppc;
1731			page_pos = best_page_pos;
1732
1733			if (page_cnt > 1)
1734				page_pos += 1;
1735		} else {
1736			page_pos = page_cnt = 1;
1737		}
1738	} else {
1739		kfree(first_tail);
1740		kfree(second_tail);
1741		goto tail_read;
1742	}
1743
1744	kfree(first_tail_prev);
1745	first_tail_prev = first_tail;
1746	final_off_prev = first_file_off;
1747	first_tail = NULL;
1748
1749	kfree(second_tail_prev);
1750	second_tail_prev = second_tail;
1751	second_off_prev = second_file_off;
1752	second_tail = NULL;
1753
1754	final_off += log->page_size;
1755	second_off += log->page_size;
1756
1757	if (tails < 0x10)
1758		goto next_tail;
1759tail_read:
1760	first_tail = first_tail_prev;
1761	final_off = final_off_prev;
1762
1763	second_tail = second_tail_prev;
1764	second_off = second_off_prev;
1765
1766	page_cnt = page_pos = 1;
1767
1768	curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off) :
1769						 log->next_page;
1770
1771	wrapped_file =
1772		curpage_off == log->first_page &&
1773		!(log->l_flags & (NTFSLOG_NO_LAST_LSN | NTFSLOG_REUSE_TAIL));
1774
1775	expected_seq = wrapped_file ? (log->seq_num + 1) : log->seq_num;
1776
1777	nextpage_off = curpage_off;
1778
1779next_page:
1780	tail_page = NULL;
1781	/* Read the next log page. */
1782	err = read_log_page(log, curpage_off, &page, &usa_error);
1783
1784	/* Compute the next log page offset the file. */
1785	nextpage_off = next_page_off(log, curpage_off);
1786	wrapped = nextpage_off == log->first_page;
1787
1788	if (tails > 1) {
1789		struct RECORD_PAGE_HDR *cur_page =
1790			Add2Ptr(page_bufs, curpage_off - page_off);
1791
1792		if (curpage_off == saved_off) {
1793			tail_page = cur_page;
1794			goto use_tail_page;
1795		}
1796
1797		if (page_off > curpage_off || curpage_off >= saved_off)
1798			goto use_tail_page;
1799
1800		if (page_off1)
1801			goto use_cur_page;
1802
1803		if (!err && !usa_error &&
1804		    page->rhdr.sign == NTFS_RCRD_SIGNATURE &&
1805		    cur_page->rhdr.lsn == page->rhdr.lsn &&
1806		    cur_page->record_hdr.next_record_off ==
1807			    page->record_hdr.next_record_off &&
1808		    ((page_pos == page_cnt &&
1809		      le16_to_cpu(page->page_pos) == 1) ||
1810		     (page_pos != page_cnt &&
1811		      le16_to_cpu(page->page_pos) == page_pos + 1 &&
1812		      le16_to_cpu(page->page_count) == page_cnt))) {
1813			cur_page = NULL;
1814			goto use_tail_page;
1815		}
1816
1817		page_off1 = page_off;
1818
1819use_cur_page:
1820
1821		lsn_cur = le64_to_cpu(cur_page->rhdr.lsn);
1822
1823		if (last_ok_lsn !=
1824			    le64_to_cpu(cur_page->record_hdr.last_end_lsn) &&
1825		    ((lsn_cur >> log->file_data_bits) +
1826		     ((curpage_off <
1827		       (lsn_to_vbo(log, lsn_cur) & ~log->page_mask)) ?
1828			      1 :
1829			      0)) != expected_seq) {
1830			goto check_tail;
1831		}
1832
1833		if (!is_log_record_end(cur_page)) {
1834			tail_page = NULL;
1835			last_ok_lsn = lsn_cur;
1836			goto next_page_1;
1837		}
1838
1839		log->seq_num = expected_seq;
1840		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
1841		log->last_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
1842		log->ra->current_lsn = cur_page->record_hdr.last_end_lsn;
1843
1844		if (log->record_header_len <=
1845		    log->page_size -
1846			    le16_to_cpu(cur_page->record_hdr.next_record_off)) {
1847			log->l_flags |= NTFSLOG_REUSE_TAIL;
1848			log->next_page = curpage_off;
1849		} else {
1850			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
1851			log->next_page = nextpage_off;
1852		}
1853
1854		if (wrapped_file)
1855			log->l_flags |= NTFSLOG_WRAPPED;
1856
1857		last_ok_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
1858		goto next_page_1;
1859	}
1860
1861	/*
1862	 * If we are at the expected first page of a transfer check to see
1863	 * if either tail copy is at this offset.
1864	 * If this page is the last page of a transfer, check if we wrote
1865	 * a subsequent tail copy.
1866	 */
1867	if (page_cnt == page_pos || page_cnt == page_pos + 1) {
1868		/*
1869		 * Check if the offset matches either the first or second
1870		 * tail copy. It is possible it will match both.
1871		 */
1872		if (curpage_off == final_off)
1873			tail_page = first_tail;
1874
1875		/*
1876		 * If we already matched on the first page then
1877		 * check the ending lsn's.
1878		 */
1879		if (curpage_off == second_off) {
1880			if (!tail_page ||
1881			    (second_tail &&
1882			     le64_to_cpu(second_tail->record_hdr.last_end_lsn) >
1883				     le64_to_cpu(first_tail->record_hdr
1884							 .last_end_lsn))) {
1885				tail_page = second_tail;
1886			}
1887		}
1888	}
1889
1890use_tail_page:
1891	if (tail_page) {
1892		/* We have a candidate for a tail copy. */
1893		lsn_cur = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
1894
1895		if (last_ok_lsn < lsn_cur) {
1896			/*
1897			 * If the sequence number is not expected,
1898			 * then don't use the tail copy.
1899			 */
1900			if (expected_seq != (lsn_cur >> log->file_data_bits))
1901				tail_page = NULL;
1902		} else if (last_ok_lsn > lsn_cur) {
1903			/*
1904			 * If the last lsn is greater than the one on
1905			 * this page then forget this tail.
1906			 */
1907			tail_page = NULL;
1908		}
1909	}
1910
1911	/*
1912	 *If we have an error on the current page,
1913	 * we will break of this loop.
1914	 */
1915	if (err || usa_error)
1916		goto check_tail;
1917
1918	/*
1919	 * Done if the last lsn on this page doesn't match the previous known
1920	 * last lsn or the sequence number is not expected.
1921	 */
1922	lsn_cur = le64_to_cpu(page->rhdr.lsn);
1923	if (last_ok_lsn != lsn_cur &&
1924	    expected_seq != (lsn_cur >> log->file_data_bits)) {
1925		goto check_tail;
1926	}
1927
1928	/*
1929	 * Check that the page position and page count values are correct.
1930	 * If this is the first page of a transfer the position must be 1
1931	 * and the count will be unknown.
1932	 */
1933	if (page_cnt == page_pos) {
1934		if (page->page_pos != cpu_to_le16(1) &&
1935		    (!reuse_page || page->page_pos != page->page_count)) {
1936			/*
1937			 * If the current page is the first page we are
1938			 * looking at and we are reusing this page then
1939			 * it can be either the first or last page of a
1940			 * transfer. Otherwise it can only be the first.
1941			 */
1942			goto check_tail;
1943		}
1944	} else if (le16_to_cpu(page->page_count) != page_cnt ||
1945		   le16_to_cpu(page->page_pos) != page_pos + 1) {
1946		/*
1947		 * The page position better be 1 more than the last page
1948		 * position and the page count better match.
1949		 */
1950		goto check_tail;
1951	}
1952
1953	/*
1954	 * We have a valid page the file and may have a valid page
1955	 * the tail copy area.
1956	 * If the tail page was written after the page the file then
1957	 * break of the loop.
1958	 */
1959	if (tail_page &&
1960	    le64_to_cpu(tail_page->record_hdr.last_end_lsn) > lsn_cur) {
1961		/* Remember if we will replace the page. */
1962		replace_page = true;
1963		goto check_tail;
1964	}
1965
1966	tail_page = NULL;
1967
1968	if (is_log_record_end(page)) {
1969		/*
1970		 * Since we have read this page we know the sequence number
1971		 * is the same as our expected value.
1972		 */
1973		log->seq_num = expected_seq;
1974		log->last_lsn = le64_to_cpu(page->record_hdr.last_end_lsn);
1975		log->ra->current_lsn = page->record_hdr.last_end_lsn;
1976		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
1977
1978		/*
1979		 * If there is room on this page for another header then
1980		 * remember we want to reuse the page.
1981		 */
1982		if (log->record_header_len <=
1983		    log->page_size -
1984			    le16_to_cpu(page->record_hdr.next_record_off)) {
1985			log->l_flags |= NTFSLOG_REUSE_TAIL;
1986			log->next_page = curpage_off;
1987		} else {
1988			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
1989			log->next_page = nextpage_off;
1990		}
1991
1992		/* Remember if we wrapped the log file. */
1993		if (wrapped_file)
1994			log->l_flags |= NTFSLOG_WRAPPED;
1995	}
1996
1997	/*
1998	 * Remember the last page count and position.
1999	 * Also remember the last known lsn.
2000	 */
2001	page_cnt = le16_to_cpu(page->page_count);
2002	page_pos = le16_to_cpu(page->page_pos);
2003	last_ok_lsn = le64_to_cpu(page->rhdr.lsn);
2004
2005next_page_1:
2006
2007	if (wrapped) {
2008		expected_seq += 1;
2009		wrapped_file = 1;
2010	}
2011
2012	curpage_off = nextpage_off;
2013	kfree(page);
2014	page = NULL;
2015	reuse_page = 0;
2016	goto next_page;
2017
2018check_tail:
2019	if (tail_page) {
2020		log->seq_num = expected_seq;
2021		log->last_lsn = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
2022		log->ra->current_lsn = tail_page->record_hdr.last_end_lsn;
2023		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
2024
2025		if (log->page_size -
2026			    le16_to_cpu(
2027				    tail_page->record_hdr.next_record_off) >=
2028		    log->record_header_len) {
2029			log->l_flags |= NTFSLOG_REUSE_TAIL;
2030			log->next_page = curpage_off;
2031		} else {
2032			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
2033			log->next_page = nextpage_off;
2034		}
2035
2036		if (wrapped)
2037			log->l_flags |= NTFSLOG_WRAPPED;
2038	}
2039
2040	/* Remember that the partial IO will start at the next page. */
2041	second_off = nextpage_off;
2042
2043	/*
2044	 * If the next page is the first page of the file then update
2045	 * the sequence number for log records which begon the next page.
2046	 */
2047	if (wrapped)
2048		expected_seq += 1;
2049
2050	/*
2051	 * If we have a tail copy or are performing single page I/O we can
2052	 * immediately look at the next page.
2053	 */
2054	if (replace_page || (log->ra->flags & RESTART_SINGLE_PAGE_IO)) {
2055		page_cnt = 2;
2056		page_pos = 1;
2057		goto check_valid;
2058	}
2059
2060	if (page_pos != page_cnt)
2061		goto check_valid;
2062	/*
2063	 * If the next page causes us to wrap to the beginning of the log
2064	 * file then we know which page to check next.
2065	 */
2066	if (wrapped) {
2067		page_cnt = 2;
2068		page_pos = 1;
2069		goto check_valid;
2070	}
2071
2072	cur_pos = 2;
2073
2074next_test_page:
2075	kfree(tst_page);
2076	tst_page = NULL;
2077
2078	/* Walk through the file, reading log pages. */
2079	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
2080
2081	/*
2082	 * If we get a USA error then assume that we correctly found
2083	 * the end of the original transfer.
2084	 */
2085	if (usa_error)
2086		goto file_is_valid;
2087
2088	/*
2089	 * If we were able to read the page, we examine it to see if it
2090	 * is the same or different Io block.
2091	 */
2092	if (err)
2093		goto next_test_page_1;
2094
2095	if (le16_to_cpu(tst_page->page_pos) == cur_pos &&
2096	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
2097		page_cnt = le16_to_cpu(tst_page->page_count) + 1;
2098		page_pos = le16_to_cpu(tst_page->page_pos);
2099		goto check_valid;
2100	} else {
2101		goto file_is_valid;
2102	}
2103
2104next_test_page_1:
2105
2106	nextpage_off = next_page_off(log, curpage_off);
2107	wrapped = nextpage_off == log->first_page;
2108
2109	if (wrapped) {
2110		expected_seq += 1;
2111		page_cnt = 2;
2112		page_pos = 1;
2113	}
2114
2115	cur_pos += 1;
2116	part_io_count += 1;
2117	if (!wrapped)
2118		goto next_test_page;
2119
2120check_valid:
2121	/* Skip over the remaining pages this transfer. */
2122	remain_pages = page_cnt - page_pos - 1;
2123	part_io_count += remain_pages;
2124
2125	while (remain_pages--) {
2126		nextpage_off = next_page_off(log, curpage_off);
2127		wrapped = nextpage_off == log->first_page;
2128
2129		if (wrapped)
2130			expected_seq += 1;
2131	}
2132
2133	/* Call our routine to check this log page. */
2134	kfree(tst_page);
2135	tst_page = NULL;
2136
2137	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
2138	if (!err && !usa_error &&
2139	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
2140		err = -EINVAL;
2141		goto out;
2142	}
2143
2144file_is_valid:
2145
2146	/* We have a valid file. */
2147	if (page_off1 || tail_page) {
2148		struct RECORD_PAGE_HDR *tmp_page;
2149
2150		if (sb_rdonly(log->ni->mi.sbi->sb)) {
2151			err = -EROFS;
2152			goto out;
2153		}
2154
2155		if (page_off1) {
2156			tmp_page = Add2Ptr(page_bufs, page_off1 - page_off);
2157			tails -= (page_off1 - page_off) / log->page_size;
2158			if (!tail_page)
2159				tails -= 1;
2160		} else {
2161			tmp_page = tail_page;
2162			tails = 1;
2163		}
2164
2165		while (tails--) {
2166			u64 off = hdr_file_off(log, tmp_page);
2167
2168			if (!page) {
2169				page = kmalloc(log->page_size, GFP_NOFS);
2170				if (!page) {
2171					err = -ENOMEM;
2172					goto out;
2173				}
2174			}
2175
2176			/*
2177			 * Correct page and copy the data from this page
2178			 * into it and flush it to disk.
2179			 */
2180			memcpy(page, tmp_page, log->page_size);
2181
2182			/* Fill last flushed lsn value flush the page. */
2183			if (log->major_ver < 2)
2184				page->rhdr.lsn = page->record_hdr.last_end_lsn;
2185			else
2186				page->file_off = 0;
2187
2188			page->page_pos = page->page_count = cpu_to_le16(1);
2189
2190			ntfs_fix_pre_write(&page->rhdr, log->page_size);
2191
2192			err = ntfs_sb_write_run(log->ni->mi.sbi,
2193						&log->ni->file.run, off, page,
2194						log->page_size, 0);
2195
2196			if (err)
2197				goto out;
2198
2199			if (part_io_count && second_off == off) {
2200				second_off += log->page_size;
2201				part_io_count -= 1;
2202			}
2203
2204			tmp_page = Add2Ptr(tmp_page, log->page_size);
2205		}
2206	}
2207
2208	if (part_io_count) {
2209		if (sb_rdonly(log->ni->mi.sbi->sb)) {
2210			err = -EROFS;
2211			goto out;
2212		}
2213	}
2214
2215out:
2216	kfree(second_tail);
2217	kfree(first_tail);
2218	kfree(page);
2219	kfree(tst_page);
2220	kfree(page_bufs);
2221
2222	return err;
2223}
2224
2225/*
2226 * read_log_rec_buf - Copy a log record from the file to a buffer.
2227 *
2228 * The log record may span several log pages and may even wrap the file.
2229 */
2230static int read_log_rec_buf(struct ntfs_log *log,
2231			    const struct LFS_RECORD_HDR *rh, void *buffer)
2232{
2233	int err;
2234	struct RECORD_PAGE_HDR *ph = NULL;
2235	u64 lsn = le64_to_cpu(rh->this_lsn);
2236	u32 vbo = lsn_to_vbo(log, lsn) & ~log->page_mask;
2237	u32 off = lsn_to_page_off(log, lsn) + log->record_header_len;
2238	u32 data_len = le32_to_cpu(rh->client_data_len);
2239
2240	/*
2241	 * While there are more bytes to transfer,
2242	 * we continue to attempt to perform the read.
2243	 */
2244	for (;;) {
2245		bool usa_error;
2246		u32 tail = log->page_size - off;
2247
2248		if (tail >= data_len)
2249			tail = data_len;
2250
2251		data_len -= tail;
2252
2253		err = read_log_page(log, vbo, &ph, &usa_error);
2254		if (err)
2255			goto out;
2256
2257		/*
2258		 * The last lsn on this page better be greater or equal
2259		 * to the lsn we are copying.
2260		 */
2261		if (lsn > le64_to_cpu(ph->rhdr.lsn)) {
2262			err = -EINVAL;
2263			goto out;
2264		}
2265
2266		memcpy(buffer, Add2Ptr(ph, off), tail);
2267
2268		/* If there are no more bytes to transfer, we exit the loop. */
2269		if (!data_len) {
2270			if (!is_log_record_end(ph) ||
2271			    lsn > le64_to_cpu(ph->record_hdr.last_end_lsn)) {
2272				err = -EINVAL;
2273				goto out;
2274			}
2275			break;
2276		}
2277
2278		if (ph->rhdr.lsn == ph->record_hdr.last_end_lsn ||
2279		    lsn > le64_to_cpu(ph->rhdr.lsn)) {
2280			err = -EINVAL;
2281			goto out;
2282		}
2283
2284		vbo = next_page_off(log, vbo);
2285		off = log->data_off;
2286
2287		/*
2288		 * Adjust our pointer the user's buffer to transfer
2289		 * the next block to.
2290		 */
2291		buffer = Add2Ptr(buffer, tail);
2292	}
2293
2294out:
2295	kfree(ph);
2296	return err;
2297}
2298
2299static int read_rst_area(struct ntfs_log *log, struct NTFS_RESTART **rst_,
2300			 u64 *lsn)
2301{
2302	int err;
2303	struct LFS_RECORD_HDR *rh = NULL;
2304	const struct CLIENT_REC *cr =
2305		Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
2306	u64 lsnr, lsnc = le64_to_cpu(cr->restart_lsn);
2307	u32 len;
2308	struct NTFS_RESTART *rst;
2309
2310	*lsn = 0;
2311	*rst_ = NULL;
2312
2313	/* If the client doesn't have a restart area, go ahead and exit now. */
2314	if (!lsnc)
2315		return 0;
2316
2317	err = read_log_page(log, lsn_to_vbo(log, lsnc),
2318			    (struct RECORD_PAGE_HDR **)&rh, NULL);
2319	if (err)
2320		return err;
2321
2322	rst = NULL;
2323	lsnr = le64_to_cpu(rh->this_lsn);
2324
2325	if (lsnc != lsnr) {
2326		/* If the lsn values don't match, then the disk is corrupt. */
2327		err = -EINVAL;
2328		goto out;
2329	}
2330
2331	*lsn = lsnr;
2332	len = le32_to_cpu(rh->client_data_len);
2333
2334	if (!len) {
2335		err = 0;
2336		goto out;
2337	}
2338
2339	if (len < sizeof(struct NTFS_RESTART)) {
2340		err = -EINVAL;
2341		goto out;
2342	}
2343
2344	rst = kmalloc(len, GFP_NOFS);
2345	if (!rst) {
2346		err = -ENOMEM;
2347		goto out;
2348	}
2349
2350	/* Copy the data into the 'rst' buffer. */
2351	err = read_log_rec_buf(log, rh, rst);
2352	if (err)
2353		goto out;
2354
2355	*rst_ = rst;
2356	rst = NULL;
2357
2358out:
2359	kfree(rh);
2360	kfree(rst);
2361
2362	return err;
2363}
2364
2365static int find_log_rec(struct ntfs_log *log, u64 lsn, struct lcb *lcb)
2366{
2367	int err;
2368	struct LFS_RECORD_HDR *rh = lcb->lrh;
2369	u32 rec_len, len;
2370
2371	/* Read the record header for this lsn. */
2372	if (!rh) {
2373		err = read_log_page(log, lsn_to_vbo(log, lsn),
2374				    (struct RECORD_PAGE_HDR **)&rh, NULL);
2375
2376		lcb->lrh = rh;
2377		if (err)
2378			return err;
2379	}
2380
2381	/*
2382	 * If the lsn the log record doesn't match the desired
2383	 * lsn then the disk is corrupt.
2384	 */
2385	if (lsn != le64_to_cpu(rh->this_lsn))
2386		return -EINVAL;
2387
2388	len = le32_to_cpu(rh->client_data_len);
2389
2390	/*
2391	 * Check that the length field isn't greater than the total
2392	 * available space the log file.
2393	 */
2394	rec_len = len + log->record_header_len;
2395	if (rec_len >= log->total_avail)
2396		return -EINVAL;
2397
2398	/*
2399	 * If the entire log record is on this log page,
2400	 * put a pointer to the log record the context block.
2401	 */
2402	if (rh->flags & LOG_RECORD_MULTI_PAGE) {
2403		void *lr = kmalloc(len, GFP_NOFS);
2404
2405		if (!lr)
2406			return -ENOMEM;
2407
2408		lcb->log_rec = lr;
2409		lcb->alloc = true;
2410
2411		/* Copy the data into the buffer returned. */
2412		err = read_log_rec_buf(log, rh, lr);
2413		if (err)
2414			return err;
2415	} else {
2416		/* If beyond the end of the current page -> an error. */
2417		u32 page_off = lsn_to_page_off(log, lsn);
2418
2419		if (page_off + len + log->record_header_len > log->page_size)
2420			return -EINVAL;
2421
2422		lcb->log_rec = Add2Ptr(rh, sizeof(struct LFS_RECORD_HDR));
2423		lcb->alloc = false;
2424	}
2425
2426	return 0;
2427}
2428
2429/*
2430 * read_log_rec_lcb - Init the query operation.
2431 */
2432static int read_log_rec_lcb(struct ntfs_log *log, u64 lsn, u32 ctx_mode,
2433			    struct lcb **lcb_)
2434{
2435	int err;
2436	const struct CLIENT_REC *cr;
2437	struct lcb *lcb;
2438
2439	switch (ctx_mode) {
2440	case lcb_ctx_undo_next:
2441	case lcb_ctx_prev:
2442	case lcb_ctx_next:
2443		break;
2444	default:
2445		return -EINVAL;
2446	}
2447
2448	/* Check that the given lsn is the legal range for this client. */
2449	cr = Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
2450
2451	if (!verify_client_lsn(log, cr, lsn))
2452		return -EINVAL;
2453
2454	lcb = kzalloc(sizeof(struct lcb), GFP_NOFS);
2455	if (!lcb)
2456		return -ENOMEM;
2457	lcb->client = log->client_id;
2458	lcb->ctx_mode = ctx_mode;
2459
2460	/* Find the log record indicated by the given lsn. */
2461	err = find_log_rec(log, lsn, lcb);
2462	if (err)
2463		goto out;
2464
2465	*lcb_ = lcb;
2466	return 0;
2467
2468out:
2469	lcb_put(lcb);
2470	*lcb_ = NULL;
2471	return err;
2472}
2473
2474/*
2475 * find_client_next_lsn
2476 *
2477 * Attempt to find the next lsn to return to a client based on the context mode.
2478 */
2479static int find_client_next_lsn(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
2480{
2481	int err;
2482	u64 next_lsn;
2483	struct LFS_RECORD_HDR *hdr;
2484
2485	hdr = lcb->lrh;
2486	*lsn = 0;
2487
2488	if (lcb_ctx_next != lcb->ctx_mode)
2489		goto check_undo_next;
2490
2491	/* Loop as long as another lsn can be found. */
2492	for (;;) {
2493		u64 current_lsn;
2494
2495		err = next_log_lsn(log, hdr, &current_lsn);
2496		if (err)
2497			goto out;
2498
2499		if (!current_lsn)
2500			break;
2501
2502		if (hdr != lcb->lrh)
2503			kfree(hdr);
2504
2505		hdr = NULL;
2506		err = read_log_page(log, lsn_to_vbo(log, current_lsn),
2507				    (struct RECORD_PAGE_HDR **)&hdr, NULL);
2508		if (err)
2509			goto out;
2510
2511		if (memcmp(&hdr->client, &lcb->client,
2512			   sizeof(struct CLIENT_ID))) {
2513			/*err = -EINVAL; */
2514		} else if (LfsClientRecord == hdr->record_type) {
2515			kfree(lcb->lrh);
2516			lcb->lrh = hdr;
2517			*lsn = current_lsn;
2518			return 0;
2519		}
2520	}
2521
2522out:
2523	if (hdr != lcb->lrh)
2524		kfree(hdr);
2525	return err;
2526
2527check_undo_next:
2528	if (lcb_ctx_undo_next == lcb->ctx_mode)
2529		next_lsn = le64_to_cpu(hdr->client_undo_next_lsn);
2530	else if (lcb_ctx_prev == lcb->ctx_mode)
2531		next_lsn = le64_to_cpu(hdr->client_prev_lsn);
2532	else
2533		return 0;
2534
2535	if (!next_lsn)
2536		return 0;
2537
2538	if (!verify_client_lsn(
2539		    log, Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)),
2540		    next_lsn))
2541		return 0;
2542
2543	hdr = NULL;
2544	err = read_log_page(log, lsn_to_vbo(log, next_lsn),
2545			    (struct RECORD_PAGE_HDR **)&hdr, NULL);
2546	if (err)
2547		return err;
2548	kfree(lcb->lrh);
2549	lcb->lrh = hdr;
2550
2551	*lsn = next_lsn;
2552
2553	return 0;
2554}
2555
2556static int read_next_log_rec(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
2557{
2558	int err;
2559
2560	err = find_client_next_lsn(log, lcb, lsn);
2561	if (err)
2562		return err;
2563
2564	if (!*lsn)
2565		return 0;
2566
2567	if (lcb->alloc)
2568		kfree(lcb->log_rec);
2569
2570	lcb->log_rec = NULL;
2571	lcb->alloc = false;
2572	kfree(lcb->lrh);
2573	lcb->lrh = NULL;
2574
2575	return find_log_rec(log, *lsn, lcb);
2576}
2577
2578bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes)
2579{
2580	__le16 mask;
2581	u32 min_de, de_off, used, total;
2582	const struct NTFS_DE *e;
2583
2584	if (hdr_has_subnode(hdr)) {
2585		min_de = sizeof(struct NTFS_DE) + sizeof(u64);
2586		mask = NTFS_IE_HAS_SUBNODES;
2587	} else {
2588		min_de = sizeof(struct NTFS_DE);
2589		mask = 0;
2590	}
2591
2592	de_off = le32_to_cpu(hdr->de_off);
2593	used = le32_to_cpu(hdr->used);
2594	total = le32_to_cpu(hdr->total);
2595
2596	if (de_off > bytes - min_de || used > bytes || total > bytes ||
2597	    de_off + min_de > used || used > total) {
2598		return false;
2599	}
2600
2601	e = Add2Ptr(hdr, de_off);
2602	for (;;) {
2603		u16 esize = le16_to_cpu(e->size);
2604		struct NTFS_DE *next = Add2Ptr(e, esize);
2605
2606		if (esize < min_de || PtrOffset(hdr, next) > used ||
2607		    (e->flags & NTFS_IE_HAS_SUBNODES) != mask) {
2608			return false;
2609		}
2610
2611		if (de_is_last(e))
2612			break;
2613
2614		e = next;
2615	}
2616
2617	return true;
2618}
2619
2620static inline bool check_index_buffer(const struct INDEX_BUFFER *ib, u32 bytes)
2621{
2622	u16 fo;
2623	const struct NTFS_RECORD_HEADER *r = &ib->rhdr;
2624
2625	if (r->sign != NTFS_INDX_SIGNATURE)
2626		return false;
2627
2628	fo = (SECTOR_SIZE - ((bytes >> SECTOR_SHIFT) + 1) * sizeof(short));
2629
2630	if (le16_to_cpu(r->fix_off) > fo)
2631		return false;
2632
2633	if ((le16_to_cpu(r->fix_num) - 1) * SECTOR_SIZE != bytes)
2634		return false;
2635
2636	return check_index_header(&ib->ihdr,
2637				  bytes - offsetof(struct INDEX_BUFFER, ihdr));
2638}
2639
2640static inline bool check_index_root(const struct ATTRIB *attr,
2641				    struct ntfs_sb_info *sbi)
2642{
2643	bool ret;
2644	const struct INDEX_ROOT *root = resident_data(attr);
2645	u8 index_bits = le32_to_cpu(root->index_block_size) >=
2646					sbi->cluster_size ?
2647				sbi->cluster_bits :
2648				SECTOR_SHIFT;
2649	u8 block_clst = root->index_block_clst;
2650
2651	if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) ||
2652	    (root->type != ATTR_NAME && root->type != ATTR_ZERO) ||
2653	    (root->type == ATTR_NAME &&
2654	     root->rule != NTFS_COLLATION_TYPE_FILENAME) ||
2655	    (le32_to_cpu(root->index_block_size) !=
2656	     (block_clst << index_bits)) ||
2657	    (block_clst != 1 && block_clst != 2 && block_clst != 4 &&
2658	     block_clst != 8 && block_clst != 0x10 && block_clst != 0x20 &&
2659	     block_clst != 0x40 && block_clst != 0x80)) {
2660		return false;
2661	}
2662
2663	ret = check_index_header(&root->ihdr,
2664				 le32_to_cpu(attr->res.data_size) -
2665					 offsetof(struct INDEX_ROOT, ihdr));
2666	return ret;
2667}
2668
2669static inline bool check_attr(const struct MFT_REC *rec,
2670			      const struct ATTRIB *attr,
2671			      struct ntfs_sb_info *sbi)
2672{
2673	u32 asize = le32_to_cpu(attr->size);
2674	u32 rsize = 0;
2675	u64 dsize, svcn, evcn;
2676	u16 run_off;
2677
2678	/* Check the fixed part of the attribute record header. */
2679	if (asize >= sbi->record_size ||
2680	    asize + PtrOffset(rec, attr) >= sbi->record_size ||
2681	    (attr->name_len &&
2682	     le16_to_cpu(attr->name_off) + attr->name_len * sizeof(short) >
2683		     asize)) {
2684		return false;
2685	}
2686
2687	/* Check the attribute fields. */
2688	switch (attr->non_res) {
2689	case 0:
2690		rsize = le32_to_cpu(attr->res.data_size);
2691		if (rsize >= asize ||
2692		    le16_to_cpu(attr->res.data_off) + rsize > asize) {
2693			return false;
2694		}
2695		break;
2696
2697	case 1:
2698		dsize = le64_to_cpu(attr->nres.data_size);
2699		svcn = le64_to_cpu(attr->nres.svcn);
2700		evcn = le64_to_cpu(attr->nres.evcn);
2701		run_off = le16_to_cpu(attr->nres.run_off);
2702
2703		if (svcn > evcn + 1 || run_off >= asize ||
2704		    le64_to_cpu(attr->nres.valid_size) > dsize ||
2705		    dsize > le64_to_cpu(attr->nres.alloc_size)) {
2706			return false;
2707		}
2708
2709		if (run_off > asize)
2710			return false;
2711
2712		if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn,
2713			       Add2Ptr(attr, run_off), asize - run_off) < 0) {
2714			return false;
2715		}
2716
2717		return true;
2718
2719	default:
2720		return false;
2721	}
2722
2723	switch (attr->type) {
2724	case ATTR_NAME:
2725		if (fname_full_size(Add2Ptr(
2726			    attr, le16_to_cpu(attr->res.data_off))) > asize) {
2727			return false;
2728		}
2729		break;
2730
2731	case ATTR_ROOT:
2732		return check_index_root(attr, sbi);
2733
2734	case ATTR_STD:
2735		if (rsize < sizeof(struct ATTR_STD_INFO5) &&
2736		    rsize != sizeof(struct ATTR_STD_INFO)) {
2737			return false;
2738		}
2739		break;
2740
2741	case ATTR_LIST:
2742	case ATTR_ID:
2743	case ATTR_SECURE:
2744	case ATTR_LABEL:
2745	case ATTR_VOL_INFO:
2746	case ATTR_DATA:
2747	case ATTR_ALLOC:
2748	case ATTR_BITMAP:
2749	case ATTR_REPARSE:
2750	case ATTR_EA_INFO:
2751	case ATTR_EA:
2752	case ATTR_PROPERTYSET:
2753	case ATTR_LOGGED_UTILITY_STREAM:
2754		break;
2755
2756	default:
2757		return false;
2758	}
2759
2760	return true;
2761}
2762
2763static inline bool check_file_record(const struct MFT_REC *rec,
2764				     const struct MFT_REC *rec2,
2765				     struct ntfs_sb_info *sbi)
2766{
2767	const struct ATTRIB *attr;
2768	u16 fo = le16_to_cpu(rec->rhdr.fix_off);
2769	u16 fn = le16_to_cpu(rec->rhdr.fix_num);
2770	u16 ao = le16_to_cpu(rec->attr_off);
2771	u32 rs = sbi->record_size;
2772
2773	/* Check the file record header for consistency. */
2774	if (rec->rhdr.sign != NTFS_FILE_SIGNATURE ||
2775	    fo > (SECTOR_SIZE - ((rs >> SECTOR_SHIFT) + 1) * sizeof(short)) ||
2776	    (fn - 1) * SECTOR_SIZE != rs || ao < MFTRECORD_FIXUP_OFFSET_1 ||
2777	    ao > sbi->record_size - SIZEOF_RESIDENT || !is_rec_inuse(rec) ||
2778	    le32_to_cpu(rec->total) != rs) {
2779		return false;
2780	}
2781
2782	/* Loop to check all of the attributes. */
2783	for (attr = Add2Ptr(rec, ao); attr->type != ATTR_END;
2784	     attr = Add2Ptr(attr, le32_to_cpu(attr->size))) {
2785		if (check_attr(rec, attr, sbi))
2786			continue;
2787		return false;
2788	}
2789
2790	return true;
2791}
2792
2793static inline int check_lsn(const struct NTFS_RECORD_HEADER *hdr,
2794			    const u64 *rlsn)
2795{
2796	u64 lsn;
2797
2798	if (!rlsn)
2799		return true;
2800
2801	lsn = le64_to_cpu(hdr->lsn);
2802
2803	if (hdr->sign == NTFS_HOLE_SIGNATURE)
2804		return false;
2805
2806	if (*rlsn > lsn)
2807		return true;
2808
2809	return false;
2810}
2811
2812static inline bool check_if_attr(const struct MFT_REC *rec,
2813				 const struct LOG_REC_HDR *lrh)
2814{
2815	u16 ro = le16_to_cpu(lrh->record_off);
2816	u16 o = le16_to_cpu(rec->attr_off);
2817	const struct ATTRIB *attr = Add2Ptr(rec, o);
2818
2819	while (o < ro) {
2820		u32 asize;
2821
2822		if (attr->type == ATTR_END)
2823			break;
2824
2825		asize = le32_to_cpu(attr->size);
2826		if (!asize)
2827			break;
2828
2829		o += asize;
2830		attr = Add2Ptr(attr, asize);
2831	}
2832
2833	return o == ro;
2834}
2835
2836static inline bool check_if_index_root(const struct MFT_REC *rec,
2837				       const struct LOG_REC_HDR *lrh)
2838{
2839	u16 ro = le16_to_cpu(lrh->record_off);
2840	u16 o = le16_to_cpu(rec->attr_off);
2841	const struct ATTRIB *attr = Add2Ptr(rec, o);
2842
2843	while (o < ro) {
2844		u32 asize;
2845
2846		if (attr->type == ATTR_END)
2847			break;
2848
2849		asize = le32_to_cpu(attr->size);
2850		if (!asize)
2851			break;
2852
2853		o += asize;
2854		attr = Add2Ptr(attr, asize);
2855	}
2856
2857	return o == ro && attr->type == ATTR_ROOT;
2858}
2859
2860static inline bool check_if_root_index(const struct ATTRIB *attr,
2861				       const struct INDEX_HDR *hdr,
2862				       const struct LOG_REC_HDR *lrh)
2863{
2864	u16 ao = le16_to_cpu(lrh->attr_off);
2865	u32 de_off = le32_to_cpu(hdr->de_off);
2866	u32 o = PtrOffset(attr, hdr) + de_off;
2867	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
2868	u32 asize = le32_to_cpu(attr->size);
2869
2870	while (o < ao) {
2871		u16 esize;
2872
2873		if (o >= asize)
2874			break;
2875
2876		esize = le16_to_cpu(e->size);
2877		if (!esize)
2878			break;
2879
2880		o += esize;
2881		e = Add2Ptr(e, esize);
2882	}
2883
2884	return o == ao;
2885}
2886
2887static inline bool check_if_alloc_index(const struct INDEX_HDR *hdr,
2888					u32 attr_off)
2889{
2890	u32 de_off = le32_to_cpu(hdr->de_off);
2891	u32 o = offsetof(struct INDEX_BUFFER, ihdr) + de_off;
2892	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
2893	u32 used = le32_to_cpu(hdr->used);
2894
2895	while (o < attr_off) {
2896		u16 esize;
2897
2898		if (de_off >= used)
2899			break;
2900
2901		esize = le16_to_cpu(e->size);
2902		if (!esize)
2903			break;
2904
2905		o += esize;
2906		de_off += esize;
2907		e = Add2Ptr(e, esize);
2908	}
2909
2910	return o == attr_off;
2911}
2912
2913static inline void change_attr_size(struct MFT_REC *rec, struct ATTRIB *attr,
2914				    u32 nsize)
2915{
2916	u32 asize = le32_to_cpu(attr->size);
2917	int dsize = nsize - asize;
2918	u8 *next = Add2Ptr(attr, asize);
2919	u32 used = le32_to_cpu(rec->used);
2920
2921	memmove(Add2Ptr(attr, nsize), next, used - PtrOffset(rec, next));
2922
2923	rec->used = cpu_to_le32(used + dsize);
2924	attr->size = cpu_to_le32(nsize);
2925}
2926
2927struct OpenAttr {
2928	struct ATTRIB *attr;
2929	struct runs_tree *run1;
2930	struct runs_tree run0;
2931	struct ntfs_inode *ni;
2932	// CLST rno;
2933};
2934
2935/*
2936 * cmp_type_and_name
2937 *
2938 * Return: 0 if 'attr' has the same type and name.
2939 */
2940static inline int cmp_type_and_name(const struct ATTRIB *a1,
2941				    const struct ATTRIB *a2)
2942{
2943	return a1->type != a2->type || a1->name_len != a2->name_len ||
2944	       (a1->name_len && memcmp(attr_name(a1), attr_name(a2),
2945				       a1->name_len * sizeof(short)));
2946}
2947
2948static struct OpenAttr *find_loaded_attr(struct ntfs_log *log,
2949					 const struct ATTRIB *attr, CLST rno)
2950{
2951	struct OPEN_ATTR_ENRTY *oe = NULL;
2952
2953	while ((oe = enum_rstbl(log->open_attr_tbl, oe))) {
2954		struct OpenAttr *op_attr;
2955
2956		if (ino_get(&oe->ref) != rno)
2957			continue;
2958
2959		op_attr = (struct OpenAttr *)oe->ptr;
2960		if (!cmp_type_and_name(op_attr->attr, attr))
2961			return op_attr;
2962	}
2963	return NULL;
2964}
2965
2966static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi,
2967					     enum ATTR_TYPE type, u64 size,
2968					     const u16 *name, size_t name_len,
2969					     __le16 flags)
2970{
2971	struct ATTRIB *attr;
2972	u32 name_size = ALIGN(name_len * sizeof(short), 8);
2973	bool is_ext = flags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED);
2974	u32 asize = name_size +
2975		    (is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT);
2976
2977	attr = kzalloc(asize, GFP_NOFS);
2978	if (!attr)
2979		return NULL;
2980
2981	attr->type = type;
2982	attr->size = cpu_to_le32(asize);
2983	attr->flags = flags;
2984	attr->non_res = 1;
2985	attr->name_len = name_len;
2986
2987	attr->nres.evcn = cpu_to_le64((u64)bytes_to_cluster(sbi, size) - 1);
2988	attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, size));
2989	attr->nres.data_size = cpu_to_le64(size);
2990	attr->nres.valid_size = attr->nres.data_size;
2991	if (is_ext) {
2992		attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
2993		if (is_attr_compressed(attr))
2994			attr->nres.c_unit = COMPRESSION_UNIT;
2995
2996		attr->nres.run_off =
2997			cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size);
2998		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT_EX), name,
2999		       name_len * sizeof(short));
3000	} else {
3001		attr->name_off = SIZEOF_NONRESIDENT_LE;
3002		attr->nres.run_off =
3003			cpu_to_le16(SIZEOF_NONRESIDENT + name_size);
3004		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT), name,
3005		       name_len * sizeof(short));
3006	}
3007
3008	return attr;
3009}
3010
3011/*
3012 * do_action - Common routine for the Redo and Undo Passes.
3013 * @rlsn: If it is NULL then undo.
3014 */
3015static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe,
3016		     const struct LOG_REC_HDR *lrh, u32 op, void *data,
3017		     u32 dlen, u32 rec_len, const u64 *rlsn)
3018{
3019	int err = 0;
3020	struct ntfs_sb_info *sbi = log->ni->mi.sbi;
3021	struct inode *inode = NULL, *inode_parent;
3022	struct mft_inode *mi = NULL, *mi2_child = NULL;
3023	CLST rno = 0, rno_base = 0;
3024	struct INDEX_BUFFER *ib = NULL;
3025	struct MFT_REC *rec = NULL;
3026	struct ATTRIB *attr = NULL, *attr2;
3027	struct INDEX_HDR *hdr;
3028	struct INDEX_ROOT *root;
3029	struct NTFS_DE *e, *e1, *e2;
3030	struct NEW_ATTRIBUTE_SIZES *new_sz;
3031	struct ATTR_FILE_NAME *fname;
3032	struct OpenAttr *oa, *oa2;
3033	u32 nsize, t32, asize, used, esize, off, bits;
3034	u16 id, id2;
3035	u32 record_size = sbi->record_size;
3036	u64 t64;
3037	u16 roff = le16_to_cpu(lrh->record_off);
3038	u16 aoff = le16_to_cpu(lrh->attr_off);
3039	u64 lco = 0;
3040	u64 cbo = (u64)le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
3041	u64 tvo = le64_to_cpu(lrh->target_vcn) << sbi->cluster_bits;
3042	u64 vbo = cbo + tvo;
3043	void *buffer_le = NULL;
3044	u32 bytes = 0;
3045	bool a_dirty = false;
3046	u16 data_off;
3047
3048	oa = oe->ptr;
3049
3050	/* Big switch to prepare. */
3051	switch (op) {
3052	/* ============================================================
3053	 * Process MFT records, as described by the current log record.
3054	 * ============================================================
3055	 */
3056	case InitializeFileRecordSegment:
3057	case DeallocateFileRecordSegment:
3058	case WriteEndOfFileRecordSegment:
3059	case CreateAttribute:
3060	case DeleteAttribute:
3061	case UpdateResidentValue:
3062	case UpdateMappingPairs:
3063	case SetNewAttributeSizes:
3064	case AddIndexEntryRoot:
3065	case DeleteIndexEntryRoot:
3066	case SetIndexEntryVcnRoot:
3067	case UpdateFileNameRoot:
3068	case UpdateRecordDataRoot:
3069	case ZeroEndOfFileRecord:
3070		rno = vbo >> sbi->record_bits;
3071		inode = ilookup(sbi->sb, rno);
3072		if (inode) {
3073			mi = &ntfs_i(inode)->mi;
3074		} else if (op == InitializeFileRecordSegment) {
3075			mi = kzalloc(sizeof(struct mft_inode), GFP_NOFS);
3076			if (!mi)
3077				return -ENOMEM;
3078			err = mi_format_new(mi, sbi, rno, 0, false);
3079			if (err)
3080				goto out;
3081		} else {
3082			/* Read from disk. */
3083			err = mi_get(sbi, rno, &mi);
3084			if (err)
3085				return err;
3086		}
3087		rec = mi->mrec;
3088
3089		if (op == DeallocateFileRecordSegment)
3090			goto skip_load_parent;
3091
3092		if (InitializeFileRecordSegment != op) {
3093			if (rec->rhdr.sign == NTFS_BAAD_SIGNATURE)
3094				goto dirty_vol;
3095			if (!check_lsn(&rec->rhdr, rlsn))
3096				goto out;
3097			if (!check_file_record(rec, NULL, sbi))
3098				goto dirty_vol;
3099			attr = Add2Ptr(rec, roff);
3100		}
3101
3102		if (is_rec_base(rec) || InitializeFileRecordSegment == op) {
3103			rno_base = rno;
3104			goto skip_load_parent;
3105		}
3106
3107		rno_base = ino_get(&rec->parent_ref);
3108		inode_parent = ntfs_iget5(sbi->sb, &rec->parent_ref, NULL);
3109		if (IS_ERR(inode_parent))
3110			goto skip_load_parent;
3111
3112		if (is_bad_inode(inode_parent)) {
3113			iput(inode_parent);
3114			goto skip_load_parent;
3115		}
3116
3117		if (ni_load_mi_ex(ntfs_i(inode_parent), rno, &mi2_child)) {
3118			iput(inode_parent);
3119		} else {
3120			if (mi2_child->mrec != mi->mrec)
3121				memcpy(mi2_child->mrec, mi->mrec,
3122				       sbi->record_size);
3123
3124			if (inode)
3125				iput(inode);
3126			else if (mi)
3127				mi_put(mi);
3128
3129			inode = inode_parent;
3130			mi = mi2_child;
3131			rec = mi2_child->mrec;
3132			attr = Add2Ptr(rec, roff);
3133		}
3134
3135skip_load_parent:
3136		inode_parent = NULL;
3137		break;
3138
3139	/*
3140	 * Process attributes, as described by the current log record.
3141	 */
3142	case UpdateNonresidentValue:
3143	case AddIndexEntryAllocation:
3144	case DeleteIndexEntryAllocation:
3145	case WriteEndOfIndexBuffer:
3146	case SetIndexEntryVcnAllocation:
3147	case UpdateFileNameAllocation:
3148	case SetBitsInNonresidentBitMap:
3149	case ClearBitsInNonresidentBitMap:
3150	case UpdateRecordDataAllocation:
3151		attr = oa->attr;
3152		bytes = UpdateNonresidentValue == op ? dlen : 0;
3153		lco = (u64)le16_to_cpu(lrh->lcns_follow) << sbi->cluster_bits;
3154
3155		if (attr->type == ATTR_ALLOC) {
3156			t32 = le32_to_cpu(oe->bytes_per_index);
3157			if (bytes < t32)
3158				bytes = t32;
3159		}
3160
3161		if (!bytes)
3162			bytes = lco - cbo;
3163
3164		bytes += roff;
3165		if (attr->type == ATTR_ALLOC)
3166			bytes = (bytes + 511) & ~511; // align
3167
3168		buffer_le = kmalloc(bytes, GFP_NOFS);
3169		if (!buffer_le)
3170			return -ENOMEM;
3171
3172		err = ntfs_read_run_nb(sbi, oa->run1, vbo, buffer_le, bytes,
3173				       NULL);
3174		if (err)
3175			goto out;
3176
3177		if (attr->type == ATTR_ALLOC && *(int *)buffer_le)
3178			ntfs_fix_post_read(buffer_le, bytes, false);
3179		break;
3180
3181	default:
3182		WARN_ON(1);
3183	}
3184
3185	/* Big switch to do operation. */
3186	switch (op) {
3187	case InitializeFileRecordSegment:
3188		if (roff + dlen > record_size)
3189			goto dirty_vol;
3190
3191		memcpy(Add2Ptr(rec, roff), data, dlen);
3192		mi->dirty = true;
3193		break;
3194
3195	case DeallocateFileRecordSegment:
3196		clear_rec_inuse(rec);
3197		le16_add_cpu(&rec->seq, 1);
3198		mi->dirty = true;
3199		break;
3200
3201	case WriteEndOfFileRecordSegment:
3202		attr2 = (struct ATTRIB *)data;
3203		if (!check_if_attr(rec, lrh) || roff + dlen > record_size)
3204			goto dirty_vol;
3205
3206		memmove(attr, attr2, dlen);
3207		rec->used = cpu_to_le32(ALIGN(roff + dlen, 8));
3208
3209		mi->dirty = true;
3210		break;
3211
3212	case CreateAttribute:
3213		attr2 = (struct ATTRIB *)data;
3214		asize = le32_to_cpu(attr2->size);
3215		used = le32_to_cpu(rec->used);
3216
3217		if (!check_if_attr(rec, lrh) || dlen < SIZEOF_RESIDENT ||
3218		    !IS_ALIGNED(asize, 8) ||
3219		    Add2Ptr(attr2, asize) > Add2Ptr(lrh, rec_len) ||
3220		    dlen > record_size - used) {
3221			goto dirty_vol;
3222		}
3223
3224		memmove(Add2Ptr(attr, asize), attr, used - roff);
3225		memcpy(attr, attr2, asize);
3226
3227		rec->used = cpu_to_le32(used + asize);
3228		id = le16_to_cpu(rec->next_attr_id);
3229		id2 = le16_to_cpu(attr2->id);
3230		if (id <= id2)
3231			rec->next_attr_id = cpu_to_le16(id2 + 1);
3232		if (is_attr_indexed(attr))
3233			le16_add_cpu(&rec->hard_links, 1);
3234
3235		oa2 = find_loaded_attr(log, attr, rno_base);
3236		if (oa2) {
3237			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
3238					   GFP_NOFS);
3239			if (p2) {
3240				// run_close(oa2->run1);
3241				kfree(oa2->attr);
3242				oa2->attr = p2;
3243			}
3244		}
3245
3246		mi->dirty = true;
3247		break;
3248
3249	case DeleteAttribute:
3250		asize = le32_to_cpu(attr->size);
3251		used = le32_to_cpu(rec->used);
3252
3253		if (!check_if_attr(rec, lrh))
3254			goto dirty_vol;
3255
3256		rec->used = cpu_to_le32(used - asize);
3257		if (is_attr_indexed(attr))
3258			le16_add_cpu(&rec->hard_links, -1);
3259
3260		memmove(attr, Add2Ptr(attr, asize), used - asize - roff);
3261
3262		mi->dirty = true;
3263		break;
3264
3265	case UpdateResidentValue:
3266		nsize = aoff + dlen;
3267
3268		if (!check_if_attr(rec, lrh))
3269			goto dirty_vol;
3270
3271		asize = le32_to_cpu(attr->size);
3272		used = le32_to_cpu(rec->used);
3273
3274		if (lrh->redo_len == lrh->undo_len) {
3275			if (nsize > asize)
3276				goto dirty_vol;
3277			goto move_data;
3278		}
3279
3280		if (nsize > asize && nsize - asize > record_size - used)
3281			goto dirty_vol;
3282
3283		nsize = ALIGN(nsize, 8);
3284		data_off = le16_to_cpu(attr->res.data_off);
3285
3286		if (nsize < asize) {
3287			memmove(Add2Ptr(attr, aoff), data, dlen);
3288			data = NULL; // To skip below memmove().
3289		}
3290
3291		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
3292			used - le16_to_cpu(lrh->record_off) - asize);
3293
3294		rec->used = cpu_to_le32(used + nsize - asize);
3295		attr->size = cpu_to_le32(nsize);
3296		attr->res.data_size = cpu_to_le32(aoff + dlen - data_off);
3297
3298move_data:
3299		if (data)
3300			memmove(Add2Ptr(attr, aoff), data, dlen);
3301
3302		oa2 = find_loaded_attr(log, attr, rno_base);
3303		if (oa2) {
3304			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
3305					   GFP_NOFS);
3306			if (p2) {
3307				// run_close(&oa2->run0);
3308				oa2->run1 = &oa2->run0;
3309				kfree(oa2->attr);
3310				oa2->attr = p2;
3311			}
3312		}
3313
3314		mi->dirty = true;
3315		break;
3316
3317	case UpdateMappingPairs:
3318		nsize = aoff + dlen;
3319		asize = le32_to_cpu(attr->size);
3320		used = le32_to_cpu(rec->used);
3321
3322		if (!check_if_attr(rec, lrh) || !attr->non_res ||
3323		    aoff < le16_to_cpu(attr->nres.run_off) || aoff > asize ||
3324		    (nsize > asize && nsize - asize > record_size - used)) {
3325			goto dirty_vol;
3326		}
3327
3328		nsize = ALIGN(nsize, 8);
3329
3330		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
3331			used - le16_to_cpu(lrh->record_off) - asize);
3332		rec->used = cpu_to_le32(used + nsize - asize);
3333		attr->size = cpu_to_le32(nsize);
3334		memmove(Add2Ptr(attr, aoff), data, dlen);
3335
3336		if (run_get_highest_vcn(le64_to_cpu(attr->nres.svcn),
3337					attr_run(attr), &t64)) {
3338			goto dirty_vol;
3339		}
3340
3341		attr->nres.evcn = cpu_to_le64(t64);
3342		oa2 = find_loaded_attr(log, attr, rno_base);
3343		if (oa2 && oa2->attr->non_res)
3344			oa2->attr->nres.evcn = attr->nres.evcn;
3345
3346		mi->dirty = true;
3347		break;
3348
3349	case SetNewAttributeSizes:
3350		new_sz = data;
3351		if (!check_if_attr(rec, lrh) || !attr->non_res)
3352			goto dirty_vol;
3353
3354		attr->nres.alloc_size = new_sz->alloc_size;
3355		attr->nres.data_size = new_sz->data_size;
3356		attr->nres.valid_size = new_sz->valid_size;
3357
3358		if (dlen >= sizeof(struct NEW_ATTRIBUTE_SIZES))
3359			attr->nres.total_size = new_sz->total_size;
3360
3361		oa2 = find_loaded_attr(log, attr, rno_base);
3362		if (oa2) {
3363			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
3364					   GFP_NOFS);
3365			if (p2) {
3366				kfree(oa2->attr);
3367				oa2->attr = p2;
3368			}
3369		}
3370		mi->dirty = true;
3371		break;
3372
3373	case AddIndexEntryRoot:
3374		e = (struct NTFS_DE *)data;
3375		esize = le16_to_cpu(e->size);
3376		root = resident_data(attr);
3377		hdr = &root->ihdr;
3378		used = le32_to_cpu(hdr->used);
3379
3380		if (!check_if_index_root(rec, lrh) ||
3381		    !check_if_root_index(attr, hdr, lrh) ||
3382		    Add2Ptr(data, esize) > Add2Ptr(lrh, rec_len) ||
3383		    esize > le32_to_cpu(rec->total) - le32_to_cpu(rec->used)) {
3384			goto dirty_vol;
3385		}
3386
3387		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3388
3389		change_attr_size(rec, attr, le32_to_cpu(attr->size) + esize);
3390
3391		memmove(Add2Ptr(e1, esize), e1,
3392			PtrOffset(e1, Add2Ptr(hdr, used)));
3393		memmove(e1, e, esize);
3394
3395		le32_add_cpu(&attr->res.data_size, esize);
3396		hdr->used = cpu_to_le32(used + esize);
3397		le32_add_cpu(&hdr->total, esize);
3398
3399		mi->dirty = true;
3400		break;
3401
3402	case DeleteIndexEntryRoot:
3403		root = resident_data(attr);
3404		hdr = &root->ihdr;
3405		used = le32_to_cpu(hdr->used);
3406
3407		if (!check_if_index_root(rec, lrh) ||
3408		    !check_if_root_index(attr, hdr, lrh)) {
3409			goto dirty_vol;
3410		}
3411
3412		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3413		esize = le16_to_cpu(e1->size);
3414		e2 = Add2Ptr(e1, esize);
3415
3416		memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used)));
3417
3418		le32_sub_cpu(&attr->res.data_size, esize);
3419		hdr->used = cpu_to_le32(used - esize);
3420		le32_sub_cpu(&hdr->total, esize);
3421
3422		change_attr_size(rec, attr, le32_to_cpu(attr->size) - esize);
3423
3424		mi->dirty = true;
3425		break;
3426
3427	case SetIndexEntryVcnRoot:
3428		root = resident_data(attr);
3429		hdr = &root->ihdr;
3430
3431		if (!check_if_index_root(rec, lrh) ||
3432		    !check_if_root_index(attr, hdr, lrh)) {
3433			goto dirty_vol;
3434		}
3435
3436		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3437
3438		de_set_vbn_le(e, *(__le64 *)data);
3439		mi->dirty = true;
3440		break;
3441
3442	case UpdateFileNameRoot:
3443		root = resident_data(attr);
3444		hdr = &root->ihdr;
3445
3446		if (!check_if_index_root(rec, lrh) ||
3447		    !check_if_root_index(attr, hdr, lrh)) {
3448			goto dirty_vol;
3449		}
3450
3451		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3452		fname = (struct ATTR_FILE_NAME *)(e + 1);
3453		memmove(&fname->dup, data, sizeof(fname->dup)); //
3454		mi->dirty = true;
3455		break;
3456
3457	case UpdateRecordDataRoot:
3458		root = resident_data(attr);
3459		hdr = &root->ihdr;
3460
3461		if (!check_if_index_root(rec, lrh) ||
3462		    !check_if_root_index(attr, hdr, lrh)) {
3463			goto dirty_vol;
3464		}
3465
3466		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3467
3468		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
3469
3470		mi->dirty = true;
3471		break;
3472
3473	case ZeroEndOfFileRecord:
3474		if (roff + dlen > record_size)
3475			goto dirty_vol;
3476
3477		memset(attr, 0, dlen);
3478		mi->dirty = true;
3479		break;
3480
3481	case UpdateNonresidentValue:
3482		if (lco < cbo + roff + dlen)
3483			goto dirty_vol;
3484
3485		memcpy(Add2Ptr(buffer_le, roff), data, dlen);
3486
3487		a_dirty = true;
3488		if (attr->type == ATTR_ALLOC)
3489			ntfs_fix_pre_write(buffer_le, bytes);
3490		break;
3491
3492	case AddIndexEntryAllocation:
3493		ib = Add2Ptr(buffer_le, roff);
3494		hdr = &ib->ihdr;
3495		e = data;
3496		esize = le16_to_cpu(e->size);
3497		e1 = Add2Ptr(ib, aoff);
3498
3499		if (is_baad(&ib->rhdr))
3500			goto dirty_vol;
3501		if (!check_lsn(&ib->rhdr, rlsn))
3502			goto out;
3503
3504		used = le32_to_cpu(hdr->used);
3505
3506		if (!check_index_buffer(ib, bytes) ||
3507		    !check_if_alloc_index(hdr, aoff) ||
3508		    Add2Ptr(e, esize) > Add2Ptr(lrh, rec_len) ||
3509		    used + esize > le32_to_cpu(hdr->total)) {
3510			goto dirty_vol;
3511		}
3512
3513		memmove(Add2Ptr(e1, esize), e1,
3514			PtrOffset(e1, Add2Ptr(hdr, used)));
3515		memcpy(e1, e, esize);
3516
3517		hdr->used = cpu_to_le32(used + esize);
3518
3519		a_dirty = true;
3520
3521		ntfs_fix_pre_write(&ib->rhdr, bytes);
3522		break;
3523
3524	case DeleteIndexEntryAllocation:
3525		ib = Add2Ptr(buffer_le, roff);
3526		hdr = &ib->ihdr;
3527		e = Add2Ptr(ib, aoff);
3528		esize = le16_to_cpu(e->size);
3529
3530		if (is_baad(&ib->rhdr))
3531			goto dirty_vol;
3532		if (!check_lsn(&ib->rhdr, rlsn))
3533			goto out;
3534
3535		if (!check_index_buffer(ib, bytes) ||
3536		    !check_if_alloc_index(hdr, aoff)) {
3537			goto dirty_vol;
3538		}
3539
3540		e1 = Add2Ptr(e, esize);
3541		nsize = esize;
3542		used = le32_to_cpu(hdr->used);
3543
3544		memmove(e, e1, PtrOffset(e1, Add2Ptr(hdr, used)));
3545
3546		hdr->used = cpu_to_le32(used - nsize);
3547
3548		a_dirty = true;
3549
3550		ntfs_fix_pre_write(&ib->rhdr, bytes);
3551		break;
3552
3553	case WriteEndOfIndexBuffer:
3554		ib = Add2Ptr(buffer_le, roff);
3555		hdr = &ib->ihdr;
3556		e = Add2Ptr(ib, aoff);
3557
3558		if (is_baad(&ib->rhdr))
3559			goto dirty_vol;
3560		if (!check_lsn(&ib->rhdr, rlsn))
3561			goto out;
3562		if (!check_index_buffer(ib, bytes) ||
3563		    !check_if_alloc_index(hdr, aoff) ||
3564		    aoff + dlen > offsetof(struct INDEX_BUFFER, ihdr) +
3565					  le32_to_cpu(hdr->total)) {
3566			goto dirty_vol;
3567		}
3568
3569		hdr->used = cpu_to_le32(dlen + PtrOffset(hdr, e));
3570		memmove(e, data, dlen);
3571
3572		a_dirty = true;
3573		ntfs_fix_pre_write(&ib->rhdr, bytes);
3574		break;
3575
3576	case SetIndexEntryVcnAllocation:
3577		ib = Add2Ptr(buffer_le, roff);
3578		hdr = &ib->ihdr;
3579		e = Add2Ptr(ib, aoff);
3580
3581		if (is_baad(&ib->rhdr))
3582			goto dirty_vol;
3583
3584		if (!check_lsn(&ib->rhdr, rlsn))
3585			goto out;
3586		if (!check_index_buffer(ib, bytes) ||
3587		    !check_if_alloc_index(hdr, aoff)) {
3588			goto dirty_vol;
3589		}
3590
3591		de_set_vbn_le(e, *(__le64 *)data);
3592
3593		a_dirty = true;
3594		ntfs_fix_pre_write(&ib->rhdr, bytes);
3595		break;
3596
3597	case UpdateFileNameAllocation:
3598		ib = Add2Ptr(buffer_le, roff);
3599		hdr = &ib->ihdr;
3600		e = Add2Ptr(ib, aoff);
3601
3602		if (is_baad(&ib->rhdr))
3603			goto dirty_vol;
3604
3605		if (!check_lsn(&ib->rhdr, rlsn))
3606			goto out;
3607		if (!check_index_buffer(ib, bytes) ||
3608		    !check_if_alloc_index(hdr, aoff)) {
3609			goto dirty_vol;
3610		}
3611
3612		fname = (struct ATTR_FILE_NAME *)(e + 1);
3613		memmove(&fname->dup, data, sizeof(fname->dup));
3614
3615		a_dirty = true;
3616		ntfs_fix_pre_write(&ib->rhdr, bytes);
3617		break;
3618
3619	case SetBitsInNonresidentBitMap:
3620		off = le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
3621		bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
3622
3623		if (cbo + (off + 7) / 8 > lco ||
3624		    cbo + ((off + bits + 7) / 8) > lco) {
3625			goto dirty_vol;
3626		}
3627
3628		ntfs_bitmap_set_le(Add2Ptr(buffer_le, roff), off, bits);
3629		a_dirty = true;
3630		break;
3631
3632	case ClearBitsInNonresidentBitMap:
3633		off = le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
3634		bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
3635
3636		if (cbo + (off + 7) / 8 > lco ||
3637		    cbo + ((off + bits + 7) / 8) > lco) {
3638			goto dirty_vol;
3639		}
3640
3641		ntfs_bitmap_clear_le(Add2Ptr(buffer_le, roff), off, bits);
3642		a_dirty = true;
3643		break;
3644
3645	case UpdateRecordDataAllocation:
3646		ib = Add2Ptr(buffer_le, roff);
3647		hdr = &ib->ihdr;
3648		e = Add2Ptr(ib, aoff);
3649
3650		if (is_baad(&ib->rhdr))
3651			goto dirty_vol;
3652
3653		if (!check_lsn(&ib->rhdr, rlsn))
3654			goto out;
3655		if (!check_index_buffer(ib, bytes) ||
3656		    !check_if_alloc_index(hdr, aoff)) {
3657			goto dirty_vol;
3658		}
3659
3660		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
3661
3662		a_dirty = true;
3663		ntfs_fix_pre_write(&ib->rhdr, bytes);
3664		break;
3665
3666	default:
3667		WARN_ON(1);
3668	}
3669
3670	if (rlsn) {
3671		__le64 t64 = cpu_to_le64(*rlsn);
3672
3673		if (rec)
3674			rec->rhdr.lsn = t64;
3675		if (ib)
3676			ib->rhdr.lsn = t64;
3677	}
3678
3679	if (mi && mi->dirty) {
3680		err = mi_write(mi, 0);
3681		if (err)
3682			goto out;
3683	}
3684
3685	if (a_dirty) {
3686		attr = oa->attr;
3687		err = ntfs_sb_write_run(sbi, oa->run1, vbo, buffer_le, bytes,
3688					0);
3689		if (err)
3690			goto out;
3691	}
3692
3693out:
3694
3695	if (inode)
3696		iput(inode);
3697	else if (mi != mi2_child)
3698		mi_put(mi);
3699
3700	kfree(buffer_le);
3701
3702	return err;
3703
3704dirty_vol:
3705	log->set_dirty = true;
3706	goto out;
3707}
3708
3709/*
3710 * log_replay - Replays log and empties it.
3711 *
3712 * This function is called during mount operation.
3713 * It replays log and empties it.
3714 * Initialized is set false if logfile contains '-1'.
3715 */
3716int log_replay(struct ntfs_inode *ni, bool *initialized)
3717{
3718	int err;
3719	struct ntfs_sb_info *sbi = ni->mi.sbi;
3720	struct ntfs_log *log;
3721
3722	u64 rec_lsn, checkpt_lsn = 0, rlsn = 0;
3723	struct ATTR_NAME_ENTRY *attr_names = NULL;
3724	struct RESTART_TABLE *dptbl = NULL;
3725	struct RESTART_TABLE *trtbl = NULL;
3726	const struct RESTART_TABLE *rt;
3727	struct RESTART_TABLE *oatbl = NULL;
3728	struct inode *inode;
3729	struct OpenAttr *oa;
3730	struct ntfs_inode *ni_oe;
3731	struct ATTRIB *attr = NULL;
3732	u64 size, vcn, undo_next_lsn;
3733	CLST rno, lcn, lcn0, len0, clen;
3734	void *data;
3735	struct NTFS_RESTART *rst = NULL;
3736	struct lcb *lcb = NULL;
3737	struct OPEN_ATTR_ENRTY *oe;
3738	struct TRANSACTION_ENTRY *tr;
3739	struct DIR_PAGE_ENTRY *dp;
3740	u32 i, bytes_per_attr_entry;
3741	u32 vbo, tail, off, dlen;
3742	u32 saved_len, rec_len, transact_id;
3743	bool use_second_page;
3744	struct RESTART_AREA *ra2, *ra = NULL;
3745	struct CLIENT_REC *ca, *cr;
3746	__le16 client;
3747	struct RESTART_HDR *rh;
3748	const struct LFS_RECORD_HDR *frh;
3749	const struct LOG_REC_HDR *lrh;
3750	bool is_mapped;
3751	bool is_ro = sb_rdonly(sbi->sb);
3752	u64 t64;
3753	u16 t16;
3754	u32 t32;
3755
3756	log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS);
3757	if (!log)
3758		return -ENOMEM;
3759
3760	log->ni = ni;
3761	log->l_size = log->orig_file_size = ni->vfs_inode.i_size;
3762
3763	/* Get the size of page. NOTE: To replay we can use default page. */
3764#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
3765	log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, true);
3766#else
3767	log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, false);
3768#endif
3769	if (!log->page_size) {
3770		err = -EINVAL;
3771		goto out;
3772	}
3773
3774	log->one_page_buf = kmalloc(log->page_size, GFP_NOFS);
3775	if (!log->one_page_buf) {
3776		err = -ENOMEM;
3777		goto out;
3778	}
3779
3780	log->page_mask = log->page_size - 1;
3781	log->page_bits = blksize_bits(log->page_size);
3782
3783	/* Look for a restart area on the disk. */
3784	err = log_read_rst(log, true, &log->rst_info);
3785	if (err)
3786		goto out;
3787
3788	/* remember 'initialized' */
3789	*initialized = log->rst_info.initialized;
3790
3791	if (!log->rst_info.restart) {
3792		if (log->rst_info.initialized) {
3793			/* No restart area but the file is not initialized. */
3794			err = -EINVAL;
3795			goto out;
3796		}
3797
3798		log_init_pg_hdr(log, 1, 1);
3799		log_create(log, 0, get_random_u32(), false, false);
3800
3801		ra = log_create_ra(log);
3802		if (!ra) {
3803			err = -ENOMEM;
3804			goto out;
3805		}
3806		log->ra = ra;
3807		log->init_ra = true;
3808
3809		goto process_log;
3810	}
3811
3812	/*
3813	 * If the restart offset above wasn't zero then we won't
3814	 * look for a second restart.
3815	 */
3816	if (log->rst_info.vbo)
3817		goto check_restart_area;
3818
3819	err = log_read_rst(log, false, &log->rst_info2);
3820	if (err)
3821		goto out;
3822
3823	/* Determine which restart area to use. */
3824	if (!log->rst_info2.restart ||
3825	    log->rst_info2.last_lsn <= log->rst_info.last_lsn)
3826		goto use_first_page;
3827
3828	use_second_page = true;
3829
3830	if (log->rst_info.chkdsk_was_run &&
3831	    log->page_size != log->rst_info.vbo) {
3832		struct RECORD_PAGE_HDR *sp = NULL;
3833		bool usa_error;
3834
3835		if (!read_log_page(log, log->page_size, &sp, &usa_error) &&
3836		    sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
3837			use_second_page = false;
3838		}
3839		kfree(sp);
3840	}
3841
3842	if (use_second_page) {
3843		kfree(log->rst_info.r_page);
3844		memcpy(&log->rst_info, &log->rst_info2,
3845		       sizeof(struct restart_info));
3846		log->rst_info2.r_page = NULL;
3847	}
3848
3849use_first_page:
3850	kfree(log->rst_info2.r_page);
3851
3852check_restart_area:
3853	/*
3854	 * If the restart area is at offset 0, we want
3855	 * to write the second restart area first.
3856	 */
3857	log->init_ra = !!log->rst_info.vbo;
3858
3859	/* If we have a valid page then grab a pointer to the restart area. */
3860	ra2 = log->rst_info.valid_page ?
3861		      Add2Ptr(log->rst_info.r_page,
3862			      le16_to_cpu(log->rst_info.r_page->ra_off)) :
3863		      NULL;
3864
3865	if (log->rst_info.chkdsk_was_run ||
3866	    (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
3867		bool wrapped = false;
3868		bool use_multi_page = false;
3869		u32 open_log_count;
3870
3871		/* Do some checks based on whether we have a valid log page. */
3872		open_log_count = log->rst_info.valid_page ?
3873					 le32_to_cpu(ra2->open_log_count) :
3874					 get_random_u32();
3875
3876		log_init_pg_hdr(log, 1, 1);
3877
3878		log_create(log, log->rst_info.last_lsn, open_log_count, wrapped,
3879			   use_multi_page);
3880
3881		ra = log_create_ra(log);
3882		if (!ra) {
3883			err = -ENOMEM;
3884			goto out;
3885		}
3886		log->ra = ra;
3887
3888		/* Put the restart areas and initialize
3889		 * the log file as required.
3890		 */
3891		goto process_log;
3892	}
3893
3894	if (!ra2) {
3895		err = -EINVAL;
3896		goto out;
3897	}
3898
3899	/*
3900	 * If the log page or the system page sizes have changed, we can't
3901	 * use the log file. We must use the system page size instead of the
3902	 * default size if there is not a clean shutdown.
3903	 */
3904	t32 = le32_to_cpu(log->rst_info.r_page->sys_page_size);
3905	if (log->page_size != t32) {
3906		log->l_size = log->orig_file_size;
3907		log->page_size = norm_file_page(t32, &log->l_size,
3908						t32 == DefaultLogPageSize);
3909	}
3910
3911	if (log->page_size != t32 ||
3912	    log->page_size != le32_to_cpu(log->rst_info.r_page->page_size)) {
3913		err = -EINVAL;
3914		goto out;
3915	}
3916
3917	/* If the file size has shrunk then we won't mount it. */
3918	if (log->l_size < le64_to_cpu(ra2->l_size)) {
3919		err = -EINVAL;
3920		goto out;
3921	}
3922
3923	log_init_pg_hdr(log, le16_to_cpu(log->rst_info.r_page->major_ver),
3924			le16_to_cpu(log->rst_info.r_page->minor_ver));
3925
3926	log->l_size = le64_to_cpu(ra2->l_size);
3927	log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
3928	log->file_data_bits = sizeof(u64) * 8 - log->seq_num_bits;
3929	log->seq_num_mask = (8 << log->file_data_bits) - 1;
3930	log->last_lsn = le64_to_cpu(ra2->current_lsn);
3931	log->seq_num = log->last_lsn >> log->file_data_bits;
3932	log->ra_off = le16_to_cpu(log->rst_info.r_page->ra_off);
3933	log->restart_size = log->sys_page_size - log->ra_off;
3934	log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
3935	log->ra_size = le16_to_cpu(ra2->ra_len);
3936	log->data_off = le16_to_cpu(ra2->data_off);
3937	log->data_size = log->page_size - log->data_off;
3938	log->reserved = log->data_size - log->record_header_len;
3939
3940	vbo = lsn_to_vbo(log, log->last_lsn);
3941
3942	if (vbo < log->first_page) {
3943		/* This is a pseudo lsn. */
3944		log->l_flags |= NTFSLOG_NO_LAST_LSN;
3945		log->next_page = log->first_page;
3946		goto find_oldest;
3947	}
3948
3949	/* Find the end of this log record. */
3950	off = final_log_off(log, log->last_lsn,
3951			    le32_to_cpu(ra2->last_lsn_data_len));
3952
3953	/* If we wrapped the file then increment the sequence number. */
3954	if (off <= vbo) {
3955		log->seq_num += 1;
3956		log->l_flags |= NTFSLOG_WRAPPED;
3957	}
3958
3959	/* Now compute the next log page to use. */
3960	vbo &= ~log->sys_page_mask;
3961	tail = log->page_size - (off & log->page_mask) - 1;
3962
3963	/*
3964	 *If we can fit another log record on the page,
3965	 * move back a page the log file.
3966	 */
3967	if (tail >= log->record_header_len) {
3968		log->l_flags |= NTFSLOG_REUSE_TAIL;
3969		log->next_page = vbo;
3970	} else {
3971		log->next_page = next_page_off(log, vbo);
3972	}
3973
3974find_oldest:
3975	/*
3976	 * Find the oldest client lsn. Use the last
3977	 * flushed lsn as a starting point.
3978	 */
3979	log->oldest_lsn = log->last_lsn;
3980	oldest_client_lsn(Add2Ptr(ra2, le16_to_cpu(ra2->client_off)),
3981			  ra2->client_idx[1], &log->oldest_lsn);
3982	log->oldest_lsn_off = lsn_to_vbo(log, log->oldest_lsn);
3983
3984	if (log->oldest_lsn_off < log->first_page)
3985		log->l_flags |= NTFSLOG_NO_OLDEST_LSN;
3986
3987	if (!(ra2->flags & RESTART_SINGLE_PAGE_IO))
3988		log->l_flags |= NTFSLOG_WRAPPED | NTFSLOG_MULTIPLE_PAGE_IO;
3989
3990	log->current_openlog_count = le32_to_cpu(ra2->open_log_count);
3991	log->total_avail_pages = log->l_size - log->first_page;
3992	log->total_avail = log->total_avail_pages >> log->page_bits;
3993	log->max_current_avail = log->total_avail * log->reserved;
3994	log->total_avail = log->total_avail * log->data_size;
3995
3996	log->current_avail = current_log_avail(log);
3997
3998	ra = kzalloc(log->restart_size, GFP_NOFS);
3999	if (!ra) {
4000		err = -ENOMEM;
4001		goto out;
4002	}
4003	log->ra = ra;
4004
4005	t16 = le16_to_cpu(ra2->client_off);
4006	if (t16 == offsetof(struct RESTART_AREA, clients)) {
4007		memcpy(ra, ra2, log->ra_size);
4008	} else {
4009		memcpy(ra, ra2, offsetof(struct RESTART_AREA, clients));
4010		memcpy(ra->clients, Add2Ptr(ra2, t16),
4011		       le16_to_cpu(ra2->ra_len) - t16);
4012
4013		log->current_openlog_count = get_random_u32();
4014		ra->open_log_count = cpu_to_le32(log->current_openlog_count);
4015		log->ra_size = offsetof(struct RESTART_AREA, clients) +
4016			       sizeof(struct CLIENT_REC);
4017		ra->client_off =
4018			cpu_to_le16(offsetof(struct RESTART_AREA, clients));
4019		ra->ra_len = cpu_to_le16(log->ra_size);
4020	}
4021
4022	le32_add_cpu(&ra->open_log_count, 1);
4023
4024	/* Now we need to walk through looking for the last lsn. */
4025	err = last_log_lsn(log);
4026	if (err)
4027		goto out;
4028
4029	log->current_avail = current_log_avail(log);
4030
4031	/* Remember which restart area to write first. */
4032	log->init_ra = log->rst_info.vbo;
4033
4034process_log:
4035	/* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */
4036	switch ((log->major_ver << 16) + log->minor_ver) {
4037	case 0x10000:
4038	case 0x10001:
4039	case 0x20000:
4040		break;
4041	default:
4042		ntfs_warn(sbi->sb, "\x24LogFile version %d.%d is not supported",
4043			  log->major_ver, log->minor_ver);
4044		err = -EOPNOTSUPP;
4045		log->set_dirty = true;
4046		goto out;
4047	}
4048
4049	/* One client "NTFS" per logfile. */
4050	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
4051
4052	for (client = ra->client_idx[1];; client = cr->next_client) {
4053		if (client == LFS_NO_CLIENT_LE) {
4054			/* Insert "NTFS" client LogFile. */
4055			client = ra->client_idx[0];
4056			if (client == LFS_NO_CLIENT_LE) {
4057				err = -EINVAL;
4058				goto out;
4059			}
4060
4061			t16 = le16_to_cpu(client);
4062			cr = ca + t16;
4063
4064			remove_client(ca, cr, &ra->client_idx[0]);
4065
4066			cr->restart_lsn = 0;
4067			cr->oldest_lsn = cpu_to_le64(log->oldest_lsn);
4068			cr->name_bytes = cpu_to_le32(8);
4069			cr->name[0] = cpu_to_le16('N');
4070			cr->name[1] = cpu_to_le16('T');
4071			cr->name[2] = cpu_to_le16('F');
4072			cr->name[3] = cpu_to_le16('S');
4073
4074			add_client(ca, t16, &ra->client_idx[1]);
4075			break;
4076		}
4077
4078		cr = ca + le16_to_cpu(client);
4079
4080		if (cpu_to_le32(8) == cr->name_bytes &&
4081		    cpu_to_le16('N') == cr->name[0] &&
4082		    cpu_to_le16('T') == cr->name[1] &&
4083		    cpu_to_le16('F') == cr->name[2] &&
4084		    cpu_to_le16('S') == cr->name[3])
4085			break;
4086	}
4087
4088	/* Update the client handle with the client block information. */
4089	log->client_id.seq_num = cr->seq_num;
4090	log->client_id.client_idx = client;
4091
4092	err = read_rst_area(log, &rst, &checkpt_lsn);
4093	if (err)
4094		goto out;
4095
4096	if (!rst)
4097		goto out;
4098
4099	bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
4100
4101	if (rst->check_point_start)
4102		checkpt_lsn = le64_to_cpu(rst->check_point_start);
4103
4104	/* Allocate and Read the Transaction Table. */
4105	if (!rst->transact_table_len)
4106		goto check_dirty_page_table;
4107
4108	t64 = le64_to_cpu(rst->transact_table_lsn);
4109	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4110	if (err)
4111		goto out;
4112
4113	lrh = lcb->log_rec;
4114	frh = lcb->lrh;
4115	rec_len = le32_to_cpu(frh->client_data_len);
4116
4117	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4118			   bytes_per_attr_entry)) {
4119		err = -EINVAL;
4120		goto out;
4121	}
4122
4123	t16 = le16_to_cpu(lrh->redo_off);
4124
4125	rt = Add2Ptr(lrh, t16);
4126	t32 = rec_len - t16;
4127
4128	/* Now check that this is a valid restart table. */
4129	if (!check_rstbl(rt, t32)) {
4130		err = -EINVAL;
4131		goto out;
4132	}
4133
4134	trtbl = kmemdup(rt, t32, GFP_NOFS);
4135	if (!trtbl) {
4136		err = -ENOMEM;
4137		goto out;
4138	}
4139
4140	lcb_put(lcb);
4141	lcb = NULL;
4142
4143check_dirty_page_table:
4144	/* The next record back should be the Dirty Pages Table. */
4145	if (!rst->dirty_pages_len)
4146		goto check_attribute_names;
4147
4148	t64 = le64_to_cpu(rst->dirty_pages_table_lsn);
4149	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4150	if (err)
4151		goto out;
4152
4153	lrh = lcb->log_rec;
4154	frh = lcb->lrh;
4155	rec_len = le32_to_cpu(frh->client_data_len);
4156
4157	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4158			   bytes_per_attr_entry)) {
4159		err = -EINVAL;
4160		goto out;
4161	}
4162
4163	t16 = le16_to_cpu(lrh->redo_off);
4164
4165	rt = Add2Ptr(lrh, t16);
4166	t32 = rec_len - t16;
4167
4168	/* Now check that this is a valid restart table. */
4169	if (!check_rstbl(rt, t32)) {
4170		err = -EINVAL;
4171		goto out;
4172	}
4173
4174	dptbl = kmemdup(rt, t32, GFP_NOFS);
4175	if (!dptbl) {
4176		err = -ENOMEM;
4177		goto out;
4178	}
4179
4180	/* Convert Ra version '0' into version '1'. */
4181	if (rst->major_ver)
4182		goto end_conv_1;
4183
4184	dp = NULL;
4185	while ((dp = enum_rstbl(dptbl, dp))) {
4186		struct DIR_PAGE_ENTRY_32 *dp0 = (struct DIR_PAGE_ENTRY_32 *)dp;
4187		// NOTE: Danger. Check for of boundary.
4188		memmove(&dp->vcn, &dp0->vcn_low,
4189			2 * sizeof(u64) +
4190				le32_to_cpu(dp->lcns_follow) * sizeof(u64));
4191	}
4192
4193end_conv_1:
4194	lcb_put(lcb);
4195	lcb = NULL;
4196
4197	/*
4198	 * Go through the table and remove the duplicates,
4199	 * remembering the oldest lsn values.
4200	 */
4201	if (sbi->cluster_size <= log->page_size)
4202		goto trace_dp_table;
4203
4204	dp = NULL;
4205	while ((dp = enum_rstbl(dptbl, dp))) {
4206		struct DIR_PAGE_ENTRY *next = dp;
4207
4208		while ((next = enum_rstbl(dptbl, next))) {
4209			if (next->target_attr == dp->target_attr &&
4210			    next->vcn == dp->vcn) {
4211				if (le64_to_cpu(next->oldest_lsn) <
4212				    le64_to_cpu(dp->oldest_lsn)) {
4213					dp->oldest_lsn = next->oldest_lsn;
4214				}
4215
4216				free_rsttbl_idx(dptbl, PtrOffset(dptbl, next));
4217			}
4218		}
4219	}
4220trace_dp_table:
4221check_attribute_names:
4222	/* The next record should be the Attribute Names. */
4223	if (!rst->attr_names_len)
4224		goto check_attr_table;
4225
4226	t64 = le64_to_cpu(rst->attr_names_lsn);
4227	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4228	if (err)
4229		goto out;
4230
4231	lrh = lcb->log_rec;
4232	frh = lcb->lrh;
4233	rec_len = le32_to_cpu(frh->client_data_len);
4234
4235	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4236			   bytes_per_attr_entry)) {
4237		err = -EINVAL;
4238		goto out;
4239	}
4240
4241	t32 = lrh_length(lrh);
4242	rec_len -= t32;
4243
4244	attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS);
4245	if (!attr_names) {
4246		err = -ENOMEM;
4247		goto out;
4248	}
4249
4250	lcb_put(lcb);
4251	lcb = NULL;
4252
4253check_attr_table:
4254	/* The next record should be the attribute Table. */
4255	if (!rst->open_attr_len)
4256		goto check_attribute_names2;
4257
4258	t64 = le64_to_cpu(rst->open_attr_table_lsn);
4259	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4260	if (err)
4261		goto out;
4262
4263	lrh = lcb->log_rec;
4264	frh = lcb->lrh;
4265	rec_len = le32_to_cpu(frh->client_data_len);
4266
4267	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4268			   bytes_per_attr_entry)) {
4269		err = -EINVAL;
4270		goto out;
4271	}
4272
4273	t16 = le16_to_cpu(lrh->redo_off);
4274
4275	rt = Add2Ptr(lrh, t16);
4276	t32 = rec_len - t16;
4277
4278	if (!check_rstbl(rt, t32)) {
4279		err = -EINVAL;
4280		goto out;
4281	}
4282
4283	oatbl = kmemdup(rt, t32, GFP_NOFS);
4284	if (!oatbl) {
4285		err = -ENOMEM;
4286		goto out;
4287	}
4288
4289	log->open_attr_tbl = oatbl;
4290
4291	/* Clear all of the Attr pointers. */
4292	oe = NULL;
4293	while ((oe = enum_rstbl(oatbl, oe))) {
4294		if (!rst->major_ver) {
4295			struct OPEN_ATTR_ENRTY_32 oe0;
4296
4297			/* Really 'oe' points to OPEN_ATTR_ENRTY_32. */
4298			memcpy(&oe0, oe, SIZEOF_OPENATTRIBUTEENTRY0);
4299
4300			oe->bytes_per_index = oe0.bytes_per_index;
4301			oe->type = oe0.type;
4302			oe->is_dirty_pages = oe0.is_dirty_pages;
4303			oe->name_len = 0;
4304			oe->ref = oe0.ref;
4305			oe->open_record_lsn = oe0.open_record_lsn;
4306		}
4307
4308		oe->is_attr_name = 0;
4309		oe->ptr = NULL;
4310	}
4311
4312	lcb_put(lcb);
4313	lcb = NULL;
4314
4315check_attribute_names2:
4316	if (rst->attr_names_len && oatbl) {
4317		struct ATTR_NAME_ENTRY *ane = attr_names;
4318		while (ane->off) {
4319			/* TODO: Clear table on exit! */
4320			oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
4321			t16 = le16_to_cpu(ane->name_bytes);
4322			oe->name_len = t16 / sizeof(short);
4323			oe->ptr = ane->name;
4324			oe->is_attr_name = 2;
4325			ane = Add2Ptr(ane,
4326				      sizeof(struct ATTR_NAME_ENTRY) + t16);
4327		}
4328	}
4329
4330	/*
4331	 * If the checkpt_lsn is zero, then this is a freshly
4332	 * formatted disk and we have no work to do.
4333	 */
4334	if (!checkpt_lsn) {
4335		err = 0;
4336		goto out;
4337	}
4338
4339	if (!oatbl) {
4340		oatbl = init_rsttbl(bytes_per_attr_entry, 8);
4341		if (!oatbl) {
4342			err = -ENOMEM;
4343			goto out;
4344		}
4345	}
4346
4347	log->open_attr_tbl = oatbl;
4348
4349	/* Start the analysis pass from the Checkpoint lsn. */
4350	rec_lsn = checkpt_lsn;
4351
4352	/* Read the first lsn. */
4353	err = read_log_rec_lcb(log, checkpt_lsn, lcb_ctx_next, &lcb);
4354	if (err)
4355		goto out;
4356
4357	/* Loop to read all subsequent records to the end of the log file. */
4358next_log_record_analyze:
4359	err = read_next_log_rec(log, lcb, &rec_lsn);
4360	if (err)
4361		goto out;
4362
4363	if (!rec_lsn)
4364		goto end_log_records_enumerate;
4365
4366	frh = lcb->lrh;
4367	transact_id = le32_to_cpu(frh->transact_id);
4368	rec_len = le32_to_cpu(frh->client_data_len);
4369	lrh = lcb->log_rec;
4370
4371	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
4372		err = -EINVAL;
4373		goto out;
4374	}
4375
4376	/*
4377	 * The first lsn after the previous lsn remembered
4378	 * the checkpoint is the first candidate for the rlsn.
4379	 */
4380	if (!rlsn)
4381		rlsn = rec_lsn;
4382
4383	if (LfsClientRecord != frh->record_type)
4384		goto next_log_record_analyze;
4385
4386	/*
4387	 * Now update the Transaction Table for this transaction. If there
4388	 * is no entry present or it is unallocated we allocate the entry.
4389	 */
4390	if (!trtbl) {
4391		trtbl = init_rsttbl(sizeof(struct TRANSACTION_ENTRY),
4392				    INITIAL_NUMBER_TRANSACTIONS);
4393		if (!trtbl) {
4394			err = -ENOMEM;
4395			goto out;
4396		}
4397	}
4398
4399	tr = Add2Ptr(trtbl, transact_id);
4400
4401	if (transact_id >= bytes_per_rt(trtbl) ||
4402	    tr->next != RESTART_ENTRY_ALLOCATED_LE) {
4403		tr = alloc_rsttbl_from_idx(&trtbl, transact_id);
4404		if (!tr) {
4405			err = -ENOMEM;
4406			goto out;
4407		}
4408		tr->transact_state = TransactionActive;
4409		tr->first_lsn = cpu_to_le64(rec_lsn);
4410	}
4411
4412	tr->prev_lsn = tr->undo_next_lsn = cpu_to_le64(rec_lsn);
4413
4414	/*
4415	 * If this is a compensation log record, then change
4416	 * the undo_next_lsn to be the undo_next_lsn of this record.
4417	 */
4418	if (lrh->undo_op == cpu_to_le16(CompensationLogRecord))
4419		tr->undo_next_lsn = frh->client_undo_next_lsn;
4420
4421	/* Dispatch to handle log record depending on type. */
4422	switch (le16_to_cpu(lrh->redo_op)) {
4423	case InitializeFileRecordSegment:
4424	case DeallocateFileRecordSegment:
4425	case WriteEndOfFileRecordSegment:
4426	case CreateAttribute:
4427	case DeleteAttribute:
4428	case UpdateResidentValue:
4429	case UpdateNonresidentValue:
4430	case UpdateMappingPairs:
4431	case SetNewAttributeSizes:
4432	case AddIndexEntryRoot:
4433	case DeleteIndexEntryRoot:
4434	case AddIndexEntryAllocation:
4435	case DeleteIndexEntryAllocation:
4436	case WriteEndOfIndexBuffer:
4437	case SetIndexEntryVcnRoot:
4438	case SetIndexEntryVcnAllocation:
4439	case UpdateFileNameRoot:
4440	case UpdateFileNameAllocation:
4441	case SetBitsInNonresidentBitMap:
4442	case ClearBitsInNonresidentBitMap:
4443	case UpdateRecordDataRoot:
4444	case UpdateRecordDataAllocation:
4445	case ZeroEndOfFileRecord:
4446		t16 = le16_to_cpu(lrh->target_attr);
4447		t64 = le64_to_cpu(lrh->target_vcn);
4448		dp = find_dp(dptbl, t16, t64);
4449
4450		if (dp)
4451			goto copy_lcns;
4452
4453		/*
4454		 * Calculate the number of clusters per page the system
4455		 * which wrote the checkpoint, possibly creating the table.
4456		 */
4457		if (dptbl) {
4458			t32 = (le16_to_cpu(dptbl->size) -
4459			       sizeof(struct DIR_PAGE_ENTRY)) /
4460			      sizeof(u64);
4461		} else {
4462			t32 = log->clst_per_page;
4463			kfree(dptbl);
4464			dptbl = init_rsttbl(struct_size(dp, page_lcns, t32),
4465					    32);
4466			if (!dptbl) {
4467				err = -ENOMEM;
4468				goto out;
4469			}
4470		}
4471
4472		dp = alloc_rsttbl_idx(&dptbl);
4473		if (!dp) {
4474			err = -ENOMEM;
4475			goto out;
4476		}
4477		dp->target_attr = cpu_to_le32(t16);
4478		dp->transfer_len = cpu_to_le32(t32 << sbi->cluster_bits);
4479		dp->lcns_follow = cpu_to_le32(t32);
4480		dp->vcn = cpu_to_le64(t64 & ~((u64)t32 - 1));
4481		dp->oldest_lsn = cpu_to_le64(rec_lsn);
4482
4483copy_lcns:
4484		/*
4485		 * Copy the Lcns from the log record into the Dirty Page Entry.
4486		 * TODO: For different page size support, must somehow make
4487		 * whole routine a loop, case Lcns do not fit below.
4488		 */
4489		t16 = le16_to_cpu(lrh->lcns_follow);
4490		for (i = 0; i < t16; i++) {
4491			size_t j = (size_t)(le64_to_cpu(lrh->target_vcn) -
4492					    le64_to_cpu(dp->vcn));
4493			dp->page_lcns[j + i] = lrh->page_lcns[i];
4494		}
4495
4496		goto next_log_record_analyze;
4497
4498	case DeleteDirtyClusters: {
4499		u32 range_count =
4500			le16_to_cpu(lrh->redo_len) / sizeof(struct LCN_RANGE);
4501		const struct LCN_RANGE *r =
4502			Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
4503
4504		/* Loop through all of the Lcn ranges this log record. */
4505		for (i = 0; i < range_count; i++, r++) {
4506			u64 lcn0 = le64_to_cpu(r->lcn);
4507			u64 lcn_e = lcn0 + le64_to_cpu(r->len) - 1;
4508
4509			dp = NULL;
4510			while ((dp = enum_rstbl(dptbl, dp))) {
4511				u32 j;
4512
4513				t32 = le32_to_cpu(dp->lcns_follow);
4514				for (j = 0; j < t32; j++) {
4515					t64 = le64_to_cpu(dp->page_lcns[j]);
4516					if (t64 >= lcn0 && t64 <= lcn_e)
4517						dp->page_lcns[j] = 0;
4518				}
4519			}
4520		}
4521		goto next_log_record_analyze;
4522		;
4523	}
4524
4525	case OpenNonresidentAttribute:
4526		t16 = le16_to_cpu(lrh->target_attr);
4527		if (t16 >= bytes_per_rt(oatbl)) {
4528			/*
4529			 * Compute how big the table needs to be.
4530			 * Add 10 extra entries for some cushion.
4531			 */
4532			u32 new_e = t16 / le16_to_cpu(oatbl->size);
4533
4534			new_e += 10 - le16_to_cpu(oatbl->used);
4535
4536			oatbl = extend_rsttbl(oatbl, new_e, ~0u);
4537			log->open_attr_tbl = oatbl;
4538			if (!oatbl) {
4539				err = -ENOMEM;
4540				goto out;
4541			}
4542		}
4543
4544		/* Point to the entry being opened. */
4545		oe = alloc_rsttbl_from_idx(&oatbl, t16);
4546		log->open_attr_tbl = oatbl;
4547		if (!oe) {
4548			err = -ENOMEM;
4549			goto out;
4550		}
4551
4552		/* Initialize this entry from the log record. */
4553		t16 = le16_to_cpu(lrh->redo_off);
4554		if (!rst->major_ver) {
4555			/* Convert version '0' into version '1'. */
4556			struct OPEN_ATTR_ENRTY_32 *oe0 = Add2Ptr(lrh, t16);
4557
4558			oe->bytes_per_index = oe0->bytes_per_index;
4559			oe->type = oe0->type;
4560			oe->is_dirty_pages = oe0->is_dirty_pages;
4561			oe->name_len = 0; //oe0.name_len;
4562			oe->ref = oe0->ref;
4563			oe->open_record_lsn = oe0->open_record_lsn;
4564		} else {
4565			memcpy(oe, Add2Ptr(lrh, t16), bytes_per_attr_entry);
4566		}
4567
4568		t16 = le16_to_cpu(lrh->undo_len);
4569		if (t16) {
4570			oe->ptr = kmalloc(t16, GFP_NOFS);
4571			if (!oe->ptr) {
4572				err = -ENOMEM;
4573				goto out;
4574			}
4575			oe->name_len = t16 / sizeof(short);
4576			memcpy(oe->ptr,
4577			       Add2Ptr(lrh, le16_to_cpu(lrh->undo_off)), t16);
4578			oe->is_attr_name = 1;
4579		} else {
4580			oe->ptr = NULL;
4581			oe->is_attr_name = 0;
4582		}
4583
4584		goto next_log_record_analyze;
4585
4586	case HotFix:
4587		t16 = le16_to_cpu(lrh->target_attr);
4588		t64 = le64_to_cpu(lrh->target_vcn);
4589		dp = find_dp(dptbl, t16, t64);
4590		if (dp) {
4591			size_t j = le64_to_cpu(lrh->target_vcn) -
4592				   le64_to_cpu(dp->vcn);
4593			if (dp->page_lcns[j])
4594				dp->page_lcns[j] = lrh->page_lcns[0];
4595		}
4596		goto next_log_record_analyze;
4597
4598	case EndTopLevelAction:
4599		tr = Add2Ptr(trtbl, transact_id);
4600		tr->prev_lsn = cpu_to_le64(rec_lsn);
4601		tr->undo_next_lsn = frh->client_undo_next_lsn;
4602		goto next_log_record_analyze;
4603
4604	case PrepareTransaction:
4605		tr = Add2Ptr(trtbl, transact_id);
4606		tr->transact_state = TransactionPrepared;
4607		goto next_log_record_analyze;
4608
4609	case CommitTransaction:
4610		tr = Add2Ptr(trtbl, transact_id);
4611		tr->transact_state = TransactionCommitted;
4612		goto next_log_record_analyze;
4613
4614	case ForgetTransaction:
4615		free_rsttbl_idx(trtbl, transact_id);
4616		goto next_log_record_analyze;
4617
4618	case Noop:
4619	case OpenAttributeTableDump:
4620	case AttributeNamesDump:
4621	case DirtyPageTableDump:
4622	case TransactionTableDump:
4623		/* The following cases require no action the Analysis Pass. */
4624		goto next_log_record_analyze;
4625
4626	default:
4627		/*
4628		 * All codes will be explicitly handled.
4629		 * If we see a code we do not expect, then we are trouble.
4630		 */
4631		goto next_log_record_analyze;
4632	}
4633
4634end_log_records_enumerate:
4635	lcb_put(lcb);
4636	lcb = NULL;
4637
4638	/*
4639	 * Scan the Dirty Page Table and Transaction Table for
4640	 * the lowest lsn, and return it as the Redo lsn.
4641	 */
4642	dp = NULL;
4643	while ((dp = enum_rstbl(dptbl, dp))) {
4644		t64 = le64_to_cpu(dp->oldest_lsn);
4645		if (t64 && t64 < rlsn)
4646			rlsn = t64;
4647	}
4648
4649	tr = NULL;
4650	while ((tr = enum_rstbl(trtbl, tr))) {
4651		t64 = le64_to_cpu(tr->first_lsn);
4652		if (t64 && t64 < rlsn)
4653			rlsn = t64;
4654	}
4655
4656	/*
4657	 * Only proceed if the Dirty Page Table or Transaction
4658	 * table are not empty.
4659	 */
4660	if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total))
4661		goto end_reply;
4662
4663	sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
4664	if (is_ro)
4665		goto out;
4666
4667	/* Reopen all of the attributes with dirty pages. */
4668	oe = NULL;
4669next_open_attribute:
4670
4671	oe = enum_rstbl(oatbl, oe);
4672	if (!oe) {
4673		err = 0;
4674		dp = NULL;
4675		goto next_dirty_page;
4676	}
4677
4678	oa = kzalloc(sizeof(struct OpenAttr), GFP_NOFS);
4679	if (!oa) {
4680		err = -ENOMEM;
4681		goto out;
4682	}
4683
4684	inode = ntfs_iget5(sbi->sb, &oe->ref, NULL);
4685	if (IS_ERR(inode))
4686		goto fake_attr;
4687
4688	if (is_bad_inode(inode)) {
4689		iput(inode);
4690fake_attr:
4691		if (oa->ni) {
4692			iput(&oa->ni->vfs_inode);
4693			oa->ni = NULL;
4694		}
4695
4696		attr = attr_create_nonres_log(sbi, oe->type, 0, oe->ptr,
4697					      oe->name_len, 0);
4698		if (!attr) {
4699			kfree(oa);
4700			err = -ENOMEM;
4701			goto out;
4702		}
4703		oa->attr = attr;
4704		oa->run1 = &oa->run0;
4705		goto final_oe;
4706	}
4707
4708	ni_oe = ntfs_i(inode);
4709	oa->ni = ni_oe;
4710
4711	attr = ni_find_attr(ni_oe, NULL, NULL, oe->type, oe->ptr, oe->name_len,
4712			    NULL, NULL);
4713
4714	if (!attr)
4715		goto fake_attr;
4716
4717	t32 = le32_to_cpu(attr->size);
4718	oa->attr = kmemdup(attr, t32, GFP_NOFS);
4719	if (!oa->attr)
4720		goto fake_attr;
4721
4722	if (!S_ISDIR(inode->i_mode)) {
4723		if (attr->type == ATTR_DATA && !attr->name_len) {
4724			oa->run1 = &ni_oe->file.run;
4725			goto final_oe;
4726		}
4727	} else {
4728		if (attr->type == ATTR_ALLOC &&
4729		    attr->name_len == ARRAY_SIZE(I30_NAME) &&
4730		    !memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) {
4731			oa->run1 = &ni_oe->dir.alloc_run;
4732			goto final_oe;
4733		}
4734	}
4735
4736	if (attr->non_res) {
4737		u16 roff = le16_to_cpu(attr->nres.run_off);
4738		CLST svcn = le64_to_cpu(attr->nres.svcn);
4739
4740		if (roff > t32) {
4741			kfree(oa->attr);
4742			oa->attr = NULL;
4743			goto fake_attr;
4744		}
4745
4746		err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn,
4747				 le64_to_cpu(attr->nres.evcn), svcn,
4748				 Add2Ptr(attr, roff), t32 - roff);
4749		if (err < 0) {
4750			kfree(oa->attr);
4751			oa->attr = NULL;
4752			goto fake_attr;
4753		}
4754		err = 0;
4755	}
4756	oa->run1 = &oa->run0;
4757	attr = oa->attr;
4758
4759final_oe:
4760	if (oe->is_attr_name == 1)
4761		kfree(oe->ptr);
4762	oe->is_attr_name = 0;
4763	oe->ptr = oa;
4764	oe->name_len = attr->name_len;
4765
4766	goto next_open_attribute;
4767
4768	/*
4769	 * Now loop through the dirty page table to extract all of the Vcn/Lcn.
4770	 * Mapping that we have, and insert it into the appropriate run.
4771	 */
4772next_dirty_page:
4773	dp = enum_rstbl(dptbl, dp);
4774	if (!dp)
4775		goto do_redo_1;
4776
4777	oe = Add2Ptr(oatbl, le32_to_cpu(dp->target_attr));
4778
4779	if (oe->next != RESTART_ENTRY_ALLOCATED_LE)
4780		goto next_dirty_page;
4781
4782	oa = oe->ptr;
4783	if (!oa)
4784		goto next_dirty_page;
4785
4786	i = -1;
4787next_dirty_page_vcn:
4788	i += 1;
4789	if (i >= le32_to_cpu(dp->lcns_follow))
4790		goto next_dirty_page;
4791
4792	vcn = le64_to_cpu(dp->vcn) + i;
4793	size = (vcn + 1) << sbi->cluster_bits;
4794
4795	if (!dp->page_lcns[i])
4796		goto next_dirty_page_vcn;
4797
4798	rno = ino_get(&oe->ref);
4799	if (rno <= MFT_REC_MIRR &&
4800	    size < (MFT_REC_VOL + 1) * sbi->record_size &&
4801	    oe->type == ATTR_DATA) {
4802		goto next_dirty_page_vcn;
4803	}
4804
4805	lcn = le64_to_cpu(dp->page_lcns[i]);
4806
4807	if ((!run_lookup_entry(oa->run1, vcn, &lcn0, &len0, NULL) ||
4808	     lcn0 != lcn) &&
4809	    !run_add_entry(oa->run1, vcn, lcn, 1, false)) {
4810		err = -ENOMEM;
4811		goto out;
4812	}
4813	attr = oa->attr;
4814	if (size > le64_to_cpu(attr->nres.alloc_size)) {
4815		attr->nres.valid_size = attr->nres.data_size =
4816			attr->nres.alloc_size = cpu_to_le64(size);
4817	}
4818	goto next_dirty_page_vcn;
4819
4820do_redo_1:
4821	/*
4822	 * Perform the Redo Pass, to restore all of the dirty pages to the same
4823	 * contents that they had immediately before the crash. If the dirty
4824	 * page table is empty, then we can skip the entire Redo Pass.
4825	 */
4826	if (!dptbl || !dptbl->total)
4827		goto do_undo_action;
4828
4829	rec_lsn = rlsn;
4830
4831	/*
4832	 * Read the record at the Redo lsn, before falling
4833	 * into common code to handle each record.
4834	 */
4835	err = read_log_rec_lcb(log, rlsn, lcb_ctx_next, &lcb);
4836	if (err)
4837		goto out;
4838
4839	/*
4840	 * Now loop to read all of our log records forwards, until
4841	 * we hit the end of the file, cleaning up at the end.
4842	 */
4843do_action_next:
4844	frh = lcb->lrh;
4845
4846	if (LfsClientRecord != frh->record_type)
4847		goto read_next_log_do_action;
4848
4849	transact_id = le32_to_cpu(frh->transact_id);
4850	rec_len = le32_to_cpu(frh->client_data_len);
4851	lrh = lcb->log_rec;
4852
4853	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
4854		err = -EINVAL;
4855		goto out;
4856	}
4857
4858	/* Ignore log records that do not update pages. */
4859	if (lrh->lcns_follow)
4860		goto find_dirty_page;
4861
4862	goto read_next_log_do_action;
4863
4864find_dirty_page:
4865	t16 = le16_to_cpu(lrh->target_attr);
4866	t64 = le64_to_cpu(lrh->target_vcn);
4867	dp = find_dp(dptbl, t16, t64);
4868
4869	if (!dp)
4870		goto read_next_log_do_action;
4871
4872	if (rec_lsn < le64_to_cpu(dp->oldest_lsn))
4873		goto read_next_log_do_action;
4874
4875	t16 = le16_to_cpu(lrh->target_attr);
4876	if (t16 >= bytes_per_rt(oatbl)) {
4877		err = -EINVAL;
4878		goto out;
4879	}
4880
4881	oe = Add2Ptr(oatbl, t16);
4882
4883	if (oe->next != RESTART_ENTRY_ALLOCATED_LE) {
4884		err = -EINVAL;
4885		goto out;
4886	}
4887
4888	oa = oe->ptr;
4889
4890	if (!oa) {
4891		err = -EINVAL;
4892		goto out;
4893	}
4894	attr = oa->attr;
4895
4896	vcn = le64_to_cpu(lrh->target_vcn);
4897
4898	if (!run_lookup_entry(oa->run1, vcn, &lcn, NULL, NULL) ||
4899	    lcn == SPARSE_LCN) {
4900		goto read_next_log_do_action;
4901	}
4902
4903	/* Point to the Redo data and get its length. */
4904	data = Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
4905	dlen = le16_to_cpu(lrh->redo_len);
4906
4907	/* Shorten length by any Lcns which were deleted. */
4908	saved_len = dlen;
4909
4910	for (i = le16_to_cpu(lrh->lcns_follow); i; i--) {
4911		size_t j;
4912		u32 alen, voff;
4913
4914		voff = le16_to_cpu(lrh->record_off) +
4915		       le16_to_cpu(lrh->attr_off);
4916		voff += le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
4917
4918		/* If the Vcn question is allocated, we can just get out. */
4919		j = le64_to_cpu(lrh->target_vcn) - le64_to_cpu(dp->vcn);
4920		if (dp->page_lcns[j + i - 1])
4921			break;
4922
4923		if (!saved_len)
4924			saved_len = 1;
4925
4926		/*
4927		 * Calculate the allocated space left relative to the
4928		 * log record Vcn, after removing this unallocated Vcn.
4929		 */
4930		alen = (i - 1) << sbi->cluster_bits;
4931
4932		/*
4933		 * If the update described this log record goes beyond
4934		 * the allocated space, then we will have to reduce the length.
4935		 */
4936		if (voff >= alen)
4937			dlen = 0;
4938		else if (voff + dlen > alen)
4939			dlen = alen - voff;
4940	}
4941
4942	/*
4943	 * If the resulting dlen from above is now zero,
4944	 * we can skip this log record.
4945	 */
4946	if (!dlen && saved_len)
4947		goto read_next_log_do_action;
4948
4949	t16 = le16_to_cpu(lrh->redo_op);
4950	if (can_skip_action(t16))
4951		goto read_next_log_do_action;
4952
4953	/* Apply the Redo operation a common routine. */
4954	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, &rec_lsn);
4955	if (err)
4956		goto out;
4957
4958	/* Keep reading and looping back until end of file. */
4959read_next_log_do_action:
4960	err = read_next_log_rec(log, lcb, &rec_lsn);
4961	if (!err && rec_lsn)
4962		goto do_action_next;
4963
4964	lcb_put(lcb);
4965	lcb = NULL;
4966
4967do_undo_action:
4968	/* Scan Transaction Table. */
4969	tr = NULL;
4970transaction_table_next:
4971	tr = enum_rstbl(trtbl, tr);
4972	if (!tr)
4973		goto undo_action_done;
4974
4975	if (TransactionActive != tr->transact_state || !tr->undo_next_lsn) {
4976		free_rsttbl_idx(trtbl, PtrOffset(trtbl, tr));
4977		goto transaction_table_next;
4978	}
4979
4980	log->transaction_id = PtrOffset(trtbl, tr);
4981	undo_next_lsn = le64_to_cpu(tr->undo_next_lsn);
4982
4983	/*
4984	 * We only have to do anything if the transaction has
4985	 * something its undo_next_lsn field.
4986	 */
4987	if (!undo_next_lsn)
4988		goto commit_undo;
4989
4990	/* Read the first record to be undone by this transaction. */
4991	err = read_log_rec_lcb(log, undo_next_lsn, lcb_ctx_undo_next, &lcb);
4992	if (err)
4993		goto out;
4994
4995	/*
4996	 * Now loop to read all of our log records forwards,
4997	 * until we hit the end of the file, cleaning up at the end.
4998	 */
4999undo_action_next:
5000
5001	lrh = lcb->log_rec;
5002	frh = lcb->lrh;
5003	transact_id = le32_to_cpu(frh->transact_id);
5004	rec_len = le32_to_cpu(frh->client_data_len);
5005
5006	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
5007		err = -EINVAL;
5008		goto out;
5009	}
5010
5011	if (lrh->undo_op == cpu_to_le16(Noop))
5012		goto read_next_log_undo_action;
5013
5014	oe = Add2Ptr(oatbl, le16_to_cpu(lrh->target_attr));
5015	oa = oe->ptr;
5016
5017	t16 = le16_to_cpu(lrh->lcns_follow);
5018	if (!t16)
5019		goto add_allocated_vcns;
5020
5021	is_mapped = run_lookup_entry(oa->run1, le64_to_cpu(lrh->target_vcn),
5022				     &lcn, &clen, NULL);
5023
5024	/*
5025	 * If the mapping isn't already the table or the  mapping
5026	 * corresponds to a hole the mapping, we need to make sure
5027	 * there is no partial page already memory.
5028	 */
5029	if (is_mapped && lcn != SPARSE_LCN && clen >= t16)
5030		goto add_allocated_vcns;
5031
5032	vcn = le64_to_cpu(lrh->target_vcn);
5033	vcn &= ~(u64)(log->clst_per_page - 1);
5034
5035add_allocated_vcns:
5036	for (i = 0, vcn = le64_to_cpu(lrh->target_vcn),
5037	    size = (vcn + 1) << sbi->cluster_bits;
5038	     i < t16; i++, vcn += 1, size += sbi->cluster_size) {
5039		attr = oa->attr;
5040		if (!attr->non_res) {
5041			if (size > le32_to_cpu(attr->res.data_size))
5042				attr->res.data_size = cpu_to_le32(size);
5043		} else {
5044			if (size > le64_to_cpu(attr->nres.data_size))
5045				attr->nres.valid_size = attr->nres.data_size =
5046					attr->nres.alloc_size =
5047						cpu_to_le64(size);
5048		}
5049	}
5050
5051	t16 = le16_to_cpu(lrh->undo_op);
5052	if (can_skip_action(t16))
5053		goto read_next_log_undo_action;
5054
5055	/* Point to the Redo data and get its length. */
5056	data = Add2Ptr(lrh, le16_to_cpu(lrh->undo_off));
5057	dlen = le16_to_cpu(lrh->undo_len);
5058
5059	/* It is time to apply the undo action. */
5060	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, NULL);
5061
5062read_next_log_undo_action:
5063	/*
5064	 * Keep reading and looping back until we have read the
5065	 * last record for this transaction.
5066	 */
5067	err = read_next_log_rec(log, lcb, &rec_lsn);
5068	if (err)
5069		goto out;
5070
5071	if (rec_lsn)
5072		goto undo_action_next;
5073
5074	lcb_put(lcb);
5075	lcb = NULL;
5076
5077commit_undo:
5078	free_rsttbl_idx(trtbl, log->transaction_id);
5079
5080	log->transaction_id = 0;
5081
5082	goto transaction_table_next;
5083
5084undo_action_done:
5085
5086	ntfs_update_mftmirr(sbi, 0);
5087
5088	sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY;
5089
5090end_reply:
5091
5092	err = 0;
5093	if (is_ro)
5094		goto out;
5095
5096	rh = kzalloc(log->page_size, GFP_NOFS);
5097	if (!rh) {
5098		err = -ENOMEM;
5099		goto out;
5100	}
5101
5102	rh->rhdr.sign = NTFS_RSTR_SIGNATURE;
5103	rh->rhdr.fix_off = cpu_to_le16(offsetof(struct RESTART_HDR, fixups));
5104	t16 = (log->page_size >> SECTOR_SHIFT) + 1;
5105	rh->rhdr.fix_num = cpu_to_le16(t16);
5106	rh->sys_page_size = cpu_to_le32(log->page_size);
5107	rh->page_size = cpu_to_le32(log->page_size);
5108
5109	t16 = ALIGN(offsetof(struct RESTART_HDR, fixups) + sizeof(short) * t16,
5110		    8);
5111	rh->ra_off = cpu_to_le16(t16);
5112	rh->minor_ver = cpu_to_le16(1); // 0x1A:
5113	rh->major_ver = cpu_to_le16(1); // 0x1C:
5114
5115	ra2 = Add2Ptr(rh, t16);
5116	memcpy(ra2, ra, sizeof(struct RESTART_AREA));
5117
5118	ra2->client_idx[0] = 0;
5119	ra2->client_idx[1] = LFS_NO_CLIENT_LE;
5120	ra2->flags = cpu_to_le16(2);
5121
5122	le32_add_cpu(&ra2->open_log_count, 1);
5123
5124	ntfs_fix_pre_write(&rh->rhdr, log->page_size);
5125
5126	err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rh, log->page_size, 0);
5127	if (!err)
5128		err = ntfs_sb_write_run(sbi, &log->ni->file.run, log->page_size,
5129					rh, log->page_size, 0);
5130
5131	kfree(rh);
5132	if (err)
5133		goto out;
5134
5135out:
5136	kfree(rst);
5137	if (lcb)
5138		lcb_put(lcb);
5139
5140	/*
5141	 * Scan the Open Attribute Table to close all of
5142	 * the open attributes.
5143	 */
5144	oe = NULL;
5145	while ((oe = enum_rstbl(oatbl, oe))) {
5146		rno = ino_get(&oe->ref);
5147
5148		if (oe->is_attr_name == 1) {
5149			kfree(oe->ptr);
5150			oe->ptr = NULL;
5151			continue;
5152		}
5153
5154		if (oe->is_attr_name)
5155			continue;
5156
5157		oa = oe->ptr;
5158		if (!oa)
5159			continue;
5160
5161		run_close(&oa->run0);
5162		kfree(oa->attr);
5163		if (oa->ni)
5164			iput(&oa->ni->vfs_inode);
5165		kfree(oa);
5166	}
5167
5168	kfree(trtbl);
5169	kfree(oatbl);
5170	kfree(dptbl);
5171	kfree(attr_names);
5172	kfree(log->rst_info.r_page);
5173
5174	kfree(ra);
5175	kfree(log->one_page_buf);
5176
5177	if (err)
5178		sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
5179
5180	if (err == -EROFS)
5181		err = 0;
5182	else if (log->set_dirty)
5183		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
5184
5185	kfree(log);
5186
5187	return err;
5188}
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *
   4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
   5 *
   6 */
   7
   8#include <linux/blkdev.h>
   9#include <linux/fs.h>
  10#include <linux/random.h>
  11#include <linux/slab.h>
  12
  13#include "debug.h"
  14#include "ntfs.h"
  15#include "ntfs_fs.h"
  16
  17/*
  18 * LOG FILE structs
  19 */
  20
  21// clang-format off
  22
  23#define MaxLogFileSize     0x100000000ull
  24#define DefaultLogPageSize 4096
  25#define MinLogRecordPages  0x30
  26
  27struct RESTART_HDR {
  28	struct NTFS_RECORD_HEADER rhdr; // 'RSTR'
  29	__le32 sys_page_size; // 0x10: Page size of the system which initialized the log.
  30	__le32 page_size;     // 0x14: Log page size used for this log file.
  31	__le16 ra_off;        // 0x18:
  32	__le16 minor_ver;     // 0x1A:
  33	__le16 major_ver;     // 0x1C:
  34	__le16 fixups[];
  35};
  36
  37#define LFS_NO_CLIENT 0xffff
  38#define LFS_NO_CLIENT_LE cpu_to_le16(0xffff)
  39
  40struct CLIENT_REC {
  41	__le64 oldest_lsn;
  42	__le64 restart_lsn; // 0x08:
  43	__le16 prev_client; // 0x10:
  44	__le16 next_client; // 0x12:
  45	__le16 seq_num;     // 0x14:
  46	u8 align[6];        // 0x16:
  47	__le32 name_bytes;  // 0x1C: In bytes.
  48	__le16 name[32];    // 0x20: Name of client.
  49};
  50
  51static_assert(sizeof(struct CLIENT_REC) == 0x60);
  52
  53/* Two copies of these will exist at the beginning of the log file */
  54struct RESTART_AREA {
  55	__le64 current_lsn;    // 0x00: Current logical end of log file.
  56	__le16 log_clients;    // 0x08: Maximum number of clients.
  57	__le16 client_idx[2];  // 0x0A: Free/use index into the client record arrays.
  58	__le16 flags;          // 0x0E: See RESTART_SINGLE_PAGE_IO.
  59	__le32 seq_num_bits;   // 0x10: The number of bits in sequence number.
  60	__le16 ra_len;         // 0x14:
  61	__le16 client_off;     // 0x16:
  62	__le64 l_size;         // 0x18: Usable log file size.
  63	__le32 last_lsn_data_len; // 0x20:
  64	__le16 rec_hdr_len;    // 0x24: Log page data offset.
  65	__le16 data_off;       // 0x26: Log page data length.
  66	__le32 open_log_count; // 0x28:
  67	__le32 align[5];       // 0x2C:
  68	struct CLIENT_REC clients[]; // 0x40:
  69};
  70
  71struct LOG_REC_HDR {
  72	__le16 redo_op;      // 0x00:  NTFS_LOG_OPERATION
  73	__le16 undo_op;      // 0x02:  NTFS_LOG_OPERATION
  74	__le16 redo_off;     // 0x04:  Offset to Redo record.
  75	__le16 redo_len;     // 0x06:  Redo length.
  76	__le16 undo_off;     // 0x08:  Offset to Undo record.
  77	__le16 undo_len;     // 0x0A:  Undo length.
  78	__le16 target_attr;  // 0x0C:
  79	__le16 lcns_follow;  // 0x0E:
  80	__le16 record_off;   // 0x10:
  81	__le16 attr_off;     // 0x12:
  82	__le16 cluster_off;  // 0x14:
  83	__le16 reserved;     // 0x16:
  84	__le64 target_vcn;   // 0x18:
  85	__le64 page_lcns[];  // 0x20:
  86};
  87
  88static_assert(sizeof(struct LOG_REC_HDR) == 0x20);
  89
  90#define RESTART_ENTRY_ALLOCATED    0xFFFFFFFF
  91#define RESTART_ENTRY_ALLOCATED_LE cpu_to_le32(0xFFFFFFFF)
  92
  93struct RESTART_TABLE {
  94	__le16 size;       // 0x00: In bytes
  95	__le16 used;       // 0x02: Entries
  96	__le16 total;      // 0x04: Entries
  97	__le16 res[3];     // 0x06:
  98	__le32 free_goal;  // 0x0C:
  99	__le32 first_free; // 0x10:
 100	__le32 last_free;  // 0x14:
 101
 102};
 103
 104static_assert(sizeof(struct RESTART_TABLE) == 0x18);
 105
 106struct ATTR_NAME_ENTRY {
 107	__le16 off; // Offset in the Open attribute Table.
 108	__le16 name_bytes;
 109	__le16 name[];
 110};
 111
 112struct OPEN_ATTR_ENRTY {
 113	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 114	__le32 bytes_per_index; // 0x04:
 115	enum ATTR_TYPE type;    // 0x08:
 116	u8 is_dirty_pages;      // 0x0C:
 117	u8 is_attr_name;        // 0x0B: Faked field to manage 'ptr'
 118	u8 name_len;            // 0x0C: Faked field to manage 'ptr'
 119	u8 res;
 120	struct MFT_REF ref;     // 0x10: File Reference of file containing attribute
 121	__le64 open_record_lsn; // 0x18:
 122	void *ptr;              // 0x20:
 123};
 124
 125/* 32 bit version of 'struct OPEN_ATTR_ENRTY' */
 126struct OPEN_ATTR_ENRTY_32 {
 127	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 128	__le32 ptr;             // 0x04:
 129	struct MFT_REF ref;     // 0x08:
 130	__le64 open_record_lsn; // 0x10:
 131	u8 is_dirty_pages;      // 0x18:
 132	u8 is_attr_name;        // 0x19:
 133	u8 res1[2];
 134	enum ATTR_TYPE type;    // 0x1C:
 135	u8 name_len;            // 0x20: In wchar
 136	u8 res2[3];
 137	__le32 AttributeName;   // 0x24:
 138	__le32 bytes_per_index; // 0x28:
 139};
 140
 141#define SIZEOF_OPENATTRIBUTEENTRY0 0x2c
 142// static_assert( 0x2C == sizeof(struct OPEN_ATTR_ENRTY_32) );
 143static_assert(sizeof(struct OPEN_ATTR_ENRTY) < SIZEOF_OPENATTRIBUTEENTRY0);
 144
 145/*
 146 * One entry exists in the Dirty Pages Table for each page which is dirty at
 147 * the time the Restart Area is written.
 148 */
 149struct DIR_PAGE_ENTRY {
 150	__le32 next;         // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 151	__le32 target_attr;  // 0x04: Index into the Open attribute Table
 152	__le32 transfer_len; // 0x08:
 153	__le32 lcns_follow;  // 0x0C:
 154	__le64 vcn;          // 0x10: Vcn of dirty page
 155	__le64 oldest_lsn;   // 0x18:
 156	__le64 page_lcns[];  // 0x20:
 157};
 158
 159static_assert(sizeof(struct DIR_PAGE_ENTRY) == 0x20);
 160
 161/* 32 bit version of 'struct DIR_PAGE_ENTRY' */
 162struct DIR_PAGE_ENTRY_32 {
 163	__le32 next;		// 0x00: RESTART_ENTRY_ALLOCATED if allocated
 164	__le32 target_attr;	// 0x04: Index into the Open attribute Table
 165	__le32 transfer_len;	// 0x08:
 166	__le32 lcns_follow;	// 0x0C:
 167	__le32 reserved;	// 0x10:
 168	__le32 vcn_low;		// 0x14: Vcn of dirty page
 169	__le32 vcn_hi;		// 0x18: Vcn of dirty page
 170	__le32 oldest_lsn_low;	// 0x1C:
 171	__le32 oldest_lsn_hi;	// 0x1C:
 172	__le32 page_lcns_low;	// 0x24:
 173	__le32 page_lcns_hi;	// 0x24:
 174};
 175
 176static_assert(offsetof(struct DIR_PAGE_ENTRY_32, vcn_low) == 0x14);
 177static_assert(sizeof(struct DIR_PAGE_ENTRY_32) == 0x2c);
 178
 179enum transact_state {
 180	TransactionUninitialized = 0,
 181	TransactionActive,
 182	TransactionPrepared,
 183	TransactionCommitted
 184};
 185
 186struct TRANSACTION_ENTRY {
 187	__le32 next;          // 0x00: RESTART_ENTRY_ALLOCATED if allocated
 188	u8 transact_state;    // 0x04:
 189	u8 reserved[3];       // 0x05:
 190	__le64 first_lsn;     // 0x08:
 191	__le64 prev_lsn;      // 0x10:
 192	__le64 undo_next_lsn; // 0x18:
 193	__le32 undo_records;  // 0x20: Number of undo log records pending abort
 194	__le32 undo_len;      // 0x24: Total undo size
 195};
 196
 197static_assert(sizeof(struct TRANSACTION_ENTRY) == 0x28);
 198
 199struct NTFS_RESTART {
 200	__le32 major_ver;             // 0x00:
 201	__le32 minor_ver;             // 0x04:
 202	__le64 check_point_start;     // 0x08:
 203	__le64 open_attr_table_lsn;   // 0x10:
 204	__le64 attr_names_lsn;        // 0x18:
 205	__le64 dirty_pages_table_lsn; // 0x20:
 206	__le64 transact_table_lsn;    // 0x28:
 207	__le32 open_attr_len;         // 0x30: In bytes
 208	__le32 attr_names_len;        // 0x34: In bytes
 209	__le32 dirty_pages_len;       // 0x38: In bytes
 210	__le32 transact_table_len;    // 0x3C: In bytes
 211};
 212
 213static_assert(sizeof(struct NTFS_RESTART) == 0x40);
 214
 215struct NEW_ATTRIBUTE_SIZES {
 216	__le64 alloc_size;
 217	__le64 valid_size;
 218	__le64 data_size;
 219	__le64 total_size;
 220};
 221
 222struct BITMAP_RANGE {
 223	__le32 bitmap_off;
 224	__le32 bits;
 225};
 226
 227struct LCN_RANGE {
 228	__le64 lcn;
 229	__le64 len;
 230};
 231
 232/* The following type defines the different log record types. */
 233#define LfsClientRecord  cpu_to_le32(1)
 234#define LfsClientRestart cpu_to_le32(2)
 235
 236/* This is used to uniquely identify a client for a particular log file. */
 237struct CLIENT_ID {
 238	__le16 seq_num;
 239	__le16 client_idx;
 240};
 241
 242/* This is the header that begins every Log Record in the log file. */
 243struct LFS_RECORD_HDR {
 244	__le64 this_lsn;		// 0x00:
 245	__le64 client_prev_lsn;		// 0x08:
 246	__le64 client_undo_next_lsn;	// 0x10:
 247	__le32 client_data_len;		// 0x18:
 248	struct CLIENT_ID client;	// 0x1C: Owner of this log record.
 249	__le32 record_type;		// 0x20: LfsClientRecord or LfsClientRestart.
 250	__le32 transact_id;		// 0x24:
 251	__le16 flags;			// 0x28: LOG_RECORD_MULTI_PAGE
 252	u8 align[6];			// 0x2A:
 253};
 254
 255#define LOG_RECORD_MULTI_PAGE cpu_to_le16(1)
 256
 257static_assert(sizeof(struct LFS_RECORD_HDR) == 0x30);
 258
 259struct LFS_RECORD {
 260	__le16 next_record_off;	// 0x00: Offset of the free space in the page,
 261	u8 align[6];		// 0x02:
 262	__le64 last_end_lsn;	// 0x08: lsn for the last log record which ends on the page,
 263};
 264
 265static_assert(sizeof(struct LFS_RECORD) == 0x10);
 266
 267struct RECORD_PAGE_HDR {
 268	struct NTFS_RECORD_HEADER rhdr;	// 'RCRD'
 269	__le32 rflags;			// 0x10: See LOG_PAGE_LOG_RECORD_END
 270	__le16 page_count;		// 0x14:
 271	__le16 page_pos;		// 0x16:
 272	struct LFS_RECORD record_hdr;	// 0x18:
 273	__le16 fixups[10];		// 0x28:
 274	__le32 file_off;		// 0x3c: Used when major version >= 2
 275};
 276
 277// clang-format on
 278
 279// Page contains the end of a log record.
 280#define LOG_PAGE_LOG_RECORD_END cpu_to_le32(0x00000001)
 281
 282static inline bool is_log_record_end(const struct RECORD_PAGE_HDR *hdr)
 283{
 284	return hdr->rflags & LOG_PAGE_LOG_RECORD_END;
 285}
 286
 287static_assert(offsetof(struct RECORD_PAGE_HDR, file_off) == 0x3c);
 288
 289/*
 290 * END of NTFS LOG structures
 291 */
 292
 293/* Define some tuning parameters to keep the restart tables a reasonable size. */
 294#define INITIAL_NUMBER_TRANSACTIONS 5
 295
 296enum NTFS_LOG_OPERATION {
 297
 298	Noop = 0x00,
 299	CompensationLogRecord = 0x01,
 300	InitializeFileRecordSegment = 0x02,
 301	DeallocateFileRecordSegment = 0x03,
 302	WriteEndOfFileRecordSegment = 0x04,
 303	CreateAttribute = 0x05,
 304	DeleteAttribute = 0x06,
 305	UpdateResidentValue = 0x07,
 306	UpdateNonresidentValue = 0x08,
 307	UpdateMappingPairs = 0x09,
 308	DeleteDirtyClusters = 0x0A,
 309	SetNewAttributeSizes = 0x0B,
 310	AddIndexEntryRoot = 0x0C,
 311	DeleteIndexEntryRoot = 0x0D,
 312	AddIndexEntryAllocation = 0x0E,
 313	DeleteIndexEntryAllocation = 0x0F,
 314	WriteEndOfIndexBuffer = 0x10,
 315	SetIndexEntryVcnRoot = 0x11,
 316	SetIndexEntryVcnAllocation = 0x12,
 317	UpdateFileNameRoot = 0x13,
 318	UpdateFileNameAllocation = 0x14,
 319	SetBitsInNonresidentBitMap = 0x15,
 320	ClearBitsInNonresidentBitMap = 0x16,
 321	HotFix = 0x17,
 322	EndTopLevelAction = 0x18,
 323	PrepareTransaction = 0x19,
 324	CommitTransaction = 0x1A,
 325	ForgetTransaction = 0x1B,
 326	OpenNonresidentAttribute = 0x1C,
 327	OpenAttributeTableDump = 0x1D,
 328	AttributeNamesDump = 0x1E,
 329	DirtyPageTableDump = 0x1F,
 330	TransactionTableDump = 0x20,
 331	UpdateRecordDataRoot = 0x21,
 332	UpdateRecordDataAllocation = 0x22,
 333
 334	UpdateRelativeDataInIndex =
 335		0x23, // NtOfsRestartUpdateRelativeDataInIndex
 336	UpdateRelativeDataInIndex2 = 0x24,
 337	ZeroEndOfFileRecord = 0x25,
 338};
 339
 340/*
 341 * Array for log records which require a target attribute.
 342 * A true indicates that the corresponding restart operation
 343 * requires a target attribute.
 344 */
 345static const u8 AttributeRequired[] = {
 346	0xFC, 0xFB, 0xFF, 0x10, 0x06,
 347};
 348
 349static inline bool is_target_required(u16 op)
 350{
 351	bool ret = op <= UpdateRecordDataAllocation &&
 352		   (AttributeRequired[op >> 3] >> (op & 7) & 1);
 353	return ret;
 354}
 355
 356static inline bool can_skip_action(enum NTFS_LOG_OPERATION op)
 357{
 358	switch (op) {
 359	case Noop:
 360	case DeleteDirtyClusters:
 361	case HotFix:
 362	case EndTopLevelAction:
 363	case PrepareTransaction:
 364	case CommitTransaction:
 365	case ForgetTransaction:
 366	case CompensationLogRecord:
 367	case OpenNonresidentAttribute:
 368	case OpenAttributeTableDump:
 369	case AttributeNamesDump:
 370	case DirtyPageTableDump:
 371	case TransactionTableDump:
 372		return true;
 373	default:
 374		return false;
 375	}
 376}
 377
 378enum { lcb_ctx_undo_next, lcb_ctx_prev, lcb_ctx_next };
 379
 380/* Bytes per restart table. */
 381static inline u32 bytes_per_rt(const struct RESTART_TABLE *rt)
 382{
 383	return le16_to_cpu(rt->used) * le16_to_cpu(rt->size) +
 384	       sizeof(struct RESTART_TABLE);
 385}
 386
 387/* Log record length. */
 388static inline u32 lrh_length(const struct LOG_REC_HDR *lr)
 389{
 390	u16 t16 = le16_to_cpu(lr->lcns_follow);
 391
 392	return struct_size(lr, page_lcns, max_t(u16, 1, t16));
 393}
 394
 395struct lcb {
 396	struct LFS_RECORD_HDR *lrh; // Log record header of the current lsn.
 397	struct LOG_REC_HDR *log_rec;
 398	u32 ctx_mode; // lcb_ctx_undo_next/lcb_ctx_prev/lcb_ctx_next
 399	struct CLIENT_ID client;
 400	bool alloc; // If true the we should deallocate 'log_rec'.
 401};
 402
 403static void lcb_put(struct lcb *lcb)
 404{
 405	if (lcb->alloc)
 406		kfree(lcb->log_rec);
 407	kfree(lcb->lrh);
 408	kfree(lcb);
 409}
 410
 411/* Find the oldest lsn from active clients. */
 412static inline void oldest_client_lsn(const struct CLIENT_REC *ca,
 413				     __le16 next_client, u64 *oldest_lsn)
 414{
 415	while (next_client != LFS_NO_CLIENT_LE) {
 416		const struct CLIENT_REC *cr = ca + le16_to_cpu(next_client);
 417		u64 lsn = le64_to_cpu(cr->oldest_lsn);
 418
 419		/* Ignore this block if it's oldest lsn is 0. */
 420		if (lsn && lsn < *oldest_lsn)
 421			*oldest_lsn = lsn;
 422
 423		next_client = cr->next_client;
 424	}
 425}
 426
 427static inline bool is_rst_page_hdr_valid(u32 file_off,
 428					 const struct RESTART_HDR *rhdr)
 429{
 430	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
 431	u32 page_size = le32_to_cpu(rhdr->page_size);
 432	u32 end_usa;
 433	u16 ro;
 434
 435	if (sys_page < SECTOR_SIZE || page_size < SECTOR_SIZE ||
 436	    sys_page & (sys_page - 1) || page_size & (page_size - 1)) {
 437		return false;
 438	}
 439
 440	/* Check that if the file offset isn't 0, it is the system page size. */
 441	if (file_off && file_off != sys_page)
 442		return false;
 443
 444	/* Check support version 1.1+. */
 445	if (le16_to_cpu(rhdr->major_ver) <= 1 && !rhdr->minor_ver)
 446		return false;
 447
 448	if (le16_to_cpu(rhdr->major_ver) > 2)
 449		return false;
 450
 451	ro = le16_to_cpu(rhdr->ra_off);
 452	if (!IS_ALIGNED(ro, 8) || ro > sys_page)
 453		return false;
 454
 455	end_usa = ((sys_page >> SECTOR_SHIFT) + 1) * sizeof(short);
 456	end_usa += le16_to_cpu(rhdr->rhdr.fix_off);
 457
 458	if (ro < end_usa)
 459		return false;
 460
 461	return true;
 462}
 463
 464static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
 465{
 466	const struct RESTART_AREA *ra;
 467	u16 cl, fl, ul;
 468	u32 off, l_size, seq_bits;
 469	u16 ro = le16_to_cpu(rhdr->ra_off);
 470	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
 471
 472	if (ro + offsetof(struct RESTART_AREA, l_size) >
 473	    SECTOR_SIZE - sizeof(short))
 474		return false;
 475
 476	ra = Add2Ptr(rhdr, ro);
 477	cl = le16_to_cpu(ra->log_clients);
 478
 479	if (cl > 1)
 480		return false;
 481
 482	off = le16_to_cpu(ra->client_off);
 483
 484	if (!IS_ALIGNED(off, 8) || ro + off > SECTOR_SIZE - sizeof(short))
 485		return false;
 486
 487	off += cl * sizeof(struct CLIENT_REC);
 488
 489	if (off > sys_page)
 490		return false;
 491
 492	/*
 493	 * Check the restart length field and whether the entire
 494	 * restart area is contained that length.
 495	 */
 496	if (le16_to_cpu(rhdr->ra_off) + le16_to_cpu(ra->ra_len) > sys_page ||
 497	    off > le16_to_cpu(ra->ra_len)) {
 498		return false;
 499	}
 500
 501	/*
 502	 * As a final check make sure that the use list and the free list
 503	 * are either empty or point to a valid client.
 504	 */
 505	fl = le16_to_cpu(ra->client_idx[0]);
 506	ul = le16_to_cpu(ra->client_idx[1]);
 507	if ((fl != LFS_NO_CLIENT && fl >= cl) ||
 508	    (ul != LFS_NO_CLIENT && ul >= cl))
 509		return false;
 510
 511	/* Make sure the sequence number bits match the log file size. */
 512	l_size = le64_to_cpu(ra->l_size);
 513
 514	seq_bits = sizeof(u64) * 8 + 3;
 515	while (l_size) {
 516		l_size >>= 1;
 517		seq_bits -= 1;
 518	}
 519
 520	if (seq_bits != ra->seq_num_bits)
 521		return false;
 522
 523	/* The log page data offset and record header length must be quad-aligned. */
 524	if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) ||
 525	    !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
 526		return false;
 527
 528	return true;
 529}
 530
 531static inline bool is_client_area_valid(const struct RESTART_HDR *rhdr,
 532					bool usa_error)
 533{
 534	u16 ro = le16_to_cpu(rhdr->ra_off);
 535	const struct RESTART_AREA *ra = Add2Ptr(rhdr, ro);
 536	u16 ra_len = le16_to_cpu(ra->ra_len);
 537	const struct CLIENT_REC *ca;
 538	u32 i;
 539
 540	if (usa_error && ra_len + ro > SECTOR_SIZE - sizeof(short))
 541		return false;
 542
 543	/* Find the start of the client array. */
 544	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
 545
 546	/*
 547	 * Start with the free list.
 548	 * Check that all the clients are valid and that there isn't a cycle.
 549	 * Do the in-use list on the second pass.
 550	 */
 551	for (i = 0; i < 2; i++) {
 552		u16 client_idx = le16_to_cpu(ra->client_idx[i]);
 553		bool first_client = true;
 554		u16 clients = le16_to_cpu(ra->log_clients);
 555
 556		while (client_idx != LFS_NO_CLIENT) {
 557			const struct CLIENT_REC *cr;
 558
 559			if (!clients ||
 560			    client_idx >= le16_to_cpu(ra->log_clients))
 561				return false;
 562
 563			clients -= 1;
 564			cr = ca + client_idx;
 565
 566			client_idx = le16_to_cpu(cr->next_client);
 567
 568			if (first_client) {
 569				first_client = false;
 570				if (cr->prev_client != LFS_NO_CLIENT_LE)
 571					return false;
 572			}
 573		}
 574	}
 575
 576	return true;
 577}
 578
 579/*
 580 * remove_client
 581 *
 582 * Remove a client record from a client record list an restart area.
 583 */
 584static inline void remove_client(struct CLIENT_REC *ca,
 585				 const struct CLIENT_REC *cr, __le16 *head)
 586{
 587	if (cr->prev_client == LFS_NO_CLIENT_LE)
 588		*head = cr->next_client;
 589	else
 590		ca[le16_to_cpu(cr->prev_client)].next_client = cr->next_client;
 591
 592	if (cr->next_client != LFS_NO_CLIENT_LE)
 593		ca[le16_to_cpu(cr->next_client)].prev_client = cr->prev_client;
 594}
 595
 596/*
 597 * add_client - Add a client record to the start of a list.
 598 */
 599static inline void add_client(struct CLIENT_REC *ca, u16 index, __le16 *head)
 600{
 601	struct CLIENT_REC *cr = ca + index;
 602
 603	cr->prev_client = LFS_NO_CLIENT_LE;
 604	cr->next_client = *head;
 605
 606	if (*head != LFS_NO_CLIENT_LE)
 607		ca[le16_to_cpu(*head)].prev_client = cpu_to_le16(index);
 608
 609	*head = cpu_to_le16(index);
 610}
 611
 612static inline void *enum_rstbl(struct RESTART_TABLE *t, void *c)
 613{
 614	__le32 *e;
 615	u32 bprt;
 616	u16 rsize = t ? le16_to_cpu(t->size) : 0;
 617
 618	if (!c) {
 619		if (!t || !t->total)
 620			return NULL;
 621		e = Add2Ptr(t, sizeof(struct RESTART_TABLE));
 622	} else {
 623		e = Add2Ptr(c, rsize);
 624	}
 625
 626	/* Loop until we hit the first one allocated, or the end of the list. */
 627	for (bprt = bytes_per_rt(t); PtrOffset(t, e) < bprt;
 628	     e = Add2Ptr(e, rsize)) {
 629		if (*e == RESTART_ENTRY_ALLOCATED_LE)
 630			return e;
 631	}
 632	return NULL;
 633}
 634
 635/*
 636 * find_dp - Search for a @vcn in Dirty Page Table.
 637 */
 638static inline struct DIR_PAGE_ENTRY *find_dp(struct RESTART_TABLE *dptbl,
 639					     u32 target_attr, u64 vcn)
 640{
 641	__le32 ta = cpu_to_le32(target_attr);
 642	struct DIR_PAGE_ENTRY *dp = NULL;
 643
 644	while ((dp = enum_rstbl(dptbl, dp))) {
 645		u64 dp_vcn = le64_to_cpu(dp->vcn);
 646
 647		if (dp->target_attr == ta && vcn >= dp_vcn &&
 648		    vcn < dp_vcn + le32_to_cpu(dp->lcns_follow)) {
 649			return dp;
 650		}
 651	}
 652	return NULL;
 653}
 654
 655static inline u32 norm_file_page(u32 page_size, u32 *l_size, bool use_default)
 656{
 657	if (use_default)
 658		page_size = DefaultLogPageSize;
 659
 660	/* Round the file size down to a system page boundary. */
 661	*l_size &= ~(page_size - 1);
 662
 663	/* File should contain at least 2 restart pages and MinLogRecordPages pages. */
 664	if (*l_size < (MinLogRecordPages + 2) * page_size)
 665		return 0;
 666
 667	return page_size;
 668}
 669
 670static bool check_log_rec(const struct LOG_REC_HDR *lr, u32 bytes, u32 tr,
 671			  u32 bytes_per_attr_entry)
 672{
 673	u16 t16;
 674
 675	if (bytes < sizeof(struct LOG_REC_HDR))
 676		return false;
 677	if (!tr)
 678		return false;
 679
 680	if ((tr - sizeof(struct RESTART_TABLE)) %
 681	    sizeof(struct TRANSACTION_ENTRY))
 682		return false;
 683
 684	if (le16_to_cpu(lr->redo_off) & 7)
 685		return false;
 686
 687	if (le16_to_cpu(lr->undo_off) & 7)
 688		return false;
 689
 690	if (lr->target_attr)
 691		goto check_lcns;
 692
 693	if (is_target_required(le16_to_cpu(lr->redo_op)))
 694		return false;
 695
 696	if (is_target_required(le16_to_cpu(lr->undo_op)))
 697		return false;
 698
 699check_lcns:
 700	if (!lr->lcns_follow)
 701		goto check_length;
 702
 703	t16 = le16_to_cpu(lr->target_attr);
 704	if ((t16 - sizeof(struct RESTART_TABLE)) % bytes_per_attr_entry)
 705		return false;
 706
 707check_length:
 708	if (bytes < lrh_length(lr))
 709		return false;
 710
 711	return true;
 712}
 713
 714static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
 715{
 716	u32 ts;
 717	u32 i, off;
 718	u16 rsize = le16_to_cpu(rt->size);
 719	u16 ne = le16_to_cpu(rt->used);
 720	u32 ff = le32_to_cpu(rt->first_free);
 721	u32 lf = le32_to_cpu(rt->last_free);
 722
 723	ts = rsize * ne + sizeof(struct RESTART_TABLE);
 724
 725	if (!rsize || rsize > bytes ||
 726	    rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts ||
 727	    le16_to_cpu(rt->total) > ne || ff > ts || lf > ts ||
 728	    (ff && ff < sizeof(struct RESTART_TABLE)) ||
 729	    (lf && lf < sizeof(struct RESTART_TABLE))) {
 730		return false;
 731	}
 732
 733	/*
 734	 * Verify each entry is either allocated or points
 735	 * to a valid offset the table.
 736	 */
 737	for (i = 0; i < ne; i++) {
 738		off = le32_to_cpu(*(__le32 *)Add2Ptr(
 739			rt, i * rsize + sizeof(struct RESTART_TABLE)));
 740
 741		if (off != RESTART_ENTRY_ALLOCATED && off &&
 742		    (off < sizeof(struct RESTART_TABLE) ||
 743		     ((off - sizeof(struct RESTART_TABLE)) % rsize))) {
 744			return false;
 745		}
 746	}
 747
 748	/*
 749	 * Walk through the list headed by the first entry to make
 750	 * sure none of the entries are currently being used.
 751	 */
 752	for (off = ff; off;) {
 753		if (off == RESTART_ENTRY_ALLOCATED)
 754			return false;
 755
 756		off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off));
 757	}
 758
 759	return true;
 760}
 761
 762/*
 763 * free_rsttbl_idx - Free a previously allocated index a Restart Table.
 764 */
 765static inline void free_rsttbl_idx(struct RESTART_TABLE *rt, u32 off)
 766{
 767	__le32 *e;
 768	u32 lf = le32_to_cpu(rt->last_free);
 769	__le32 off_le = cpu_to_le32(off);
 770
 771	e = Add2Ptr(rt, off);
 772
 773	if (off < le32_to_cpu(rt->free_goal)) {
 774		*e = rt->first_free;
 775		rt->first_free = off_le;
 776		if (!lf)
 777			rt->last_free = off_le;
 778	} else {
 779		if (lf)
 780			*(__le32 *)Add2Ptr(rt, lf) = off_le;
 781		else
 782			rt->first_free = off_le;
 783
 784		rt->last_free = off_le;
 785		*e = 0;
 786	}
 787
 788	le16_sub_cpu(&rt->total, 1);
 789}
 790
 791static inline struct RESTART_TABLE *init_rsttbl(u16 esize, u16 used)
 792{
 793	__le32 *e, *last_free;
 794	u32 off;
 795	u32 bytes = esize * used + sizeof(struct RESTART_TABLE);
 796	u32 lf = sizeof(struct RESTART_TABLE) + (used - 1) * esize;
 797	struct RESTART_TABLE *t = kzalloc(bytes, GFP_NOFS);
 798
 799	if (!t)
 800		return NULL;
 801
 802	t->size = cpu_to_le16(esize);
 803	t->used = cpu_to_le16(used);
 804	t->free_goal = cpu_to_le32(~0u);
 805	t->first_free = cpu_to_le32(sizeof(struct RESTART_TABLE));
 806	t->last_free = cpu_to_le32(lf);
 807
 808	e = (__le32 *)(t + 1);
 809	last_free = Add2Ptr(t, lf);
 810
 811	for (off = sizeof(struct RESTART_TABLE) + esize; e < last_free;
 812	     e = Add2Ptr(e, esize), off += esize) {
 813		*e = cpu_to_le32(off);
 814	}
 815	return t;
 816}
 817
 818static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl,
 819						  u32 add, u32 free_goal)
 820{
 821	u16 esize = le16_to_cpu(tbl->size);
 822	__le32 osize = cpu_to_le32(bytes_per_rt(tbl));
 823	u32 used = le16_to_cpu(tbl->used);
 824	struct RESTART_TABLE *rt;
 825
 826	rt = init_rsttbl(esize, used + add);
 827	if (!rt)
 828		return NULL;
 829
 830	memcpy(rt + 1, tbl + 1, esize * used);
 831
 832	rt->free_goal = free_goal == ~0u ?
 833				cpu_to_le32(~0u) :
 834				cpu_to_le32(sizeof(struct RESTART_TABLE) +
 835					    free_goal * esize);
 836
 837	if (tbl->first_free) {
 838		rt->first_free = tbl->first_free;
 839		*(__le32 *)Add2Ptr(rt, le32_to_cpu(tbl->last_free)) = osize;
 840	} else {
 841		rt->first_free = osize;
 842	}
 843
 844	rt->total = tbl->total;
 845
 846	kfree(tbl);
 847	return rt;
 848}
 849
 850/*
 851 * alloc_rsttbl_idx
 852 *
 853 * Allocate an index from within a previously initialized Restart Table.
 854 */
 855static inline void *alloc_rsttbl_idx(struct RESTART_TABLE **tbl)
 856{
 857	u32 off;
 858	__le32 *e;
 859	struct RESTART_TABLE *t = *tbl;
 860
 861	if (!t->first_free) {
 862		*tbl = t = extend_rsttbl(t, 16, ~0u);
 863		if (!t)
 864			return NULL;
 865	}
 866
 867	off = le32_to_cpu(t->first_free);
 868
 869	/* Dequeue this entry and zero it. */
 870	e = Add2Ptr(t, off);
 871
 872	t->first_free = *e;
 873
 874	memset(e, 0, le16_to_cpu(t->size));
 875
 876	*e = RESTART_ENTRY_ALLOCATED_LE;
 877
 878	/* If list is going empty, then we fix the last_free as well. */
 879	if (!t->first_free)
 880		t->last_free = 0;
 881
 882	le16_add_cpu(&t->total, 1);
 883
 884	return Add2Ptr(t, off);
 885}
 886
 887/*
 888 * alloc_rsttbl_from_idx
 889 *
 890 * Allocate a specific index from within a previously initialized Restart Table.
 891 */
 892static inline void *alloc_rsttbl_from_idx(struct RESTART_TABLE **tbl, u32 vbo)
 893{
 894	u32 off;
 895	__le32 *e;
 896	struct RESTART_TABLE *rt = *tbl;
 897	u32 bytes = bytes_per_rt(rt);
 898	u16 esize = le16_to_cpu(rt->size);
 899
 900	/* If the entry is not the table, we will have to extend the table. */
 901	if (vbo >= bytes) {
 902		/*
 903		 * Extend the size by computing the number of entries between
 904		 * the existing size and the desired index and adding 1 to that.
 905		 */
 906		u32 bytes2idx = vbo - bytes;
 907
 908		/*
 909		 * There should always be an integral number of entries
 910		 * being added. Now extend the table.
 911		 */
 912		*tbl = rt = extend_rsttbl(rt, bytes2idx / esize + 1, bytes);
 913		if (!rt)
 914			return NULL;
 915	}
 916
 917	/* See if the entry is already allocated, and just return if it is. */
 918	e = Add2Ptr(rt, vbo);
 919
 920	if (*e == RESTART_ENTRY_ALLOCATED_LE)
 921		return e;
 922
 923	/*
 924	 * Walk through the table, looking for the entry we're
 925	 * interested and the previous entry.
 926	 */
 927	off = le32_to_cpu(rt->first_free);
 928	e = Add2Ptr(rt, off);
 929
 930	if (off == vbo) {
 931		/* this is a match */
 932		rt->first_free = *e;
 933		goto skip_looking;
 934	}
 935
 936	/*
 937	 * Need to walk through the list looking for the predecessor
 938	 * of our entry.
 939	 */
 940	for (;;) {
 941		/* Remember the entry just found */
 942		u32 last_off = off;
 943		__le32 *last_e = e;
 944
 945		/* Should never run of entries. */
 946
 947		/* Lookup up the next entry the list. */
 948		off = le32_to_cpu(*last_e);
 949		e = Add2Ptr(rt, off);
 950
 951		/* If this is our match we are done. */
 952		if (off == vbo) {
 953			*last_e = *e;
 954
 955			/*
 956			 * If this was the last entry, we update that
 957			 * table as well.
 958			 */
 959			if (le32_to_cpu(rt->last_free) == off)
 960				rt->last_free = cpu_to_le32(last_off);
 961			break;
 962		}
 963	}
 964
 965skip_looking:
 966	/* If the list is now empty, we fix the last_free as well. */
 967	if (!rt->first_free)
 968		rt->last_free = 0;
 969
 970	/* Zero this entry. */
 971	memset(e, 0, esize);
 972	*e = RESTART_ENTRY_ALLOCATED_LE;
 973
 974	le16_add_cpu(&rt->total, 1);
 975
 976	return e;
 977}
 978
 979struct restart_info {
 980	u64 last_lsn;
 981	struct RESTART_HDR *r_page;
 982	u32 vbo;
 983	bool chkdsk_was_run;
 984	bool valid_page;
 985	bool initialized;
 986	bool restart;
 987};
 988
 989#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
 990
 991#define NTFSLOG_WRAPPED 0x00000001
 992#define NTFSLOG_MULTIPLE_PAGE_IO 0x00000002
 993#define NTFSLOG_NO_LAST_LSN 0x00000004
 994#define NTFSLOG_REUSE_TAIL 0x00000010
 995#define NTFSLOG_NO_OLDEST_LSN 0x00000020
 996
 997/* Helper struct to work with NTFS $LogFile. */
 998struct ntfs_log {
 999	struct ntfs_inode *ni;
1000
1001	u32 l_size;
1002	u32 orig_file_size;
1003	u32 sys_page_size;
1004	u32 sys_page_mask;
1005	u32 page_size;
1006	u32 page_mask; // page_size - 1
1007	u8 page_bits;
1008	struct RECORD_PAGE_HDR *one_page_buf;
1009
1010	struct RESTART_TABLE *open_attr_tbl;
1011	u32 transaction_id;
1012	u32 clst_per_page;
1013
1014	u32 first_page;
1015	u32 next_page;
1016	u32 ra_off;
1017	u32 data_off;
1018	u32 restart_size;
1019	u32 data_size;
1020	u16 record_header_len;
1021	u64 seq_num;
1022	u32 seq_num_bits;
1023	u32 file_data_bits;
1024	u32 seq_num_mask; /* (1 << file_data_bits) - 1 */
1025
1026	struct RESTART_AREA *ra; /* In-memory image of the next restart area. */
1027	u32 ra_size; /* The usable size of the restart area. */
1028
1029	/*
1030	 * If true, then the in-memory restart area is to be written
1031	 * to the first position on the disk.
1032	 */
1033	bool init_ra;
1034	bool set_dirty; /* True if we need to set dirty flag. */
1035
1036	u64 oldest_lsn;
1037
1038	u32 oldest_lsn_off;
1039	u64 last_lsn;
1040
1041	u32 total_avail;
1042	u32 total_avail_pages;
1043	u32 total_undo_commit;
1044	u32 max_current_avail;
1045	u32 current_avail;
1046	u32 reserved;
1047
1048	short major_ver;
1049	short minor_ver;
1050
1051	u32 l_flags; /* See NTFSLOG_XXX */
1052	u32 current_openlog_count; /* On-disk value for open_log_count. */
1053
1054	struct CLIENT_ID client_id;
1055	u32 client_undo_commit;
1056
1057	struct restart_info rst_info, rst_info2;
1058};
1059
1060static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
1061{
1062	u32 vbo = (lsn << log->seq_num_bits) >> (log->seq_num_bits - 3);
1063
1064	return vbo;
1065}
1066
1067/* Compute the offset in the log file of the next log page. */
1068static inline u32 next_page_off(struct ntfs_log *log, u32 off)
1069{
1070	off = (off & ~log->sys_page_mask) + log->page_size;
1071	return off >= log->l_size ? log->first_page : off;
1072}
1073
1074static inline u32 lsn_to_page_off(struct ntfs_log *log, u64 lsn)
1075{
1076	return (((u32)lsn) << 3) & log->page_mask;
1077}
1078
1079static inline u64 vbo_to_lsn(struct ntfs_log *log, u32 off, u64 Seq)
1080{
1081	return (off >> 3) + (Seq << log->file_data_bits);
1082}
1083
1084static inline bool is_lsn_in_file(struct ntfs_log *log, u64 lsn)
1085{
1086	return lsn >= log->oldest_lsn &&
1087	       lsn <= le64_to_cpu(log->ra->current_lsn);
1088}
1089
1090static inline u32 hdr_file_off(struct ntfs_log *log,
1091			       struct RECORD_PAGE_HDR *hdr)
1092{
1093	if (log->major_ver < 2)
1094		return le64_to_cpu(hdr->rhdr.lsn);
1095
1096	return le32_to_cpu(hdr->file_off);
1097}
1098
1099static inline u64 base_lsn(struct ntfs_log *log,
1100			   const struct RECORD_PAGE_HDR *hdr, u64 lsn)
1101{
1102	u64 h_lsn = le64_to_cpu(hdr->rhdr.lsn);
1103	u64 ret = (((h_lsn >> log->file_data_bits) +
1104		    (lsn < (lsn_to_vbo(log, h_lsn) & ~log->page_mask) ? 1 : 0))
1105		   << log->file_data_bits) +
1106		  ((((is_log_record_end(hdr) &&
1107		      h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn)) ?
1108			     le16_to_cpu(hdr->record_hdr.next_record_off) :
1109			     log->page_size) +
1110		    lsn) >>
1111		   3);
1112
1113	return ret;
1114}
1115
1116static inline bool verify_client_lsn(struct ntfs_log *log,
1117				     const struct CLIENT_REC *client, u64 lsn)
1118{
1119	return lsn >= le64_to_cpu(client->oldest_lsn) &&
1120	       lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
1121}
1122
1123static int read_log_page(struct ntfs_log *log, u32 vbo,
1124			 struct RECORD_PAGE_HDR **buffer, bool *usa_error)
1125{
1126	int err = 0;
1127	u32 page_idx = vbo >> log->page_bits;
1128	u32 page_off = vbo & log->page_mask;
1129	u32 bytes = log->page_size - page_off;
1130	void *to_free = NULL;
1131	u32 page_vbo = page_idx << log->page_bits;
1132	struct RECORD_PAGE_HDR *page_buf;
1133	struct ntfs_inode *ni = log->ni;
1134	bool bBAAD;
1135
1136	if (vbo >= log->l_size)
1137		return -EINVAL;
1138
1139	if (!*buffer) {
1140		to_free = kmalloc(log->page_size, GFP_NOFS);
1141		if (!to_free)
1142			return -ENOMEM;
1143		*buffer = to_free;
1144	}
1145
1146	page_buf = page_off ? log->one_page_buf : *buffer;
1147
1148	err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf,
1149			       log->page_size, NULL);
1150	if (err)
1151		goto out;
1152
1153	if (page_buf->rhdr.sign != NTFS_FFFF_SIGNATURE)
1154		ntfs_fix_post_read(&page_buf->rhdr, PAGE_SIZE, false);
1155
1156	if (page_buf != *buffer)
1157		memcpy(*buffer, Add2Ptr(page_buf, page_off), bytes);
1158
1159	bBAAD = page_buf->rhdr.sign == NTFS_BAAD_SIGNATURE;
1160
1161	if (usa_error)
1162		*usa_error = bBAAD;
1163	/* Check that the update sequence array for this page is valid */
1164	/* If we don't allow errors, raise an error status */
1165	else if (bBAAD)
1166		err = -EINVAL;
1167
1168out:
1169	if (err && to_free) {
1170		kfree(to_free);
1171		*buffer = NULL;
1172	}
1173
1174	return err;
1175}
1176
1177/*
1178 * log_read_rst
1179 *
1180 * It walks through 512 blocks of the file looking for a valid
1181 * restart page header. It will stop the first time we find a
1182 * valid page header.
1183 */
1184static int log_read_rst(struct ntfs_log *log, bool first,
1185			struct restart_info *info)
1186{
1187	u32 skip;
1188	u64 vbo;
1189	struct RESTART_HDR *r_page = NULL;
1190
1191	/* Determine which restart area we are looking for. */
1192	if (first) {
1193		vbo = 0;
1194		skip = 512;
1195	} else {
1196		vbo = 512;
1197		skip = 0;
1198	}
1199
1200	/* Loop continuously until we succeed. */
1201	for (; vbo < log->l_size; vbo = 2 * vbo + skip, skip = 0) {
1202		bool usa_error;
1203		bool brst, bchk;
1204		struct RESTART_AREA *ra;
1205
1206		/* Read a page header at the current offset. */
1207		if (read_log_page(log, vbo, (struct RECORD_PAGE_HDR **)&r_page,
1208				  &usa_error)) {
1209			/* Ignore any errors. */
1210			continue;
1211		}
1212
1213		/* Exit if the signature is a log record page. */
1214		if (r_page->rhdr.sign == NTFS_RCRD_SIGNATURE) {
1215			info->initialized = true;
1216			break;
1217		}
1218
1219		brst = r_page->rhdr.sign == NTFS_RSTR_SIGNATURE;
1220		bchk = r_page->rhdr.sign == NTFS_CHKD_SIGNATURE;
1221
1222		if (!bchk && !brst) {
1223			if (r_page->rhdr.sign != NTFS_FFFF_SIGNATURE) {
1224				/*
1225				 * Remember if the signature does not
1226				 * indicate uninitialized file.
1227				 */
1228				info->initialized = true;
1229			}
1230			continue;
1231		}
1232
1233		ra = NULL;
1234		info->valid_page = false;
1235		info->initialized = true;
1236		info->vbo = vbo;
1237
1238		/* Let's check the restart area if this is a valid page. */
1239		if (!is_rst_page_hdr_valid(vbo, r_page))
1240			goto check_result;
1241		ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
1242
1243		if (!is_rst_area_valid(r_page))
1244			goto check_result;
1245
1246		/*
1247		 * We have a valid restart page header and restart area.
1248		 * If chkdsk was run or we have no clients then we have
1249		 * no more checking to do.
1250		 */
1251		if (bchk || ra->client_idx[1] == LFS_NO_CLIENT_LE) {
1252			info->valid_page = true;
1253			goto check_result;
1254		}
1255
1256		if (is_client_area_valid(r_page, usa_error)) {
1257			info->valid_page = true;
1258			ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
1259		}
1260
1261check_result:
1262		/*
1263		 * If chkdsk was run then update the caller's
1264		 * values and return.
1265		 */
1266		if (r_page->rhdr.sign == NTFS_CHKD_SIGNATURE) {
1267			info->chkdsk_was_run = true;
1268			info->last_lsn = le64_to_cpu(r_page->rhdr.lsn);
1269			info->restart = true;
1270			info->r_page = r_page;
1271			return 0;
1272		}
1273
1274		/*
1275		 * If we have a valid page then copy the values
1276		 * we need from it.
1277		 */
1278		if (info->valid_page) {
1279			info->last_lsn = le64_to_cpu(ra->current_lsn);
1280			info->restart = true;
1281			info->r_page = r_page;
1282			return 0;
1283		}
1284	}
1285
1286	kfree(r_page);
1287
1288	return 0;
1289}
1290
1291/*
1292 * Ilog_init_pg_hdr - Init @log from restart page header.
1293 */
1294static void log_init_pg_hdr(struct ntfs_log *log, u16 major_ver, u16 minor_ver)
1295{
1296	log->sys_page_size = log->page_size;
1297	log->sys_page_mask = log->page_mask;
1298
1299	log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
1300	if (!log->clst_per_page)
1301		log->clst_per_page = 1;
1302
1303	log->first_page = major_ver >= 2 ? 0x22 * log->page_size :
1304					   4 * log->page_size;
1305	log->major_ver = major_ver;
1306	log->minor_ver = minor_ver;
1307}
1308
1309/*
1310 * log_create - Init @log in cases when we don't have a restart area to use.
1311 */
1312static void log_create(struct ntfs_log *log, const u64 last_lsn,
1313		       u32 open_log_count, bool wrapped, bool use_multi_page)
1314{
1315	/* All file offsets must be quadword aligned. */
1316	log->file_data_bits = blksize_bits(log->l_size) - 3;
1317	log->seq_num_mask = (8 << log->file_data_bits) - 1;
1318	log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
1319	log->seq_num = (last_lsn >> log->file_data_bits) + 2;
1320	log->next_page = log->first_page;
1321	log->oldest_lsn = log->seq_num << log->file_data_bits;
1322	log->oldest_lsn_off = 0;
1323	log->last_lsn = log->oldest_lsn;
1324
1325	log->l_flags |= NTFSLOG_NO_LAST_LSN | NTFSLOG_NO_OLDEST_LSN;
1326
1327	/* Set the correct flags for the I/O and indicate if we have wrapped. */
1328	if (wrapped)
1329		log->l_flags |= NTFSLOG_WRAPPED;
1330
1331	if (use_multi_page)
1332		log->l_flags |= NTFSLOG_MULTIPLE_PAGE_IO;
1333
1334	/* Compute the log page values. */
1335	log->data_off = ALIGN(
1336		offsetof(struct RECORD_PAGE_HDR, fixups) +
1337			sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1),
1338		8);
1339	log->data_size = log->page_size - log->data_off;
1340	log->record_header_len = sizeof(struct LFS_RECORD_HDR);
1341
1342	/* Remember the different page sizes for reservation. */
1343	log->reserved = log->data_size - log->record_header_len;
1344
1345	/* Compute the restart page values. */
1346	log->ra_off = ALIGN(
1347		offsetof(struct RESTART_HDR, fixups) +
1348			sizeof(short) *
1349				((log->sys_page_size >> SECTOR_SHIFT) + 1),
1350		8);
1351	log->restart_size = log->sys_page_size - log->ra_off;
1352	log->ra_size = struct_size(log->ra, clients, 1);
1353	log->current_openlog_count = open_log_count;
1354
1355	/*
1356	 * The total available log file space is the number of
1357	 * log file pages times the space available on each page.
1358	 */
1359	log->total_avail_pages = log->l_size - log->first_page;
1360	log->total_avail = log->total_avail_pages >> log->page_bits;
1361
1362	/*
1363	 * We assume that we can't use the end of the page less than
1364	 * the file record size.
1365	 * Then we won't need to reserve more than the caller asks for.
1366	 */
1367	log->max_current_avail = log->total_avail * log->reserved;
1368	log->total_avail = log->total_avail * log->data_size;
1369	log->current_avail = log->max_current_avail;
1370}
1371
1372/*
1373 * log_create_ra - Fill a restart area from the values stored in @log.
1374 */
1375static struct RESTART_AREA *log_create_ra(struct ntfs_log *log)
1376{
1377	struct CLIENT_REC *cr;
1378	struct RESTART_AREA *ra = kzalloc(log->restart_size, GFP_NOFS);
1379
1380	if (!ra)
1381		return NULL;
1382
1383	ra->current_lsn = cpu_to_le64(log->last_lsn);
1384	ra->log_clients = cpu_to_le16(1);
1385	ra->client_idx[1] = LFS_NO_CLIENT_LE;
1386	if (log->l_flags & NTFSLOG_MULTIPLE_PAGE_IO)
1387		ra->flags = RESTART_SINGLE_PAGE_IO;
1388	ra->seq_num_bits = cpu_to_le32(log->seq_num_bits);
1389	ra->ra_len = cpu_to_le16(log->ra_size);
1390	ra->client_off = cpu_to_le16(offsetof(struct RESTART_AREA, clients));
1391	ra->l_size = cpu_to_le64(log->l_size);
1392	ra->rec_hdr_len = cpu_to_le16(log->record_header_len);
1393	ra->data_off = cpu_to_le16(log->data_off);
1394	ra->open_log_count = cpu_to_le32(log->current_openlog_count + 1);
1395
1396	cr = ra->clients;
1397
1398	cr->prev_client = LFS_NO_CLIENT_LE;
1399	cr->next_client = LFS_NO_CLIENT_LE;
1400
1401	return ra;
1402}
1403
1404static u32 final_log_off(struct ntfs_log *log, u64 lsn, u32 data_len)
1405{
1406	u32 base_vbo = lsn << 3;
1407	u32 final_log_off = (base_vbo & log->seq_num_mask) & ~log->page_mask;
1408	u32 page_off = base_vbo & log->page_mask;
1409	u32 tail = log->page_size - page_off;
1410
1411	page_off -= 1;
1412
1413	/* Add the length of the header. */
1414	data_len += log->record_header_len;
1415
1416	/*
1417	 * If this lsn is contained this log page we are done.
1418	 * Otherwise we need to walk through several log pages.
1419	 */
1420	if (data_len > tail) {
1421		data_len -= tail;
1422		tail = log->data_size;
1423		page_off = log->data_off - 1;
1424
1425		for (;;) {
1426			final_log_off = next_page_off(log, final_log_off);
1427
1428			/*
1429			 * We are done if the remaining bytes
1430			 * fit on this page.
1431			 */
1432			if (data_len <= tail)
1433				break;
1434			data_len -= tail;
1435		}
1436	}
1437
1438	/*
1439	 * We add the remaining bytes to our starting position on this page
1440	 * and then add that value to the file offset of this log page.
1441	 */
1442	return final_log_off + data_len + page_off;
1443}
1444
1445static int next_log_lsn(struct ntfs_log *log, const struct LFS_RECORD_HDR *rh,
1446			u64 *lsn)
1447{
1448	int err;
1449	u64 this_lsn = le64_to_cpu(rh->this_lsn);
1450	u32 vbo = lsn_to_vbo(log, this_lsn);
1451	u32 end =
1452		final_log_off(log, this_lsn, le32_to_cpu(rh->client_data_len));
1453	u32 hdr_off = end & ~log->sys_page_mask;
1454	u64 seq = this_lsn >> log->file_data_bits;
1455	struct RECORD_PAGE_HDR *page = NULL;
1456
1457	/* Remember if we wrapped. */
1458	if (end <= vbo)
1459		seq += 1;
1460
1461	/* Log page header for this page. */
1462	err = read_log_page(log, hdr_off, &page, NULL);
1463	if (err)
1464		return err;
1465
1466	/*
1467	 * If the lsn we were given was not the last lsn on this page,
1468	 * then the starting offset for the next lsn is on a quad word
1469	 * boundary following the last file offset for the current lsn.
1470	 * Otherwise the file offset is the start of the data on the next page.
1471	 */
1472	if (this_lsn == le64_to_cpu(page->rhdr.lsn)) {
1473		/* If we wrapped, we need to increment the sequence number. */
1474		hdr_off = next_page_off(log, hdr_off);
1475		if (hdr_off == log->first_page)
1476			seq += 1;
1477
1478		vbo = hdr_off + log->data_off;
1479	} else {
1480		vbo = ALIGN(end, 8);
1481	}
1482
1483	/* Compute the lsn based on the file offset and the sequence count. */
1484	*lsn = vbo_to_lsn(log, vbo, seq);
1485
1486	/*
1487	 * If this lsn is within the legal range for the file, we return true.
1488	 * Otherwise false indicates that there are no more lsn's.
1489	 */
1490	if (!is_lsn_in_file(log, *lsn))
1491		*lsn = 0;
1492
1493	kfree(page);
1494
1495	return 0;
1496}
1497
1498/*
1499 * current_log_avail - Calculate the number of bytes available for log records.
1500 */
1501static u32 current_log_avail(struct ntfs_log *log)
1502{
1503	u32 oldest_off, next_free_off, free_bytes;
1504
1505	if (log->l_flags & NTFSLOG_NO_LAST_LSN) {
1506		/* The entire file is available. */
1507		return log->max_current_avail;
1508	}
1509
1510	/*
1511	 * If there is a last lsn the restart area then we know that we will
1512	 * have to compute the free range.
1513	 * If there is no oldest lsn then start at the first page of the file.
1514	 */
1515	oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN) ?
1516			     log->first_page :
1517			     (log->oldest_lsn_off & ~log->sys_page_mask);
1518
1519	/*
1520	 * We will use the next log page offset to compute the next free page.
1521	 * If we are going to reuse this page go to the next page.
1522	 * If we are at the first page then use the end of the file.
1523	 */
1524	next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL) ?
1525				log->next_page + log->page_size :
1526			log->next_page == log->first_page ? log->l_size :
1527							    log->next_page;
1528
1529	/* If the two offsets are the same then there is no available space. */
1530	if (oldest_off == next_free_off)
1531		return 0;
1532	/*
1533	 * If the free offset follows the oldest offset then subtract
1534	 * this range from the total available pages.
1535	 */
1536	free_bytes =
1537		oldest_off < next_free_off ?
1538			log->total_avail_pages - (next_free_off - oldest_off) :
1539			oldest_off - next_free_off;
1540
1541	free_bytes >>= log->page_bits;
1542	return free_bytes * log->reserved;
1543}
1544
1545static bool check_subseq_log_page(struct ntfs_log *log,
1546				  const struct RECORD_PAGE_HDR *rp, u32 vbo,
1547				  u64 seq)
1548{
1549	u64 lsn_seq;
1550	const struct NTFS_RECORD_HEADER *rhdr = &rp->rhdr;
1551	u64 lsn = le64_to_cpu(rhdr->lsn);
1552
1553	if (rhdr->sign == NTFS_FFFF_SIGNATURE || !rhdr->sign)
1554		return false;
1555
1556	/*
1557	 * If the last lsn on the page occurs was written after the page
1558	 * that caused the original error then we have a fatal error.
1559	 */
1560	lsn_seq = lsn >> log->file_data_bits;
1561
1562	/*
1563	 * If the sequence number for the lsn the page is equal or greater
1564	 * than lsn we expect, then this is a subsequent write.
1565	 */
1566	return lsn_seq >= seq ||
1567	       (lsn_seq == seq - 1 && log->first_page == vbo &&
1568		vbo != (lsn_to_vbo(log, lsn) & ~log->page_mask));
1569}
1570
1571/*
1572 * last_log_lsn
1573 *
1574 * Walks through the log pages for a file, searching for the
1575 * last log page written to the file.
1576 */
1577static int last_log_lsn(struct ntfs_log *log)
1578{
1579	int err;
1580	bool usa_error = false;
1581	bool replace_page = false;
1582	bool reuse_page = log->l_flags & NTFSLOG_REUSE_TAIL;
1583	bool wrapped_file, wrapped;
1584
1585	u32 page_cnt = 1, page_pos = 1;
1586	u32 page_off = 0, page_off1 = 0, saved_off = 0;
1587	u32 final_off, second_off, final_off_prev = 0, second_off_prev = 0;
1588	u32 first_file_off = 0, second_file_off = 0;
1589	u32 part_io_count = 0;
1590	u32 tails = 0;
1591	u32 this_off, curpage_off, nextpage_off, remain_pages;
1592
1593	u64 expected_seq, seq_base = 0, lsn_base = 0;
1594	u64 best_lsn, best_lsn1, best_lsn2;
1595	u64 lsn_cur, lsn1, lsn2;
1596	u64 last_ok_lsn = reuse_page ? log->last_lsn : 0;
1597
1598	u16 cur_pos, best_page_pos;
1599
1600	struct RECORD_PAGE_HDR *page = NULL;
1601	struct RECORD_PAGE_HDR *tst_page = NULL;
1602	struct RECORD_PAGE_HDR *first_tail = NULL;
1603	struct RECORD_PAGE_HDR *second_tail = NULL;
1604	struct RECORD_PAGE_HDR *tail_page = NULL;
1605	struct RECORD_PAGE_HDR *second_tail_prev = NULL;
1606	struct RECORD_PAGE_HDR *first_tail_prev = NULL;
1607	struct RECORD_PAGE_HDR *page_bufs = NULL;
1608	struct RECORD_PAGE_HDR *best_page;
1609
1610	if (log->major_ver >= 2) {
1611		final_off = 0x02 * log->page_size;
1612		second_off = 0x12 * log->page_size;
1613
1614		// 0x10 == 0x12 - 0x2
1615		page_bufs = kmalloc(log->page_size * 0x10, GFP_NOFS);
1616		if (!page_bufs)
1617			return -ENOMEM;
1618	} else {
1619		second_off = log->first_page - log->page_size;
1620		final_off = second_off - log->page_size;
1621	}
1622
1623next_tail:
1624	/* Read second tail page (at pos 3/0x12000). */
1625	if (read_log_page(log, second_off, &second_tail, &usa_error) ||
1626	    usa_error || second_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
1627		kfree(second_tail);
1628		second_tail = NULL;
1629		second_file_off = 0;
1630		lsn2 = 0;
1631	} else {
1632		second_file_off = hdr_file_off(log, second_tail);
1633		lsn2 = le64_to_cpu(second_tail->record_hdr.last_end_lsn);
1634	}
1635
1636	/* Read first tail page (at pos 2/0x2000). */
1637	if (read_log_page(log, final_off, &first_tail, &usa_error) ||
1638	    usa_error || first_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
1639		kfree(first_tail);
1640		first_tail = NULL;
1641		first_file_off = 0;
1642		lsn1 = 0;
1643	} else {
1644		first_file_off = hdr_file_off(log, first_tail);
1645		lsn1 = le64_to_cpu(first_tail->record_hdr.last_end_lsn);
1646	}
1647
1648	if (log->major_ver < 2) {
1649		int best_page;
1650
1651		first_tail_prev = first_tail;
1652		final_off_prev = first_file_off;
1653		second_tail_prev = second_tail;
1654		second_off_prev = second_file_off;
1655		tails = 1;
1656
1657		if (!first_tail && !second_tail)
1658			goto tail_read;
1659
1660		if (first_tail && second_tail)
1661			best_page = lsn1 < lsn2 ? 1 : 0;
1662		else if (first_tail)
1663			best_page = 0;
1664		else
1665			best_page = 1;
1666
1667		page_off = best_page ? second_file_off : first_file_off;
1668		seq_base = (best_page ? lsn2 : lsn1) >> log->file_data_bits;
1669		goto tail_read;
1670	}
1671
1672	best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0;
1673	best_lsn2 = second_tail ? base_lsn(log, second_tail, second_file_off) :
1674				  0;
1675
1676	if (first_tail && second_tail) {
1677		if (best_lsn1 > best_lsn2) {
1678			best_lsn = best_lsn1;
1679			best_page = first_tail;
1680			this_off = first_file_off;
1681		} else {
1682			best_lsn = best_lsn2;
1683			best_page = second_tail;
1684			this_off = second_file_off;
1685		}
1686	} else if (first_tail) {
1687		best_lsn = best_lsn1;
1688		best_page = first_tail;
1689		this_off = first_file_off;
1690	} else if (second_tail) {
1691		best_lsn = best_lsn2;
1692		best_page = second_tail;
1693		this_off = second_file_off;
1694	} else {
1695		goto tail_read;
1696	}
1697
1698	best_page_pos = le16_to_cpu(best_page->page_pos);
1699
1700	if (!tails) {
1701		if (best_page_pos == page_pos) {
1702			seq_base = best_lsn >> log->file_data_bits;
1703			saved_off = page_off = le32_to_cpu(best_page->file_off);
1704			lsn_base = best_lsn;
1705
1706			memmove(page_bufs, best_page, log->page_size);
1707
1708			page_cnt = le16_to_cpu(best_page->page_count);
1709			if (page_cnt > 1)
1710				page_pos += 1;
1711
1712			tails = 1;
1713		}
1714	} else if (seq_base == (best_lsn >> log->file_data_bits) &&
1715		   saved_off + log->page_size == this_off &&
1716		   lsn_base < best_lsn &&
1717		   (page_pos != page_cnt || best_page_pos == page_pos ||
1718		    best_page_pos == 1) &&
1719		   (page_pos >= page_cnt || best_page_pos == page_pos)) {
1720		u16 bppc = le16_to_cpu(best_page->page_count);
1721
1722		saved_off += log->page_size;
1723		lsn_base = best_lsn;
1724
1725		memmove(Add2Ptr(page_bufs, tails * log->page_size), best_page,
1726			log->page_size);
1727
1728		tails += 1;
1729
1730		if (best_page_pos != bppc) {
1731			page_cnt = bppc;
1732			page_pos = best_page_pos;
1733
1734			if (page_cnt > 1)
1735				page_pos += 1;
1736		} else {
1737			page_pos = page_cnt = 1;
1738		}
1739	} else {
1740		kfree(first_tail);
1741		kfree(second_tail);
1742		goto tail_read;
1743	}
1744
1745	kfree(first_tail_prev);
1746	first_tail_prev = first_tail;
1747	final_off_prev = first_file_off;
1748	first_tail = NULL;
1749
1750	kfree(second_tail_prev);
1751	second_tail_prev = second_tail;
1752	second_off_prev = second_file_off;
1753	second_tail = NULL;
1754
1755	final_off += log->page_size;
1756	second_off += log->page_size;
1757
1758	if (tails < 0x10)
1759		goto next_tail;
1760tail_read:
1761	first_tail = first_tail_prev;
1762	final_off = final_off_prev;
1763
1764	second_tail = second_tail_prev;
1765	second_off = second_off_prev;
1766
1767	page_cnt = page_pos = 1;
1768
1769	curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off) :
1770						 log->next_page;
1771
1772	wrapped_file =
1773		curpage_off == log->first_page &&
1774		!(log->l_flags & (NTFSLOG_NO_LAST_LSN | NTFSLOG_REUSE_TAIL));
1775
1776	expected_seq = wrapped_file ? (log->seq_num + 1) : log->seq_num;
1777
1778	nextpage_off = curpage_off;
1779
1780next_page:
1781	tail_page = NULL;
1782	/* Read the next log page. */
1783	err = read_log_page(log, curpage_off, &page, &usa_error);
1784
1785	/* Compute the next log page offset the file. */
1786	nextpage_off = next_page_off(log, curpage_off);
1787	wrapped = nextpage_off == log->first_page;
1788
1789	if (tails > 1) {
1790		struct RECORD_PAGE_HDR *cur_page =
1791			Add2Ptr(page_bufs, curpage_off - page_off);
1792
1793		if (curpage_off == saved_off) {
1794			tail_page = cur_page;
1795			goto use_tail_page;
1796		}
1797
1798		if (page_off > curpage_off || curpage_off >= saved_off)
1799			goto use_tail_page;
1800
1801		if (page_off1)
1802			goto use_cur_page;
1803
1804		if (!err && !usa_error &&
1805		    page->rhdr.sign == NTFS_RCRD_SIGNATURE &&
1806		    cur_page->rhdr.lsn == page->rhdr.lsn &&
1807		    cur_page->record_hdr.next_record_off ==
1808			    page->record_hdr.next_record_off &&
1809		    ((page_pos == page_cnt &&
1810		      le16_to_cpu(page->page_pos) == 1) ||
1811		     (page_pos != page_cnt &&
1812		      le16_to_cpu(page->page_pos) == page_pos + 1 &&
1813		      le16_to_cpu(page->page_count) == page_cnt))) {
1814			cur_page = NULL;
1815			goto use_tail_page;
1816		}
1817
1818		page_off1 = page_off;
1819
1820use_cur_page:
1821
1822		lsn_cur = le64_to_cpu(cur_page->rhdr.lsn);
1823
1824		if (last_ok_lsn !=
1825			    le64_to_cpu(cur_page->record_hdr.last_end_lsn) &&
1826		    ((lsn_cur >> log->file_data_bits) +
1827		     ((curpage_off <
1828		       (lsn_to_vbo(log, lsn_cur) & ~log->page_mask)) ?
1829			      1 :
1830			      0)) != expected_seq) {
1831			goto check_tail;
1832		}
1833
1834		if (!is_log_record_end(cur_page)) {
1835			tail_page = NULL;
1836			last_ok_lsn = lsn_cur;
1837			goto next_page_1;
1838		}
1839
1840		log->seq_num = expected_seq;
1841		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
1842		log->last_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
1843		log->ra->current_lsn = cur_page->record_hdr.last_end_lsn;
1844
1845		if (log->record_header_len <=
1846		    log->page_size -
1847			    le16_to_cpu(cur_page->record_hdr.next_record_off)) {
1848			log->l_flags |= NTFSLOG_REUSE_TAIL;
1849			log->next_page = curpage_off;
1850		} else {
1851			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
1852			log->next_page = nextpage_off;
1853		}
1854
1855		if (wrapped_file)
1856			log->l_flags |= NTFSLOG_WRAPPED;
1857
1858		last_ok_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
1859		goto next_page_1;
1860	}
1861
1862	/*
1863	 * If we are at the expected first page of a transfer check to see
1864	 * if either tail copy is at this offset.
1865	 * If this page is the last page of a transfer, check if we wrote
1866	 * a subsequent tail copy.
1867	 */
1868	if (page_cnt == page_pos || page_cnt == page_pos + 1) {
1869		/*
1870		 * Check if the offset matches either the first or second
1871		 * tail copy. It is possible it will match both.
1872		 */
1873		if (curpage_off == final_off)
1874			tail_page = first_tail;
1875
1876		/*
1877		 * If we already matched on the first page then
1878		 * check the ending lsn's.
1879		 */
1880		if (curpage_off == second_off) {
1881			if (!tail_page ||
1882			    (second_tail &&
1883			     le64_to_cpu(second_tail->record_hdr.last_end_lsn) >
1884				     le64_to_cpu(first_tail->record_hdr
1885							 .last_end_lsn))) {
1886				tail_page = second_tail;
1887			}
1888		}
1889	}
1890
1891use_tail_page:
1892	if (tail_page) {
1893		/* We have a candidate for a tail copy. */
1894		lsn_cur = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
1895
1896		if (last_ok_lsn < lsn_cur) {
1897			/*
1898			 * If the sequence number is not expected,
1899			 * then don't use the tail copy.
1900			 */
1901			if (expected_seq != (lsn_cur >> log->file_data_bits))
1902				tail_page = NULL;
1903		} else if (last_ok_lsn > lsn_cur) {
1904			/*
1905			 * If the last lsn is greater than the one on
1906			 * this page then forget this tail.
1907			 */
1908			tail_page = NULL;
1909		}
1910	}
1911
1912	/*
1913	 *If we have an error on the current page,
1914	 * we will break of this loop.
1915	 */
1916	if (err || usa_error)
1917		goto check_tail;
1918
1919	/*
1920	 * Done if the last lsn on this page doesn't match the previous known
1921	 * last lsn or the sequence number is not expected.
1922	 */
1923	lsn_cur = le64_to_cpu(page->rhdr.lsn);
1924	if (last_ok_lsn != lsn_cur &&
1925	    expected_seq != (lsn_cur >> log->file_data_bits)) {
1926		goto check_tail;
1927	}
1928
1929	/*
1930	 * Check that the page position and page count values are correct.
1931	 * If this is the first page of a transfer the position must be 1
1932	 * and the count will be unknown.
1933	 */
1934	if (page_cnt == page_pos) {
1935		if (page->page_pos != cpu_to_le16(1) &&
1936		    (!reuse_page || page->page_pos != page->page_count)) {
1937			/*
1938			 * If the current page is the first page we are
1939			 * looking at and we are reusing this page then
1940			 * it can be either the first or last page of a
1941			 * transfer. Otherwise it can only be the first.
1942			 */
1943			goto check_tail;
1944		}
1945	} else if (le16_to_cpu(page->page_count) != page_cnt ||
1946		   le16_to_cpu(page->page_pos) != page_pos + 1) {
1947		/*
1948		 * The page position better be 1 more than the last page
1949		 * position and the page count better match.
1950		 */
1951		goto check_tail;
1952	}
1953
1954	/*
1955	 * We have a valid page the file and may have a valid page
1956	 * the tail copy area.
1957	 * If the tail page was written after the page the file then
1958	 * break of the loop.
1959	 */
1960	if (tail_page &&
1961	    le64_to_cpu(tail_page->record_hdr.last_end_lsn) > lsn_cur) {
1962		/* Remember if we will replace the page. */
1963		replace_page = true;
1964		goto check_tail;
1965	}
1966
1967	tail_page = NULL;
1968
1969	if (is_log_record_end(page)) {
1970		/*
1971		 * Since we have read this page we know the sequence number
1972		 * is the same as our expected value.
1973		 */
1974		log->seq_num = expected_seq;
1975		log->last_lsn = le64_to_cpu(page->record_hdr.last_end_lsn);
1976		log->ra->current_lsn = page->record_hdr.last_end_lsn;
1977		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
1978
1979		/*
1980		 * If there is room on this page for another header then
1981		 * remember we want to reuse the page.
1982		 */
1983		if (log->record_header_len <=
1984		    log->page_size -
1985			    le16_to_cpu(page->record_hdr.next_record_off)) {
1986			log->l_flags |= NTFSLOG_REUSE_TAIL;
1987			log->next_page = curpage_off;
1988		} else {
1989			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
1990			log->next_page = nextpage_off;
1991		}
1992
1993		/* Remember if we wrapped the log file. */
1994		if (wrapped_file)
1995			log->l_flags |= NTFSLOG_WRAPPED;
1996	}
1997
1998	/*
1999	 * Remember the last page count and position.
2000	 * Also remember the last known lsn.
2001	 */
2002	page_cnt = le16_to_cpu(page->page_count);
2003	page_pos = le16_to_cpu(page->page_pos);
2004	last_ok_lsn = le64_to_cpu(page->rhdr.lsn);
2005
2006next_page_1:
2007
2008	if (wrapped) {
2009		expected_seq += 1;
2010		wrapped_file = 1;
2011	}
2012
2013	curpage_off = nextpage_off;
2014	kfree(page);
2015	page = NULL;
2016	reuse_page = 0;
2017	goto next_page;
2018
2019check_tail:
2020	if (tail_page) {
2021		log->seq_num = expected_seq;
2022		log->last_lsn = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
2023		log->ra->current_lsn = tail_page->record_hdr.last_end_lsn;
2024		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
2025
2026		if (log->page_size -
2027			    le16_to_cpu(
2028				    tail_page->record_hdr.next_record_off) >=
2029		    log->record_header_len) {
2030			log->l_flags |= NTFSLOG_REUSE_TAIL;
2031			log->next_page = curpage_off;
2032		} else {
2033			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
2034			log->next_page = nextpage_off;
2035		}
2036
2037		if (wrapped)
2038			log->l_flags |= NTFSLOG_WRAPPED;
2039	}
2040
2041	/* Remember that the partial IO will start at the next page. */
2042	second_off = nextpage_off;
2043
2044	/*
2045	 * If the next page is the first page of the file then update
2046	 * the sequence number for log records which begon the next page.
2047	 */
2048	if (wrapped)
2049		expected_seq += 1;
2050
2051	/*
2052	 * If we have a tail copy or are performing single page I/O we can
2053	 * immediately look at the next page.
2054	 */
2055	if (replace_page || (log->ra->flags & RESTART_SINGLE_PAGE_IO)) {
2056		page_cnt = 2;
2057		page_pos = 1;
2058		goto check_valid;
2059	}
2060
2061	if (page_pos != page_cnt)
2062		goto check_valid;
2063	/*
2064	 * If the next page causes us to wrap to the beginning of the log
2065	 * file then we know which page to check next.
2066	 */
2067	if (wrapped) {
2068		page_cnt = 2;
2069		page_pos = 1;
2070		goto check_valid;
2071	}
2072
2073	cur_pos = 2;
2074
2075next_test_page:
2076	kfree(tst_page);
2077	tst_page = NULL;
2078
2079	/* Walk through the file, reading log pages. */
2080	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
2081
2082	/*
2083	 * If we get a USA error then assume that we correctly found
2084	 * the end of the original transfer.
2085	 */
2086	if (usa_error)
2087		goto file_is_valid;
2088
2089	/*
2090	 * If we were able to read the page, we examine it to see if it
2091	 * is the same or different Io block.
2092	 */
2093	if (err)
2094		goto next_test_page_1;
2095
2096	if (le16_to_cpu(tst_page->page_pos) == cur_pos &&
2097	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
2098		page_cnt = le16_to_cpu(tst_page->page_count) + 1;
2099		page_pos = le16_to_cpu(tst_page->page_pos);
2100		goto check_valid;
2101	} else {
2102		goto file_is_valid;
2103	}
2104
2105next_test_page_1:
2106
2107	nextpage_off = next_page_off(log, curpage_off);
2108	wrapped = nextpage_off == log->first_page;
2109
2110	if (wrapped) {
2111		expected_seq += 1;
2112		page_cnt = 2;
2113		page_pos = 1;
2114	}
2115
2116	cur_pos += 1;
2117	part_io_count += 1;
2118	if (!wrapped)
2119		goto next_test_page;
2120
2121check_valid:
2122	/* Skip over the remaining pages this transfer. */
2123	remain_pages = page_cnt - page_pos - 1;
2124	part_io_count += remain_pages;
2125
2126	while (remain_pages--) {
2127		nextpage_off = next_page_off(log, curpage_off);
2128		wrapped = nextpage_off == log->first_page;
2129
2130		if (wrapped)
2131			expected_seq += 1;
2132	}
2133
2134	/* Call our routine to check this log page. */
2135	kfree(tst_page);
2136	tst_page = NULL;
2137
2138	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
2139	if (!err && !usa_error &&
2140	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
2141		err = -EINVAL;
2142		goto out;
2143	}
2144
2145file_is_valid:
2146
2147	/* We have a valid file. */
2148	if (page_off1 || tail_page) {
2149		struct RECORD_PAGE_HDR *tmp_page;
2150
2151		if (sb_rdonly(log->ni->mi.sbi->sb)) {
2152			err = -EROFS;
2153			goto out;
2154		}
2155
2156		if (page_off1) {
2157			tmp_page = Add2Ptr(page_bufs, page_off1 - page_off);
2158			tails -= (page_off1 - page_off) / log->page_size;
2159			if (!tail_page)
2160				tails -= 1;
2161		} else {
2162			tmp_page = tail_page;
2163			tails = 1;
2164		}
2165
2166		while (tails--) {
2167			u64 off = hdr_file_off(log, tmp_page);
2168
2169			if (!page) {
2170				page = kmalloc(log->page_size, GFP_NOFS);
2171				if (!page) {
2172					err = -ENOMEM;
2173					goto out;
2174				}
2175			}
2176
2177			/*
2178			 * Correct page and copy the data from this page
2179			 * into it and flush it to disk.
2180			 */
2181			memcpy(page, tmp_page, log->page_size);
2182
2183			/* Fill last flushed lsn value flush the page. */
2184			if (log->major_ver < 2)
2185				page->rhdr.lsn = page->record_hdr.last_end_lsn;
2186			else
2187				page->file_off = 0;
2188
2189			page->page_pos = page->page_count = cpu_to_le16(1);
2190
2191			ntfs_fix_pre_write(&page->rhdr, log->page_size);
2192
2193			err = ntfs_sb_write_run(log->ni->mi.sbi,
2194						&log->ni->file.run, off, page,
2195						log->page_size, 0);
2196
2197			if (err)
2198				goto out;
2199
2200			if (part_io_count && second_off == off) {
2201				second_off += log->page_size;
2202				part_io_count -= 1;
2203			}
2204
2205			tmp_page = Add2Ptr(tmp_page, log->page_size);
2206		}
2207	}
2208
2209	if (part_io_count) {
2210		if (sb_rdonly(log->ni->mi.sbi->sb)) {
2211			err = -EROFS;
2212			goto out;
2213		}
2214	}
2215
2216out:
2217	kfree(second_tail);
2218	kfree(first_tail);
2219	kfree(page);
2220	kfree(tst_page);
2221	kfree(page_bufs);
2222
2223	return err;
2224}
2225
2226/*
2227 * read_log_rec_buf - Copy a log record from the file to a buffer.
2228 *
2229 * The log record may span several log pages and may even wrap the file.
2230 */
2231static int read_log_rec_buf(struct ntfs_log *log,
2232			    const struct LFS_RECORD_HDR *rh, void *buffer)
2233{
2234	int err;
2235	struct RECORD_PAGE_HDR *ph = NULL;
2236	u64 lsn = le64_to_cpu(rh->this_lsn);
2237	u32 vbo = lsn_to_vbo(log, lsn) & ~log->page_mask;
2238	u32 off = lsn_to_page_off(log, lsn) + log->record_header_len;
2239	u32 data_len = le32_to_cpu(rh->client_data_len);
2240
2241	/*
2242	 * While there are more bytes to transfer,
2243	 * we continue to attempt to perform the read.
2244	 */
2245	for (;;) {
2246		bool usa_error;
2247		u32 tail = log->page_size - off;
2248
2249		if (tail >= data_len)
2250			tail = data_len;
2251
2252		data_len -= tail;
2253
2254		err = read_log_page(log, vbo, &ph, &usa_error);
2255		if (err)
2256			goto out;
2257
2258		/*
2259		 * The last lsn on this page better be greater or equal
2260		 * to the lsn we are copying.
2261		 */
2262		if (lsn > le64_to_cpu(ph->rhdr.lsn)) {
2263			err = -EINVAL;
2264			goto out;
2265		}
2266
2267		memcpy(buffer, Add2Ptr(ph, off), tail);
2268
2269		/* If there are no more bytes to transfer, we exit the loop. */
2270		if (!data_len) {
2271			if (!is_log_record_end(ph) ||
2272			    lsn > le64_to_cpu(ph->record_hdr.last_end_lsn)) {
2273				err = -EINVAL;
2274				goto out;
2275			}
2276			break;
2277		}
2278
2279		if (ph->rhdr.lsn == ph->record_hdr.last_end_lsn ||
2280		    lsn > le64_to_cpu(ph->rhdr.lsn)) {
2281			err = -EINVAL;
2282			goto out;
2283		}
2284
2285		vbo = next_page_off(log, vbo);
2286		off = log->data_off;
2287
2288		/*
2289		 * Adjust our pointer the user's buffer to transfer
2290		 * the next block to.
2291		 */
2292		buffer = Add2Ptr(buffer, tail);
2293	}
2294
2295out:
2296	kfree(ph);
2297	return err;
2298}
2299
2300static int read_rst_area(struct ntfs_log *log, struct NTFS_RESTART **rst_,
2301			 u64 *lsn)
2302{
2303	int err;
2304	struct LFS_RECORD_HDR *rh = NULL;
2305	const struct CLIENT_REC *cr =
2306		Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
2307	u64 lsnr, lsnc = le64_to_cpu(cr->restart_lsn);
2308	u32 len;
2309	struct NTFS_RESTART *rst;
2310
2311	*lsn = 0;
2312	*rst_ = NULL;
2313
2314	/* If the client doesn't have a restart area, go ahead and exit now. */
2315	if (!lsnc)
2316		return 0;
2317
2318	err = read_log_page(log, lsn_to_vbo(log, lsnc),
2319			    (struct RECORD_PAGE_HDR **)&rh, NULL);
2320	if (err)
2321		return err;
2322
2323	rst = NULL;
2324	lsnr = le64_to_cpu(rh->this_lsn);
2325
2326	if (lsnc != lsnr) {
2327		/* If the lsn values don't match, then the disk is corrupt. */
2328		err = -EINVAL;
2329		goto out;
2330	}
2331
2332	*lsn = lsnr;
2333	len = le32_to_cpu(rh->client_data_len);
2334
2335	if (!len) {
2336		err = 0;
2337		goto out;
2338	}
2339
2340	if (len < sizeof(struct NTFS_RESTART)) {
2341		err = -EINVAL;
2342		goto out;
2343	}
2344
2345	rst = kmalloc(len, GFP_NOFS);
2346	if (!rst) {
2347		err = -ENOMEM;
2348		goto out;
2349	}
2350
2351	/* Copy the data into the 'rst' buffer. */
2352	err = read_log_rec_buf(log, rh, rst);
2353	if (err)
2354		goto out;
2355
2356	*rst_ = rst;
2357	rst = NULL;
2358
2359out:
2360	kfree(rh);
2361	kfree(rst);
2362
2363	return err;
2364}
2365
2366static int find_log_rec(struct ntfs_log *log, u64 lsn, struct lcb *lcb)
2367{
2368	int err;
2369	struct LFS_RECORD_HDR *rh = lcb->lrh;
2370	u32 rec_len, len;
2371
2372	/* Read the record header for this lsn. */
2373	if (!rh) {
2374		err = read_log_page(log, lsn_to_vbo(log, lsn),
2375				    (struct RECORD_PAGE_HDR **)&rh, NULL);
2376
2377		lcb->lrh = rh;
2378		if (err)
2379			return err;
2380	}
2381
2382	/*
2383	 * If the lsn the log record doesn't match the desired
2384	 * lsn then the disk is corrupt.
2385	 */
2386	if (lsn != le64_to_cpu(rh->this_lsn))
2387		return -EINVAL;
2388
2389	len = le32_to_cpu(rh->client_data_len);
2390
2391	/*
2392	 * Check that the length field isn't greater than the total
2393	 * available space the log file.
2394	 */
2395	rec_len = len + log->record_header_len;
2396	if (rec_len >= log->total_avail)
2397		return -EINVAL;
2398
2399	/*
2400	 * If the entire log record is on this log page,
2401	 * put a pointer to the log record the context block.
2402	 */
2403	if (rh->flags & LOG_RECORD_MULTI_PAGE) {
2404		void *lr = kmalloc(len, GFP_NOFS);
2405
2406		if (!lr)
2407			return -ENOMEM;
2408
2409		lcb->log_rec = lr;
2410		lcb->alloc = true;
2411
2412		/* Copy the data into the buffer returned. */
2413		err = read_log_rec_buf(log, rh, lr);
2414		if (err)
2415			return err;
2416	} else {
2417		/* If beyond the end of the current page -> an error. */
2418		u32 page_off = lsn_to_page_off(log, lsn);
2419
2420		if (page_off + len + log->record_header_len > log->page_size)
2421			return -EINVAL;
2422
2423		lcb->log_rec = Add2Ptr(rh, sizeof(struct LFS_RECORD_HDR));
2424		lcb->alloc = false;
2425	}
2426
2427	return 0;
2428}
2429
2430/*
2431 * read_log_rec_lcb - Init the query operation.
2432 */
2433static int read_log_rec_lcb(struct ntfs_log *log, u64 lsn, u32 ctx_mode,
2434			    struct lcb **lcb_)
2435{
2436	int err;
2437	const struct CLIENT_REC *cr;
2438	struct lcb *lcb;
2439
2440	switch (ctx_mode) {
2441	case lcb_ctx_undo_next:
2442	case lcb_ctx_prev:
2443	case lcb_ctx_next:
2444		break;
2445	default:
2446		return -EINVAL;
2447	}
2448
2449	/* Check that the given lsn is the legal range for this client. */
2450	cr = Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
2451
2452	if (!verify_client_lsn(log, cr, lsn))
2453		return -EINVAL;
2454
2455	lcb = kzalloc(sizeof(struct lcb), GFP_NOFS);
2456	if (!lcb)
2457		return -ENOMEM;
2458	lcb->client = log->client_id;
2459	lcb->ctx_mode = ctx_mode;
2460
2461	/* Find the log record indicated by the given lsn. */
2462	err = find_log_rec(log, lsn, lcb);
2463	if (err)
2464		goto out;
2465
2466	*lcb_ = lcb;
2467	return 0;
2468
2469out:
2470	lcb_put(lcb);
2471	*lcb_ = NULL;
2472	return err;
2473}
2474
2475/*
2476 * find_client_next_lsn
2477 *
2478 * Attempt to find the next lsn to return to a client based on the context mode.
2479 */
2480static int find_client_next_lsn(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
2481{
2482	int err;
2483	u64 next_lsn;
2484	struct LFS_RECORD_HDR *hdr;
2485
2486	hdr = lcb->lrh;
2487	*lsn = 0;
2488
2489	if (lcb_ctx_next != lcb->ctx_mode)
2490		goto check_undo_next;
2491
2492	/* Loop as long as another lsn can be found. */
2493	for (;;) {
2494		u64 current_lsn;
2495
2496		err = next_log_lsn(log, hdr, &current_lsn);
2497		if (err)
2498			goto out;
2499
2500		if (!current_lsn)
2501			break;
2502
2503		if (hdr != lcb->lrh)
2504			kfree(hdr);
2505
2506		hdr = NULL;
2507		err = read_log_page(log, lsn_to_vbo(log, current_lsn),
2508				    (struct RECORD_PAGE_HDR **)&hdr, NULL);
2509		if (err)
2510			goto out;
2511
2512		if (memcmp(&hdr->client, &lcb->client,
2513			   sizeof(struct CLIENT_ID))) {
2514			/*err = -EINVAL; */
2515		} else if (LfsClientRecord == hdr->record_type) {
2516			kfree(lcb->lrh);
2517			lcb->lrh = hdr;
2518			*lsn = current_lsn;
2519			return 0;
2520		}
2521	}
2522
2523out:
2524	if (hdr != lcb->lrh)
2525		kfree(hdr);
2526	return err;
2527
2528check_undo_next:
2529	if (lcb_ctx_undo_next == lcb->ctx_mode)
2530		next_lsn = le64_to_cpu(hdr->client_undo_next_lsn);
2531	else if (lcb_ctx_prev == lcb->ctx_mode)
2532		next_lsn = le64_to_cpu(hdr->client_prev_lsn);
2533	else
2534		return 0;
2535
2536	if (!next_lsn)
2537		return 0;
2538
2539	if (!verify_client_lsn(
2540		    log, Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)),
2541		    next_lsn))
2542		return 0;
2543
2544	hdr = NULL;
2545	err = read_log_page(log, lsn_to_vbo(log, next_lsn),
2546			    (struct RECORD_PAGE_HDR **)&hdr, NULL);
2547	if (err)
2548		return err;
2549	kfree(lcb->lrh);
2550	lcb->lrh = hdr;
2551
2552	*lsn = next_lsn;
2553
2554	return 0;
2555}
2556
2557static int read_next_log_rec(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
2558{
2559	int err;
2560
2561	err = find_client_next_lsn(log, lcb, lsn);
2562	if (err)
2563		return err;
2564
2565	if (!*lsn)
2566		return 0;
2567
2568	if (lcb->alloc)
2569		kfree(lcb->log_rec);
2570
2571	lcb->log_rec = NULL;
2572	lcb->alloc = false;
2573	kfree(lcb->lrh);
2574	lcb->lrh = NULL;
2575
2576	return find_log_rec(log, *lsn, lcb);
2577}
2578
2579bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes)
2580{
2581	__le16 mask;
2582	u32 min_de, de_off, used, total;
2583	const struct NTFS_DE *e;
2584
2585	if (hdr_has_subnode(hdr)) {
2586		min_de = sizeof(struct NTFS_DE) + sizeof(u64);
2587		mask = NTFS_IE_HAS_SUBNODES;
2588	} else {
2589		min_de = sizeof(struct NTFS_DE);
2590		mask = 0;
2591	}
2592
2593	de_off = le32_to_cpu(hdr->de_off);
2594	used = le32_to_cpu(hdr->used);
2595	total = le32_to_cpu(hdr->total);
2596
2597	if (de_off > bytes - min_de || used > bytes || total > bytes ||
2598	    de_off + min_de > used || used > total) {
2599		return false;
2600	}
2601
2602	e = Add2Ptr(hdr, de_off);
2603	for (;;) {
2604		u16 esize = le16_to_cpu(e->size);
2605		struct NTFS_DE *next = Add2Ptr(e, esize);
2606
2607		if (esize < min_de || PtrOffset(hdr, next) > used ||
2608		    (e->flags & NTFS_IE_HAS_SUBNODES) != mask) {
2609			return false;
2610		}
2611
2612		if (de_is_last(e))
2613			break;
2614
2615		e = next;
2616	}
2617
2618	return true;
2619}
2620
2621static inline bool check_index_buffer(const struct INDEX_BUFFER *ib, u32 bytes)
2622{
2623	u16 fo;
2624	const struct NTFS_RECORD_HEADER *r = &ib->rhdr;
2625
2626	if (r->sign != NTFS_INDX_SIGNATURE)
2627		return false;
2628
2629	fo = (SECTOR_SIZE - ((bytes >> SECTOR_SHIFT) + 1) * sizeof(short));
2630
2631	if (le16_to_cpu(r->fix_off) > fo)
2632		return false;
2633
2634	if ((le16_to_cpu(r->fix_num) - 1) * SECTOR_SIZE != bytes)
2635		return false;
2636
2637	return check_index_header(&ib->ihdr,
2638				  bytes - offsetof(struct INDEX_BUFFER, ihdr));
2639}
2640
2641static inline bool check_index_root(const struct ATTRIB *attr,
2642				    struct ntfs_sb_info *sbi)
2643{
2644	bool ret;
2645	const struct INDEX_ROOT *root = resident_data(attr);
2646	u8 index_bits = le32_to_cpu(root->index_block_size) >=
2647					sbi->cluster_size ?
2648				sbi->cluster_bits :
2649				SECTOR_SHIFT;
2650	u8 block_clst = root->index_block_clst;
2651
2652	if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) ||
2653	    (root->type != ATTR_NAME && root->type != ATTR_ZERO) ||
2654	    (root->type == ATTR_NAME &&
2655	     root->rule != NTFS_COLLATION_TYPE_FILENAME) ||
2656	    (le32_to_cpu(root->index_block_size) !=
2657	     (block_clst << index_bits)) ||
2658	    (block_clst != 1 && block_clst != 2 && block_clst != 4 &&
2659	     block_clst != 8 && block_clst != 0x10 && block_clst != 0x20 &&
2660	     block_clst != 0x40 && block_clst != 0x80)) {
2661		return false;
2662	}
2663
2664	ret = check_index_header(&root->ihdr,
2665				 le32_to_cpu(attr->res.data_size) -
2666					 offsetof(struct INDEX_ROOT, ihdr));
2667	return ret;
2668}
2669
2670static inline bool check_attr(const struct MFT_REC *rec,
2671			      const struct ATTRIB *attr,
2672			      struct ntfs_sb_info *sbi)
2673{
2674	u32 asize = le32_to_cpu(attr->size);
2675	u32 rsize = 0;
2676	u64 dsize, svcn, evcn;
2677	u16 run_off;
2678
2679	/* Check the fixed part of the attribute record header. */
2680	if (asize >= sbi->record_size ||
2681	    asize + PtrOffset(rec, attr) >= sbi->record_size ||
2682	    (attr->name_len &&
2683	     le16_to_cpu(attr->name_off) + attr->name_len * sizeof(short) >
2684		     asize)) {
2685		return false;
2686	}
2687
2688	/* Check the attribute fields. */
2689	switch (attr->non_res) {
2690	case 0:
2691		rsize = le32_to_cpu(attr->res.data_size);
2692		if (rsize >= asize ||
2693		    le16_to_cpu(attr->res.data_off) + rsize > asize) {
2694			return false;
2695		}
2696		break;
2697
2698	case 1:
2699		dsize = le64_to_cpu(attr->nres.data_size);
2700		svcn = le64_to_cpu(attr->nres.svcn);
2701		evcn = le64_to_cpu(attr->nres.evcn);
2702		run_off = le16_to_cpu(attr->nres.run_off);
2703
2704		if (svcn > evcn + 1 || run_off >= asize ||
2705		    le64_to_cpu(attr->nres.valid_size) > dsize ||
2706		    dsize > le64_to_cpu(attr->nres.alloc_size)) {
2707			return false;
2708		}
2709
2710		if (run_off > asize)
2711			return false;
2712
2713		if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn,
2714			       Add2Ptr(attr, run_off), asize - run_off) < 0) {
2715			return false;
2716		}
2717
2718		return true;
2719
2720	default:
2721		return false;
2722	}
2723
2724	switch (attr->type) {
2725	case ATTR_NAME:
2726		if (fname_full_size(Add2Ptr(
2727			    attr, le16_to_cpu(attr->res.data_off))) > asize) {
2728			return false;
2729		}
2730		break;
2731
2732	case ATTR_ROOT:
2733		return check_index_root(attr, sbi);
2734
2735	case ATTR_STD:
2736		if (rsize < sizeof(struct ATTR_STD_INFO5) &&
2737		    rsize != sizeof(struct ATTR_STD_INFO)) {
2738			return false;
2739		}
2740		break;
2741
2742	case ATTR_LIST:
2743	case ATTR_ID:
2744	case ATTR_SECURE:
2745	case ATTR_LABEL:
2746	case ATTR_VOL_INFO:
2747	case ATTR_DATA:
2748	case ATTR_ALLOC:
2749	case ATTR_BITMAP:
2750	case ATTR_REPARSE:
2751	case ATTR_EA_INFO:
2752	case ATTR_EA:
2753	case ATTR_PROPERTYSET:
2754	case ATTR_LOGGED_UTILITY_STREAM:
2755		break;
2756
2757	default:
2758		return false;
2759	}
2760
2761	return true;
2762}
2763
2764static inline bool check_file_record(const struct MFT_REC *rec,
2765				     const struct MFT_REC *rec2,
2766				     struct ntfs_sb_info *sbi)
2767{
2768	const struct ATTRIB *attr;
2769	u16 fo = le16_to_cpu(rec->rhdr.fix_off);
2770	u16 fn = le16_to_cpu(rec->rhdr.fix_num);
2771	u16 ao = le16_to_cpu(rec->attr_off);
2772	u32 rs = sbi->record_size;
2773
2774	/* Check the file record header for consistency. */
2775	if (rec->rhdr.sign != NTFS_FILE_SIGNATURE ||
2776	    fo > (SECTOR_SIZE - ((rs >> SECTOR_SHIFT) + 1) * sizeof(short)) ||
2777	    (fn - 1) * SECTOR_SIZE != rs || ao < MFTRECORD_FIXUP_OFFSET_1 ||
2778	    ao > sbi->record_size - SIZEOF_RESIDENT || !is_rec_inuse(rec) ||
2779	    le32_to_cpu(rec->total) != rs) {
2780		return false;
2781	}
2782
2783	/* Loop to check all of the attributes. */
2784	for (attr = Add2Ptr(rec, ao); attr->type != ATTR_END;
2785	     attr = Add2Ptr(attr, le32_to_cpu(attr->size))) {
2786		if (check_attr(rec, attr, sbi))
2787			continue;
2788		return false;
2789	}
2790
2791	return true;
2792}
2793
2794static inline int check_lsn(const struct NTFS_RECORD_HEADER *hdr,
2795			    const u64 *rlsn)
2796{
2797	u64 lsn;
2798
2799	if (!rlsn)
2800		return true;
2801
2802	lsn = le64_to_cpu(hdr->lsn);
2803
2804	if (hdr->sign == NTFS_HOLE_SIGNATURE)
2805		return false;
2806
2807	if (*rlsn > lsn)
2808		return true;
2809
2810	return false;
2811}
2812
2813static inline bool check_if_attr(const struct MFT_REC *rec,
2814				 const struct LOG_REC_HDR *lrh)
2815{
2816	u16 ro = le16_to_cpu(lrh->record_off);
2817	u16 o = le16_to_cpu(rec->attr_off);
2818	const struct ATTRIB *attr = Add2Ptr(rec, o);
2819
2820	while (o < ro) {
2821		u32 asize;
2822
2823		if (attr->type == ATTR_END)
2824			break;
2825
2826		asize = le32_to_cpu(attr->size);
2827		if (!asize)
2828			break;
2829
2830		o += asize;
2831		attr = Add2Ptr(attr, asize);
2832	}
2833
2834	return o == ro;
2835}
2836
2837static inline bool check_if_index_root(const struct MFT_REC *rec,
2838				       const struct LOG_REC_HDR *lrh)
2839{
2840	u16 ro = le16_to_cpu(lrh->record_off);
2841	u16 o = le16_to_cpu(rec->attr_off);
2842	const struct ATTRIB *attr = Add2Ptr(rec, o);
2843
2844	while (o < ro) {
2845		u32 asize;
2846
2847		if (attr->type == ATTR_END)
2848			break;
2849
2850		asize = le32_to_cpu(attr->size);
2851		if (!asize)
2852			break;
2853
2854		o += asize;
2855		attr = Add2Ptr(attr, asize);
2856	}
2857
2858	return o == ro && attr->type == ATTR_ROOT;
2859}
2860
2861static inline bool check_if_root_index(const struct ATTRIB *attr,
2862				       const struct INDEX_HDR *hdr,
2863				       const struct LOG_REC_HDR *lrh)
2864{
2865	u16 ao = le16_to_cpu(lrh->attr_off);
2866	u32 de_off = le32_to_cpu(hdr->de_off);
2867	u32 o = PtrOffset(attr, hdr) + de_off;
2868	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
2869	u32 asize = le32_to_cpu(attr->size);
2870
2871	while (o < ao) {
2872		u16 esize;
2873
2874		if (o >= asize)
2875			break;
2876
2877		esize = le16_to_cpu(e->size);
2878		if (!esize)
2879			break;
2880
2881		o += esize;
2882		e = Add2Ptr(e, esize);
2883	}
2884
2885	return o == ao;
2886}
2887
2888static inline bool check_if_alloc_index(const struct INDEX_HDR *hdr,
2889					u32 attr_off)
2890{
2891	u32 de_off = le32_to_cpu(hdr->de_off);
2892	u32 o = offsetof(struct INDEX_BUFFER, ihdr) + de_off;
2893	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
2894	u32 used = le32_to_cpu(hdr->used);
2895
2896	while (o < attr_off) {
2897		u16 esize;
2898
2899		if (de_off >= used)
2900			break;
2901
2902		esize = le16_to_cpu(e->size);
2903		if (!esize)
2904			break;
2905
2906		o += esize;
2907		de_off += esize;
2908		e = Add2Ptr(e, esize);
2909	}
2910
2911	return o == attr_off;
2912}
2913
2914static inline void change_attr_size(struct MFT_REC *rec, struct ATTRIB *attr,
2915				    u32 nsize)
2916{
2917	u32 asize = le32_to_cpu(attr->size);
2918	int dsize = nsize - asize;
2919	u8 *next = Add2Ptr(attr, asize);
2920	u32 used = le32_to_cpu(rec->used);
2921
2922	memmove(Add2Ptr(attr, nsize), next, used - PtrOffset(rec, next));
2923
2924	rec->used = cpu_to_le32(used + dsize);
2925	attr->size = cpu_to_le32(nsize);
2926}
2927
2928struct OpenAttr {
2929	struct ATTRIB *attr;
2930	struct runs_tree *run1;
2931	struct runs_tree run0;
2932	struct ntfs_inode *ni;
2933	// CLST rno;
2934};
2935
2936/*
2937 * cmp_type_and_name
2938 *
2939 * Return: 0 if 'attr' has the same type and name.
2940 */
2941static inline int cmp_type_and_name(const struct ATTRIB *a1,
2942				    const struct ATTRIB *a2)
2943{
2944	return a1->type != a2->type || a1->name_len != a2->name_len ||
2945	       (a1->name_len && memcmp(attr_name(a1), attr_name(a2),
2946				       a1->name_len * sizeof(short)));
2947}
2948
2949static struct OpenAttr *find_loaded_attr(struct ntfs_log *log,
2950					 const struct ATTRIB *attr, CLST rno)
2951{
2952	struct OPEN_ATTR_ENRTY *oe = NULL;
2953
2954	while ((oe = enum_rstbl(log->open_attr_tbl, oe))) {
2955		struct OpenAttr *op_attr;
2956
2957		if (ino_get(&oe->ref) != rno)
2958			continue;
2959
2960		op_attr = (struct OpenAttr *)oe->ptr;
2961		if (!cmp_type_and_name(op_attr->attr, attr))
2962			return op_attr;
2963	}
2964	return NULL;
2965}
2966
2967static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi,
2968					     enum ATTR_TYPE type, u64 size,
2969					     const u16 *name, size_t name_len,
2970					     __le16 flags)
2971{
2972	struct ATTRIB *attr;
2973	u32 name_size = ALIGN(name_len * sizeof(short), 8);
2974	bool is_ext = flags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED);
2975	u32 asize = name_size +
2976		    (is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT);
2977
2978	attr = kzalloc(asize, GFP_NOFS);
2979	if (!attr)
2980		return NULL;
2981
2982	attr->type = type;
2983	attr->size = cpu_to_le32(asize);
2984	attr->flags = flags;
2985	attr->non_res = 1;
2986	attr->name_len = name_len;
2987
2988	attr->nres.evcn = cpu_to_le64((u64)bytes_to_cluster(sbi, size) - 1);
2989	attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, size));
2990	attr->nres.data_size = cpu_to_le64(size);
2991	attr->nres.valid_size = attr->nres.data_size;
2992	if (is_ext) {
2993		attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
2994		if (is_attr_compressed(attr))
2995			attr->nres.c_unit = COMPRESSION_UNIT;
2996
2997		attr->nres.run_off =
2998			cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size);
2999		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT_EX), name,
3000		       name_len * sizeof(short));
3001	} else {
3002		attr->name_off = SIZEOF_NONRESIDENT_LE;
3003		attr->nres.run_off =
3004			cpu_to_le16(SIZEOF_NONRESIDENT + name_size);
3005		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT), name,
3006		       name_len * sizeof(short));
3007	}
3008
3009	return attr;
3010}
3011
3012/*
3013 * do_action - Common routine for the Redo and Undo Passes.
3014 * @rlsn: If it is NULL then undo.
3015 */
3016static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe,
3017		     const struct LOG_REC_HDR *lrh, u32 op, void *data,
3018		     u32 dlen, u32 rec_len, const u64 *rlsn)
3019{
3020	int err = 0;
3021	struct ntfs_sb_info *sbi = log->ni->mi.sbi;
3022	struct inode *inode = NULL, *inode_parent;
3023	struct mft_inode *mi = NULL, *mi2_child = NULL;
3024	CLST rno = 0, rno_base = 0;
3025	struct INDEX_BUFFER *ib = NULL;
3026	struct MFT_REC *rec = NULL;
3027	struct ATTRIB *attr = NULL, *attr2;
3028	struct INDEX_HDR *hdr;
3029	struct INDEX_ROOT *root;
3030	struct NTFS_DE *e, *e1, *e2;
3031	struct NEW_ATTRIBUTE_SIZES *new_sz;
3032	struct ATTR_FILE_NAME *fname;
3033	struct OpenAttr *oa, *oa2;
3034	u32 nsize, t32, asize, used, esize, off, bits;
3035	u16 id, id2;
3036	u32 record_size = sbi->record_size;
3037	u64 t64;
3038	u16 roff = le16_to_cpu(lrh->record_off);
3039	u16 aoff = le16_to_cpu(lrh->attr_off);
3040	u64 lco = 0;
3041	u64 cbo = (u64)le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
3042	u64 tvo = le64_to_cpu(lrh->target_vcn) << sbi->cluster_bits;
3043	u64 vbo = cbo + tvo;
3044	void *buffer_le = NULL;
3045	u32 bytes = 0;
3046	bool a_dirty = false;
3047	u16 data_off;
3048
3049	oa = oe->ptr;
3050
3051	/* Big switch to prepare. */
3052	switch (op) {
3053	/* ============================================================
3054	 * Process MFT records, as described by the current log record.
3055	 * ============================================================
3056	 */
3057	case InitializeFileRecordSegment:
3058	case DeallocateFileRecordSegment:
3059	case WriteEndOfFileRecordSegment:
3060	case CreateAttribute:
3061	case DeleteAttribute:
3062	case UpdateResidentValue:
3063	case UpdateMappingPairs:
3064	case SetNewAttributeSizes:
3065	case AddIndexEntryRoot:
3066	case DeleteIndexEntryRoot:
3067	case SetIndexEntryVcnRoot:
3068	case UpdateFileNameRoot:
3069	case UpdateRecordDataRoot:
3070	case ZeroEndOfFileRecord:
3071		rno = vbo >> sbi->record_bits;
3072		inode = ilookup(sbi->sb, rno);
3073		if (inode) {
3074			mi = &ntfs_i(inode)->mi;
3075		} else if (op == InitializeFileRecordSegment) {
3076			mi = kzalloc(sizeof(struct mft_inode), GFP_NOFS);
3077			if (!mi)
3078				return -ENOMEM;
3079			err = mi_format_new(mi, sbi, rno, 0, false);
3080			if (err)
3081				goto out;
3082		} else {
3083			/* Read from disk. */
3084			err = mi_get(sbi, rno, &mi);
3085			if (err)
3086				return err;
3087		}
3088		rec = mi->mrec;
3089
3090		if (op == DeallocateFileRecordSegment)
3091			goto skip_load_parent;
3092
3093		if (InitializeFileRecordSegment != op) {
3094			if (rec->rhdr.sign == NTFS_BAAD_SIGNATURE)
3095				goto dirty_vol;
3096			if (!check_lsn(&rec->rhdr, rlsn))
3097				goto out;
3098			if (!check_file_record(rec, NULL, sbi))
3099				goto dirty_vol;
3100			attr = Add2Ptr(rec, roff);
3101		}
3102
3103		if (is_rec_base(rec) || InitializeFileRecordSegment == op) {
3104			rno_base = rno;
3105			goto skip_load_parent;
3106		}
3107
3108		rno_base = ino_get(&rec->parent_ref);
3109		inode_parent = ntfs_iget5(sbi->sb, &rec->parent_ref, NULL);
3110		if (IS_ERR(inode_parent))
3111			goto skip_load_parent;
3112
3113		if (is_bad_inode(inode_parent)) {
3114			iput(inode_parent);
3115			goto skip_load_parent;
3116		}
3117
3118		if (ni_load_mi_ex(ntfs_i(inode_parent), rno, &mi2_child)) {
3119			iput(inode_parent);
3120		} else {
3121			if (mi2_child->mrec != mi->mrec)
3122				memcpy(mi2_child->mrec, mi->mrec,
3123				       sbi->record_size);
3124
3125			if (inode)
3126				iput(inode);
3127			else if (mi)
3128				mi_put(mi);
3129
3130			inode = inode_parent;
3131			mi = mi2_child;
3132			rec = mi2_child->mrec;
3133			attr = Add2Ptr(rec, roff);
3134		}
3135
3136skip_load_parent:
3137		inode_parent = NULL;
3138		break;
3139
3140	/*
3141	 * Process attributes, as described by the current log record.
3142	 */
3143	case UpdateNonresidentValue:
3144	case AddIndexEntryAllocation:
3145	case DeleteIndexEntryAllocation:
3146	case WriteEndOfIndexBuffer:
3147	case SetIndexEntryVcnAllocation:
3148	case UpdateFileNameAllocation:
3149	case SetBitsInNonresidentBitMap:
3150	case ClearBitsInNonresidentBitMap:
3151	case UpdateRecordDataAllocation:
3152		attr = oa->attr;
3153		bytes = UpdateNonresidentValue == op ? dlen : 0;
3154		lco = (u64)le16_to_cpu(lrh->lcns_follow) << sbi->cluster_bits;
3155
3156		if (attr->type == ATTR_ALLOC) {
3157			t32 = le32_to_cpu(oe->bytes_per_index);
3158			if (bytes < t32)
3159				bytes = t32;
3160		}
3161
3162		if (!bytes)
3163			bytes = lco - cbo;
3164
3165		bytes += roff;
3166		if (attr->type == ATTR_ALLOC)
3167			bytes = (bytes + 511) & ~511; // align
3168
3169		buffer_le = kmalloc(bytes, GFP_NOFS);
3170		if (!buffer_le)
3171			return -ENOMEM;
3172
3173		err = ntfs_read_run_nb(sbi, oa->run1, vbo, buffer_le, bytes,
3174				       NULL);
3175		if (err)
3176			goto out;
3177
3178		if (attr->type == ATTR_ALLOC && *(int *)buffer_le)
3179			ntfs_fix_post_read(buffer_le, bytes, false);
3180		break;
3181
3182	default:
3183		WARN_ON(1);
3184	}
3185
3186	/* Big switch to do operation. */
3187	switch (op) {
3188	case InitializeFileRecordSegment:
3189		if (roff + dlen > record_size)
3190			goto dirty_vol;
3191
3192		memcpy(Add2Ptr(rec, roff), data, dlen);
3193		mi->dirty = true;
3194		break;
3195
3196	case DeallocateFileRecordSegment:
3197		clear_rec_inuse(rec);
3198		le16_add_cpu(&rec->seq, 1);
3199		mi->dirty = true;
3200		break;
3201
3202	case WriteEndOfFileRecordSegment:
3203		attr2 = (struct ATTRIB *)data;
3204		if (!check_if_attr(rec, lrh) || roff + dlen > record_size)
3205			goto dirty_vol;
3206
3207		memmove(attr, attr2, dlen);
3208		rec->used = cpu_to_le32(ALIGN(roff + dlen, 8));
3209
3210		mi->dirty = true;
3211		break;
3212
3213	case CreateAttribute:
3214		attr2 = (struct ATTRIB *)data;
3215		asize = le32_to_cpu(attr2->size);
3216		used = le32_to_cpu(rec->used);
3217
3218		if (!check_if_attr(rec, lrh) || dlen < SIZEOF_RESIDENT ||
3219		    !IS_ALIGNED(asize, 8) ||
3220		    Add2Ptr(attr2, asize) > Add2Ptr(lrh, rec_len) ||
3221		    dlen > record_size - used) {
3222			goto dirty_vol;
3223		}
3224
3225		memmove(Add2Ptr(attr, asize), attr, used - roff);
3226		memcpy(attr, attr2, asize);
3227
3228		rec->used = cpu_to_le32(used + asize);
3229		id = le16_to_cpu(rec->next_attr_id);
3230		id2 = le16_to_cpu(attr2->id);
3231		if (id <= id2)
3232			rec->next_attr_id = cpu_to_le16(id2 + 1);
3233		if (is_attr_indexed(attr))
3234			le16_add_cpu(&rec->hard_links, 1);
3235
3236		oa2 = find_loaded_attr(log, attr, rno_base);
3237		if (oa2) {
3238			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
3239					   GFP_NOFS);
3240			if (p2) {
3241				// run_close(oa2->run1);
3242				kfree(oa2->attr);
3243				oa2->attr = p2;
3244			}
3245		}
3246
3247		mi->dirty = true;
3248		break;
3249
3250	case DeleteAttribute:
3251		asize = le32_to_cpu(attr->size);
3252		used = le32_to_cpu(rec->used);
3253
3254		if (!check_if_attr(rec, lrh))
3255			goto dirty_vol;
3256
3257		rec->used = cpu_to_le32(used - asize);
3258		if (is_attr_indexed(attr))
3259			le16_add_cpu(&rec->hard_links, -1);
3260
3261		memmove(attr, Add2Ptr(attr, asize), used - asize - roff);
3262
3263		mi->dirty = true;
3264		break;
3265
3266	case UpdateResidentValue:
3267		nsize = aoff + dlen;
3268
3269		if (!check_if_attr(rec, lrh))
3270			goto dirty_vol;
3271
3272		asize = le32_to_cpu(attr->size);
3273		used = le32_to_cpu(rec->used);
3274
3275		if (lrh->redo_len == lrh->undo_len) {
3276			if (nsize > asize)
3277				goto dirty_vol;
3278			goto move_data;
3279		}
3280
3281		if (nsize > asize && nsize - asize > record_size - used)
3282			goto dirty_vol;
3283
3284		nsize = ALIGN(nsize, 8);
3285		data_off = le16_to_cpu(attr->res.data_off);
3286
3287		if (nsize < asize) {
3288			memmove(Add2Ptr(attr, aoff), data, dlen);
3289			data = NULL; // To skip below memmove().
3290		}
3291
3292		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
3293			used - le16_to_cpu(lrh->record_off) - asize);
3294
3295		rec->used = cpu_to_le32(used + nsize - asize);
3296		attr->size = cpu_to_le32(nsize);
3297		attr->res.data_size = cpu_to_le32(aoff + dlen - data_off);
3298
3299move_data:
3300		if (data)
3301			memmove(Add2Ptr(attr, aoff), data, dlen);
3302
3303		oa2 = find_loaded_attr(log, attr, rno_base);
3304		if (oa2) {
3305			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
3306					   GFP_NOFS);
3307			if (p2) {
3308				// run_close(&oa2->run0);
3309				oa2->run1 = &oa2->run0;
3310				kfree(oa2->attr);
3311				oa2->attr = p2;
3312			}
3313		}
3314
3315		mi->dirty = true;
3316		break;
3317
3318	case UpdateMappingPairs:
3319		nsize = aoff + dlen;
3320		asize = le32_to_cpu(attr->size);
3321		used = le32_to_cpu(rec->used);
3322
3323		if (!check_if_attr(rec, lrh) || !attr->non_res ||
3324		    aoff < le16_to_cpu(attr->nres.run_off) || aoff > asize ||
3325		    (nsize > asize && nsize - asize > record_size - used)) {
3326			goto dirty_vol;
3327		}
3328
3329		nsize = ALIGN(nsize, 8);
3330
3331		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
3332			used - le16_to_cpu(lrh->record_off) - asize);
3333		rec->used = cpu_to_le32(used + nsize - asize);
3334		attr->size = cpu_to_le32(nsize);
3335		memmove(Add2Ptr(attr, aoff), data, dlen);
3336
3337		if (run_get_highest_vcn(le64_to_cpu(attr->nres.svcn),
3338					attr_run(attr), &t64)) {
3339			goto dirty_vol;
3340		}
3341
3342		attr->nres.evcn = cpu_to_le64(t64);
3343		oa2 = find_loaded_attr(log, attr, rno_base);
3344		if (oa2 && oa2->attr->non_res)
3345			oa2->attr->nres.evcn = attr->nres.evcn;
3346
3347		mi->dirty = true;
3348		break;
3349
3350	case SetNewAttributeSizes:
3351		new_sz = data;
3352		if (!check_if_attr(rec, lrh) || !attr->non_res)
3353			goto dirty_vol;
3354
3355		attr->nres.alloc_size = new_sz->alloc_size;
3356		attr->nres.data_size = new_sz->data_size;
3357		attr->nres.valid_size = new_sz->valid_size;
3358
3359		if (dlen >= sizeof(struct NEW_ATTRIBUTE_SIZES))
3360			attr->nres.total_size = new_sz->total_size;
3361
3362		oa2 = find_loaded_attr(log, attr, rno_base);
3363		if (oa2) {
3364			void *p2 = kmemdup(attr, le32_to_cpu(attr->size),
3365					   GFP_NOFS);
3366			if (p2) {
3367				kfree(oa2->attr);
3368				oa2->attr = p2;
3369			}
3370		}
3371		mi->dirty = true;
3372		break;
3373
3374	case AddIndexEntryRoot:
3375		e = (struct NTFS_DE *)data;
3376		esize = le16_to_cpu(e->size);
3377		root = resident_data(attr);
3378		hdr = &root->ihdr;
3379		used = le32_to_cpu(hdr->used);
3380
3381		if (!check_if_index_root(rec, lrh) ||
3382		    !check_if_root_index(attr, hdr, lrh) ||
3383		    Add2Ptr(data, esize) > Add2Ptr(lrh, rec_len) ||
3384		    esize > le32_to_cpu(rec->total) - le32_to_cpu(rec->used)) {
3385			goto dirty_vol;
3386		}
3387
3388		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3389
3390		change_attr_size(rec, attr, le32_to_cpu(attr->size) + esize);
3391
3392		memmove(Add2Ptr(e1, esize), e1,
3393			PtrOffset(e1, Add2Ptr(hdr, used)));
3394		memmove(e1, e, esize);
3395
3396		le32_add_cpu(&attr->res.data_size, esize);
3397		hdr->used = cpu_to_le32(used + esize);
3398		le32_add_cpu(&hdr->total, esize);
3399
3400		mi->dirty = true;
3401		break;
3402
3403	case DeleteIndexEntryRoot:
3404		root = resident_data(attr);
3405		hdr = &root->ihdr;
3406		used = le32_to_cpu(hdr->used);
3407
3408		if (!check_if_index_root(rec, lrh) ||
3409		    !check_if_root_index(attr, hdr, lrh)) {
3410			goto dirty_vol;
3411		}
3412
3413		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3414		esize = le16_to_cpu(e1->size);
3415		e2 = Add2Ptr(e1, esize);
3416
3417		memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used)));
3418
3419		le32_sub_cpu(&attr->res.data_size, esize);
3420		hdr->used = cpu_to_le32(used - esize);
3421		le32_sub_cpu(&hdr->total, esize);
3422
3423		change_attr_size(rec, attr, le32_to_cpu(attr->size) - esize);
3424
3425		mi->dirty = true;
3426		break;
3427
3428	case SetIndexEntryVcnRoot:
3429		root = resident_data(attr);
3430		hdr = &root->ihdr;
3431
3432		if (!check_if_index_root(rec, lrh) ||
3433		    !check_if_root_index(attr, hdr, lrh)) {
3434			goto dirty_vol;
3435		}
3436
3437		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3438
3439		de_set_vbn_le(e, *(__le64 *)data);
3440		mi->dirty = true;
3441		break;
3442
3443	case UpdateFileNameRoot:
3444		root = resident_data(attr);
3445		hdr = &root->ihdr;
3446
3447		if (!check_if_index_root(rec, lrh) ||
3448		    !check_if_root_index(attr, hdr, lrh)) {
3449			goto dirty_vol;
3450		}
3451
3452		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3453		fname = (struct ATTR_FILE_NAME *)(e + 1);
3454		memmove(&fname->dup, data, sizeof(fname->dup)); //
3455		mi->dirty = true;
3456		break;
3457
3458	case UpdateRecordDataRoot:
3459		root = resident_data(attr);
3460		hdr = &root->ihdr;
3461
3462		if (!check_if_index_root(rec, lrh) ||
3463		    !check_if_root_index(attr, hdr, lrh)) {
3464			goto dirty_vol;
3465		}
3466
3467		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3468
3469		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
3470
3471		mi->dirty = true;
3472		break;
3473
3474	case ZeroEndOfFileRecord:
3475		if (roff + dlen > record_size)
3476			goto dirty_vol;
3477
3478		memset(attr, 0, dlen);
3479		mi->dirty = true;
3480		break;
3481
3482	case UpdateNonresidentValue:
3483		if (lco < cbo + roff + dlen)
3484			goto dirty_vol;
3485
3486		memcpy(Add2Ptr(buffer_le, roff), data, dlen);
3487
3488		a_dirty = true;
3489		if (attr->type == ATTR_ALLOC)
3490			ntfs_fix_pre_write(buffer_le, bytes);
3491		break;
3492
3493	case AddIndexEntryAllocation:
3494		ib = Add2Ptr(buffer_le, roff);
3495		hdr = &ib->ihdr;
3496		e = data;
3497		esize = le16_to_cpu(e->size);
3498		e1 = Add2Ptr(ib, aoff);
3499
3500		if (is_baad(&ib->rhdr))
3501			goto dirty_vol;
3502		if (!check_lsn(&ib->rhdr, rlsn))
3503			goto out;
3504
3505		used = le32_to_cpu(hdr->used);
3506
3507		if (!check_index_buffer(ib, bytes) ||
3508		    !check_if_alloc_index(hdr, aoff) ||
3509		    Add2Ptr(e, esize) > Add2Ptr(lrh, rec_len) ||
3510		    used + esize > le32_to_cpu(hdr->total)) {
3511			goto dirty_vol;
3512		}
3513
3514		memmove(Add2Ptr(e1, esize), e1,
3515			PtrOffset(e1, Add2Ptr(hdr, used)));
3516		memcpy(e1, e, esize);
3517
3518		hdr->used = cpu_to_le32(used + esize);
3519
3520		a_dirty = true;
3521
3522		ntfs_fix_pre_write(&ib->rhdr, bytes);
3523		break;
3524
3525	case DeleteIndexEntryAllocation:
3526		ib = Add2Ptr(buffer_le, roff);
3527		hdr = &ib->ihdr;
3528		e = Add2Ptr(ib, aoff);
3529		esize = le16_to_cpu(e->size);
3530
3531		if (is_baad(&ib->rhdr))
3532			goto dirty_vol;
3533		if (!check_lsn(&ib->rhdr, rlsn))
3534			goto out;
3535
3536		if (!check_index_buffer(ib, bytes) ||
3537		    !check_if_alloc_index(hdr, aoff)) {
3538			goto dirty_vol;
3539		}
3540
3541		e1 = Add2Ptr(e, esize);
3542		nsize = esize;
3543		used = le32_to_cpu(hdr->used);
3544
3545		memmove(e, e1, PtrOffset(e1, Add2Ptr(hdr, used)));
3546
3547		hdr->used = cpu_to_le32(used - nsize);
3548
3549		a_dirty = true;
3550
3551		ntfs_fix_pre_write(&ib->rhdr, bytes);
3552		break;
3553
3554	case WriteEndOfIndexBuffer:
3555		ib = Add2Ptr(buffer_le, roff);
3556		hdr = &ib->ihdr;
3557		e = Add2Ptr(ib, aoff);
3558
3559		if (is_baad(&ib->rhdr))
3560			goto dirty_vol;
3561		if (!check_lsn(&ib->rhdr, rlsn))
3562			goto out;
3563		if (!check_index_buffer(ib, bytes) ||
3564		    !check_if_alloc_index(hdr, aoff) ||
3565		    aoff + dlen > offsetof(struct INDEX_BUFFER, ihdr) +
3566					  le32_to_cpu(hdr->total)) {
3567			goto dirty_vol;
3568		}
3569
3570		hdr->used = cpu_to_le32(dlen + PtrOffset(hdr, e));
3571		memmove(e, data, dlen);
3572
3573		a_dirty = true;
3574		ntfs_fix_pre_write(&ib->rhdr, bytes);
3575		break;
3576
3577	case SetIndexEntryVcnAllocation:
3578		ib = Add2Ptr(buffer_le, roff);
3579		hdr = &ib->ihdr;
3580		e = Add2Ptr(ib, aoff);
3581
3582		if (is_baad(&ib->rhdr))
3583			goto dirty_vol;
3584
3585		if (!check_lsn(&ib->rhdr, rlsn))
3586			goto out;
3587		if (!check_index_buffer(ib, bytes) ||
3588		    !check_if_alloc_index(hdr, aoff)) {
3589			goto dirty_vol;
3590		}
3591
3592		de_set_vbn_le(e, *(__le64 *)data);
3593
3594		a_dirty = true;
3595		ntfs_fix_pre_write(&ib->rhdr, bytes);
3596		break;
3597
3598	case UpdateFileNameAllocation:
3599		ib = Add2Ptr(buffer_le, roff);
3600		hdr = &ib->ihdr;
3601		e = Add2Ptr(ib, aoff);
3602
3603		if (is_baad(&ib->rhdr))
3604			goto dirty_vol;
3605
3606		if (!check_lsn(&ib->rhdr, rlsn))
3607			goto out;
3608		if (!check_index_buffer(ib, bytes) ||
3609		    !check_if_alloc_index(hdr, aoff)) {
3610			goto dirty_vol;
3611		}
3612
3613		fname = (struct ATTR_FILE_NAME *)(e + 1);
3614		memmove(&fname->dup, data, sizeof(fname->dup));
3615
3616		a_dirty = true;
3617		ntfs_fix_pre_write(&ib->rhdr, bytes);
3618		break;
3619
3620	case SetBitsInNonresidentBitMap:
3621		off = le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
3622		bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
3623
3624		if (cbo + (off + 7) / 8 > lco ||
3625		    cbo + ((off + bits + 7) / 8) > lco) {
3626			goto dirty_vol;
3627		}
3628
3629		ntfs_bitmap_set_le(Add2Ptr(buffer_le, roff), off, bits);
3630		a_dirty = true;
3631		break;
3632
3633	case ClearBitsInNonresidentBitMap:
3634		off = le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
3635		bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
3636
3637		if (cbo + (off + 7) / 8 > lco ||
3638		    cbo + ((off + bits + 7) / 8) > lco) {
3639			goto dirty_vol;
3640		}
3641
3642		ntfs_bitmap_clear_le(Add2Ptr(buffer_le, roff), off, bits);
3643		a_dirty = true;
3644		break;
3645
3646	case UpdateRecordDataAllocation:
3647		ib = Add2Ptr(buffer_le, roff);
3648		hdr = &ib->ihdr;
3649		e = Add2Ptr(ib, aoff);
3650
3651		if (is_baad(&ib->rhdr))
3652			goto dirty_vol;
3653
3654		if (!check_lsn(&ib->rhdr, rlsn))
3655			goto out;
3656		if (!check_index_buffer(ib, bytes) ||
3657		    !check_if_alloc_index(hdr, aoff)) {
3658			goto dirty_vol;
3659		}
3660
3661		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
3662
3663		a_dirty = true;
3664		ntfs_fix_pre_write(&ib->rhdr, bytes);
3665		break;
3666
3667	default:
3668		WARN_ON(1);
3669	}
3670
3671	if (rlsn) {
3672		__le64 t64 = cpu_to_le64(*rlsn);
3673
3674		if (rec)
3675			rec->rhdr.lsn = t64;
3676		if (ib)
3677			ib->rhdr.lsn = t64;
3678	}
3679
3680	if (mi && mi->dirty) {
3681		err = mi_write(mi, 0);
3682		if (err)
3683			goto out;
3684	}
3685
3686	if (a_dirty) {
3687		attr = oa->attr;
3688		err = ntfs_sb_write_run(sbi, oa->run1, vbo, buffer_le, bytes,
3689					0);
3690		if (err)
3691			goto out;
3692	}
3693
3694out:
3695
3696	if (inode)
3697		iput(inode);
3698	else if (mi != mi2_child)
3699		mi_put(mi);
3700
3701	kfree(buffer_le);
3702
3703	return err;
3704
3705dirty_vol:
3706	log->set_dirty = true;
3707	goto out;
3708}
3709
3710/*
3711 * log_replay - Replays log and empties it.
3712 *
3713 * This function is called during mount operation.
3714 * It replays log and empties it.
3715 * Initialized is set false if logfile contains '-1'.
3716 */
3717int log_replay(struct ntfs_inode *ni, bool *initialized)
3718{
3719	int err;
3720	struct ntfs_sb_info *sbi = ni->mi.sbi;
3721	struct ntfs_log *log;
3722
3723	u64 rec_lsn, checkpt_lsn = 0, rlsn = 0;
3724	struct ATTR_NAME_ENTRY *attr_names = NULL;
3725	struct RESTART_TABLE *dptbl = NULL;
3726	struct RESTART_TABLE *trtbl = NULL;
3727	const struct RESTART_TABLE *rt;
3728	struct RESTART_TABLE *oatbl = NULL;
3729	struct inode *inode;
3730	struct OpenAttr *oa;
3731	struct ntfs_inode *ni_oe;
3732	struct ATTRIB *attr = NULL;
3733	u64 size, vcn, undo_next_lsn;
3734	CLST rno, lcn, lcn0, len0, clen;
3735	void *data;
3736	struct NTFS_RESTART *rst = NULL;
3737	struct lcb *lcb = NULL;
3738	struct OPEN_ATTR_ENRTY *oe;
3739	struct TRANSACTION_ENTRY *tr;
3740	struct DIR_PAGE_ENTRY *dp;
3741	u32 i, bytes_per_attr_entry;
3742	u32 vbo, tail, off, dlen;
3743	u32 saved_len, rec_len, transact_id;
3744	bool use_second_page;
3745	struct RESTART_AREA *ra2, *ra = NULL;
3746	struct CLIENT_REC *ca, *cr;
3747	__le16 client;
3748	struct RESTART_HDR *rh;
3749	const struct LFS_RECORD_HDR *frh;
3750	const struct LOG_REC_HDR *lrh;
3751	bool is_mapped;
3752	bool is_ro = sb_rdonly(sbi->sb);
3753	u64 t64;
3754	u16 t16;
3755	u32 t32;
3756
3757	log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS);
3758	if (!log)
3759		return -ENOMEM;
3760
3761	log->ni = ni;
3762	log->l_size = log->orig_file_size = ni->vfs_inode.i_size;
3763
3764	/* Get the size of page. NOTE: To replay we can use default page. */
3765#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
3766	log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, true);
3767#else
3768	log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, false);
3769#endif
3770	if (!log->page_size) {
3771		err = -EINVAL;
3772		goto out;
3773	}
3774
3775	log->one_page_buf = kmalloc(log->page_size, GFP_NOFS);
3776	if (!log->one_page_buf) {
3777		err = -ENOMEM;
3778		goto out;
3779	}
3780
3781	log->page_mask = log->page_size - 1;
3782	log->page_bits = blksize_bits(log->page_size);
3783
3784	/* Look for a restart area on the disk. */
3785	err = log_read_rst(log, true, &log->rst_info);
3786	if (err)
3787		goto out;
3788
3789	/* remember 'initialized' */
3790	*initialized = log->rst_info.initialized;
3791
3792	if (!log->rst_info.restart) {
3793		if (log->rst_info.initialized) {
3794			/* No restart area but the file is not initialized. */
3795			err = -EINVAL;
3796			goto out;
3797		}
3798
3799		log_init_pg_hdr(log, 1, 1);
3800		log_create(log, 0, get_random_u32(), false, false);
3801
3802		ra = log_create_ra(log);
3803		if (!ra) {
3804			err = -ENOMEM;
3805			goto out;
3806		}
3807		log->ra = ra;
3808		log->init_ra = true;
3809
3810		goto process_log;
3811	}
3812
3813	/*
3814	 * If the restart offset above wasn't zero then we won't
3815	 * look for a second restart.
3816	 */
3817	if (log->rst_info.vbo)
3818		goto check_restart_area;
3819
3820	err = log_read_rst(log, false, &log->rst_info2);
3821	if (err)
3822		goto out;
3823
3824	/* Determine which restart area to use. */
3825	if (!log->rst_info2.restart ||
3826	    log->rst_info2.last_lsn <= log->rst_info.last_lsn)
3827		goto use_first_page;
3828
3829	use_second_page = true;
3830
3831	if (log->rst_info.chkdsk_was_run &&
3832	    log->page_size != log->rst_info.vbo) {
3833		struct RECORD_PAGE_HDR *sp = NULL;
3834		bool usa_error;
3835
3836		if (!read_log_page(log, log->page_size, &sp, &usa_error) &&
3837		    sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
3838			use_second_page = false;
3839		}
3840		kfree(sp);
3841	}
3842
3843	if (use_second_page) {
3844		kfree(log->rst_info.r_page);
3845		memcpy(&log->rst_info, &log->rst_info2,
3846		       sizeof(struct restart_info));
3847		log->rst_info2.r_page = NULL;
3848	}
3849
3850use_first_page:
3851	kfree(log->rst_info2.r_page);
3852
3853check_restart_area:
3854	/*
3855	 * If the restart area is at offset 0, we want
3856	 * to write the second restart area first.
3857	 */
3858	log->init_ra = !!log->rst_info.vbo;
3859
3860	/* If we have a valid page then grab a pointer to the restart area. */
3861	ra2 = log->rst_info.valid_page ?
3862		      Add2Ptr(log->rst_info.r_page,
3863			      le16_to_cpu(log->rst_info.r_page->ra_off)) :
3864		      NULL;
3865
3866	if (log->rst_info.chkdsk_was_run ||
3867	    (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
3868		bool wrapped = false;
3869		bool use_multi_page = false;
3870		u32 open_log_count;
3871
3872		/* Do some checks based on whether we have a valid log page. */
3873		open_log_count = log->rst_info.valid_page ?
3874					 le32_to_cpu(ra2->open_log_count) :
3875					 get_random_u32();
3876
3877		log_init_pg_hdr(log, 1, 1);
3878
3879		log_create(log, log->rst_info.last_lsn, open_log_count, wrapped,
3880			   use_multi_page);
3881
3882		ra = log_create_ra(log);
3883		if (!ra) {
3884			err = -ENOMEM;
3885			goto out;
3886		}
3887		log->ra = ra;
3888
3889		/* Put the restart areas and initialize
3890		 * the log file as required.
3891		 */
3892		goto process_log;
3893	}
3894
3895	if (!ra2) {
3896		err = -EINVAL;
3897		goto out;
3898	}
3899
3900	/*
3901	 * If the log page or the system page sizes have changed, we can't
3902	 * use the log file. We must use the system page size instead of the
3903	 * default size if there is not a clean shutdown.
3904	 */
3905	t32 = le32_to_cpu(log->rst_info.r_page->sys_page_size);
3906	if (log->page_size != t32) {
3907		log->l_size = log->orig_file_size;
3908		log->page_size = norm_file_page(t32, &log->l_size,
3909						t32 == DefaultLogPageSize);
3910	}
3911
3912	if (log->page_size != t32 ||
3913	    log->page_size != le32_to_cpu(log->rst_info.r_page->page_size)) {
3914		err = -EINVAL;
3915		goto out;
3916	}
3917
3918	/* If the file size has shrunk then we won't mount it. */
3919	if (log->l_size < le64_to_cpu(ra2->l_size)) {
3920		err = -EINVAL;
3921		goto out;
3922	}
3923
3924	log_init_pg_hdr(log, le16_to_cpu(log->rst_info.r_page->major_ver),
3925			le16_to_cpu(log->rst_info.r_page->minor_ver));
3926
3927	log->l_size = le64_to_cpu(ra2->l_size);
3928	log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
3929	log->file_data_bits = sizeof(u64) * 8 - log->seq_num_bits;
3930	log->seq_num_mask = (8 << log->file_data_bits) - 1;
3931	log->last_lsn = le64_to_cpu(ra2->current_lsn);
3932	log->seq_num = log->last_lsn >> log->file_data_bits;
3933	log->ra_off = le16_to_cpu(log->rst_info.r_page->ra_off);
3934	log->restart_size = log->sys_page_size - log->ra_off;
3935	log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
3936	log->ra_size = le16_to_cpu(ra2->ra_len);
3937	log->data_off = le16_to_cpu(ra2->data_off);
3938	log->data_size = log->page_size - log->data_off;
3939	log->reserved = log->data_size - log->record_header_len;
3940
3941	vbo = lsn_to_vbo(log, log->last_lsn);
3942
3943	if (vbo < log->first_page) {
3944		/* This is a pseudo lsn. */
3945		log->l_flags |= NTFSLOG_NO_LAST_LSN;
3946		log->next_page = log->first_page;
3947		goto find_oldest;
3948	}
3949
3950	/* Find the end of this log record. */
3951	off = final_log_off(log, log->last_lsn,
3952			    le32_to_cpu(ra2->last_lsn_data_len));
3953
3954	/* If we wrapped the file then increment the sequence number. */
3955	if (off <= vbo) {
3956		log->seq_num += 1;
3957		log->l_flags |= NTFSLOG_WRAPPED;
3958	}
3959
3960	/* Now compute the next log page to use. */
3961	vbo &= ~log->sys_page_mask;
3962	tail = log->page_size - (off & log->page_mask) - 1;
3963
3964	/*
3965	 *If we can fit another log record on the page,
3966	 * move back a page the log file.
3967	 */
3968	if (tail >= log->record_header_len) {
3969		log->l_flags |= NTFSLOG_REUSE_TAIL;
3970		log->next_page = vbo;
3971	} else {
3972		log->next_page = next_page_off(log, vbo);
3973	}
3974
3975find_oldest:
3976	/*
3977	 * Find the oldest client lsn. Use the last
3978	 * flushed lsn as a starting point.
3979	 */
3980	log->oldest_lsn = log->last_lsn;
3981	oldest_client_lsn(Add2Ptr(ra2, le16_to_cpu(ra2->client_off)),
3982			  ra2->client_idx[1], &log->oldest_lsn);
3983	log->oldest_lsn_off = lsn_to_vbo(log, log->oldest_lsn);
3984
3985	if (log->oldest_lsn_off < log->first_page)
3986		log->l_flags |= NTFSLOG_NO_OLDEST_LSN;
3987
3988	if (!(ra2->flags & RESTART_SINGLE_PAGE_IO))
3989		log->l_flags |= NTFSLOG_WRAPPED | NTFSLOG_MULTIPLE_PAGE_IO;
3990
3991	log->current_openlog_count = le32_to_cpu(ra2->open_log_count);
3992	log->total_avail_pages = log->l_size - log->first_page;
3993	log->total_avail = log->total_avail_pages >> log->page_bits;
3994	log->max_current_avail = log->total_avail * log->reserved;
3995	log->total_avail = log->total_avail * log->data_size;
3996
3997	log->current_avail = current_log_avail(log);
3998
3999	ra = kzalloc(log->restart_size, GFP_NOFS);
4000	if (!ra) {
4001		err = -ENOMEM;
4002		goto out;
4003	}
4004	log->ra = ra;
4005
4006	t16 = le16_to_cpu(ra2->client_off);
4007	if (t16 == offsetof(struct RESTART_AREA, clients)) {
4008		memcpy(ra, ra2, log->ra_size);
4009	} else {
4010		memcpy(ra, ra2, offsetof(struct RESTART_AREA, clients));
4011		memcpy(ra->clients, Add2Ptr(ra2, t16),
4012		       le16_to_cpu(ra2->ra_len) - t16);
4013
4014		log->current_openlog_count = get_random_u32();
4015		ra->open_log_count = cpu_to_le32(log->current_openlog_count);
4016		log->ra_size = offsetof(struct RESTART_AREA, clients) +
4017			       sizeof(struct CLIENT_REC);
4018		ra->client_off =
4019			cpu_to_le16(offsetof(struct RESTART_AREA, clients));
4020		ra->ra_len = cpu_to_le16(log->ra_size);
4021	}
4022
4023	le32_add_cpu(&ra->open_log_count, 1);
4024
4025	/* Now we need to walk through looking for the last lsn. */
4026	err = last_log_lsn(log);
4027	if (err)
4028		goto out;
4029
4030	log->current_avail = current_log_avail(log);
4031
4032	/* Remember which restart area to write first. */
4033	log->init_ra = log->rst_info.vbo;
4034
4035process_log:
4036	/* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */
4037	switch ((log->major_ver << 16) + log->minor_ver) {
4038	case 0x10000:
4039	case 0x10001:
4040	case 0x20000:
4041		break;
4042	default:
4043		ntfs_warn(sbi->sb, "\x24LogFile version %d.%d is not supported",
4044			  log->major_ver, log->minor_ver);
4045		err = -EOPNOTSUPP;
4046		log->set_dirty = true;
4047		goto out;
4048	}
4049
4050	/* One client "NTFS" per logfile. */
4051	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
4052
4053	for (client = ra->client_idx[1];; client = cr->next_client) {
4054		if (client == LFS_NO_CLIENT_LE) {
4055			/* Insert "NTFS" client LogFile. */
4056			client = ra->client_idx[0];
4057			if (client == LFS_NO_CLIENT_LE) {
4058				err = -EINVAL;
4059				goto out;
4060			}
4061
4062			t16 = le16_to_cpu(client);
4063			cr = ca + t16;
4064
4065			remove_client(ca, cr, &ra->client_idx[0]);
4066
4067			cr->restart_lsn = 0;
4068			cr->oldest_lsn = cpu_to_le64(log->oldest_lsn);
4069			cr->name_bytes = cpu_to_le32(8);
4070			cr->name[0] = cpu_to_le16('N');
4071			cr->name[1] = cpu_to_le16('T');
4072			cr->name[2] = cpu_to_le16('F');
4073			cr->name[3] = cpu_to_le16('S');
4074
4075			add_client(ca, t16, &ra->client_idx[1]);
4076			break;
4077		}
4078
4079		cr = ca + le16_to_cpu(client);
4080
4081		if (cpu_to_le32(8) == cr->name_bytes &&
4082		    cpu_to_le16('N') == cr->name[0] &&
4083		    cpu_to_le16('T') == cr->name[1] &&
4084		    cpu_to_le16('F') == cr->name[2] &&
4085		    cpu_to_le16('S') == cr->name[3])
4086			break;
4087	}
4088
4089	/* Update the client handle with the client block information. */
4090	log->client_id.seq_num = cr->seq_num;
4091	log->client_id.client_idx = client;
4092
4093	err = read_rst_area(log, &rst, &checkpt_lsn);
4094	if (err)
4095		goto out;
4096
4097	if (!rst)
4098		goto out;
4099
4100	bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
4101
4102	if (rst->check_point_start)
4103		checkpt_lsn = le64_to_cpu(rst->check_point_start);
4104
4105	/* Allocate and Read the Transaction Table. */
4106	if (!rst->transact_table_len)
4107		goto check_dirty_page_table;
4108
4109	t64 = le64_to_cpu(rst->transact_table_lsn);
4110	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4111	if (err)
4112		goto out;
4113
4114	lrh = lcb->log_rec;
4115	frh = lcb->lrh;
4116	rec_len = le32_to_cpu(frh->client_data_len);
4117
4118	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4119			   bytes_per_attr_entry)) {
4120		err = -EINVAL;
4121		goto out;
4122	}
4123
4124	t16 = le16_to_cpu(lrh->redo_off);
4125
4126	rt = Add2Ptr(lrh, t16);
4127	t32 = rec_len - t16;
4128
4129	/* Now check that this is a valid restart table. */
4130	if (!check_rstbl(rt, t32)) {
4131		err = -EINVAL;
4132		goto out;
4133	}
4134
4135	trtbl = kmemdup(rt, t32, GFP_NOFS);
4136	if (!trtbl) {
4137		err = -ENOMEM;
4138		goto out;
4139	}
4140
4141	lcb_put(lcb);
4142	lcb = NULL;
4143
4144check_dirty_page_table:
4145	/* The next record back should be the Dirty Pages Table. */
4146	if (!rst->dirty_pages_len)
4147		goto check_attribute_names;
4148
4149	t64 = le64_to_cpu(rst->dirty_pages_table_lsn);
4150	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4151	if (err)
4152		goto out;
4153
4154	lrh = lcb->log_rec;
4155	frh = lcb->lrh;
4156	rec_len = le32_to_cpu(frh->client_data_len);
4157
4158	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4159			   bytes_per_attr_entry)) {
4160		err = -EINVAL;
4161		goto out;
4162	}
4163
4164	t16 = le16_to_cpu(lrh->redo_off);
4165
4166	rt = Add2Ptr(lrh, t16);
4167	t32 = rec_len - t16;
4168
4169	/* Now check that this is a valid restart table. */
4170	if (!check_rstbl(rt, t32)) {
4171		err = -EINVAL;
4172		goto out;
4173	}
4174
4175	dptbl = kmemdup(rt, t32, GFP_NOFS);
4176	if (!dptbl) {
4177		err = -ENOMEM;
4178		goto out;
4179	}
4180
4181	/* Convert Ra version '0' into version '1'. */
4182	if (rst->major_ver)
4183		goto end_conv_1;
4184
4185	dp = NULL;
4186	while ((dp = enum_rstbl(dptbl, dp))) {
4187		struct DIR_PAGE_ENTRY_32 *dp0 = (struct DIR_PAGE_ENTRY_32 *)dp;
4188		// NOTE: Danger. Check for of boundary.
4189		memmove(&dp->vcn, &dp0->vcn_low,
4190			2 * sizeof(u64) +
4191				le32_to_cpu(dp->lcns_follow) * sizeof(u64));
4192	}
4193
4194end_conv_1:
4195	lcb_put(lcb);
4196	lcb = NULL;
4197
4198	/*
4199	 * Go through the table and remove the duplicates,
4200	 * remembering the oldest lsn values.
4201	 */
4202	if (sbi->cluster_size <= log->page_size)
4203		goto trace_dp_table;
4204
4205	dp = NULL;
4206	while ((dp = enum_rstbl(dptbl, dp))) {
4207		struct DIR_PAGE_ENTRY *next = dp;
4208
4209		while ((next = enum_rstbl(dptbl, next))) {
4210			if (next->target_attr == dp->target_attr &&
4211			    next->vcn == dp->vcn) {
4212				if (le64_to_cpu(next->oldest_lsn) <
4213				    le64_to_cpu(dp->oldest_lsn)) {
4214					dp->oldest_lsn = next->oldest_lsn;
4215				}
4216
4217				free_rsttbl_idx(dptbl, PtrOffset(dptbl, next));
4218			}
4219		}
4220	}
4221trace_dp_table:
4222check_attribute_names:
4223	/* The next record should be the Attribute Names. */
4224	if (!rst->attr_names_len)
4225		goto check_attr_table;
4226
4227	t64 = le64_to_cpu(rst->attr_names_lsn);
4228	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4229	if (err)
4230		goto out;
4231
4232	lrh = lcb->log_rec;
4233	frh = lcb->lrh;
4234	rec_len = le32_to_cpu(frh->client_data_len);
4235
4236	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4237			   bytes_per_attr_entry)) {
4238		err = -EINVAL;
4239		goto out;
4240	}
4241
4242	t32 = lrh_length(lrh);
4243	rec_len -= t32;
4244
4245	attr_names = kmemdup(Add2Ptr(lrh, t32), rec_len, GFP_NOFS);
4246	if (!attr_names) {
4247		err = -ENOMEM;
4248		goto out;
4249	}
4250
4251	lcb_put(lcb);
4252	lcb = NULL;
4253
4254check_attr_table:
4255	/* The next record should be the attribute Table. */
4256	if (!rst->open_attr_len)
4257		goto check_attribute_names2;
4258
4259	t64 = le64_to_cpu(rst->open_attr_table_lsn);
4260	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4261	if (err)
4262		goto out;
4263
4264	lrh = lcb->log_rec;
4265	frh = lcb->lrh;
4266	rec_len = le32_to_cpu(frh->client_data_len);
4267
4268	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4269			   bytes_per_attr_entry)) {
4270		err = -EINVAL;
4271		goto out;
4272	}
4273
4274	t16 = le16_to_cpu(lrh->redo_off);
4275
4276	rt = Add2Ptr(lrh, t16);
4277	t32 = rec_len - t16;
4278
4279	if (!check_rstbl(rt, t32)) {
4280		err = -EINVAL;
4281		goto out;
4282	}
4283
4284	oatbl = kmemdup(rt, t32, GFP_NOFS);
4285	if (!oatbl) {
4286		err = -ENOMEM;
4287		goto out;
4288	}
4289
4290	log->open_attr_tbl = oatbl;
4291
4292	/* Clear all of the Attr pointers. */
4293	oe = NULL;
4294	while ((oe = enum_rstbl(oatbl, oe))) {
4295		if (!rst->major_ver) {
4296			struct OPEN_ATTR_ENRTY_32 oe0;
4297
4298			/* Really 'oe' points to OPEN_ATTR_ENRTY_32. */
4299			memcpy(&oe0, oe, SIZEOF_OPENATTRIBUTEENTRY0);
4300
4301			oe->bytes_per_index = oe0.bytes_per_index;
4302			oe->type = oe0.type;
4303			oe->is_dirty_pages = oe0.is_dirty_pages;
4304			oe->name_len = 0;
4305			oe->ref = oe0.ref;
4306			oe->open_record_lsn = oe0.open_record_lsn;
4307		}
4308
4309		oe->is_attr_name = 0;
4310		oe->ptr = NULL;
4311	}
4312
4313	lcb_put(lcb);
4314	lcb = NULL;
4315
4316check_attribute_names2:
4317	if (rst->attr_names_len && oatbl) {
4318		struct ATTR_NAME_ENTRY *ane = attr_names;
4319		while (ane->off) {
4320			/* TODO: Clear table on exit! */
4321			oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
4322			t16 = le16_to_cpu(ane->name_bytes);
4323			oe->name_len = t16 / sizeof(short);
4324			oe->ptr = ane->name;
4325			oe->is_attr_name = 2;
4326			ane = Add2Ptr(ane,
4327				      sizeof(struct ATTR_NAME_ENTRY) + t16);
4328		}
4329	}
4330
4331	/*
4332	 * If the checkpt_lsn is zero, then this is a freshly
4333	 * formatted disk and we have no work to do.
4334	 */
4335	if (!checkpt_lsn) {
4336		err = 0;
4337		goto out;
4338	}
4339
4340	if (!oatbl) {
4341		oatbl = init_rsttbl(bytes_per_attr_entry, 8);
4342		if (!oatbl) {
4343			err = -ENOMEM;
4344			goto out;
4345		}
4346	}
4347
4348	log->open_attr_tbl = oatbl;
4349
4350	/* Start the analysis pass from the Checkpoint lsn. */
4351	rec_lsn = checkpt_lsn;
4352
4353	/* Read the first lsn. */
4354	err = read_log_rec_lcb(log, checkpt_lsn, lcb_ctx_next, &lcb);
4355	if (err)
4356		goto out;
4357
4358	/* Loop to read all subsequent records to the end of the log file. */
4359next_log_record_analyze:
4360	err = read_next_log_rec(log, lcb, &rec_lsn);
4361	if (err)
4362		goto out;
4363
4364	if (!rec_lsn)
4365		goto end_log_records_enumerate;
4366
4367	frh = lcb->lrh;
4368	transact_id = le32_to_cpu(frh->transact_id);
4369	rec_len = le32_to_cpu(frh->client_data_len);
4370	lrh = lcb->log_rec;
4371
4372	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
4373		err = -EINVAL;
4374		goto out;
4375	}
4376
4377	/*
4378	 * The first lsn after the previous lsn remembered
4379	 * the checkpoint is the first candidate for the rlsn.
4380	 */
4381	if (!rlsn)
4382		rlsn = rec_lsn;
4383
4384	if (LfsClientRecord != frh->record_type)
4385		goto next_log_record_analyze;
4386
4387	/*
4388	 * Now update the Transaction Table for this transaction. If there
4389	 * is no entry present or it is unallocated we allocate the entry.
4390	 */
4391	if (!trtbl) {
4392		trtbl = init_rsttbl(sizeof(struct TRANSACTION_ENTRY),
4393				    INITIAL_NUMBER_TRANSACTIONS);
4394		if (!trtbl) {
4395			err = -ENOMEM;
4396			goto out;
4397		}
4398	}
4399
4400	tr = Add2Ptr(trtbl, transact_id);
4401
4402	if (transact_id >= bytes_per_rt(trtbl) ||
4403	    tr->next != RESTART_ENTRY_ALLOCATED_LE) {
4404		tr = alloc_rsttbl_from_idx(&trtbl, transact_id);
4405		if (!tr) {
4406			err = -ENOMEM;
4407			goto out;
4408		}
4409		tr->transact_state = TransactionActive;
4410		tr->first_lsn = cpu_to_le64(rec_lsn);
4411	}
4412
4413	tr->prev_lsn = tr->undo_next_lsn = cpu_to_le64(rec_lsn);
4414
4415	/*
4416	 * If this is a compensation log record, then change
4417	 * the undo_next_lsn to be the undo_next_lsn of this record.
4418	 */
4419	if (lrh->undo_op == cpu_to_le16(CompensationLogRecord))
4420		tr->undo_next_lsn = frh->client_undo_next_lsn;
4421
4422	/* Dispatch to handle log record depending on type. */
4423	switch (le16_to_cpu(lrh->redo_op)) {
4424	case InitializeFileRecordSegment:
4425	case DeallocateFileRecordSegment:
4426	case WriteEndOfFileRecordSegment:
4427	case CreateAttribute:
4428	case DeleteAttribute:
4429	case UpdateResidentValue:
4430	case UpdateNonresidentValue:
4431	case UpdateMappingPairs:
4432	case SetNewAttributeSizes:
4433	case AddIndexEntryRoot:
4434	case DeleteIndexEntryRoot:
4435	case AddIndexEntryAllocation:
4436	case DeleteIndexEntryAllocation:
4437	case WriteEndOfIndexBuffer:
4438	case SetIndexEntryVcnRoot:
4439	case SetIndexEntryVcnAllocation:
4440	case UpdateFileNameRoot:
4441	case UpdateFileNameAllocation:
4442	case SetBitsInNonresidentBitMap:
4443	case ClearBitsInNonresidentBitMap:
4444	case UpdateRecordDataRoot:
4445	case UpdateRecordDataAllocation:
4446	case ZeroEndOfFileRecord:
4447		t16 = le16_to_cpu(lrh->target_attr);
4448		t64 = le64_to_cpu(lrh->target_vcn);
4449		dp = find_dp(dptbl, t16, t64);
4450
4451		if (dp)
4452			goto copy_lcns;
4453
4454		/*
4455		 * Calculate the number of clusters per page the system
4456		 * which wrote the checkpoint, possibly creating the table.
4457		 */
4458		if (dptbl) {
4459			t32 = (le16_to_cpu(dptbl->size) -
4460			       sizeof(struct DIR_PAGE_ENTRY)) /
4461			      sizeof(u64);
4462		} else {
4463			t32 = log->clst_per_page;
4464			kfree(dptbl);
4465			dptbl = init_rsttbl(struct_size(dp, page_lcns, t32),
4466					    32);
4467			if (!dptbl) {
4468				err = -ENOMEM;
4469				goto out;
4470			}
4471		}
4472
4473		dp = alloc_rsttbl_idx(&dptbl);
4474		if (!dp) {
4475			err = -ENOMEM;
4476			goto out;
4477		}
4478		dp->target_attr = cpu_to_le32(t16);
4479		dp->transfer_len = cpu_to_le32(t32 << sbi->cluster_bits);
4480		dp->lcns_follow = cpu_to_le32(t32);
4481		dp->vcn = cpu_to_le64(t64 & ~((u64)t32 - 1));
4482		dp->oldest_lsn = cpu_to_le64(rec_lsn);
4483
4484copy_lcns:
4485		/*
4486		 * Copy the Lcns from the log record into the Dirty Page Entry.
4487		 * TODO: For different page size support, must somehow make
4488		 * whole routine a loop, case Lcns do not fit below.
4489		 */
4490		t16 = le16_to_cpu(lrh->lcns_follow);
4491		for (i = 0; i < t16; i++) {
4492			size_t j = (size_t)(le64_to_cpu(lrh->target_vcn) -
4493					    le64_to_cpu(dp->vcn));
4494			dp->page_lcns[j + i] = lrh->page_lcns[i];
4495		}
4496
4497		goto next_log_record_analyze;
4498
4499	case DeleteDirtyClusters: {
4500		u32 range_count =
4501			le16_to_cpu(lrh->redo_len) / sizeof(struct LCN_RANGE);
4502		const struct LCN_RANGE *r =
4503			Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
4504
4505		/* Loop through all of the Lcn ranges this log record. */
4506		for (i = 0; i < range_count; i++, r++) {
4507			u64 lcn0 = le64_to_cpu(r->lcn);
4508			u64 lcn_e = lcn0 + le64_to_cpu(r->len) - 1;
4509
4510			dp = NULL;
4511			while ((dp = enum_rstbl(dptbl, dp))) {
4512				u32 j;
4513
4514				t32 = le32_to_cpu(dp->lcns_follow);
4515				for (j = 0; j < t32; j++) {
4516					t64 = le64_to_cpu(dp->page_lcns[j]);
4517					if (t64 >= lcn0 && t64 <= lcn_e)
4518						dp->page_lcns[j] = 0;
4519				}
4520			}
4521		}
4522		goto next_log_record_analyze;
4523		;
4524	}
4525
4526	case OpenNonresidentAttribute:
4527		t16 = le16_to_cpu(lrh->target_attr);
4528		if (t16 >= bytes_per_rt(oatbl)) {
4529			/*
4530			 * Compute how big the table needs to be.
4531			 * Add 10 extra entries for some cushion.
4532			 */
4533			u32 new_e = t16 / le16_to_cpu(oatbl->size);
4534
4535			new_e += 10 - le16_to_cpu(oatbl->used);
4536
4537			oatbl = extend_rsttbl(oatbl, new_e, ~0u);
4538			log->open_attr_tbl = oatbl;
4539			if (!oatbl) {
4540				err = -ENOMEM;
4541				goto out;
4542			}
4543		}
4544
4545		/* Point to the entry being opened. */
4546		oe = alloc_rsttbl_from_idx(&oatbl, t16);
4547		log->open_attr_tbl = oatbl;
4548		if (!oe) {
4549			err = -ENOMEM;
4550			goto out;
4551		}
4552
4553		/* Initialize this entry from the log record. */
4554		t16 = le16_to_cpu(lrh->redo_off);
4555		if (!rst->major_ver) {
4556			/* Convert version '0' into version '1'. */
4557			struct OPEN_ATTR_ENRTY_32 *oe0 = Add2Ptr(lrh, t16);
4558
4559			oe->bytes_per_index = oe0->bytes_per_index;
4560			oe->type = oe0->type;
4561			oe->is_dirty_pages = oe0->is_dirty_pages;
4562			oe->name_len = 0; //oe0.name_len;
4563			oe->ref = oe0->ref;
4564			oe->open_record_lsn = oe0->open_record_lsn;
4565		} else {
4566			memcpy(oe, Add2Ptr(lrh, t16), bytes_per_attr_entry);
4567		}
4568
4569		t16 = le16_to_cpu(lrh->undo_len);
4570		if (t16) {
4571			oe->ptr = kmalloc(t16, GFP_NOFS);
4572			if (!oe->ptr) {
4573				err = -ENOMEM;
4574				goto out;
4575			}
4576			oe->name_len = t16 / sizeof(short);
4577			memcpy(oe->ptr,
4578			       Add2Ptr(lrh, le16_to_cpu(lrh->undo_off)), t16);
4579			oe->is_attr_name = 1;
4580		} else {
4581			oe->ptr = NULL;
4582			oe->is_attr_name = 0;
4583		}
4584
4585		goto next_log_record_analyze;
4586
4587	case HotFix:
4588		t16 = le16_to_cpu(lrh->target_attr);
4589		t64 = le64_to_cpu(lrh->target_vcn);
4590		dp = find_dp(dptbl, t16, t64);
4591		if (dp) {
4592			size_t j = le64_to_cpu(lrh->target_vcn) -
4593				   le64_to_cpu(dp->vcn);
4594			if (dp->page_lcns[j])
4595				dp->page_lcns[j] = lrh->page_lcns[0];
4596		}
4597		goto next_log_record_analyze;
4598
4599	case EndTopLevelAction:
4600		tr = Add2Ptr(trtbl, transact_id);
4601		tr->prev_lsn = cpu_to_le64(rec_lsn);
4602		tr->undo_next_lsn = frh->client_undo_next_lsn;
4603		goto next_log_record_analyze;
4604
4605	case PrepareTransaction:
4606		tr = Add2Ptr(trtbl, transact_id);
4607		tr->transact_state = TransactionPrepared;
4608		goto next_log_record_analyze;
4609
4610	case CommitTransaction:
4611		tr = Add2Ptr(trtbl, transact_id);
4612		tr->transact_state = TransactionCommitted;
4613		goto next_log_record_analyze;
4614
4615	case ForgetTransaction:
4616		free_rsttbl_idx(trtbl, transact_id);
4617		goto next_log_record_analyze;
4618
4619	case Noop:
4620	case OpenAttributeTableDump:
4621	case AttributeNamesDump:
4622	case DirtyPageTableDump:
4623	case TransactionTableDump:
4624		/* The following cases require no action the Analysis Pass. */
4625		goto next_log_record_analyze;
4626
4627	default:
4628		/*
4629		 * All codes will be explicitly handled.
4630		 * If we see a code we do not expect, then we are trouble.
4631		 */
4632		goto next_log_record_analyze;
4633	}
4634
4635end_log_records_enumerate:
4636	lcb_put(lcb);
4637	lcb = NULL;
4638
4639	/*
4640	 * Scan the Dirty Page Table and Transaction Table for
4641	 * the lowest lsn, and return it as the Redo lsn.
4642	 */
4643	dp = NULL;
4644	while ((dp = enum_rstbl(dptbl, dp))) {
4645		t64 = le64_to_cpu(dp->oldest_lsn);
4646		if (t64 && t64 < rlsn)
4647			rlsn = t64;
4648	}
4649
4650	tr = NULL;
4651	while ((tr = enum_rstbl(trtbl, tr))) {
4652		t64 = le64_to_cpu(tr->first_lsn);
4653		if (t64 && t64 < rlsn)
4654			rlsn = t64;
4655	}
4656
4657	/*
4658	 * Only proceed if the Dirty Page Table or Transaction
4659	 * table are not empty.
4660	 */
4661	if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total))
4662		goto end_reply;
4663
4664	sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
4665	if (is_ro)
4666		goto out;
4667
4668	/* Reopen all of the attributes with dirty pages. */
4669	oe = NULL;
4670next_open_attribute:
4671
4672	oe = enum_rstbl(oatbl, oe);
4673	if (!oe) {
4674		err = 0;
4675		dp = NULL;
4676		goto next_dirty_page;
4677	}
4678
4679	oa = kzalloc(sizeof(struct OpenAttr), GFP_NOFS);
4680	if (!oa) {
4681		err = -ENOMEM;
4682		goto out;
4683	}
4684
4685	inode = ntfs_iget5(sbi->sb, &oe->ref, NULL);
4686	if (IS_ERR(inode))
4687		goto fake_attr;
4688
4689	if (is_bad_inode(inode)) {
4690		iput(inode);
4691fake_attr:
4692		if (oa->ni) {
4693			iput(&oa->ni->vfs_inode);
4694			oa->ni = NULL;
4695		}
4696
4697		attr = attr_create_nonres_log(sbi, oe->type, 0, oe->ptr,
4698					      oe->name_len, 0);
4699		if (!attr) {
4700			kfree(oa);
4701			err = -ENOMEM;
4702			goto out;
4703		}
4704		oa->attr = attr;
4705		oa->run1 = &oa->run0;
4706		goto final_oe;
4707	}
4708
4709	ni_oe = ntfs_i(inode);
4710	oa->ni = ni_oe;
4711
4712	attr = ni_find_attr(ni_oe, NULL, NULL, oe->type, oe->ptr, oe->name_len,
4713			    NULL, NULL);
4714
4715	if (!attr)
4716		goto fake_attr;
4717
4718	t32 = le32_to_cpu(attr->size);
4719	oa->attr = kmemdup(attr, t32, GFP_NOFS);
4720	if (!oa->attr)
4721		goto fake_attr;
4722
4723	if (!S_ISDIR(inode->i_mode)) {
4724		if (attr->type == ATTR_DATA && !attr->name_len) {
4725			oa->run1 = &ni_oe->file.run;
4726			goto final_oe;
4727		}
4728	} else {
4729		if (attr->type == ATTR_ALLOC &&
4730		    attr->name_len == ARRAY_SIZE(I30_NAME) &&
4731		    !memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) {
4732			oa->run1 = &ni_oe->dir.alloc_run;
4733			goto final_oe;
4734		}
4735	}
4736
4737	if (attr->non_res) {
4738		u16 roff = le16_to_cpu(attr->nres.run_off);
4739		CLST svcn = le64_to_cpu(attr->nres.svcn);
4740
4741		if (roff > t32) {
4742			kfree(oa->attr);
4743			oa->attr = NULL;
4744			goto fake_attr;
4745		}
4746
4747		err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn,
4748				 le64_to_cpu(attr->nres.evcn), svcn,
4749				 Add2Ptr(attr, roff), t32 - roff);
4750		if (err < 0) {
4751			kfree(oa->attr);
4752			oa->attr = NULL;
4753			goto fake_attr;
4754		}
4755		err = 0;
4756	}
4757	oa->run1 = &oa->run0;
4758	attr = oa->attr;
4759
4760final_oe:
4761	if (oe->is_attr_name == 1)
4762		kfree(oe->ptr);
4763	oe->is_attr_name = 0;
4764	oe->ptr = oa;
4765	oe->name_len = attr->name_len;
4766
4767	goto next_open_attribute;
4768
4769	/*
4770	 * Now loop through the dirty page table to extract all of the Vcn/Lcn.
4771	 * Mapping that we have, and insert it into the appropriate run.
4772	 */
4773next_dirty_page:
4774	dp = enum_rstbl(dptbl, dp);
4775	if (!dp)
4776		goto do_redo_1;
4777
4778	oe = Add2Ptr(oatbl, le32_to_cpu(dp->target_attr));
4779
4780	if (oe->next != RESTART_ENTRY_ALLOCATED_LE)
4781		goto next_dirty_page;
4782
4783	oa = oe->ptr;
4784	if (!oa)
4785		goto next_dirty_page;
4786
4787	i = -1;
4788next_dirty_page_vcn:
4789	i += 1;
4790	if (i >= le32_to_cpu(dp->lcns_follow))
4791		goto next_dirty_page;
4792
4793	vcn = le64_to_cpu(dp->vcn) + i;
4794	size = (vcn + 1) << sbi->cluster_bits;
4795
4796	if (!dp->page_lcns[i])
4797		goto next_dirty_page_vcn;
4798
4799	rno = ino_get(&oe->ref);
4800	if (rno <= MFT_REC_MIRR &&
4801	    size < (MFT_REC_VOL + 1) * sbi->record_size &&
4802	    oe->type == ATTR_DATA) {
4803		goto next_dirty_page_vcn;
4804	}
4805
4806	lcn = le64_to_cpu(dp->page_lcns[i]);
4807
4808	if ((!run_lookup_entry(oa->run1, vcn, &lcn0, &len0, NULL) ||
4809	     lcn0 != lcn) &&
4810	    !run_add_entry(oa->run1, vcn, lcn, 1, false)) {
4811		err = -ENOMEM;
4812		goto out;
4813	}
4814	attr = oa->attr;
4815	if (size > le64_to_cpu(attr->nres.alloc_size)) {
4816		attr->nres.valid_size = attr->nres.data_size =
4817			attr->nres.alloc_size = cpu_to_le64(size);
4818	}
4819	goto next_dirty_page_vcn;
4820
4821do_redo_1:
4822	/*
4823	 * Perform the Redo Pass, to restore all of the dirty pages to the same
4824	 * contents that they had immediately before the crash. If the dirty
4825	 * page table is empty, then we can skip the entire Redo Pass.
4826	 */
4827	if (!dptbl || !dptbl->total)
4828		goto do_undo_action;
4829
4830	rec_lsn = rlsn;
4831
4832	/*
4833	 * Read the record at the Redo lsn, before falling
4834	 * into common code to handle each record.
4835	 */
4836	err = read_log_rec_lcb(log, rlsn, lcb_ctx_next, &lcb);
4837	if (err)
4838		goto out;
4839
4840	/*
4841	 * Now loop to read all of our log records forwards, until
4842	 * we hit the end of the file, cleaning up at the end.
4843	 */
4844do_action_next:
4845	frh = lcb->lrh;
4846
4847	if (LfsClientRecord != frh->record_type)
4848		goto read_next_log_do_action;
4849
4850	transact_id = le32_to_cpu(frh->transact_id);
4851	rec_len = le32_to_cpu(frh->client_data_len);
4852	lrh = lcb->log_rec;
4853
4854	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
4855		err = -EINVAL;
4856		goto out;
4857	}
4858
4859	/* Ignore log records that do not update pages. */
4860	if (lrh->lcns_follow)
4861		goto find_dirty_page;
4862
4863	goto read_next_log_do_action;
4864
4865find_dirty_page:
4866	t16 = le16_to_cpu(lrh->target_attr);
4867	t64 = le64_to_cpu(lrh->target_vcn);
4868	dp = find_dp(dptbl, t16, t64);
4869
4870	if (!dp)
4871		goto read_next_log_do_action;
4872
4873	if (rec_lsn < le64_to_cpu(dp->oldest_lsn))
4874		goto read_next_log_do_action;
4875
4876	t16 = le16_to_cpu(lrh->target_attr);
4877	if (t16 >= bytes_per_rt(oatbl)) {
4878		err = -EINVAL;
4879		goto out;
4880	}
4881
4882	oe = Add2Ptr(oatbl, t16);
4883
4884	if (oe->next != RESTART_ENTRY_ALLOCATED_LE) {
4885		err = -EINVAL;
4886		goto out;
4887	}
4888
4889	oa = oe->ptr;
4890
4891	if (!oa) {
4892		err = -EINVAL;
4893		goto out;
4894	}
4895	attr = oa->attr;
4896
4897	vcn = le64_to_cpu(lrh->target_vcn);
4898
4899	if (!run_lookup_entry(oa->run1, vcn, &lcn, NULL, NULL) ||
4900	    lcn == SPARSE_LCN) {
4901		goto read_next_log_do_action;
4902	}
4903
4904	/* Point to the Redo data and get its length. */
4905	data = Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
4906	dlen = le16_to_cpu(lrh->redo_len);
4907
4908	/* Shorten length by any Lcns which were deleted. */
4909	saved_len = dlen;
4910
4911	for (i = le16_to_cpu(lrh->lcns_follow); i; i--) {
4912		size_t j;
4913		u32 alen, voff;
4914
4915		voff = le16_to_cpu(lrh->record_off) +
4916		       le16_to_cpu(lrh->attr_off);
4917		voff += le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
4918
4919		/* If the Vcn question is allocated, we can just get out. */
4920		j = le64_to_cpu(lrh->target_vcn) - le64_to_cpu(dp->vcn);
4921		if (dp->page_lcns[j + i - 1])
4922			break;
4923
4924		if (!saved_len)
4925			saved_len = 1;
4926
4927		/*
4928		 * Calculate the allocated space left relative to the
4929		 * log record Vcn, after removing this unallocated Vcn.
4930		 */
4931		alen = (i - 1) << sbi->cluster_bits;
4932
4933		/*
4934		 * If the update described this log record goes beyond
4935		 * the allocated space, then we will have to reduce the length.
4936		 */
4937		if (voff >= alen)
4938			dlen = 0;
4939		else if (voff + dlen > alen)
4940			dlen = alen - voff;
4941	}
4942
4943	/*
4944	 * If the resulting dlen from above is now zero,
4945	 * we can skip this log record.
4946	 */
4947	if (!dlen && saved_len)
4948		goto read_next_log_do_action;
4949
4950	t16 = le16_to_cpu(lrh->redo_op);
4951	if (can_skip_action(t16))
4952		goto read_next_log_do_action;
4953
4954	/* Apply the Redo operation a common routine. */
4955	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, &rec_lsn);
4956	if (err)
4957		goto out;
4958
4959	/* Keep reading and looping back until end of file. */
4960read_next_log_do_action:
4961	err = read_next_log_rec(log, lcb, &rec_lsn);
4962	if (!err && rec_lsn)
4963		goto do_action_next;
4964
4965	lcb_put(lcb);
4966	lcb = NULL;
4967
4968do_undo_action:
4969	/* Scan Transaction Table. */
4970	tr = NULL;
4971transaction_table_next:
4972	tr = enum_rstbl(trtbl, tr);
4973	if (!tr)
4974		goto undo_action_done;
4975
4976	if (TransactionActive != tr->transact_state || !tr->undo_next_lsn) {
4977		free_rsttbl_idx(trtbl, PtrOffset(trtbl, tr));
4978		goto transaction_table_next;
4979	}
4980
4981	log->transaction_id = PtrOffset(trtbl, tr);
4982	undo_next_lsn = le64_to_cpu(tr->undo_next_lsn);
4983
4984	/*
4985	 * We only have to do anything if the transaction has
4986	 * something its undo_next_lsn field.
4987	 */
4988	if (!undo_next_lsn)
4989		goto commit_undo;
4990
4991	/* Read the first record to be undone by this transaction. */
4992	err = read_log_rec_lcb(log, undo_next_lsn, lcb_ctx_undo_next, &lcb);
4993	if (err)
4994		goto out;
4995
4996	/*
4997	 * Now loop to read all of our log records forwards,
4998	 * until we hit the end of the file, cleaning up at the end.
4999	 */
5000undo_action_next:
5001
5002	lrh = lcb->log_rec;
5003	frh = lcb->lrh;
5004	transact_id = le32_to_cpu(frh->transact_id);
5005	rec_len = le32_to_cpu(frh->client_data_len);
5006
5007	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
5008		err = -EINVAL;
5009		goto out;
5010	}
5011
5012	if (lrh->undo_op == cpu_to_le16(Noop))
5013		goto read_next_log_undo_action;
5014
5015	oe = Add2Ptr(oatbl, le16_to_cpu(lrh->target_attr));
5016	oa = oe->ptr;
5017
5018	t16 = le16_to_cpu(lrh->lcns_follow);
5019	if (!t16)
5020		goto add_allocated_vcns;
5021
5022	is_mapped = run_lookup_entry(oa->run1, le64_to_cpu(lrh->target_vcn),
5023				     &lcn, &clen, NULL);
5024
5025	/*
5026	 * If the mapping isn't already the table or the  mapping
5027	 * corresponds to a hole the mapping, we need to make sure
5028	 * there is no partial page already memory.
5029	 */
5030	if (is_mapped && lcn != SPARSE_LCN && clen >= t16)
5031		goto add_allocated_vcns;
5032
5033	vcn = le64_to_cpu(lrh->target_vcn);
5034	vcn &= ~(u64)(log->clst_per_page - 1);
5035
5036add_allocated_vcns:
5037	for (i = 0, vcn = le64_to_cpu(lrh->target_vcn),
5038	    size = (vcn + 1) << sbi->cluster_bits;
5039	     i < t16; i++, vcn += 1, size += sbi->cluster_size) {
5040		attr = oa->attr;
5041		if (!attr->non_res) {
5042			if (size > le32_to_cpu(attr->res.data_size))
5043				attr->res.data_size = cpu_to_le32(size);
5044		} else {
5045			if (size > le64_to_cpu(attr->nres.data_size))
5046				attr->nres.valid_size = attr->nres.data_size =
5047					attr->nres.alloc_size =
5048						cpu_to_le64(size);
5049		}
5050	}
5051
5052	t16 = le16_to_cpu(lrh->undo_op);
5053	if (can_skip_action(t16))
5054		goto read_next_log_undo_action;
5055
5056	/* Point to the Redo data and get its length. */
5057	data = Add2Ptr(lrh, le16_to_cpu(lrh->undo_off));
5058	dlen = le16_to_cpu(lrh->undo_len);
5059
5060	/* It is time to apply the undo action. */
5061	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, NULL);
5062
5063read_next_log_undo_action:
5064	/*
5065	 * Keep reading and looping back until we have read the
5066	 * last record for this transaction.
5067	 */
5068	err = read_next_log_rec(log, lcb, &rec_lsn);
5069	if (err)
5070		goto out;
5071
5072	if (rec_lsn)
5073		goto undo_action_next;
5074
5075	lcb_put(lcb);
5076	lcb = NULL;
5077
5078commit_undo:
5079	free_rsttbl_idx(trtbl, log->transaction_id);
5080
5081	log->transaction_id = 0;
5082
5083	goto transaction_table_next;
5084
5085undo_action_done:
5086
5087	ntfs_update_mftmirr(sbi, 0);
5088
5089	sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY;
5090
5091end_reply:
5092
5093	err = 0;
5094	if (is_ro)
5095		goto out;
5096
5097	rh = kzalloc(log->page_size, GFP_NOFS);
5098	if (!rh) {
5099		err = -ENOMEM;
5100		goto out;
5101	}
5102
5103	rh->rhdr.sign = NTFS_RSTR_SIGNATURE;
5104	rh->rhdr.fix_off = cpu_to_le16(offsetof(struct RESTART_HDR, fixups));
5105	t16 = (log->page_size >> SECTOR_SHIFT) + 1;
5106	rh->rhdr.fix_num = cpu_to_le16(t16);
5107	rh->sys_page_size = cpu_to_le32(log->page_size);
5108	rh->page_size = cpu_to_le32(log->page_size);
5109
5110	t16 = ALIGN(offsetof(struct RESTART_HDR, fixups) + sizeof(short) * t16,
5111		    8);
5112	rh->ra_off = cpu_to_le16(t16);
5113	rh->minor_ver = cpu_to_le16(1); // 0x1A:
5114	rh->major_ver = cpu_to_le16(1); // 0x1C:
5115
5116	ra2 = Add2Ptr(rh, t16);
5117	memcpy(ra2, ra, sizeof(struct RESTART_AREA));
5118
5119	ra2->client_idx[0] = 0;
5120	ra2->client_idx[1] = LFS_NO_CLIENT_LE;
5121	ra2->flags = cpu_to_le16(2);
5122
5123	le32_add_cpu(&ra2->open_log_count, 1);
5124
5125	ntfs_fix_pre_write(&rh->rhdr, log->page_size);
5126
5127	err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rh, log->page_size, 0);
5128	if (!err)
5129		err = ntfs_sb_write_run(sbi, &log->ni->file.run, log->page_size,
5130					rh, log->page_size, 0);
5131
5132	kfree(rh);
5133	if (err)
5134		goto out;
5135
5136out:
5137	kfree(rst);
5138	if (lcb)
5139		lcb_put(lcb);
5140
5141	/*
5142	 * Scan the Open Attribute Table to close all of
5143	 * the open attributes.
5144	 */
5145	oe = NULL;
5146	while ((oe = enum_rstbl(oatbl, oe))) {
5147		rno = ino_get(&oe->ref);
5148
5149		if (oe->is_attr_name == 1) {
5150			kfree(oe->ptr);
5151			oe->ptr = NULL;
5152			continue;
5153		}
5154
5155		if (oe->is_attr_name)
5156			continue;
5157
5158		oa = oe->ptr;
5159		if (!oa)
5160			continue;
5161
5162		run_close(&oa->run0);
5163		kfree(oa->attr);
5164		if (oa->ni)
5165			iput(&oa->ni->vfs_inode);
5166		kfree(oa);
5167	}
5168
5169	kfree(trtbl);
5170	kfree(oatbl);
5171	kfree(dptbl);
5172	kfree(attr_names);
5173	kfree(log->rst_info.r_page);
5174
5175	kfree(ra);
5176	kfree(log->one_page_buf);
5177
5178	if (err)
5179		sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
5180
5181	if (err == -EROFS)
5182		err = 0;
5183	else if (log->set_dirty)
5184		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
5185
5186	kfree(log);
5187
5188	return err;
5189}