// SPDX-License-Identifier: GPL-2.0
/*
 *  Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
 *  Provide the pin memory method for check point and restore task.
 */
#ifdef CONFIG_PIN_MEMORY
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/sched/cputime.h>
#include <linux/tick.h>
#include <linux/mm.h>
#include <linux/pin_mem.h>
#include <linux/idr.h>
#include <linux/page-isolation.h>
#include <linux/sched/mm.h>
#include <linux/ctype.h>
#include <linux/highmem.h>
#include <crypto/sha2.h>
#include <linux/memblock.h>

#define MAX_PIN_PID_NUM  128
#define DEFAULT_REDIRECT_SPACE_SIZE  0x100000

static DEFINE_SPINLOCK(page_map_entry_lock);
static DEFINE_MUTEX(pin_mem_mutex);
static struct pin_mem_dump_info *pin_mem_dump_start;
static unsigned int pin_pid_num;
static unsigned int *pin_pid_num_addr;
static struct page_map_entry *__page_map_entry_start;
static unsigned long page_map_entry_end;
static struct page_map_info *user_space_reserve_start;
static struct page_map_entry *page_map_entry_start;

unsigned int max_pin_pid_num __read_mostly;
unsigned long redirect_space_size __read_mostly;
static unsigned long redirect_space_start;
static void *pin_mem_pagewalk;
static unsigned long *pagemap_buffer;
static int reserve_user_map_pages_fail;

static int __init setup_max_pin_pid_num(char *str)
{
	int ret;

	if (!str)
		return 0;

	ret = kstrtouint(str, 10, &max_pin_pid_num);
	if (ret) {
		pr_warn("Unable to parse max pin pid num.\n");
	} else {
		if (max_pin_pid_num > MAX_PIN_PID_NUM) {
			max_pin_pid_num = 0;
			pr_warn("Input max_pin_pid_num is too large.\n");
		}
	}
	return ret;
}
early_param("max_pin_pid_num", setup_max_pin_pid_num);

static int __init setup_redirect_space_size(char *str)
{
	if (!str)
		return 0;

	redirect_space_size = memparse(str, NULL);
	if (!redirect_space_size) {
		pr_warn("Unable to parse redirect space size, use the default value.\n");
		redirect_space_size = DEFAULT_REDIRECT_SPACE_SIZE;
	}
	return 0;
}
early_param("redirect_space_size", setup_redirect_space_size);

static struct page_map_info *create_page_map_info(int pid)
{
	struct page_map_info *new;

	if (!user_space_reserve_start)
		return NULL;

	if (pin_pid_num >= max_pin_pid_num) {
		pr_warn("Pin pid num too large than max_pin_pid_num, fail create: %d!", pid);
		return NULL;
	}
	new = (struct page_map_info *)(user_space_reserve_start + pin_pid_num);
	new->pid = pid;
	new->pme = NULL;
	new->entry_num = 0;
	new->pid_reserved = false;
	new->disable_free_page = false;
	(*pin_pid_num_addr)++;
	pin_pid_num++;
	return new;
}

struct page_map_info *create_page_map_info_by_pid(int pid)
{
	unsigned long flags;
	struct page_map_info *ret;

	spin_lock_irqsave(&page_map_entry_lock, flags);
	ret = create_page_map_info(pid);
	spin_unlock_irqrestore(&page_map_entry_lock, flags);
	return ret;
}
EXPORT_SYMBOL_GPL(create_page_map_info_by_pid);

static struct page_map_info *get_page_map_info(int pid)
{
	int i;

	if (!user_space_reserve_start)
		return NULL;

	for (i = 0; i < pin_pid_num; i++) {
		if (user_space_reserve_start[i].pid == pid)
			return &(user_space_reserve_start[i]);
	}
	return NULL;
}

struct page_map_info *get_page_map_info_by_pid(int pid)
{
	unsigned long flags;
	struct page_map_info *ret;

	spin_lock_irqsave(&page_map_entry_lock, flags);
	ret = get_page_map_info(pid);
	spin_unlock_irqrestore(&page_map_entry_lock, flags);
	return ret;
}
EXPORT_SYMBOL_GPL(get_page_map_info_by_pid);

static struct page *find_head_page(struct page *page)
{
	struct page *p = page;

	while (!PageBuddy(p)) {
		if (PageLRU(p))
			return NULL;
		p--;
	}
	return p;
}

static void spilt_page_area_left(struct zone *zone, struct free_area *area, struct page *page,
	unsigned long size, int order)
{
	unsigned long cur_size = 1 << order;
	unsigned long total_size = 0;

	while (size && cur_size > size) {
		cur_size >>= 1;
		order--;
		area--;
		if (cur_size <= size) {
			list_add(&page[total_size].lru, &area->free_list[MIGRATE_MOVABLE]);
			atomic_set(&(page[total_size]._mapcount), PAGE_BUDDY_MAPCOUNT_VALUE);
			set_page_private(&page[total_size], order);
			set_pageblock_migratetype(&page[total_size], MIGRATE_MOVABLE);
			area->nr_free++;
			total_size += cur_size;
			size -= cur_size;
		}
	}
}

static void spilt_page_area_right(struct zone *zone, struct free_area *area, struct page *page,
		unsigned long size, int order)
{
	unsigned long cur_size = 1 << order;
	struct page *right_page, *head_page;

	right_page = page + size;
	while (size && cur_size > size) {
		cur_size >>= 1;
		order--;
		area--;
		if (cur_size <= size) {
			head_page = right_page - cur_size;
			list_add(&head_page->lru, &area->free_list[MIGRATE_MOVABLE]);
			atomic_set(&(head_page->_mapcount), PAGE_BUDDY_MAPCOUNT_VALUE);
			set_page_private(head_page, order);
			set_pageblock_migratetype(head_page, MIGRATE_MOVABLE);
			area->nr_free++;
			size -= cur_size;
			right_page = head_page;
		}
	}
}

void reserve_page_from_buddy(unsigned long nr_pages, struct page *page)
{
	unsigned int current_order;
	struct page *page_end;
	struct free_area *area;
	struct zone *zone;
	struct page *head_page;

	head_page = find_head_page(page);
	if (!head_page) {
		pr_warn("Find page head fail.");
		return;
	}

	current_order = head_page->private;
	page_end = head_page + (1 << current_order);
	zone = page_zone(head_page);
	area = &(zone->free_area[current_order]);
	list_del(&head_page->lru);
	atomic_set(&head_page->_mapcount, -1);
	set_page_private(head_page, 0);
	area->nr_free--;

	if (head_page != page)
		spilt_page_area_left(zone, area, head_page,
			(unsigned long)(page - head_page), current_order);
	page = page + nr_pages;
	if (page < page_end) {
		spilt_page_area_right(zone, area, page,
			(unsigned long)(page_end - page), current_order);
	} else if (page > page_end) {
		pr_warn("Find page end smaller than page.");
	}
}

static inline void reserve_user_normal_pages(struct page *page)
{
	atomic_inc(&page->_refcount);
	reserve_page_from_buddy(1, page);
}

static void init_huge_pmd_pages(struct page *head_page)
{
	int i = 0;
	struct page *page = head_page;
	unsigned long compound_pad = COMPOUND_PAD_START;

	__set_bit(PG_head, &page->flags);
	__set_bit(PG_active, &page->flags);
	atomic_set(&page->_refcount, 1);
	page++;
	i++;
	page->compound_head = (unsigned long)head_page + 1;
	page->compound_dtor = HUGETLB_PAGE_DTOR + 1;
	page->compound_order = HPAGE_PMD_ORDER;
	page++;
	i++;
	page->compound_head = (unsigned long)head_page + 1;
	i++;

	INIT_LIST_HEAD(&(page->deferred_list));
	for (; i < HPAGE_PMD_NR; i++) {
		page = head_page + i;
		page->compound_head = (unsigned long)head_page + 1;
		compound_pad += COMPOUND_PAD_DELTA;
	}
}

static inline void reserve_user_huge_pmd_pages(struct page *page)
{
	atomic_inc(&page->_refcount);
	reserve_page_from_buddy((1 << HPAGE_PMD_ORDER), page);
	init_huge_pmd_pages(page);
}

void free_user_map_pages(unsigned int pid_index, unsigned int entry_index, unsigned int page_index)
{
	unsigned int i, j, index, order;
	struct page_map_info *pmi;
	struct page_map_entry *pme;
	struct page *page;
	unsigned long phy_addr;

	for (index = 0; index < pid_index; index++) {
		pmi = &(user_space_reserve_start[index]);
		pme = pmi->pme;
		for (i = 0; i < pmi->entry_num; i++) {
			for (j = 0; j < pme->nr_pages; j++) {
				order = pme->is_huge_page ? HPAGE_PMD_ORDER : 0;
				phy_addr = pme->phy_addr_array[j];
				if (phy_addr) {
					page = phys_to_page(phy_addr);
					if (!(page->flags & PAGE_FLAGS_CHECK_RESERVED)) {
						__free_pages(page, order);
						pme->phy_addr_array[j] = 0;
					}
				}
			}
			pme = (struct page_map_entry *)next_pme(pme);
		}
	}

	pmi = &(user_space_reserve_start[index]);
	pme = pmi->pme;
	for (i = 0; i < entry_index; i++) {
		for (j = 0; j < pme->nr_pages; j++) {
			order = pme->is_huge_page ? HPAGE_PMD_ORDER : 0;
			phy_addr = pme->phy_addr_array[j];
			if (phy_addr) {
				page = phys_to_page(phy_addr);
				if (!(page->flags & PAGE_FLAGS_CHECK_RESERVED)) {
					__free_pages(page, order);
					pme->phy_addr_array[j] = 0;
				}
			}
		}
		pme = (struct page_map_entry *)next_pme(pme);
	}

	for (j = 0; j < page_index; j++) {
		order = pme->is_huge_page ? HPAGE_PMD_ORDER : 0;
		phy_addr = pme->phy_addr_array[j];
		if (phy_addr) {
			page = phys_to_page(phy_addr);
			if (!(page->flags & PAGE_FLAGS_CHECK_RESERVED)) {
				__free_pages(page, order);
				pme->phy_addr_array[j] = 0;
			}
		}
	}
}

bool check_redirect_end_valid(struct redirect_info *redirect_start,
			unsigned long max_redirect_page_num)
{
	unsigned long redirect_end;

	redirect_end = ((unsigned long)(redirect_start + 1) +
		max_redirect_page_num * sizeof(unsigned int));
	if (redirect_end > redirect_space_start + redirect_space_size)
		return false;
	return true;
}

static void reserve_user_space_map_pages(void)
{
	struct page_map_info *pmi;
	struct page_map_entry *pme;
	unsigned int i, j, index;
	struct page *page;
	unsigned long flags;
	unsigned long phy_addr;
	unsigned long redirect_pages = 0;
	struct redirect_info *redirect_start = (struct redirect_info *)redirect_space_start;

	if (!user_space_reserve_start || !redirect_start)
		return;
	spin_lock_irqsave(&page_map_entry_lock, flags);
	for (index = 0; index < pin_pid_num; index++) {
		pmi = &(user_space_reserve_start[index]);
		pme = pmi->pme;
		for (i = 0; i < pmi->entry_num; i++) {
			redirect_pages = 0;
			if (!check_redirect_end_valid(redirect_start, pme->nr_pages))
				redirect_start = NULL;

			for (j = 0; j < pme->nr_pages; j++) {
				phy_addr = pme->phy_addr_array[j];
				if (!phy_addr)
					continue;
				page = phys_to_page(phy_addr);
				if (atomic_read(&page->_refcount)) {
					if ((page->flags & PAGE_FLAGS_CHECK_RESERVED)
						&& !pme->redirect_start)
						pme->redirect_start =
							(unsigned long)redirect_start;

					if (redirect_start &&
						(page->flags & PAGE_FLAGS_CHECK_RESERVED)) {
						redirect_start->redirect_index[redirect_pages] = j;
						redirect_pages++;
						continue;
					} else {
						reserve_user_map_pages_fail = 1;
						pr_warn("Page %pK refcount %d large than zero, no need reserve.\n",
						page, atomic_read(&page->_refcount));
						goto free_pages;
					}
				}

				if (!pme->is_huge_page)
					reserve_user_normal_pages(page);
				else
					reserve_user_huge_pmd_pages(page);
			}
			pme = (struct page_map_entry *)next_pme(pme);
			if (redirect_pages && redirect_start) {
				redirect_start->redirect_pages = redirect_pages;
				redirect_start = (struct redirect_info *)(
					(unsigned long)(redirect_start + 1) +
					redirect_start->redirect_pages * sizeof(unsigned int));
			}
		}
	}
	spin_unlock_irqrestore(&page_map_entry_lock, flags);
	return;

free_pages:
	free_user_map_pages(index, i, j);
	spin_unlock_irqrestore(&page_map_entry_lock, flags);
}


int calculate_pin_mem_digest(struct pin_mem_dump_info *pmdi, char *digest)
{
	int i;
	struct sha256_state sctx;

	if (!digest)
		digest = pmdi->sha_digest;
	sha256_init(&sctx);
	sha256_update(&sctx, (unsigned char *)(&(pmdi->magic)),
		sizeof(struct pin_mem_dump_info) - SHA256_DIGEST_SIZE);
	for (i = 0; i < pmdi->pin_pid_num; i++) {
		sha256_update(&sctx, (unsigned char *)(&(pmdi->pmi_array[i])),
			sizeof(struct page_map_info));
	}
	sha256_final(&sctx, digest);
	return 0;
}

static int check_sha_digest(struct pin_mem_dump_info *pmdi)
{
	int ret = 0;
	char digest[SHA256_DIGEST_SIZE] = {0};

	ret = calculate_pin_mem_digest(pmdi, digest);
	if (ret) {
		pr_warn("calculate pin mem digest fail:%d\n", ret);
		return ret;
	}
	if (memcmp(pmdi->sha_digest, digest, SHA256_DIGEST_SIZE)) {
		pr_warn("pin mem dump info sha256 digest match error!\n");
		return -EFAULT;
	}
	return ret;
}

/*
 * The whole page map entry collect process must be Sequentially.
 * The user_space_reserve_start points to the first page map info for
 * the first dump task. And the page_map_entry_start points to
 * the first page map entry of the first dump vma.
 */
static void init_page_map_info(struct pin_mem_dump_info *pmdi, unsigned long map_len)
{
	if (pin_mem_dump_start || !max_pin_pid_num) {
		pr_warn("pin page map already init or max_pin_pid_num not set.\n");
		return;
	}
	if (map_len < sizeof(struct pin_mem_dump_info) +
		max_pin_pid_num * sizeof(struct page_map_info) + redirect_space_size) {
		pr_warn("pin memory reserved memblock too small.\n");
		return;
	}
	if ((pmdi->magic != PIN_MEM_DUMP_MAGIC) || (pmdi->pin_pid_num > max_pin_pid_num) ||
		check_sha_digest(pmdi))
		memset(pmdi, 0, sizeof(struct pin_mem_dump_info));

	pin_mem_dump_start = pmdi;
	pin_pid_num = pmdi->pin_pid_num;
	pr_info("pin_pid_num: %d\n", pin_pid_num);
	pin_pid_num_addr = &(pmdi->pin_pid_num);
	user_space_reserve_start =
		(struct page_map_info *)pmdi->pmi_array;
	page_map_entry_start =
		(struct page_map_entry *)(user_space_reserve_start + max_pin_pid_num);
	__page_map_entry_start = page_map_entry_start;
	page_map_entry_end = (unsigned long)pmdi + map_len - redirect_space_size;
	redirect_space_start = page_map_entry_end;

	if (pin_pid_num > 0)
		reserve_user_space_map_pages();
}

int finish_pin_mem_dump(void)
{
	int ret;

	if (!pin_mem_dump_start)
		return -EFAULT;
	pin_mem_dump_start->magic = PIN_MEM_DUMP_MAGIC;
	memset(pin_mem_dump_start->sha_digest, 0, SHA256_DIGEST_SIZE);
	ret = calculate_pin_mem_digest(pin_mem_dump_start, NULL);
	if (ret) {
		pr_warn("calculate pin mem digest fail:%d\n", ret);
		return ret;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(finish_pin_mem_dump);

int collect_pmd_huge_pages(struct task_struct *task,
	unsigned long start_addr, unsigned long end_addr, struct page_map_entry *pme)
{
	int ret, i, res;
	int index = 0;
	unsigned long start = start_addr;
	struct page *temp_page;
	unsigned long *pte_entry = pagemap_buffer;
	unsigned int count;
	struct mm_struct *mm = task->mm;

	while (start < end_addr) {
		temp_page = NULL;
		count = 0;
		ret = pagemap_get(mm, pin_mem_pagewalk,
			start, start + HPAGE_PMD_SIZE, pte_entry, &count);
		if (ret || !count) {
			pr_warn("Get huge page fail: %d.", ret);
			return COLLECT_PAGES_FAIL;
		}

		/* For huge page, get one map entry per time. */
		if ((pte_entry[0] & PM_SWAP) && (count == 1)) {
			res = get_user_pages_remote(task->mm, start, 1,
				FOLL_TOUCH | FOLL_GET, &temp_page, NULL, NULL);
			if (!res) {
				pr_warn("Swap in huge page fail.\n");
				return COLLECT_PAGES_FAIL;
			}
			pme->phy_addr_array[index] = page_to_phys(temp_page);
			start += HPAGE_PMD_SIZE;
			index++;
			continue;
		}

		if (IS_PTE_PRESENT(pte_entry[0])) {
			temp_page = pfn_to_page(pte_entry[0] & PM_PFRAME_MASK);
			if (PageHead(temp_page)) {
				SetPageHotreplace(temp_page);
				atomic_inc(&((temp_page)->_refcount));
				start += HPAGE_PMD_SIZE;
				pme->phy_addr_array[index] = page_to_phys(temp_page);
				index++;
			} else {
				/* If the page is not compound head, goto collect normal pages. */
				pme->nr_pages = index;
				return COLLECT_PAGES_NEED_CONTINUE;
			}
		} else {
			for (i = 1; i < count; i++) {
				if (pte_entry[i] & PM_PFRAME_MASK) {
					pme->nr_pages = index;
					return COLLECT_PAGES_NEED_CONTINUE;
				}
			}
			start += HPAGE_PMD_SIZE;
			pme->phy_addr_array[index] = 0;
			index++;
		}
	}
	pme->nr_pages = index;
	return COLLECT_PAGES_FINISH;
}

int collect_normal_pages(struct task_struct *task,
	unsigned long start_addr, unsigned long end_addr, struct page_map_entry *pme)
{
	int ret, res;
	unsigned long next;
	unsigned long i, nr_pages;
	struct page *tmp_page;
	unsigned long *phy_addr_array = pme->phy_addr_array;
	unsigned int count;
	unsigned long *pte_entry = pagemap_buffer;
	struct mm_struct *mm = task->mm;

	next = (start_addr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
	next = (next > end_addr) ? end_addr : next;
	pme->nr_pages = 0;
	while (start_addr < next) {
		count = 0;
		nr_pages = (PAGE_ALIGN(next) - start_addr) / PAGE_SIZE;
		ret = pagemap_get(mm, pin_mem_pagewalk,
			start_addr, next, pte_entry, &count);
		if (ret || !count) {
			pr_warn("Get user page fail: %d, count: %u.\n",
				ret, count);
			return COLLECT_PAGES_FAIL;
		}

		if (IS_PTE_PRESENT(pte_entry[0])) {
			tmp_page = pfn_to_page(pte_entry[0] & PM_PFRAME_MASK);
			/* If the page is compound head, goto collect huge pages. */
			if (PageHead(tmp_page))
				return COLLECT_PAGES_NEED_CONTINUE;
			if (PageTail(tmp_page)) {
				start_addr = next;
				pme->virt_addr = start_addr;
				next = NEXT_PIN_ADDR(next, end_addr);
				continue;
			}
		}

		for (i = 0; i < count; i++) {
			if (pte_entry[i] & PM_SWAP) {
				res = get_user_pages_remote(task->mm, start_addr + i * PAGE_SIZE,
					1, FOLL_TOUCH | FOLL_GET, &tmp_page, NULL, NULL);
				if (!res) {
					pr_warn("Swap in page fail.\n");
					return COLLECT_PAGES_FAIL;
				}
				phy_addr_array[i] = page_to_phys(tmp_page);
				continue;
			}
			if (!IS_PTE_PRESENT(pte_entry[i])) {
				phy_addr_array[i] = 0;
				continue;
			}
			tmp_page = pfn_to_page(pte_entry[i] & PM_PFRAME_MASK);
			SetPageHotreplace(tmp_page);
			atomic_inc(&(tmp_page->_refcount));
			phy_addr_array[i] = ((pte_entry[i] & PM_PFRAME_MASK) << PAGE_SHIFT);
		}
		pme->nr_pages += count;
		phy_addr_array += count;
		start_addr = next;
		next = NEXT_PIN_ADDR(next, end_addr);
	}
	return COLLECT_PAGES_FINISH;
}

void free_pin_pages(struct page_map_entry *pme)
{
	unsigned long i;
	struct page *tmp_page;

	if (!pme)
		return;
	for (i = 0; i < pme->nr_pages; i++) {
		if (pme->phy_addr_array[i]) {
			tmp_page = phys_to_page(pme->phy_addr_array[i]);
			atomic_dec(&(tmp_page->_refcount));
			pme->phy_addr_array[i] = 0;
		}
	}
}

int init_pagemap_read(void)
{
	int ret = -ENOMEM;

	if (pin_mem_pagewalk)
		return 0;

	mutex_lock(&pin_mem_mutex);
	pin_mem_pagewalk = create_pagemap_walk();
	if (!pin_mem_pagewalk)
		goto out;
	pagemap_buffer = kmalloc(((PMD_SIZE >> PAGE_SHIFT) + 1) *
		sizeof(unsigned long), GFP_KERNEL);
	if (!pagemap_buffer)
		goto free;

	ret = 0;
out:
	mutex_unlock(&pin_mem_mutex);
	return ret;
free:
	free_pagemap_walk(pin_mem_pagewalk);
	pin_mem_pagewalk = NULL;
	goto out;
}
EXPORT_SYMBOL_GPL(init_pagemap_read);

/* Users make sure that the pin memory belongs to anonymous vma. */
int pin_mem_area(struct task_struct *task, struct mm_struct *mm,
		unsigned long start_addr, unsigned long end_addr)
{
	int pid, ret;
	int is_huge_page = false;
	unsigned int page_size;
	unsigned long nr_pages, flags;
	struct page_map_entry *pme = NULL;
	struct page_map_info *pmi;
	struct vm_area_struct *vma;
	unsigned long i;
	struct page *tmp_page;

	if (!page_map_entry_start
		|| !task || !mm
		|| start_addr >= end_addr || !pin_mem_pagewalk)
		return -EFAULT;

	pid = task->pid;
	spin_lock_irqsave(&page_map_entry_lock, flags);
	nr_pages = ((end_addr - start_addr) / PAGE_SIZE);
	if ((unsigned long)page_map_entry_start +
		nr_pages * sizeof(unsigned long) +
		sizeof(struct page_map_entry) >= page_map_entry_end) {
		pr_warn("Page map entry use up!\n");
		ret = -ENOMEM;
		goto finish;
	}

	vma = find_extend_vma(mm, start_addr);
	if (!vma) {
		pr_warn("Find no match vma!\n");
		ret = -EFAULT;
		goto finish;
	}
	if (start_addr == (start_addr & HPAGE_PMD_MASK) &&
		transparent_hugepage_active(vma)) {
		page_size = HPAGE_PMD_SIZE;
		is_huge_page = true;
	} else {
		page_size = PAGE_SIZE;
	}

	pme = page_map_entry_start;
	pme->virt_addr = start_addr;
	pme->redirect_start = 0;
	pme->is_huge_page = is_huge_page;
	memset(pme->phy_addr_array, 0, nr_pages * sizeof(unsigned long));

	down_read(&mm->mmap_lock);
	if (!is_huge_page) {
		ret = collect_normal_pages(task, start_addr, end_addr, pme);
		if (ret != COLLECT_PAGES_FAIL && !pme->nr_pages) {
			if (ret == COLLECT_PAGES_FINISH) {
				ret = 0;
				up_read(&mm->mmap_lock);
				goto finish;
			}
			pme->is_huge_page = true;
			page_size = HPAGE_PMD_SIZE;
			ret = collect_pmd_huge_pages(task, pme->virt_addr, end_addr, pme);
		}
	} else {
		ret = collect_pmd_huge_pages(task, start_addr, end_addr, pme);
		if (ret != COLLECT_PAGES_FAIL && !pme->nr_pages) {
			if (ret == COLLECT_PAGES_FINISH) {
				ret = 0;
				up_read(&mm->mmap_lock);
				goto finish;
			}
			pme->is_huge_page = false;
			page_size = PAGE_SIZE;
			ret = collect_normal_pages(task, pme->virt_addr, end_addr, pme);
		}
	}
	up_read(&mm->mmap_lock);
	if (ret == COLLECT_PAGES_FAIL) {
		ret = -EFAULT;
		goto finish;
	}

	/* check for zero pages */
	for (i = 0; i < pme->nr_pages; i++) {
		tmp_page = phys_to_page(pme->phy_addr_array[i]);
		if (!pme->is_huge_page) {
			if (page_to_pfn(tmp_page) == my_zero_pfn(pme->virt_addr + i * PAGE_SIZE))
				pme->phy_addr_array[i] = 0;
		} else if (is_huge_zero_page(tmp_page))
			pme->phy_addr_array[i] = 0;
	}

	page_map_entry_start = (struct page_map_entry *)(next_pme(pme));
	pmi = get_page_map_info(pid);
	if (!pmi)
		pmi = create_page_map_info(pid);
	if (!pmi) {
		pr_warn("Create page map info fail for pid: %d!\n", pid);
		ret = -EFAULT;
		goto finish;
	}
	if (!pmi->pme)
		pmi->pme = pme;
	pmi->entry_num++;
	spin_unlock_irqrestore(&page_map_entry_lock, flags);

	if (ret == COLLECT_PAGES_NEED_CONTINUE)
		ret = pin_mem_area(task, mm, pme->virt_addr + pme->nr_pages * page_size, end_addr);
	return ret;

finish:
	if (ret)
		free_pin_pages(pme);
	spin_unlock_irqrestore(&page_map_entry_lock, flags);
	return ret;
}
EXPORT_SYMBOL_GPL(pin_mem_area);

vm_fault_t remap_normal_pages(struct mm_struct *mm, struct vm_area_struct *vma,
		struct page_map_entry *pme)
{
	int ret;
	unsigned int j, i;
	pgd_t *pgd;
	p4d_t *p4d;
	pmd_t *pmd;
	pud_t *pud;
	struct page *page, *new;
	unsigned long address;
	unsigned long phy_addr;
	unsigned int redirect_pages = 0;
	struct redirect_info *redirect_start;

	redirect_start = (struct redirect_info *)pme->redirect_start;
	for (j = 0; j < pme->nr_pages; j++) {
		address = pme->virt_addr + j * PAGE_SIZE;
		phy_addr = pme->phy_addr_array[j];
		if (!phy_addr)
			continue;

		page = phys_to_page(phy_addr);
		if (page_to_pfn(page) == my_zero_pfn(address)) {
			pme->phy_addr_array[j] = 0;
			continue;
		}
		pme->phy_addr_array[j] = 0;

		if (redirect_start && (redirect_pages < redirect_start->redirect_pages) &&
			(j == redirect_start->redirect_index[redirect_pages])) {
			new = alloc_zeroed_user_highpage_movable(vma, address);
			if (!new) {
				pr_warn("Redirect alloc page fail\n");
				continue;
			}
			copy_page(page_to_virt(new), phys_to_virt(phy_addr));
			page = new;
			redirect_pages++;
		}

		page->mapping = NULL;
		pgd = pgd_offset(mm, address);
		ret = VM_FAULT_OOM;
		p4d = p4d_alloc(mm, pgd, address);
		if (!p4d)
			goto free;
		pud = pud_alloc(mm, p4d, address);
		if (!pud)
			goto free;
		pmd = pmd_alloc(mm, pud, address);
		if (!pmd)
			goto free;
		ret = do_anon_page_remap(vma, address, pmd, page);
		if (ret)
			goto free;
		ClearPageHotreplace(page);
	}
	return 0;

free:
	ClearPageHotreplace(page);
	for (i = j; i < pme->nr_pages; i++) {
		phy_addr = pme->phy_addr_array[i];
		if (phy_addr) {
			put_page(phys_to_page(phy_addr));
			pme->phy_addr_array[i] = 0;
		}
	}
	return ret;
}

static inline gfp_t get_hugepage_gfpmask(struct vm_area_struct *vma)
{
	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);

	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
							     __GFP_KSWAPD_RECLAIM);
	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
							     0);
	return GFP_TRANSHUGE_LIGHT;
}

vm_fault_t remap_huge_pmd_pages(struct mm_struct *mm, struct vm_area_struct *vma,
		struct page_map_entry *pme)
{
	int ret;
	unsigned int j, i;
	pgd_t *pgd;
	p4d_t *p4d;
	pmd_t *pmd;
	pud_t *pud;
	gfp_t gfp;
	struct page *page, *new;
	unsigned long address;
	unsigned long phy_addr;
	unsigned int redirect_pages = 0;
	struct redirect_info *redirect_start;

	redirect_start = (struct redirect_info *)pme->redirect_start;
	for (j = 0; j < pme->nr_pages; j++) {
		address = pme->virt_addr + j * HPAGE_PMD_SIZE;
		phy_addr = pme->phy_addr_array[j];
		if (!phy_addr)
			continue;

		page = phys_to_page(phy_addr);
		if (is_huge_zero_page(page)) {
			pme->phy_addr_array[j] = 0;
			continue;
		}
		pme->phy_addr_array[j] = 0;

		if (redirect_start && (redirect_pages < redirect_start->redirect_pages) &&
			(j == redirect_start->redirect_index[redirect_pages])) {
			gfp = get_hugepage_gfpmask(vma);
			new = alloc_hugepage_vma(gfp, vma, address, HPAGE_PMD_ORDER);
			if (!new) {
				pr_warn("Redirect alloc huge page fail\n");
				continue;
			}
			memcpy(page_to_virt(new), phys_to_virt(phy_addr), HPAGE_PMD_SIZE);
			page = new;
			redirect_pages++;
		}

		pgd = pgd_offset(mm, address);
		ret = VM_FAULT_OOM;
		p4d = p4d_alloc(mm, pgd, address);
		if (!p4d)
			goto free;
		pud = pud_alloc(mm, p4d, address);
		if (!pud)
			goto free;
		pmd = pmd_alloc(mm, pud, address);
		if (!pmd)
			goto free;
		ret = do_anon_huge_page_remap(vma, address, pmd, page);
		if (ret)
			goto free;
		ClearPageHotreplace(page);
	}
	return 0;

free:
	ClearPageHotreplace(page);
	for (i = j; i < pme->nr_pages; i++) {
		phy_addr = pme->phy_addr_array[i];
		if (phy_addr) {
			page = phys_to_page(phy_addr);
			if (!(page->flags & PAGE_FLAGS_CHECK_RESERVED)) {
				put_page(page);
				pme->phy_addr_array[i] = 0;
			}
		}
	}
	return ret;
}

static void free_unmap_pages(struct page_map_info *pmi,
			struct page_map_entry *pme,
			unsigned int index)
{
	unsigned int i, j;
	unsigned long phy_addr;
	struct page *page;

	pme = (struct page_map_entry *)(next_pme(pme));
	for (i = index; i < pmi->entry_num; i++) {
		for (j = 0; j < pme->nr_pages; j++) {
			phy_addr = pme->phy_addr_array[i];
			if (phy_addr) {
				page = phys_to_page(phy_addr);
				if (!(page->flags & PAGE_FLAGS_CHECK_RESERVED)) {
					put_page(page);
					pme->phy_addr_array[i] = 0;
				}
			}
		}
		pme = (struct page_map_entry *)(next_pme(pme));
	}
}

vm_fault_t do_mem_remap(int pid, struct mm_struct *mm)
{
	unsigned int i = 0;
	vm_fault_t ret = 0;
	struct vm_area_struct *vma;
	struct page_map_info *pmi;
	struct page_map_entry *pme;
	unsigned long flags;

	if (reserve_user_map_pages_fail || !mm)
		return -EFAULT;

	spin_lock_irqsave(&page_map_entry_lock, flags);
	pmi = get_page_map_info(pid);
	if (pmi)
		pmi->disable_free_page = true;
	spin_unlock_irqrestore(&page_map_entry_lock, flags);
	if (!pmi)
		return -EFAULT;

	down_write(&mm->mmap_lock);
	pme = pmi->pme;
	vma = mm->mmap;
	while ((i < pmi->entry_num) && (vma != NULL)) {
		if (pme->virt_addr >= vma->vm_start && pme->virt_addr < vma->vm_end) {
			i++;
			if (!vma_is_anonymous(vma)) {
				pme = (struct page_map_entry *)(next_pme(pme));
				continue;
			}
			if (!pme->is_huge_page) {
				ret = remap_normal_pages(mm, vma, pme);
				if (ret < 0)
					goto free;
			} else {
				ret = remap_huge_pmd_pages(mm, vma, pme);
				if (ret < 0)
					goto free;
			}
			pme = (struct page_map_entry *)(next_pme(pme));
		} else {
			vma = vma->vm_next;
		}
	}
	up_write(&mm->mmap_lock);
	return 0;

free:
	free_unmap_pages(pmi, pme, i);
	up_write(&mm->mmap_lock);
	return ret;
}
EXPORT_SYMBOL_GPL(do_mem_remap);

static void free_all_reserved_pages(void)
{
	unsigned int i, j, index;
	struct page_map_info *pmi;
	struct page_map_entry *pme;
	struct page *page;
	unsigned long phy_addr;

	if (!user_space_reserve_start || reserve_user_map_pages_fail)
		return;

	for (index = 0; index < pin_pid_num; index++) {
		pmi = &(user_space_reserve_start[index]);
		if (pmi->disable_free_page)
			continue;
		pme = pmi->pme;
		for (i = 0; i < pmi->entry_num; i++) {
			for (j = 0; j < pme->nr_pages; j++) {
				phy_addr = pme->phy_addr_array[j];
				if (phy_addr) {
					page = phys_to_page(phy_addr);
					ClearPageHotreplace(page);
					if (!(page->flags & PAGE_FLAGS_CHECK_RESERVED)) {
						put_page(page);
						pme->phy_addr_array[j] = 0;
					}
				}
			}
			pme = (struct page_map_entry *)next_pme(pme);
		}
	}
}

/* Clear all pin memory record. */
void clear_pin_memory_record(void)
{
	unsigned long flags;

	spin_lock_irqsave(&page_map_entry_lock, flags);
	free_all_reserved_pages();
	if (pin_pid_num_addr) {
		*pin_pid_num_addr = 0;
		pin_pid_num = 0;
		page_map_entry_start = __page_map_entry_start;
	}
	spin_unlock_irqrestore(&page_map_entry_lock, flags);
}
EXPORT_SYMBOL_GPL(clear_pin_memory_record);

static struct resource pin_memory_resource = {
	.name = "Pin memory",
	.start = 0,
	.end = 0,
	.flags = IORESOURCE_MEM,
	.desc = IORES_DESC_RESERVED
};

static unsigned long long pin_mem_start;
static unsigned long long pin_mem_len;

static int __init parse_pin_memory(char *cmdline)
{
	char *cur = cmdline;

	if (!cmdline)
		return 0;

	pin_mem_len = memparse(cmdline, &cur);
	if (cmdline == cur) {
		pr_warn("pinmem: memory value expected\n");
		return -EINVAL;
	}

	if (*cur == '@')
		pin_mem_start = memparse(cur+1, &cur);
	else if (*cur != ' ' && *cur != '\0') {
		pr_warn("pinmem: unrecognized char: %c\n", *cur);
		return -EINVAL;
	}

	return 0;
}
early_param("pinmemory", parse_pin_memory);

void __init reserve_pin_memory_res(void)
{
	unsigned long long mem_start = pin_mem_start;
	unsigned long long mem_len = pin_mem_len;

	if (!pin_mem_len)
		return;

	mem_len = PAGE_ALIGN(mem_len);

	if (!memblock_is_region_memory(mem_start, mem_len)) {
		pr_warn("cannot reserve for pin memory: region is not memory!\n");
		return;
	}

	if (memblock_is_region_reserved(mem_start, mem_len)) {
		pr_warn("cannot reserve for pin memory: region overlaps reserved memory!\n");
		return;
	}

	memblock_reserve(mem_start, mem_len);
	pr_debug("pin memory resource reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
		mem_start, mem_start + mem_len, mem_len >> 20);

	pin_memory_resource.start = mem_start;
	pin_memory_resource.end = mem_start + mem_len - 1;
}

void request_pin_mem_res(struct resource *res)
{
	if (pin_memory_resource.end &&
		pin_memory_resource.start >= res->start &&
		pin_memory_resource.end <= res->end)
		request_resource(res, &pin_memory_resource);
}

void init_reserve_page_map(void)
{
	void *addr;
	unsigned long map_addr, map_size;

	map_addr = (unsigned long)pin_memory_resource.start;
	map_size = (unsigned long)(pin_memory_resource.end - pin_memory_resource.start + 1);
	if (!map_addr || !map_size)
		return;

	addr = phys_to_virt(map_addr);
	init_page_map_info((struct pin_mem_dump_info *)addr, map_size);
}

#endif /* CONFIG_PIN_MEMORY */

#ifdef CONFIG_PID_RESERVE
struct idr *reserve_idr;

void free_reserved_pid(struct idr *idr, int pid)
{
	unsigned int index;
	struct page_map_info *pmi;

	if (!max_pin_pid_num || idr != reserve_idr)
		return;

	for (index = 0; index < pin_pid_num; index++) {
		pmi = &(user_space_reserve_start[index]);
		if (pmi->pid == pid && pmi->pid_reserved) {
			idr_remove(idr, pid);
			return;
		}
	}
}

/* reserve pids for check point tasks which pinned memory */
void reserve_pids(struct idr *idr, int pid_max)
{
	int alloc_pid;
	unsigned int index;
	struct page_map_info *pmi;

	if (!pin_pid_num || !max_pin_pid_num)
		return;

	reserve_idr = idr;
	for (index = 0; index < pin_pid_num; index++) {
		pmi = &(user_space_reserve_start[index]);
		pmi->pid_reserved = true;
		alloc_pid = idr_alloc(idr, NULL, pmi->pid, pid_max, GFP_ATOMIC);
		if (alloc_pid != pmi->pid) {
			if (alloc_pid > 0)
				idr_remove(idr, alloc_pid);
			pr_warn("Reserve pid (%d) fail, real pid is %d.\n", alloc_pid, pmi->pid);
			pmi->pid_reserved = false;
			continue;
		}
	}
}

#endif /* CONFIG_PID_RESERVE */