/* * linux/mm/mlock.c * * (C) Copyright 1995 Linus Torvalds * (C) Copyright 2002 Christoph Hellwig */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "internal.h" int can_do_mlock(void) { if (capable(CAP_IPC_LOCK)) return 1; if (rlimit(RLIMIT_MEMLOCK) != 0) return 1; return 0; } EXPORT_SYMBOL(can_do_mlock); void __clear_page_mlock(struct page *page) { VM_BUG_ON(!PageLocked(page)); if (!page->mapping) { return; } dec_zone_page_state(page, NR_MLOCK); count_vm_event(UNEVICTABLE_PGCLEARED); if (!isolate_lru_page(page)) { putback_lru_page(page); } else { if (PageUnevictable(page)) count_vm_event(UNEVICTABLE_PGSTRANDED); } } void mlock_vma_page(struct page *page) { /* Serialize with page migration */ BUG_ON(!PageLocked(page)); if (!TestSetPageMlocked(page)) { inc_zone_page_state(page, NR_MLOCK); count_vm_event(UNEVICTABLE_PGMLOCKED); if (!isolate_lru_page(page)) putback_lru_page(page); } } void munlock_vma_page(struct page *page) { /* For try_to_munlock() and to serialize with page migration */ BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { dec_zone_page_state(page, NR_MLOCK); if (!isolate_lru_page(page)) { int ret = SWAP_AGAIN; if (page_mapcount(page) > 1) ret = try_to_munlock(page); if (ret != SWAP_MLOCK) count_vm_event(UNEVICTABLE_PGMUNLOCKED); putback_lru_page(page); } else { if (PageUnevictable(page)) count_vm_event(UNEVICTABLE_PGSTRANDED); else count_vm_event(UNEVICTABLE_PGMUNLOCKED); } } } static long __mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *nonblocking) { struct mm_struct *mm = vma->vm_mm; unsigned long addr = start; int nr_pages = (end - start) / PAGE_SIZE; int gup_flags; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(end & ~PAGE_MASK); VM_BUG_ON(start < vma->vm_start); VM_BUG_ON(end > vma->vm_end); VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); gup_flags = FOLL_TOUCH | FOLL_MLOCK; if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) gup_flags |= FOLL_WRITE; if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) gup_flags |= FOLL_FORCE; return __get_user_pages(current, mm, addr, nr_pages, gup_flags, NULL, NULL, nonblocking); } static int __mlock_posix_error_return(long retval) { if (retval == -EFAULT) retval = -ENOMEM; else if (retval == -ENOMEM) retval = -EAGAIN; return retval; } long mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { int nr_pages = (end - start) / PAGE_SIZE; BUG_ON(!(vma->vm_flags & VM_LOCKED)); if (vma->vm_flags & (VM_IO | VM_PFNMAP)) goto no_mlock; if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))) { __mlock_vma_pages_range(vma, start, end, NULL); return 0; } make_pages_present(start, end); no_mlock: vma->vm_flags &= ~VM_LOCKED; return nr_pages; } void munlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { unsigned long addr; lru_add_drain(); vma->vm_flags &= ~VM_LOCKED; for (addr = start; addr < end; addr += PAGE_SIZE) { struct page *page; page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); if (page && !IS_ERR(page)) { lock_page(page); if (page->mapping) munlock_vma_page(page); unlock_page(page); put_page(page); } cond_resched(); } } static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) { struct mm_struct *mm = vma->vm_mm; pgoff_t pgoff; int nr_pages; int ret = 0; int lock = !!(newflags & VM_LOCKED); if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) goto out; pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma)); if (*prev) { vma = *prev; goto success; } if (start != vma->vm_start) { ret = split_vma(mm, vma, start, 1); if (ret) goto out; } if (end != vma->vm_end) { ret = split_vma(mm, vma, end, 0); if (ret) goto out; } success: nr_pages = (end - start) >> PAGE_SHIFT; if (!lock) nr_pages = -nr_pages; mm->locked_vm += nr_pages; if (lock) vma->vm_flags = newflags; else munlock_vma_pages_range(vma, start, end); out: *prev = vma; return ret; } static int do_mlock(unsigned long start, size_t len, int on) { unsigned long nstart, end, tmp; struct vm_area_struct * vma, * prev; int error; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; vma = find_vma(current->mm, start); if (!vma || vma->vm_start > start) return -ENOMEM; prev = vma->vm_prev; if (start > vma->vm_start) prev = vma; for (nstart = start ; ; ) { vm_flags_t newflags; newflags = vma->vm_flags | VM_LOCKED; if (!on) newflags &= ~VM_LOCKED; tmp = vma->vm_end; if (tmp > end) tmp = end; error = mlock_fixup(vma, &prev, nstart, tmp, newflags); if (error) break; nstart = tmp; if (nstart < prev->vm_end) nstart = prev->vm_end; if (nstart >= end) break; vma = prev->vm_next; if (!vma || vma->vm_start != nstart) { error = -ENOMEM; break; } } return error; } static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) { struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; int ret = 0; VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; for (nstart = start; nstart < end; nstart = nend) { if (!locked) { locked = 1; down_read(&mm->mmap_sem); vma = find_vma(mm, nstart); } else if (nstart >= vma->vm_end) vma = vma->vm_next; if (!vma || vma->vm_start >= end) break; nend = min(end, vma->vm_end); if (vma->vm_flags & (VM_IO | VM_PFNMAP)) continue; if (nstart < vma->vm_start) nstart = vma->vm_start; ret = __mlock_vma_pages_range(vma, nstart, nend, &locked); if (ret < 0) { if (ignore_errors) { ret = 0; continue; } ret = __mlock_posix_error_return(ret); break; } nend = nstart + ret * PAGE_SIZE; ret = 0; } if (locked) up_read(&mm->mmap_sem); return ret; } SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) { unsigned long locked; unsigned long lock_limit; int error = -ENOMEM; if (!can_do_mlock()) return -EPERM; lru_add_drain_all(); down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; locked = len >> PAGE_SHIFT; locked += current->mm->locked_vm; lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); up_write(¤t->mm->mmap_sem); if (!error) error = do_mlock_pages(start, len, 0); return error; } SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; ret = do_mlock(start, len, 0); up_write(¤t->mm->mmap_sem); return ret; } static int do_mlockall(int flags) { struct vm_area_struct * vma, * prev = NULL; unsigned int def_flags = 0; if (flags & MCL_FUTURE) def_flags = VM_LOCKED; current->mm->def_flags = def_flags; if (flags == MCL_FUTURE) goto out; for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { vm_flags_t newflags; newflags = vma->vm_flags | VM_LOCKED; if (!(flags & MCL_CURRENT)) newflags &= ~VM_LOCKED; mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); } out: return 0; } SYSCALL_DEFINE1(mlockall, int, flags) { unsigned long lock_limit; int ret = -EINVAL; if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) goto out; ret = -EPERM; if (!can_do_mlock()) goto out; if (flags & MCL_CURRENT) lru_add_drain_all(); down_write(¤t->mm->mmap_sem); lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); up_write(¤t->mm->mmap_sem); if (!ret && (flags & MCL_CURRENT)) { do_mlock_pages(0, TASK_SIZE, 1); } out: return ret; } SYSCALL_DEFINE0(munlockall) { int ret; down_write(¤t->mm->mmap_sem); ret = do_mlockall(0); up_write(¤t->mm->mmap_sem); return ret; } static DEFINE_SPINLOCK(shmlock_user_lock); int user_shm_lock(size_t size, struct user_struct *user) { unsigned long lock_limit, locked; int allowed = 0; locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; lock_limit = rlimit(RLIMIT_MEMLOCK); if (lock_limit == RLIM_INFINITY) allowed = 1; lock_limit >>= PAGE_SHIFT; spin_lock(&shmlock_user_lock); if (!allowed && locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK)) goto out; get_uid(user); user->locked_shm += locked; allowed = 1; out: spin_unlock(&shmlock_user_lock); return allowed; } void user_shm_unlock(size_t size, struct user_struct *user) { spin_lock(&shmlock_user_lock); user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT; spin_unlock(&shmlock_user_lock); free_uid(user); }