Skip to content
Snippets Groups Projects
Commit 4180be4a authored by Avi Kivity's avatar Avi Kivity
Browse files

Merge branch 'master' of github.com:cloudius-systems/osv

fpu preemption
parents 0b2c57d1 88075417
No related branches found
No related tags found
No related merge requests found
......@@ -105,7 +105,7 @@ pt_element make_pte(phys addr, unsigned perm)
if (perm & perm_write) {
pte |= 0x2;
}
if (!(perm * perm_exec)) {
if (!(perm & perm_exec)) {
pte |= pt_element(0x8000000000000000);
}
return pte;
......@@ -217,7 +217,7 @@ void populate_huge_page(void* addr, fill_page& fill, uint64_t offset, unsigned p
*/
void populate(vma& vma, fill_page& fill, unsigned perm)
{
// Find the largest 2MB-aligned range inside the given byte (or actually,
// Find the largest 2MB-aligned range inside the given byte (or actually,
// 4K-aligned) range:
uintptr_t hp_start = ((vma.start()-1) & ~(huge_page_size-1)) + huge_page_size;
uintptr_t hp_end = (vma.end()) & ~(huge_page_size-1);
......@@ -244,6 +244,86 @@ void populate(vma& vma, fill_page& fill, unsigned perm)
}
void unpopulate_page(void* addr)
{
pt_element pte = processor::read_cr3();
auto pt = phys_cast<pt_element>(pte_phys(pte));
auto ptep = &pt[pt_index(addr, nlevels - 1)];
unsigned level = nlevels - 1;
while (level > 0) {
if (!pte_present(*ptep))
return;
else if (pte_large(*ptep)) {
// This case means that part of a larger mmap was mmapped over,
// previously a huge page was mapped, and now we need to free some
// of the small pages composing it. Luckily, in our implementation
// it is ok to free pieces of a alloc_huge_page() with free_page()
split_large_page(ptep, level);
}
pte = *ptep;
--level;
pt = phys_cast<pt_element>(pte_phys(pte));
ptep = &pt[pt_index(addr, level)];
}
if (!pte_present(*ptep))
return;
*ptep &= ~1; // make not present
memory::free_page(phys_to_virt(pte_phys(*ptep)));
}
void unpopulate_huge_page(void* addr)
{
pt_element pte = processor::read_cr3();
auto pt = phys_cast<pt_element>(pte_phys(pte));
auto ptep = &pt[pt_index(addr, nlevels - 1)];
unsigned level = nlevels - 1;
while (level > 1) {
if (!pte_present(*ptep))
return;
else if (pte_large(*ptep))
split_large_page(ptep, level);
pte = *ptep;
--level;
pt = phys_cast<pt_element>(pte_phys(pte));
ptep = &pt[pt_index(addr, level)];
}
if (!pte_present(*ptep))
return;
if (pte_large(*ptep)){
memory::free_huge_page(phys_to_virt(pte_phys(*ptep)), huge_page_size);
} else {
// We've previously allocated small pages here, not a huge pages.
// We need to free them one by one - as they are not necessarily part
// of one huge page.
pt_element* pt = phys_cast<pt_element>(pte_phys(*ptep));
for(int i=0; i<pte_per_page; ++i)
if (pte_present(pt[i]))
memory::free_page(phys_to_virt(pte_phys(pt[i])));
}
*ptep &= ~1; // make not present
}
/*
* Undo the operation of populate(), freeing memory allocated by populate()
* and marking the pages non-present.
*/
void unpopulate(vma& vma)
{
uintptr_t hp_start = ((vma.start()-1) & ~(huge_page_size-1)) + huge_page_size;
uintptr_t hp_end = (vma.end()) & ~(huge_page_size-1);
if (hp_start > vma.end())
hp_start = vma.end();
if (hp_end < vma.start())
hp_end = vma.start();
for (auto addr = vma.start(); addr < hp_start; addr += page_size)
unpopulate_page(reinterpret_cast<void*>(addr));
for (auto addr = hp_start; addr < hp_end; addr += huge_page_size)
unpopulate_huge_page(reinterpret_cast<void*>(addr));
for (auto addr = hp_end; addr < vma.end(); addr += page_size)
unpopulate_page(reinterpret_cast<void*>(addr));
}
uintptr_t find_hole(uintptr_t start, uintptr_t size)
{
// FIXME: use lower_bound or something
......@@ -277,6 +357,7 @@ void evacuate(vma* v)
i->split(v->start());
if (contains(*v, *i)) {
auto& dead = *i--;
unpopulate(dead);
vma_list.erase(dead);
}
}
......@@ -297,8 +378,8 @@ vma* reserve(void* hint, size_t size)
void unmap(void* addr, size_t size)
{
vma tmp { reinterpret_cast<uintptr_t>(addr), size };
// FIXME: also need to free the pages (small or huge) allocated to back this range.
auto start = reinterpret_cast<uintptr_t>(addr);
vma tmp { start, start+size };
evacuate(&tmp);
}
......@@ -329,10 +410,6 @@ vma* allocate(uintptr_t start, uintptr_t end, fill_page& fill,
unsigned perm)
{
vma* ret = new vma(start, end);
// FIXME: also need to free the pages (small or huge) potentially allocated for this range.
// we'll have a complication if the original mapping included a huge page, but this mapping
// is smaller and only covers part of it - we'll need to break up the huge page and copy
// the small pages not covered by the new mapping to a new location.
evacuate(ret);
vma_list.insert(*ret);
......@@ -357,6 +434,110 @@ vma* map_file(void* addr, size_t size, unsigned perm,
return ret;
}
void change_perm(pt_element *ptep, unsigned int perm)
{
if (perm & perm_write)
*ptep |= 0x2;
else
*ptep &= ~0x2;
if (!(perm & perm_exec))
*ptep |= pt_element(0x8000000000000000);
else
*ptep &= ~pt_element(0x8000000000000000);
// TODO: we ignore here perm & perm_read, breaking mmap()'s
// ability to set PROT_NONE, i.e., unaccessible memory.
// We could have zeroed the present bit in this case, but
// the problem is that if the present bit is unset, it also
// tells us (e.g., in unpopulate()) that the memory is
// unmapped. So to support !perm_read, we'll need to change
// the code....
}
int protect_page(void *addr, unsigned int perm)
{
pt_element pte = processor::read_cr3();
auto pt = phys_cast<pt_element>(pte_phys(pte));
auto ptep = &pt[pt_index(addr, nlevels - 1)];
unsigned level = nlevels - 1;
while (level > 0) {
if (!pte_present(*ptep))
return 0;
else if (pte_large(*ptep)) {
// We're trying to change the protection of part of a huge page, so
// we need to split the huge page into small pages. This is fine
// because in in our implementation it is ok to free pieces of a
// alloc_huge_page() with free_page()
split_large_page(ptep, level);
}
pte = *ptep;
--level;
pt = phys_cast<pt_element>(pte_phys(pte));
ptep = &pt[pt_index(addr, level)];
}
if (!pte_present(*ptep))
return 0;
change_perm(ptep, perm);
return 1;
}
int protect_huge_page(void *addr, unsigned int perm)
{
pt_element pte = processor::read_cr3();
auto pt = phys_cast<pt_element>(pte_phys(pte));
auto ptep = &pt[pt_index(addr, nlevels - 1)];
unsigned level = nlevels - 1;
while (level > 1) {
if (!pte_present(*ptep))
return 0;
else if (pte_large(*ptep))
split_large_page(ptep, level);
pte = *ptep;
--level;
pt = phys_cast<pt_element>(pte_phys(pte));
ptep = &pt[pt_index(addr, level)];
}
if (!pte_present(*ptep))
return 0;
if (pte_large(*ptep)){
change_perm(ptep, perm);
return 1;
} else {
int ret = 1;
pt_element* pt = phys_cast<pt_element>(pte_phys(*ptep));
for(int i=0; i<pte_per_page; ++i)
if(pte_present(pt[i]))
change_perm(&pt[i], perm);
else
ret = 0;
return ret;
}
}
int protect(void *start, size_t size, unsigned int perm)
{
void *end = start+size; // one byte after the end
void *hp_start = (void*) ((((uintptr_t)start-1) & ~(huge_page_size-1)) +
huge_page_size);
void *hp_end = (void*) ((uintptr_t)end & ~(huge_page_size-1));
if (hp_start > end)
hp_start = end;
if (hp_end < start)
hp_end = start;
int ret=1;
for (auto addr = start; addr < hp_start; addr += page_size)
ret &= protect_page(addr, perm);
for (auto addr = hp_start; addr < hp_end; addr += huge_page_size)
ret &= protect_huge_page(addr, perm);
for (auto addr = hp_end; addr < end; addr += page_size)
ret &= protect_page(addr, perm);
return ret;
}
namespace {
uintptr_t align_down(uintptr_t ptr)
......
......@@ -40,6 +40,7 @@ vma* map_file(void* addr, size_t size, unsigned perm,
file& file, f_offset offset);
vma* map_anon(void* addr, size_t size, unsigned perm);
void unmap(void* addr, size_t size);
int protect(void *addr, size_t size, unsigned int perm);
typedef uint64_t phys;
phys virt_to_phys(void *virt);
......
......@@ -337,11 +337,8 @@ libc += misc/get_current_dir_name.o
libc += misc/gethostid.o
libc += misc/getopt.o
libc += misc/getopt_long.o
libc += misc/getresuid.o
libc += misc/getresgid.o
libc += misc/getsubopt.o
libc += misc/realpath.o
libc += misc/setdomainname.o
libc += multibyte/btowc.o
libc += multibyte/internal.o
......@@ -599,6 +596,7 @@ libc += time/localtime_r.o
libc += time/mktime.o
libc += time/strftime.o
libc += time/strptime.o
libc += time/time.o
libc += time/timegm.o
libc += time/tzset.o
libc += time/wcsftime.o
......
#include <dlfcn.h>
#include "elf.hh"
#include <link.h>
#include <osv/debug.h>
void* dlopen(const char* filename, int flags)
{
......@@ -57,3 +58,10 @@ int dl_iterate_phdr(int (*callback)(struct dl_phdr_info *info,
});
return ret;
}
extern "C" int dladdr(__const void *addr, Dl_info *info)
{
kprintf("stub dladdr()\n");
errno = EINVAL;
return -1;
}
......@@ -9,6 +9,8 @@
#include <sys/resource.h>
#include <pwd.h>
#include <sys/utsname.h>
#include <sys/sysinfo.h>
#include <osv/debug.h>
#include <sched.h>
int libc_error(int err)
......@@ -36,7 +38,17 @@ int getrlimit(int resource, struct rlimit *rlim)
case RLIMIT_NOFILE:
set(1024*10); // FIXME: larger?
break;
case RLIMIT_CORE:
set(RLIM_INFINITY);
break;
case RLIMIT_NPROC:
set(RLIM_INFINITY);
break;
case RLIMIT_AS:
set(RLIM_INFINITY);
break;
default:
kprintf("getrlimit: resource %d not supported\n", resource);
abort();
}
return 0;
......@@ -101,3 +113,20 @@ int sched_yield()
sched::thread::yield();
return 0;
}
int getloadavg(double loadavg[], int nelem)
{
int i;
for (i = 0; i < nelem; i++)
loadavg[i] = 0.5;
return 0;
}
extern "C" int sysinfo(struct sysinfo *info)
{
memset(info, 0, sizeof(struct sysinfo));
return 0;
}
#define _GNU_SOURCE
#include <unistd.h>
#include "syscall.h"
int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
{
return syscall(SYS_getresgid, rgid, egid, sgid);
}
#define _GNU_SOURCE
#include <unistd.h>
#include "syscall.h"
int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
{
return syscall(SYS_getresuid, ruid, euid, suid);
}
#define _GNU_SOURCE
#include <unistd.h>
#include "syscall.h"
int setdomainname(const char *name, size_t len)
{
return syscall(SYS_setdomainname, name, len);
}
......@@ -20,7 +20,21 @@ unsigned libc_prot_to_perm(int prot)
int mprotect(void *addr, size_t len, int prot)
{
debug("stub mprotect()");
if(!(prot & PROT_READ)){
// FIXME: currently we do not implement PROT_NONE :( see change_perm()...
debug(fmt("mprotect(%x,%d,0x%x) - PROT_NONE unimplemented, using PROT_READ\n")%addr%len%prot,false);
}
if ((reinterpret_cast<intptr_t>(addr) & 4095) || (len & 4095)) {
// address not page aligned
errno = EINVAL;
return -1;
}
if (!mmu::protect(addr, len, libc_prot_to_perm(prot))) {
// NOTE: we return ENOMEM when part of the range was not mapped,
// but nevertheless, set the protection on the rest!
errno = ENOMEM;
return -1;
}
return 0;
}
......
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include "syscall.h"
int remove(const char *path)
{
......
......@@ -4,7 +4,6 @@
#include <unistd.h>
#include <time.h>
#include "libc.h"
#include "syscall.h"
#include "atomic.h"
#define MAXTRIES 100
......
#include <string.h>
#include <sys/types.h>
#include <errno.h>
/*
* Glibc provides two incompatible versions of strerror_r and uses
* redirection magic for the XPG compliants ones in <string.h>,
* so we must avoid including that header as long as we use the glibc
* headers instead of the musl ones.
*/
extern char *strerror (int);
extern size_t strlen (const char *);
extern void *memcpy (void *__restrict, const void *__restrict, size_t);
int strerror_r(int err, char *buf, size_t buflen)
{
char *msg = strerror(err);
......
#include <time.h>
#include <sys/time.h>
time_t time(time_t *t)
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
if (t) *t = ts.tv_sec;
return ts.tv_sec;
}
......@@ -77,6 +77,7 @@ void *__dso_handle;
void abort()
{
kprintf("Aborted\n");
while (true)
processor::halt_no_interrupts();
}
......
......@@ -7,6 +7,7 @@
#include "tst-rwlock.hh"
#include "tst-bsd-synch.hh"
#include "tst-queue-mpsc.hh"
#include "tst-mmap.hh"
using namespace unit_tests;
......@@ -19,6 +20,7 @@ void tests::execute_tests() {
test_rwlock rwlock;
test_synch synch;
test_queue_mpsc q1;
test_mmap mmap;
instance().register_test(&threads);
instance().register_test(&malloc);
......@@ -28,6 +30,7 @@ void tests::execute_tests() {
instance().register_test(&rwlock);
instance().register_test(&synch);
instance().register_test(&q1);
instance().register_test(&mmap);
instance().run();
}
#include "tst-hub.hh"
#include "sched.hh"
#include "debug.hh"
#include <sys/mman.h>
class test_mmap: public unit_tests::vtest {
public:
void run()
{
debug("Running mmap tests\n", false);
// Test that munmap actually recycles the physical memory allocated by mmap
for (int i=0; i<1000; i++) {
constexpr size_t size = 1<<20;
void *buf = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0);
if(!buf)
debug("mmap failed!\n",false);
munmap(buf, size);
}
// Do the same for allocations large enough to use huge-pages
for (int i=0; i<100; i++) {
constexpr size_t size = 30 * 1<<20;
void *buf = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0);
if(!buf)
debug("mmap failed!\n",false);
munmap(buf, size);
}
// Test that we can override mmaps, without munmap, without leaking
// physical memory. Mix in small page and huge page allocations for
// more fun.
int hugepagesize = 1<<21;
void *buf = mmap(NULL, hugepagesize*10, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0);
assert(buf);
for (int i=0; i<100; i++) {
mmap(buf, hugepagesize-4096, PROT_READ, MAP_ANONYMOUS|MAP_FIXED, -1, 0);
mmap(buf, hugepagesize*9+4096, PROT_READ, MAP_ANONYMOUS|MAP_FIXED, -1, 0);
}
munmap(buf, hugepagesize*9+4096);
// test mprotect. Fault-causing tests commented out until I write a
// framework for verifing these faults.
buf = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0);
// debug("testing write ok");
// *(char*)buf = 0;
// debug("testing write failure");
mprotect(buf, 4096, PROT_READ);
// *(char*)buf = 0;
munmap(buf, 4096);
// test mprotect with part of huge page
buf = mmap(NULL, 3*hugepagesize, PROT_READ|PROT_WRITE, MAP_ANONYMOUS, -1, 0);
void *hp = (void*) (((uintptr_t)buf&~(hugepagesize-1))+hugepagesize);
mprotect(hp+4096, 4096, PROT_READ);
// debug("should be fine");
// *(char*)hp = 0; // should be fine
// debug("should be fine");
// *(char*)(hp+8192) = 0; // should be fine
// debug("should croak");
// *(char*)(hp+4096) = 0; // should croak
munmap(buf, 3*hugepagesize);
// TODO: verify that mmapping more than available physical memory doesn't
// panic just return -1 and ENOMEM.
// TODO: verify that huge-page-sized allocations get a huge-page aligned address
// (if addr=0). Not critical, though, just makes sense.
debug("mmap tests succeeded\n", false);
}
};
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment