Skip to content
Snippets Groups Projects
loader.cc 14.4 KiB
Newer Older
/*
 * Copyright (C) 2013 Cloudius Systems, Ltd.
 *
 * This work is open source software, licensed under the terms of the
 * BSD license as described in the LICENSE file in the top-level directory.
 */
#include "fs/fs.hh"
#include <bsd/net.hh>
#include <boost/format.hpp>
#include <boost/program_options.hpp>
#include <boost/algorithm/string.hpp>
#include <cctype>
#include <osv/elf.hh>
#include <osv/tls.hh>
#include <osv/debug.hh>
#include "smp.hh"

#ifndef AARCH64_PORT_STUB
#include "exceptions.hh"
#include "drivers/pci.hh"
#include "ioapic.hh"
#include "drivers/acpi.hh"
#include "drivers/driver.hh"
#include "drivers/virtio-net.hh"
#include "drivers/virtio-blk.hh"
Asias He's avatar
Asias He committed
#include "drivers/virtio-scsi.hh"
#include "drivers/virtio-rng.hh"
#include "drivers/xenfront-xenbus.hh"
Asias He's avatar
Asias He committed
#include "drivers/ahci.hh"
Takuya ASADA's avatar
Takuya ASADA committed
#include "drivers/ide.hh"
Asias He's avatar
Asias He committed
#include "drivers/vmw-pvscsi.hh"
Takuya ASADA's avatar
Takuya ASADA committed
#include "drivers/vmxnet3.hh"
#include "drivers/zfs.hh"
#include "drivers/pvpanic.hh"
#include "drivers/console.hh"
#include "drivers/isa-serial.hh"
#include "drivers/vga.hh"
#endif /* !AARCH64_PORT_STUB */
#include <osv/sched.hh>
#include <osv/barrier.hh>
#include "arch.hh"
#include "arch-setup.hh"
#include "osv/trace.hh"
#include <osv/power.hh>
#include <osv/rcu.hh>
#include <osv/mempool.hh>
Takuya ASADA's avatar
Takuya ASADA committed
#include <bsd/porting/networking.hh>
#include <bsd/porting/shrinker.h>
#include <osv/dhcp.hh>
Pekka Enberg's avatar
Pekka Enberg committed
#include <osv/version.h>
#include <osv/shutdown.hh>
#include <osv/commands.hh>
#include <osv/boot.hh>
#include <osv/sampler.hh>

using namespace osv;

asm(".pushsection \".debug_gdb_scripts\", \"MS\",@progbits,1 \n"
    ".byte 1 \n"
    ".asciz \"scripts/loader.py\" \n"
    ".popsection \n");

#ifdef AARCH64_PORT_STUB
#define ALIGN_ELF_HEADER_ADDR 4096
#else
#define ALIGN_ELF_HEADER_ADDR 8
#endif
elf::Elf64_Ehdr* elf_header __attribute__ ((aligned(ALIGN_ELF_HEADER_ADDR)));

size_t elf_size;
void* elf_start;
elf::tls_data tls_data;

boot_time_chart boot_time;

void setup_tls(elf::init_table inittab)
{
    tls_data = inittab.tls;
    sched::init_tls(tls_data);
    memset(tls_data.start + tls_data.filesize, 0, tls_data.size - tls_data.filesize);
    extern char tcb0[]; // defined by linker script
    memcpy(tcb0, inittab.tls.start, inittab.tls.size);
    auto p = reinterpret_cast<thread_control_block*>(tcb0 + inittab.tls.size);
    p->self = p;
}

extern "C" {
    void premain();
    void vfs_init(void);
Pekka Enberg's avatar
Pekka Enberg committed
    void mount_zfs_rootfs(void);
    void ramdisk_init(void);
}

void premain()
{
    /* besides reporting the OSV version, this string has the function
       to check if the early console really works early enough,
       without depending on prior initialization. */
    debug_early("OSv " OSV_VERSION "\n");
    auto inittab = elf::get_init(elf_header);
    setup_tls(inittab);
    boot_time.event("TLS initialization");
    for (auto init = inittab.start; init < inittab.start + inittab.count; ++init) {
        (*init)();
    }
    boot_time.event(".init functions");
}

int main(int ac, char **av)
{
#ifdef AARCH64_PORT_STUB
    printf("argc=%d\n", ac);

    for (int i = 0; i < ac; i++) {
        printf("argv[%d] = %s\n", i, av[i]);
    }

    printf("OSv AArch64: main reached, halting.\n");
    while (1) {
        asm ("wfi;");
    }
#endif /* AARCH64_PORT_STUB */

#ifndef AARCH64_PORT_STUB
    smp_initial_find_current_cpu()->init_on_cpu();
    void main_cont(int ac, char** av);
    sched::init([=] { main_cont(ac, av); });
#endif /* !AARCH64_PORT_STUB */
#ifndef AARCH64_PORT_STUB
static bool opt_leak = false;
static bool opt_noshutdown = false;
static bool opt_log_backtrace = false;
Avi Kivity's avatar
Avi Kivity committed
static bool opt_mount = true;
static std::string opt_console = "both";
static bool opt_verbose = false;
static std::string opt_chdir;
static bool opt_bootchart = false;
static int sampler_frequency;
static bool opt_enable_sampler = false;

std::tuple<int, char**> parse_options(int ac, char** av)
{
    namespace bpo = boost::program_options;
    namespace bpos = boost::program_options::command_line_style;

    std::vector<const char*> args = { "osv" };

    // due to https://svn.boost.org/trac/boost/ticket/6991, we can't terminate
    // command line parsing on the executable name, so we need to look for it
    // ourselves

    auto nr_options = std::find_if(av, av + ac,
                                   [](const char* arg) { return arg[0] != '-'; }) - av;
    std::copy(av, av + nr_options, std::back_inserter(args));

    bpo::options_description desc("OSv options");
    desc.add_options()
        ("help", "show help text")
        ("sampler", bpo::value<int>(), "start stack sampling profiler")
        ("trace", bpo::value<std::vector<std::string>>(), "tracepoints to enable")
        ("trace-backtrace", "log backtraces in the tracepoint log")
        ("leak", "start leak detector after boot")
        ("nomount", "don't mount the file system")
        ("noshutdown", "continue running after main() returns")
        ("verbose", "be verbose, print debug messages")
        ("console", bpo::value<std::vector<std::string>>(), "select console driver")
        ("env", bpo::value<std::vector<std::string>>(), "set Unix-like environment variable (putenv())")
        ("cwd", bpo::value<std::vector<std::string>>(), "set current working directory")
        ("bootchart", "perform a test boot measuring a time distribution of the various operations\n")
    ;
    bpo::variables_map vars;
    // don't allow --foo bar (require --foo=bar) so we can find the first non-option
    // argument
    int style = bpos::unix_style & ~(bpos::long_allow_next | bpos::short_allow_next);
    try {
        bpo::store(bpo::parse_command_line(args.size(), args.data(), desc, style), vars);
    } catch(std::exception &e) {
        std::cout << e.what() << '\n';
        std::cout << desc << '\n';
    bpo::notify(vars);

    if (vars.count("help")) {
        std::cout << desc << "\n";
    }

    if (vars.count("leak")) {
        opt_leak = true;
    }

    if (vars.count("noshutdown")) {
        opt_noshutdown = true;
    }

    if (vars.count("trace-backtrace")) {
        opt_log_backtrace = true;
    }

    if (vars.count("verbose")) {
        opt_verbose = true;
        enable_verbose();
    }

    if (vars.count("sampler")) {
        sampler_frequency = vars["sampler"].as<int>();
        opt_enable_sampler = true;
    }

    if (vars.count("bootchart")) {
        opt_bootchart = true;
    }

    if (vars.count("trace")) {
        auto tv = vars["trace"].as<std::vector<std::string>>();
        for (auto t : tv) {
            std::vector<std::string> tmp;
            boost::split(tmp, t, boost::is_any_of(" ,"), boost::token_compress_on);
            for (auto t : tmp) {
                enable_tracepoint(t);
            }
        }
    }
Avi Kivity's avatar
Avi Kivity committed
    opt_mount = !vars.count("nomount");

    if (vars.count("console")) {
        auto v = vars["console"].as<std::vector<std::string>>();
        if (v.size() > 1) {
            printf("Ignoring '--console' options after the first.");
        }
        opt_console = v.front();
        debug("console=%s\n", opt_console);
    }
    if (vars.count("env")) {
        for (auto t : vars["env"].as<std::vector<std::string>>()) {
            debug("Setting in environment: %s\n", t);
            putenv(strdup(t.c_str()));
        }
    }

    if (vars.count("cwd")) {
        auto v = vars["cwd"].as<std::vector<std::string>>();
        if (v.size() > 1) {
            printf("Ignoring '--cwd' options after the first.");
        }
        opt_chdir = v.front();
    }

    av += nr_options;
    ac -= nr_options;
    return std::make_tuple(ac, av);
}

// return the std::string and the commands_args poiting to them as a move
std::vector<std::vector<std::string> > prepare_commands(int ac, char** av)
{
    if (ac == 0) {
        puts("This image has an empty command line. Nothing to run.");
#ifdef AARCH64_PORT_STUB
        abort(); // a good test for the backtrace code
#endif
    std::vector<std::vector<std::string> > commands;
    std::string line = std::string("");
    bool ok;
    // concatenate everything
    for (auto i = 0; i < ac; i++) {
        line += std::string(av[i]) + " ";
    }
    commands = osv::parse_command_line(line, ok);

    if (!ok) {
        puts("Failed to parse command line.");
        osv::poweroff();
// Java uses this global variable (supplied by Glibc) to figure out
// aproximatively where the initial thread's stack end.
// The aproximation allow to fill the variable here instead of doing it in
// osv::run.
void *__libc_stack_end;

void run_main(const std::vector<std::string> &vec)
{
    auto b = std::begin(vec)++;
    auto e = std::end(vec);
    std::string command = vec[0];
    std::vector<std::string> args(b, e);
    int ret;

    if (opt_leak) {
        debug("Enabling leak detector.\n");
        memory::tracker_enabled = true;
    }

    __libc_stack_end = __builtin_frame_address(0);
    auto lib = osv::run(command, args, &ret);
        // success
        if (ret) {
            debug("program %s returned %d\n", command.c_str(), ret);
        }

    if (opt_bootchart) {
        boot_time.print_chart();
    }

    printf("run_main(): cannot execute %s. Powering off.\n", command.c_str());
void *_run_main(void *data)
    auto vecp = (std::vector<std::string> *)data;
    run_main(*vecp);
    delete vecp;
void* do_main_thread(void *_commands)
    auto commands =
         static_cast<std::vector<std::vector<std::string> > *>(_commands);
    // initialize panic drivers
    panic::pvpanic::probe_and_setup();
    boot_time.event("pvpanic done");
    // Enumerate PCI devices
    pci::pci_device_enumeration();
    boot_time.event("pci enumerated");

    // Initialize all drivers
    hw::driver_manager* drvman = hw::driver_manager::instance();
    drvman->register_driver(virtio::blk::probe);
Asias He's avatar
Asias He committed
    drvman->register_driver(virtio::scsi::probe);
    drvman->register_driver(virtio::net::probe);
    drvman->register_driver(virtio::rng::probe);
    drvman->register_driver(xenfront::xenbus::probe);
Asias He's avatar
Asias He committed
    drvman->register_driver(ahci::hba::probe);
Asias He's avatar
Asias He committed
    drvman->register_driver(vmw::pvscsi::probe);
Takuya ASADA's avatar
Takuya ASADA committed
    drvman->register_driver(vmw::vmxnet3::probe);
Takuya ASADA's avatar
Takuya ASADA committed
    drvman->register_driver(ide::ide_drive::probe);
    boot_time.event("drivers probe");
    drvman->load_all();
    drvman->list_drivers();

    randomdev::randomdev_init();
    boot_time.event("drivers loaded");
Avi Kivity's avatar
Avi Kivity committed
    if (opt_mount) {
Pekka Enberg's avatar
Pekka Enberg committed
        mount_zfs_rootfs();
        bsd_shrinker_init();
    boot_time.event("ZFS mounted");
    bool has_if = false;
    osv::for_each_if([&has_if] (std::string if_name) {
        if (if_name == "lo0")
            return;

Takuya ASADA's avatar
Takuya ASADA committed
        // Start DHCP by default and wait for an IP
        if (osv::start_if(if_name, "0.0.0.0", "255.255.255.0") != 0 ||
            osv::ifup(if_name) != 0)
            debug("Could not initialize network interface.\n");
    });
    if (has_if) {
        dhcp_start(true);
    }
    if (!opt_chdir.empty()) {
        debug("Chdir to: '%s'\n", opt_chdir.c_str());

        if (chdir(opt_chdir.c_str()) != 0) {
            perror("chdir");
        }
        debug("chdir done\n");
    }

    boot_time.event("Total time");

    // Our parse_command_line() leaves at the end of each command a delimiter,
    // can be '&' if we need to run this command in a new thread, or ';' or
    // empty otherwise, to run in this thread.
    std::vector<pthread_t> bg;
        std::vector<std::string> newvec(it.begin(), std::prev(it.end()));
        if (it.back() != "&") {
            run_main(newvec);
        } else {
            pthread_t t;
            pthread_create(&t, nullptr, _run_main,
                    new std::vector<std::string> (newvec));
    void* retval;
    for (auto t : bg) {
        pthread_join(t, &retval);

    return nullptr;
}

void main_cont(int ac, char** av)
{
    new elf::program();
    elf::get_program()->set_search_path({"/", "/usr/lib"});
    std::vector<std::vector<std::string> > cmds;
    std::tie(ac, av) = parse_options(ac, av);
    // multiple programs can be run -> separate their arguments
    cmds = prepare_commands(ac, av);
    ioapic::init();
    smp_launch();
    boot_time.event("SMP launched");
    memory::enable_debug_allocator();
#ifdef __x86_64__
    if (opt_console.compare("serial") == 0) {
        console::console_driver_add(new console::IsaSerialConsole());
    } else if (opt_console.compare("vga") == 0) {
        console::console_driver_add(new console::VGAConsole());
    } else if (opt_console.compare("both") == 0) {
        console::console_driver_add(new console::IsaSerialConsole());
        console::console_driver_add(new console::VGAConsole());
    } else {
        abort("Unknown console:%s", opt_console.c_str());
    }
#endif /* __x86_64__ */
    console::console_init();

    if (sched::cpus.size() > sched::max_cpus) {
        printf("Too many cpus, can't boot with greater than %u cpus.\n", sched::max_cpus);
        poweroff();
    }

    enable_trace();
    if (opt_log_backtrace) {
        // can only do this after smp_launch, otherwise the IDT is not initialized,
        // and backtrace_safe() fails as soon as we get an exception
        tracepoint_base::log_backtraces();
    }
    sched::init_detached_threads_reaper();

    vfs_init();
    boot_time.event("VFS initialized");
    ramdisk_init();

    net_init();
    boot_time.event("Network initialized");

    processor::sti();

    if (opt_enable_sampler) {
        prof::config config{std::chrono::nanoseconds(1000000000 / sampler_frequency)};
        prof::start_sampler(config);
    }

    pthread_t pthread;
    // run the payload in a pthread, so pthread_self() etc. work
    pthread_create(&pthread, nullptr, do_main_thread, (void *) &cmds);
    void* retval;
    pthread_join(pthread, &retval);

    if (opt_noshutdown) {
        // If the --noshutdown option is given, continue running the system,
        // and whatever threads might be running, even after main returns
        debug("main() returned.\n");
        sched::thread::wait_until([] { return false; });
    }

    if (memory::tracker_enabled) {
        debug("Leak testing done. Please use 'osv leak show' in gdb to analyze results.\n");
        osv::halt();
    } else {
        osv::shutdown();
#endif /* !AARCH64_PORT_STUB */

int __argc;
char** __argv;