diff --git a/photon/libc/sys/horizon/__elf.h b/photon/libc/sys/horizon/__elf.h new file mode 100644 index 0000000..9c1495f --- /dev/null +++ b/photon/libc/sys/horizon/__elf.h @@ -0,0 +1,323 @@ +#ifndef SYS_HORIZON_SYS___ELF_H_ +#define SYS_HORIZON_SYS___ELF_H_ + +#include +#include + +#define ELF_LOAD_ERR -1 +#define ELF_LOADED_EXEC 0 +#define ELF_LOADED_INTERP 1 + +#define ELF_MAG0 0x7f +#define ELF_MAG1 'E' +#define ELF_MAG2 'L' +#define ELF_MAG3 'F' +#define ELF_NIDENT 16 + +#define SHT_NONE 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_DYNAMIC 6 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_DYNSYM 11 + +/** Little endian. */ +#define ELFDATA2LSB (1) + +/** 64-bit. */ +#define ELFCLASS64 (2) + +/** x86_64 machine type. */ +#define EM_X86_64 (62) + +/** ELF current version. */ +#define EV_CURRENT (1) + +/** Dynamic section tags. */ +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_JMPREL 23 +#define DT_GNU_HASH 0x6ffffef5 +#define DT_AUXILIARY 0x7ffffffd + +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_GOT32X 43 + +#define R_X86_64_64 1 +#define R_X86_64_PC32 2 +#define R_X86_64_GOT32 3 +#define R_X86_64_PLT32 4 +#define R_X86_64_COPY 5 +#define R_X86_64_GLOB_DAT 6 +#define R_X86_64_JUMP_SLOT 7 +#define R_X86_64_RELATIVE 8 +#define R_X86_64_GOTPCREL 9 +#define R_X86_64_32 10 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 +#define STT_LOPROC 13 +#define STT_HIPROC 15 + +/* Section flags */ +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 + +#define SHN_UNDEF 0 + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((elf_word_t)(i)) +#define ELF64_ST_BIND(i) ((i) >> 4) +#define ELF64_ST_TYPE(i) ((i)&0xf) + +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 +#define STB_NUM 3 + +typedef uint64_t elf_addr_t; +typedef uint64_t elf_off_t; +typedef uint16_t elf_half_t; +typedef uint32_t elf_word_t; +typedef int32_t elf_sword_t; +typedef uint64_t elf_xword_t; +typedef int64_t elf_sxword_t; + +/** + * ELF file header. + */ +typedef struct { + uint8_t e_ident[ELF_NIDENT]; + elf_half_t e_type; + elf_half_t e_machine; + elf_word_t e_version; + elf_addr_t e_entry; + elf_off_t e_phoff; + elf_off_t e_shoff; + elf_word_t e_flags; + elf_half_t e_ehsize; + elf_half_t e_phentsize; + elf_half_t e_phnum; + elf_half_t e_shentsize; + elf_half_t e_shnum; + elf_half_t e_shstrndx; +} elf_ehdr_t; + +/** + * ELF section header. + */ +typedef struct { + elf_word_t sh_name; + elf_word_t sh_type; + elf_xword_t sh_flags; + elf_addr_t sh_addr; + elf_off_t sh_offset; + elf_xword_t sh_size; + elf_word_t sh_link; + elf_word_t sh_info; + elf_xword_t sh_addralign; + elf_xword_t sh_entsize; +} elf_shdr_t; + +/** + * ELF symbol. + */ +typedef struct { + elf_word_t st_name; + unsigned char st_info; + unsigned char st_other; + elf_half_t st_shndx; + elf_addr_t st_value; + elf_xword_t st_size; +} elf_sym_t; + +/** + * ELF program header. + */ +typedef struct { + elf_word_t p_type; + elf_word_t p_flags; + elf_off_t p_offset; + elf_addr_t p_vaddr; + elf_addr_t p_paddr; + elf_xword_t p_filesz; + elf_xword_t p_memsz; + elf_xword_t p_align; +} elf_phdr_t; + +/** + * Extended ELF relocation information. + */ +typedef struct { + elf_addr_t r_offset; + elf_xword_t r_info; + elf_sxword_t r_addend; +} elf_rela_t; + +/** + * Dynamic section entries + */ +typedef struct { + elf_sxword_t d_tag; + union { + elf_xword_t d_val; + elf_addr_t d_ptr; + } d_un; +} elf_dyn_t; + +/** + * Section header types. + */ +enum elf_stype { + ST_NONE = 0, + ST_PROGBITS = 1, + ST_SYMTAB = 2, + ST_STRTAB = 3, + ST_NOBITS = 8, + ST_REL = 9 +}; + +/** + * Program header types. + */ +enum elf_ptype { + PT_NULL = 0, + PT_LOAD = 1, + PT_DYNAMIC = 2, + PT_INTERP = 3, + PT_NOTE = 4, + PT_SHLIB = 5, + PT_PHDR = 6 +}; + +#define PF_X 0x1 +#define PF_W 0x2 +#define PF_R 0x4 + +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7FFFFFFF + +/** + * ELF identification byte locations. + */ +enum elf_ident { + EI_MAG0 = 0, + EI_MAG1 = 1, + EI_MAG2 = 2, + EI_MAG3 = 3, + EI_CLASS = 4, + EI_DATA = 5, + EI_VERSION = 6, + EI_OSABI = 7, + EI_ABIVERSION = 8, + EI_PAD = 9 +}; + +enum elf_type { + ET_NONE = 0, + ET_REL = 1, + ET_EXEC = 2, + ET_DYN = 3, +}; + +#define AT_NULL 0 +#define AT_IGNORE 1 +#define AT_EXECFD 2 +#define AT_PHDR 3 +#define AT_PHENT 4 +#define AT_PHNUM 5 +#define AT_PAGESZ 6 +#define AT_BASE 7 +#define AT_FLAGS 8 +#define AT_ENTRY 9 +#define AT_NOTELF 10 +#define AT_UID 11 +#define AT_EUID 12 +#define AT_GID 13 +#define AT_EGID 14 +#define AT_CLKTCK 17 +#define AT_PLATFORM 15 +#define AT_HWCAP 16 +#define AT_FPUCW 18 +#define AT_DCACHEBSIZE 19 +#define AT_ICACHEBSIZE 20 +#define AT_UCACHEBSIZE 21 +#define AT_IGNOREPPC 22 +#define AT_SECURE 23 +#define AT_BASE_PLATFORM 24 +#define AT_RANDOM 25 +#define AT_HWCAP2 26 +#define AT_EXECFN 31 +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 +#define AT_L1I_CACHESHAPE 34 +#define AT_L1D_CACHESHAPE 35 +#define AT_L2_CACHESHAPE 36 +#define AT_L3_CACHESHAPE 37 +#define AT_ENTRY_COUNT 38 + +struct bootdata; +struct bootfs_file; + +struct elf_image { + mx_handle_t image_vmo, rw_vmo; + mx_vaddr_t local_base_addr, remote_base_addr; + + mx_handle_t local_root_vmar, local_exec_vmar; + mx_handle_t remote_root_vmar, remote_exec_vmar; + size_t exec_vmar_size; + + elf_ehdr_t hdr; + uintptr_t dynstr_offset; + size_t dynstr_len; + + uintptr_t dynsym_offset; + size_t dynsym_len, dynsym_entsz; + + elf_phdr_t dynamic; + + enum { NO_HASH, REG_HASH, GNU_HASH } hash_type; + uintptr_t hash_offset; +}; + +extern void __elf_image_init(mx_handle_t local_vmar, mx_handle_t remote_vmar, struct elf_image *out); +extern int __elf_image_load_image(struct elf_image *image, mx_handle_t image_vmo); +extern int __elf_get_interp_path(mx_handle_t exec_vmo, char *out, size_t max); +extern int __elf_check_dependencies(struct elf_image *image); + +extern mx_vaddr_t __elf_image_get_symbol(struct elf_image *image, const char *name); + +extern int __elf_image_link(struct elf_image *exec, struct elf_image *vdso); +extern mx_vaddr_t __elf_image_entry_point(struct elf_image *image); + +extern void __elf_image_cleanup(struct elf_image *image); + +#endif diff --git a/photon/libc/sys/horizon/elf.c b/photon/libc/sys/horizon/elf.c new file mode 100644 index 0000000..01dc8cc --- /dev/null +++ b/photon/libc/sys/horizon/elf.c @@ -0,0 +1,673 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "__elf.h" + +#define HIDDEN __attribute__((visibility("hidden"))) + +#define VDSO_SONAME "mx.dylib" + +#define NEEDS_NOTHING 0 +#define NEEDS_VDSO 1 +#define NEEDS_MORE 2 + +#define ACL (PF_R | PF_W | PF_X) +#define ACCESS(x) ((x) & ACL) + +/* TODO in case we ever support ELF32 images */ +#define elf_class_bits(x) (64) + +#define PAGE_OFFSET(v) ((v) & (MX_PAGE_SIZE - 1)) +#define PAGE_ALIGN_DOWN(v) (v) &= ~(MX_PAGE_SIZE - 1) +#define PAGE_ALIGN_UP(v) \ + do { \ + if ((v) & (MX_PAGE_SIZE - 1)) { \ + v &= ~(MX_PAGE_SIZE - 1); \ + v += MX_PAGE_SIZE; \ + } \ + } while (0) + +#undef DEBUG_LOG + +HIDDEN void __elf_image_init(mx_handle_t local_vmar, mx_handle_t remote_vmar, struct elf_image *out) +{ + memset(out, 0x0, sizeof(*out)); + out->local_root_vmar = local_vmar; + out->remote_root_vmar = remote_vmar; +} + +static int elf_validate_ehdr(elf_ehdr_t *hdr) +{ + if (hdr->e_ident[EI_MAG0] != ELF_MAG0) { + return -1; + } + + if (hdr->e_ident[EI_MAG1] != ELF_MAG1) { + return -1; + } + + if (hdr->e_ident[EI_MAG2] != ELF_MAG2) { + return -1; + } + + if (hdr->e_ident[EI_MAG3] != ELF_MAG3) { + return -1; + } + + if (hdr->e_ident[EI_CLASS] != ELFCLASS64) { + return -1; + } + + if (hdr->e_machine != EM_X86_64) { + return -1; + } + + if (hdr->e_ident[EI_DATA] != ELFDATA2LSB) { + return -1; + } + + if (hdr->e_ident[EI_VERSION] != EV_CURRENT) { + return -1; + } + + return 0; +} + +static int read_header(struct elf_image *image) +{ + mx_status_t err = mx_vmo_read(image->image_vmo, (uint8_t *)&image->hdr, sizeof(image->hdr), 0); + if (err != MX_OK) { + printf("vmo_read %x error %s\n", image->image_vmo, mx_status_to_string(err)); + return -1; + } + + return elf_validate_ehdr(&image->hdr); +} + +static mx_status_t allocate_vmar(struct elf_image *image, size_t size) +{ + mx_status_t err = mx_vmar_allocate(image->local_root_vmar, + MX_VM_CAN_MAP_READ | MX_VM_CAN_MAP_WRITE | + MX_VM_CAN_MAP_EXEC | MX_VM_CAN_MAP_SPECIFIC, + 0, size, &image->local_exec_vmar, &image->local_base_addr); + + if (err != MX_OK) { + return err; + } + + err = mx_vmar_allocate(image->remote_root_vmar, + MX_VM_CAN_MAP_READ | MX_VM_CAN_MAP_WRITE | + MX_VM_CAN_MAP_EXEC | MX_VM_CAN_MAP_SPECIFIC, + 0, size, &image->remote_exec_vmar, &image->remote_base_addr); + image->exec_vmar_size = size; + + return err; +} + +static void deduce_dynsym_count(struct elf_image *image) +{ + if (image->hash_type == REG_HASH) { + image->dynsym_len = *(((uint32_t *)image->hash_offset) + 1); + } else { + /* We don't need to know the symbol count to use DT_GNU_HASH + * (which is good because calculating it is a pain) */ + image->dynsym_len = 0; + } + } + +static void read_dynamic_segment(struct elf_image *image) +{ + elf_dyn_t *dyn_array = (elf_dyn_t *)(image->local_base_addr + image->dynamic.p_vaddr); + + for (int i = 0;; i++) { + elf_dyn_t *dyn = &dyn_array[i]; + + if (dyn->d_tag == DT_NULL) { + break; + } + + switch (dyn->d_tag) { + case DT_STRTAB: + image->dynstr_offset = dyn->d_un.d_ptr; + dyn->d_un.d_ptr += image->remote_base_addr; + break; + case DT_STRSZ: + image->dynstr_len = dyn->d_un.d_val; + break; + case DT_SYMTAB: + image->dynsym_offset = dyn->d_un.d_ptr; + dyn->d_un.d_ptr += image->remote_base_addr; + break; + case DT_SYMENT: + image->dynsym_entsz = dyn->d_un.d_val; + break; + case DT_HASH: + /* prefer the GNU hash table */ + if (image->hash_type == GNU_HASH) { + continue; + } + image->hash_type = REG_HASH; + image->hash_offset = dyn->d_un.d_ptr; + dyn->d_un.d_ptr += image->remote_base_addr; + break; + case DT_GNU_HASH: + image->hash_type = GNU_HASH; + image->hash_offset = dyn->d_un.d_ptr; + dyn->d_un.d_ptr += image->remote_base_addr; + break; + default: + break; + } + } +} + +static void get_memory_requirements(void *phdr_buf, int entsize, int nr_phdr, + size_t *out_vmem, size_t *out_rw) +{ + size_t vmem_max = 0; + size_t rw_max = 0; + + for (int i = 0; i < nr_phdr; i++) { + elf_phdr_t *phdr = (elf_phdr_t *)((char *)phdr_buf + (i * entsize)); + if (phdr->p_type != PT_LOAD) { + continue; + } + + size_t foffset = phdr->p_offset; + size_t voffset = phdr->p_vaddr; + PAGE_ALIGN_DOWN(foffset); + PAGE_ALIGN_DOWN(voffset); + + size_t vend = phdr->p_vaddr + phdr->p_memsz; + PAGE_ALIGN_UP(vend); + + size_t fsize = phdr->p_filesz; + size_t vsize = vend - voffset; + PAGE_ALIGN_UP(fsize); + + if (phdr->p_flags & PF_W) { + rw_max += vsize; + } + + if (vend > vmem_max) { + vmem_max = vend; + } + } + + *out_rw = rw_max; + *out_vmem = vmem_max; +} + +static mx_status_t map_memory(struct elf_image *image, + mx_off_t voffset, mx_off_t foffset, size_t sz, int flags, void **out_buf) +{ + mx_status_t err = MX_OK; + mx_flags_t options = MX_VM_SPECIFIC; + mx_handle_t vmo = image->image_vmo; + + if (flags & PF_R) { + options |= MX_VM_PERM_READ; + } + + if (flags & PF_W) { + options |= MX_VM_PERM_WRITE; + vmo = image->rw_vmo; + } + + if (flags & PF_X) { + options |= MX_VM_PERM_EXEC; + } + + mx_vaddr_t local_addr = 0, remote_addr = 0; + *out_buf = NULL; + + err = mx_vmar_map(image->local_exec_vmar, options, voffset, vmo, foffset, sz, &local_addr); + if (err != MX_OK) { + return err; + } + + err = mx_vmar_map(image->remote_exec_vmar, options, voffset, vmo, foffset, sz, &remote_addr); + if (err != MX_OK) { + return err; + } + + *out_buf = (void *)local_addr; + return MX_OK; +} + +static mx_status_t map_image(struct elf_image *image, + void *phdr_buf, int phdr_entsize, int phdr_num) +{ + size_t rw_offset = 0; + mx_status_t err = MX_OK; + + for (int i = 0; i < phdr_num; i++) { + elf_phdr_t *phdr = (elf_phdr_t *)((char *)phdr_buf + (i * phdr_entsize)); + if (phdr->p_type == PT_DYNAMIC) { + image->dynamic = *phdr; + } + + if (phdr->p_type != PT_LOAD) { + continue; + } + + size_t foffset = phdr->p_offset; + size_t voffset = phdr->p_vaddr; + PAGE_ALIGN_DOWN(foffset); + PAGE_ALIGN_DOWN(voffset); + + size_t vend = phdr->p_vaddr + phdr->p_memsz; + PAGE_ALIGN_UP(vend); + + size_t fsize = phdr->p_filesz; + size_t vsize = vend - voffset; + PAGE_ALIGN_UP(fsize); + + if (phdr->p_flags & PF_W) { + size_t rw_vmo_offset = rw_offset; + size_t rw_vmo_size = vsize; + + size_t bss_offset = phdr->p_vaddr + phdr->p_filesz; + size_t bss_size = phdr->p_memsz - phdr->p_filesz; + + void *segment_buf = NULL; + err = map_memory(image, voffset, rw_vmo_offset, vsize, phdr->p_flags, &segment_buf); + + if (err != MX_OK) { + return err; + } + + void *file_dest = (char *)image->local_base_addr + phdr->p_vaddr; + void *bss_dest = (char *)image->local_base_addr + bss_offset; + + mx_vmo_read(image->image_vmo, file_dest, phdr->p_filesz, phdr->p_offset); + + if (bss_size) { + memset(bss_dest, 0x0, bss_size); + } + + rw_offset += rw_vmo_size; + } else { + void *segment_buf = NULL; + err = map_memory(image, voffset, foffset, vsize, phdr->p_flags, &segment_buf); + + if (err != MX_OK) { + return err; + } + } + } + + return MX_OK; +} + +HIDDEN int __elf_image_load_image(struct elf_image *image, mx_handle_t image_vmo) +{ + image->image_vmo = image_vmo; + + int status = read_header(image); + + if (status != 0) { + printf("read_header error\n"); + return status; + } + + size_t phdr_bufsz = image->hdr.e_phentsize * image->hdr.e_phnum; + unsigned char phdr_buf[phdr_bufsz]; + + mx_vmo_read(image_vmo, phdr_buf, phdr_bufsz, image->hdr.e_phoff); + + size_t rw_size, vmem_size; + get_memory_requirements(phdr_buf, image->hdr.e_phentsize, image->hdr.e_phnum, &vmem_size, &rw_size); + + if (allocate_vmar(image, vmem_size) != MX_OK) { + printf("allocate_vmar error\n"); + return -1; + } + + if (rw_size) { + mx_vmo_create(rw_size, MX_VM_PERM_READ | MX_VM_PERM_WRITE, &image->rw_vmo); + } + + mx_status_t err = map_image(image, phdr_buf, image->hdr.e_phentsize, image->hdr.e_phnum); + if (err != MX_OK) { + printf("map_image error\n"); + return -1; + } + + read_dynamic_segment(image); + + return 0; +} + +HIDDEN int __elf_get_interp_path(mx_handle_t image_vmo, char *out, size_t max) +{ + elf_ehdr_t hdr; + mx_vmo_read(image_vmo, (uint8_t *)&hdr, sizeof(hdr), 0); + + if (elf_validate_ehdr(&hdr) != 0) { + return -1; + } + + unsigned char phdr_buf[hdr.e_phentsize * hdr.e_phnum]; + mx_vmo_read(image_vmo, phdr_buf, hdr.e_phentsize * hdr.e_phnum, hdr.e_phoff); + + uintptr_t dynamic_offset = 0, interp_offset = 0; + size_t dynamic_sz = 0, interp_sz = 0; + + for (size_t i = 0; i < hdr.e_phnum; i++) { + elf_phdr_t *phdr = (elf_phdr_t *)(phdr_buf + i * hdr.e_phentsize); + + switch (phdr->p_type) { + case PT_DYNAMIC: + dynamic_offset = phdr->p_offset; + dynamic_sz = phdr->p_filesz; + break; + case PT_INTERP: + interp_offset = phdr->p_offset; + interp_sz = phdr->p_filesz; + break; + default: + break; + } + } + + if (!dynamic_sz || !interp_offset) { + return 0; + } + + if (max > interp_sz) { + max = interp_sz; + } + + mx_vmo_read(image_vmo, (uint8_t *)out, max, interp_offset); + return 1; +} + +HIDDEN int __elf_check_dependencies(struct elf_image *image) +{ + elf_dyn_t *dyn_array = (elf_dyn_t *)(image->local_base_addr + image->dynamic.p_vaddr); + + for (int i = 0;; i++) { + elf_dyn_t *dyn = &dyn_array[i]; + + if (dyn->d_tag == DT_NULL) { + break; + } + + if (dyn->d_tag != DT_NEEDED) { + continue; + } + + const char *lib_name = (const char *)(image->local_base_addr + image->dynstr_offset + dyn->d_un.d_ptr); + if (strcmp(lib_name, VDSO_SONAME) != 0) { + /* We can't load executables that link to libraries other than + * libmx */ + return -1; + } + } + + return 0; +} + +static elf_sym_t *get_dynsym_entry(struct elf_image *image, unsigned int idx) +{ + return (elf_sym_t *)(image->local_base_addr + image->dynsym_offset + idx * image->dynsym_entsz); +} + +static const char *get_dynstr(struct elf_image *image, unsigned int idx) +{ + return (const char *)(image->local_base_addr + image->dynstr_offset + idx); +} + +static int do_rela(struct elf_image *image, struct elf_image *lib, uintptr_t ptr, size_t sz, size_t entsz) +{ + size_t entries = sz / entsz; + + for (size_t i = 0; i < entries; i++) { + elf_rela_t *rela = (elf_rela_t *)(image->local_base_addr + ptr + i * entsz); + int sym_idx = ELF64_R_SYM(rela->r_info); + int type = ELF64_R_TYPE(rela->r_info); + + mx_vaddr_t sym_val = 0; + if (type != R_X86_64_RELATIVE) { + elf_sym_t *dynsym = get_dynsym_entry(image, sym_idx); + const char *name = get_dynstr(image, dynsym->st_name); + sym_val = __elf_image_get_symbol(image, name); + if (!sym_val && lib) { + sym_val = __elf_image_get_symbol(lib, name); + } + + if (!sym_val) { + return -1; + } + } + + int ok = 1; + mx_status_t status = MX_OK; + + switch (type) { + case R_X86_64_GLOB_DAT: { + elf_xword_t val = sym_val; + elf_xword_t *dest = (elf_xword_t *)((char *)image->local_base_addr + rela->r_offset); + *dest = val; + break; + } case R_X86_64_64: { + elf_xword_t val = sym_val + rela->r_addend; + elf_xword_t *dest = (elf_xword_t *)((char *)image->local_base_addr + rela->r_offset); + *dest = val; + break; + } case R_X86_64_JUMP_SLOT: { + elf_xword_t val = sym_val; + elf_xword_t *dest = (elf_xword_t *)((char *)image->local_base_addr + rela->r_offset); + *dest = val; + break; + } case R_X86_64_RELATIVE: { + elf_xword_t val = image->remote_base_addr + rela->r_addend; + elf_xword_t *dest = (elf_xword_t *)((char *)image->local_base_addr + rela->r_offset); + *dest = val; + break; + } default: + ok = 0; + break; + } + + if (!ok || status != MX_OK) { + return -1; + } + } + + return 0; +} + +static int relocate(struct elf_image *image, struct elf_image *lib) +{ + elf_dyn_t *dyn_array = (elf_dyn_t *)(image->local_base_addr + image->dynamic.p_vaddr); + + int result = 0; + + uintptr_t rel_addr = 0, rela_addr = 0, plt_addr = 0; + size_t rel_sz = 0, rel_entsz = 0; + size_t rela_sz = 0, rela_entsz = 0; + size_t plt_sz = 0, plt_entsz = 0; + int plt_enttype; + for (int i = 0;; i++) { + elf_dyn_t *dyn = &dyn_array[i]; + + if (dyn->d_tag == DT_NULL) { + break; + } + + switch (dyn->d_tag) { + case DT_REL: + rel_addr = dyn->d_un.d_ptr; + break; + case DT_RELSZ: + rel_sz = dyn->d_un.d_val; + break; + case DT_RELENT: + rel_entsz = dyn->d_un.d_val; + break; + case DT_RELA: + rela_addr = dyn->d_un.d_ptr; + break; + case DT_RELASZ: + rela_sz = dyn->d_un.d_val; + break; + case DT_RELAENT: + rela_entsz = dyn->d_un.d_val; + break; + case DT_PLTRELSZ: + plt_sz = dyn->d_un.d_val; + break; + case DT_JMPREL: + plt_addr = dyn->d_un.d_ptr; + break; + case DT_PLTREL: + plt_enttype = dyn->d_un.d_ptr; + break; + default: + break; + } + + if (dyn->d_tag != DT_NEEDED) { + continue; + } + } + + if (rel_sz) { + /* DT_REL is not supported */ + return -1; + } + + if (plt_enttype == DT_RELA) { + plt_entsz = rela_entsz ? rela_entsz : sizeof(elf_rela_t); + } else if (plt_enttype == DT_REL) { + return -1; + } + + int r; + if (rela_sz) { + r = do_rela(image, lib, rela_addr, rela_sz, rela_entsz); + if (r != 0) { + return r; + } + } + + if (plt_sz) { + r = do_rela(image, lib, plt_addr, plt_sz, plt_entsz); + if (r != 0) { + return r; + } + } + + return 0; +} + +HIDDEN int __elf_image_link(struct elf_image *exec, struct elf_image *vdso) +{ + int status = relocate(vdso, NULL); + if (status != 0) { + return status; + } + + return relocate(exec, vdso); +} + +HIDDEN mx_vaddr_t __elf_image_entry_point(struct elf_image *image) +{ + return image->remote_base_addr + image->hdr.e_entry; +} + +static uint32_t gnu_hash(const char *name) +{ + uint32_t h = 5381; + for (; *name; name++) { + h = (h << 5) + h + *name; + } + + return h; +} + +static mx_vaddr_t sym_gnu_hash_search(struct elf_image *image, const char *name) +{ + uint32_t hash = gnu_hash(name); + uint32_t *hashtab = (uint32_t *)(image->local_base_addr + image->hash_offset); + + uint32_t nbuckets = hashtab[0]; + uint32_t symoffset = hashtab[1]; + uint32_t bloom_size = hashtab[2]; + uint32_t bloom_shift = hashtab[3]; + uint64_t *bloom = (uint64_t *)&hashtab[4]; + uint32_t *buckets = (uint32_t *)&bloom[bloom_size]; + uint32_t *chain = &buckets[nbuckets]; + + uint64_t bloom_word = + bloom[(hash / elf_class_bits(image)) % bloom_size]; + uint64_t bloom_mask = + ((uint64_t)1 << (hash % elf_class_bits(image))) | + ((uint64_t)1 << ((hash >> bloom_shift) % elf_class_bits(image))); + + if ((bloom_word & bloom_mask) != bloom_mask) { + return 0; + } + + uint32_t symix = buckets[hash % nbuckets]; + if (symix < symoffset) { + return 0; + } + + const char *strtab = (const char *)(image->local_base_addr + image->dynstr_offset); + elf_sym_t *symtab = (elf_sym_t *)(image->local_base_addr + image->dynsym_offset); + + while (1) { + const char *symname = strtab + symtab[symix].st_name; + uint32_t symhash = chain[symix - symoffset]; + + if ((symhash | 1) == (hash | 1) && !strcmp(symname, name)) { + return image->remote_base_addr + symtab[symix].st_value; + } + + if (symhash & 1) { + break; + } + + symix++; + } + + return 0; +} + +HIDDEN mx_vaddr_t __elf_image_get_symbol(struct elf_image *image, const char *name) +{ + switch (image->hash_type) { + case GNU_HASH: + return sym_gnu_hash_search(image, name); + default: + return 0; + } +} + +HIDDEN void __elf_image_cleanup(struct elf_image *image) +{ + if (image->local_exec_vmar) { + mx_vmar_unmap(image->local_exec_vmar, 0, image->exec_vmar_size); + mx_vmar_destroy(image->local_exec_vmar); + mx_handle_close(image->local_exec_vmar); + image->local_exec_vmar = MX_NULL_HANDLE; + } + + if (image->remote_exec_vmar) { + mx_handle_close(image->remote_exec_vmar); + image->remote_exec_vmar = MX_NULL_HANDLE; + } + + if (image->rw_vmo) { + mx_handle_close(image->rw_vmo); + image->rw_vmo = MX_NULL_HANDLE; + } +} diff --git a/photon/libc/sys/horizon/init.c b/photon/libc/sys/horizon/init.c index 9f2d03e..a93e05e 100644 --- a/photon/libc/sys/horizon/init.c +++ b/photon/libc/sys/horizon/init.c @@ -17,8 +17,6 @@ #include "__heap.h" #include "__fio.h" -#define DYLD_PARENT_IMAGE - /* maximum of 32 handles can be received sent as arguments */ #define MAX_HANDLE_ARGS 32 @@ -258,6 +256,7 @@ static int do_init(mx_handle_t bootstrap, bool enable_fs, bool enable_stdio, int #endif dbg_log("extracted %zu handles\n", nr_handles); + mx_bootstrap_handle_init(arg_handles, nr_handles); const char **argv = NULL, **envp = NULL, **namep = NULL; @@ -287,16 +286,19 @@ static int do_init(mx_handle_t bootstrap, bool enable_fs, bool enable_stdio, int mio_namespace *ns = mio_namespace_create(); + dbg_log("received %u names/%zu handles\n", msg->names_num, nr_handles); if (msg->names_num > 0) { for (size_t i = 0; i < nr_handles; i++) { int type = MX_B_HND_TYPE(arg_handles[i].info); int arg = MX_B_HND_ARG(arg_handles[i].info); if (type != MX_B_NS_DIR && type != MX_B_TUNNEL_CWD) { + dbg_log(" * wrong type %x\n", type); continue; } const char *path = namep[arg]; + dbg_log(" * %s = %x\n", path, arg_handles[i].handle); if (type == MX_B_TUNNEL_CWD) { mio_set_cwd(arg_handles[i].handle, path); @@ -329,7 +331,7 @@ static int do_load_and_run(mx_handle_t bootstrap) int argc; const char **argv; - int err = do_init(bootstrap, false, false, &argc, &argv); + int err = do_init(bootstrap, false, true, &argc, &argv); if (err != 0) { return err; } diff --git a/photon/libc/sys/horizon/launch.c b/photon/libc/sys/horizon/launch.c new file mode 100644 index 0000000..ae24786 --- /dev/null +++ b/photon/libc/sys/horizon/launch.c @@ -0,0 +1,1224 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "__elf.h" + +#define LDV1_VERSION 0 +#define LDV2_VERSION 1 + +/* 1 MiB default stack */ +#define DEFAULT_STACK_SZ 256 * 0x1000 + +#define NR_DEFAULT_HANDLES 6 +#define NR_STDIO_HANDLES 3 +#define MAX_ARG_HANDLES 32 + +#define ARG_HANDLE(h, t) (mx_bootstrap_handle_t){ .handle = h, .info = t } + +#define RET_ERR(code) \ + __set_errno(code); \ + return -1 + +static char g_launch_error[1024] = {}; + +struct cleanup { + /* list of handles that were opened by launch() that should be closed if an error occurs */ + mx_handle_t *auto_handles; + size_t auto_handle_count; + + const char **names; + size_t name_count; + + /* bootstrap handle array */ + mx_bootstrap_handle_t *b_handles; + size_t b_handle_count; + + mx_handle_t exec_vmo, stack_vmo; + mx_vaddr_t stack_ptr; + + int ldsvc_version; + union { + mx_handle_t handle; + xpc_socket sock; + } ldsvc; + + mx_handle_t bootstrap_local, bootstrap_remote; + + /* ELF-loader specific items */ + struct elf_image exec, vdso; + mx_handle_t interp_vmo; + char *interp_path; + mx_handle_t new_task, new_vmar; + mx_vaddr_t entry_point; + + struct namespace_entry *inherited_ns; + size_t inherited_ns_len; + + int *fd; +}; + +static void set_error_msg(const char *s, ...) +{ + va_list arg; + va_start(arg, s); + vsnprintf(g_launch_error, sizeof g_launch_error, s, arg); + va_end(arg); +} + +const char *launch_error(void) +{ + return g_launch_error; +} + +static int collect_stdio_handles(mx_bootstrap_handle_t *out, size_t max) +{ + int offset = 0; + + mx_handle_t handle; + for (int i = 0; i < NR_STDIO_HANDLES; i++) { + if (offset >= max) { + break; + } + + int r = mio_dup_handle(i, &handle); + if (r < 0) { + continue; + } + + out[offset].handle = handle; + out[offset].info = MX_B_HND(MX_B_FD, i); + offset++; + } + + return offset; +} + +static const char *task_name(const char *path) +{ + const char *name = path; + const char *s = path; + while (*s) { + char c = *s; + s++; + + if (c == '/') { + name = s; + } + } + + return *name ? name : path; +} + +static int validate_argv(int argc, const char **argv) +{ + if (argc && !argv) { + set_error_msg("launch_info.argv is NULL"); + return EFAULT; + } + + for (int i = 0; i < argc; i++) { + if (!argv[i]) { + set_error_msg("argument #%d is NULL", i); + return EFAULT; + } + } + + return 0; +} + +static int validate_fds(const struct launch_info *info, struct cleanup *cleanup) +{ + cleanup->fd = calloc(LAUNCH_MAX_FDS, sizeof(int)); + bool auto_stdio = true; + + if (info->flags & LAUNCH_SET_FD) { + memcpy(cleanup->fd, info->fd, sizeof info->fd); + auto_stdio = false; + } else { + for (int i = 0; i < LAUNCH_MAX_FDS; i++) { + cleanup->fd[i] = i; + } + } + + for (size_t i = 0; i < LAUNCH_MAX_FDS; i++) { + int fd = cleanup->fd[i]; + bool inherited = false; + + if (fd == FD_INHERIT) { + fd = cleanup->fd[i] = i; + inherited = true; + } + + if (fd == FD_NONE) { + continue; + } + + struct mio_object *obj = mio_fd_list_object_from_fd(mio_global_fd_list(), fd); + if (obj) { + continue; + } + + if (inherited || auto_stdio) { + cleanup->fd[i] = FD_NONE; + } else { + set_error_msg("file descriptor %d (launch_info.fd[%zu]) is invalid", fd, i); + return EBADF; + } + } + + return 0; +} + +static int validate_ns(int flags, const struct namespace_entry *ns, size_t count) +{ + if (count && !ns) { + set_error_msg("launch_info.ns is NULL"); + return EFAULT; + } + + if ((flags & LAUNCH_INHERIT_NS) && count) { + /* TODO better error messages */ + set_error_msg("cannot create a new namespace and inherit an existing one"); + return EINVAL; + } + + for (size_t i = 0; i < count; i++) { + const struct namespace_entry *ent = &ns[i]; + if (!ent->path) { + set_error_msg("filepath for namespace entry #%zu is invalid", i); + return EFAULT; + } + + if (ent->fd != -1 && ent->handle != MX_NULL_HANDLE) { + set_error_msg("namespace entry %s has both a file descriptor and a handle", ent->path); + return EINVAL; + } + + if (ent->fd) { + struct mio_object *obj = mio_fd_list_object_from_fd(mio_global_fd_list(), ent->fd); + if (!obj) { + set_error_msg("file descriptor %d for namespace entry %s is invalid", ent->fd, ent->path); + return EBADF; + } + } + } + + return 0; +} + +static int collect_names(const struct launch_info *info, struct cleanup *cleanup) +{ + size_t ns_count = info->ns_count; + const struct namespace_entry *ns = info->ns; + + if (info->flags & LAUNCH_INHERIT_NS) { + ns_count = cleanup->inherited_ns_len; + ns = cleanup->inherited_ns; + } + + size_t name_count = ns_count; + if (info->cwd.path) { + name_count++; + } + + const char **names = calloc(name_count, sizeof *names); + + for (size_t i = 0; i < ns_count; i++) { + const struct namespace_entry *ent = &ns[i]; + names[i] = ent->path; + } + + if (info->cwd.path) { + names[name_count - 1] = info->cwd.path; + } + + cleanup->names = names; + cleanup->name_count = name_count; + return 0; +} + +static int count_handles(const struct launch_info *info, struct cleanup *cleanup) +{ + size_t all_handles = 0, auto_handles = 0; + for (size_t i = 0; i < LAUNCH_MAX_FDS; i++) { + if (cleanup->fd[i] != FD_NONE) { + all_handles++; + auto_handles++; + } + } + + size_t ns_len = info->ns_count; + const struct namespace_entry *ns = info->ns; + bool auto_ns = false; + + if (info->flags & LAUNCH_INHERIT_NS) { + ns_len = cleanup->inherited_ns_len; + ns = cleanup->inherited_ns; + auto_ns = true; + } + + for (size_t i = 0; i < ns_len; i++) { + const struct namespace_entry *ent = &ns[i]; + + all_handles++; + + if (ent->handle == MX_NULL_HANDLE || auto_ns) { + auto_handles++; + } + } + + for (size_t i = 0; i < info->handle_count; i++) { + int type = MX_B_HND_TYPE(info->handles[i].info); + + switch (type) { + /* the user can only send handles of the following types. + * all other handle types are used internally by launch() */ + case MX_B_TUNNEL_LDSVC: + case MX_B_TUNNEL_EXPORT: + case MX_B_USER0: + case MX_B_USER1: + case MX_B_USER2: + case MX_B_USER3: + case MX_B_USER4: + break; + default: + set_error_msg("handles of type %x cannot be sent to new process", type); + return EINVAL; + } + } + + /* these handles are sent by launch() in all cases + * MX_B_TASK_SELF + * MX_B_VMO_VDSO + * MX_B_VMO_EXEC + * MX_B_VMAR_ROOT + * MX_B_VMAR_EXEC + * MX_B_TUNNEL_BTSTP + */ + all_handles += NR_DEFAULT_HANDLES; + + all_handles += info->handle_count; + + cleanup->auto_handle_count = auto_handles; + cleanup->b_handle_count = all_handles; + + cleanup->auto_handles = calloc(cleanup->auto_handle_count, sizeof *cleanup->auto_handles); + cleanup->b_handles = calloc(cleanup->b_handle_count, sizeof *cleanup->b_handles); + + return 0; +} + +static int collect_handles(const struct launch_info *info, struct cleanup *cleanup) +{ + memcpy(cleanup->b_handles, info->handles, info->handle_count * sizeof *info->handles); + size_t b_idx = info->handle_count; + size_t auto_idx = 0; + + for (int i = 0; i < LAUNCH_MAX_FDS; i++) { + int fd = cleanup->fd[i]; + if (fd == FD_NONE) { + continue; + } + + mx_handle_t fd_handle = MX_NULL_HANDLE; + + if (fd == FD_INHERIT) { + mio_dup_handle(i, &fd_handle); + } else { + mio_dup_handle(fd, &fd_handle); + } + + if (fd_handle == MX_NULL_HANDLE) { + if (fd == FD_INHERIT) { + continue; + } + + set_error_msg("cannot duplicate file descriptor %d", + fd == FD_INHERIT ? i : fd); + return EBADF; + } + + cleanup->b_handles[b_idx].info = MX_B_HND(MX_B_FD, i); + cleanup->b_handles[b_idx].handle = fd_handle; + b_idx++; + + cleanup->auto_handles[auto_idx++] = fd_handle; + } + + if (info->flags & LAUNCH_INHERIT_NS) { + for (size_t i = 0; i < cleanup->inherited_ns_len; i++) { + const struct namespace_entry *ent = &cleanup->inherited_ns[i]; + + cleanup->b_handles[b_idx].info = MX_B_HND(MX_B_NS_DIR, i); + cleanup->b_handles[b_idx].handle = ent->handle; + b_idx++; + + cleanup->auto_handles[auto_idx++] = ent->handle; + } + + return 0; + } + + for (size_t i = 0; i < info->ns_count; i++) { + const struct namespace_entry *ent = &info->ns[i]; + + mx_handle_t ns_handle = MX_NULL_HANDLE; + bool is_auto = false; + + if (ent->handle) { + ns_handle = ent->handle; + } else if (ent->fd != -1) { + mio_dup_handle(ent->fd, &ns_handle); + is_auto = true; + } else { + const char *src = ent->src; + if (!src) { + src = ent->path; + } + + /* TODO allow caller to specify permissions */ + int ns_fd = open(src, O_RDWR); + if (ns_fd == -1) { + int err = errno; + set_error_msg("cannot open namespace entry '%s': %s", src, err); + return err; + } + + mio_release_handle(ns_fd, &ns_handle); + is_auto = true; + } + + if (ns_handle == MX_NULL_HANDLE) { + set_error_msg("file descriptor or handle for namespace entry '%s' is invalid", ent->path); + return EBADF; + } + + cleanup->b_handles[b_idx].info = MX_B_HND(MX_B_NS_DIR, i); + cleanup->b_handles[b_idx].handle = ns_handle; + b_idx++; + + if (is_auto) { + cleanup->auto_handles[auto_idx++] = ns_handle; + } + } + + return 0; +} + +static void fail_cleanup(struct cleanup *cleanup) +{ + if (cleanup->exec_vmo) { + mx_handle_close(cleanup->exec_vmo); + } + + if (cleanup->auto_handles) { + for (size_t i = 0; i < cleanup->auto_handle_count; i++) { + if (cleanup->auto_handles[i]) { + mx_handle_close(cleanup->auto_handles[i]); + } + } + + free(cleanup->auto_handles); + } + + if (cleanup->inherited_ns) { + for (size_t i = 0; i < cleanup->inherited_ns_len; i++) { + struct namespace_entry *ent = &cleanup->inherited_ns[i]; + free((char *)ent->path); + mx_handle_close(ent->handle); + } + + free(cleanup->inherited_ns); + cleanup->inherited_ns = NULL; + cleanup->inherited_ns_len = 0; + } + + if (cleanup->names) { + free(cleanup->names); + cleanup->names = NULL; + cleanup->name_count = 0; + } + + if (cleanup->b_handles) { + free(cleanup->b_handles); + } + + __elf_image_cleanup(&cleanup->exec); + __elf_image_cleanup(&cleanup->vdso); + + if (cleanup->interp_vmo) { + mx_handle_close(cleanup->interp_vmo); + cleanup->interp_vmo = MX_NULL_HANDLE; + } + + if (cleanup->interp_path) { + free(cleanup->interp_path); + cleanup->interp_path = NULL; + } + + if (cleanup->new_vmar) { + mx_handle_close(cleanup->new_vmar); + cleanup->new_vmar = MX_NULL_HANDLE; + } + + if (cleanup->stack_vmo) { + mx_handle_close(cleanup->stack_vmo); + cleanup->stack_vmo = MX_NULL_HANDLE; + } + + if (cleanup->new_task) { + mx_handle_close(cleanup->new_task); + cleanup->new_task = MX_NULL_HANDLE; + } + + if (cleanup->bootstrap_local) { + mx_handle_close(cleanup->bootstrap_local); + cleanup->bootstrap_local = MX_NULL_HANDLE; + } + + if (cleanup->bootstrap_remote) { + mx_handle_close(cleanup->bootstrap_remote); + cleanup->bootstrap_remote = MX_NULL_HANDLE; + } + + if (cleanup->fd) { + free(cleanup->fd); + cleanup->fd = NULL; + } + + if (cleanup->ldsvc_version == LDV1_VERSION && cleanup->ldsvc.handle) { + mx_handle_close(cleanup->ldsvc.handle); + cleanup->ldsvc.handle = MX_NULL_HANDLE; + } else if (cleanup->ldsvc_version == LDV2_VERSION && cleanup->ldsvc.sock) { + xpc_socket_destroy(cleanup->ldsvc.sock); + cleanup->ldsvc.sock = NULL; + } +} + +static void success_cleanup(struct cleanup *cleanup) +{ + if (cleanup->exec_vmo) { + mx_handle_close(cleanup->exec_vmo); + } + + if (cleanup->auto_handles) { + for (size_t i = 0; i < cleanup->auto_handle_count; i++) { + if (cleanup->auto_handles[i]) { + mx_handle_close(cleanup->auto_handles[i]); + } + } + + free(cleanup->auto_handles); + } + + if (cleanup->inherited_ns) { + for (size_t i = 0; i < cleanup->inherited_ns_len; i++) { + struct namespace_entry *ent = &cleanup->inherited_ns[i]; + free((char *)ent->path); + mx_handle_close(ent->handle); + } + + free(cleanup->inherited_ns); + cleanup->inherited_ns = NULL; + cleanup->inherited_ns_len = 0; + } + + if (cleanup->names) { + free(cleanup->names); + cleanup->names = NULL; + cleanup->name_count = 0; + } + + if (cleanup->b_handles) { + free(cleanup->b_handles); + } + + __elf_image_cleanup(&cleanup->exec); + __elf_image_cleanup(&cleanup->vdso); + + if (cleanup->interp_vmo) { + mx_handle_close(cleanup->interp_vmo); + cleanup->interp_vmo = MX_NULL_HANDLE; + } + + if (cleanup->interp_path) { + free(cleanup->interp_path); + cleanup->interp_path = NULL; + } + + if (cleanup->stack_vmo) { + mx_handle_close(cleanup->stack_vmo); + cleanup->stack_vmo = MX_NULL_HANDLE; + } + + if (cleanup->new_vmar) { + mx_handle_close(cleanup->new_vmar); + cleanup->new_vmar = MX_NULL_HANDLE; + } + + if (cleanup->bootstrap_local) { + mx_handle_close(cleanup->bootstrap_local); + cleanup->bootstrap_local = MX_NULL_HANDLE; + } + + if (cleanup->fd) { + free(cleanup->fd); + cleanup->fd = NULL; + } + + if (cleanup->ldsvc_version == LDV1_VERSION && cleanup->ldsvc.handle) { + mx_handle_close(cleanup->ldsvc.handle); + cleanup->ldsvc.handle = MX_NULL_HANDLE; + } else if (cleanup->ldsvc_version == LDV2_VERSION && cleanup->ldsvc.sock) { + xpc_socket_destroy(cleanup->ldsvc.sock); + cleanup->ldsvc.sock = NULL; + } +} + +static int inherit_namespace(const struct launch_info *info, struct cleanup *cleanup) +{ + if (!(info->flags & LAUNCH_INHERIT_NS)) { + return 0; + } + + cleanup->inherited_ns = mio_namespace_export(mio_global_namespace(), &cleanup->inherited_ns_len); + return cleanup->inherited_ns ? 0 : -1; +} + +static mx_handle_t request_v1(mx_handle_t ldsvc, const char *name) +{ + size_t name_len = strlen(name); + size_t msg_len = sizeof(mx_ldsvc_msg_t) + name_len; + unsigned char msg_buf[512]; + + if (name_len > sizeof msg_buf - sizeof (mx_ldsvc_msg_t)) { + name_len = sizeof msg_buf - sizeof (mx_ldsvc_msg_t); + } + + mx_ldsvc_msg_t *msg = (mx_ldsvc_msg_t *)msg_buf; + char *name_dest = (char *)msg + sizeof(mx_ldsvc_msg_t); + + msg->name_off = sizeof(mx_ldsvc_msg_t); + msg->name_len = name_len; + msg->op = MX_LDSVC_OP_LOAD_OBJECT; + memcpy(name_dest, name, name_len); + + mx_tunnel_write(ldsvc, msg_buf, msg_len, NULL, 0); + +#if 0 + mx_signals_t sig = 0; + mx_status_t err = mx_object_wait(ldsvc, + MX_TUNNEL_READABLE | MX_TUNNEL_REMOTE_CLOSED, + mx_deadline_after(MX_SEC(3)), &sig); + + if (sig & MX_TUNNEL_REMOTE_CLOSED) { + return MX_NULL_HANDLE; + } + + if (err == MX_ERR_TIMED_OUT) { + return MX_NULL_HANDLE; + } + + size_t nr_handles = 0; + mx_tunnel_read(ldsvc, NULL, 0, NULL, 0, &msg_len, &nr_handles); + + unsigned char resp_buf[msg_len]; + mx_handle_t handles[nr_handles]; +#endif + + mx_handle_t handle; + + mx_tunnel_read(ldsvc, + msg_buf, sizeof msg_buf, + &handle, 1, + &msg_len, NULL); + + if (msg->code != MX_LDSVC_CODE_OK) { + return MX_NULL_HANDLE; + } + + return handle; +} + +static mx_handle_t request_v2(xpc_socket ldsvc, const char *name) +{ + int32_t result; + mx_handle_t vmo; + xpc_error err = horizon_sys_LoaderService_GetImageVMO(ldsvc, name, &result, &vmo); + if (err != XPC_OK) { + return MX_NULL_HANDLE; + } + + if (result != 0) { + return MX_NULL_HANDLE; + } + + return vmo; +} + +static mx_handle_t request_image(struct cleanup *cleanup, const char *name) +{ + switch (cleanup->ldsvc_version) { + case 0: + return request_v1(cleanup->ldsvc.handle, name); + case 1: + return request_v2(cleanup->ldsvc.sock, name); + default: + return MX_NULL_HANDLE; + } +} + +static const char *get_name_from_path(const struct launch_info *info) +{ + const char *p = info->path; + const char *e = NULL; + + for (int i = 0; p[i]; i++) { + if (p[i] == '/') { + e = p + i + 1; + } + } + + if (!e || *e == '\0') { + return p; + } + + return e; +} + +static int load_exec(const struct launch_info *info, struct cleanup *cleanup) +{ + mx_handle_t local_vmar = mx_bootstrap_handle_get(MX_B_VMAR_ROOT); + mx_handle_t self_task = mx_bootstrap_handle_get(MX_B_TASK_SELF); + mx_handle_t vdso_vmo = mx_bootstrap_handle_get(MX_B_VMO_VDSO); + + if (!local_vmar || !self_task || !vdso_vmo) { + return EPERM; + } + + char interp_path[1024]; + interp_path[0] = '\0'; + + __elf_get_interp_path(cleanup->exec_vmo, interp_path, sizeof interp_path); + + mx_handle_t real_exec = cleanup->exec_vmo; + + if (*interp_path != '\0') { + real_exec = request_image(cleanup, interp_path); + + if (real_exec == MX_NULL_HANDLE) { + set_error_msg("cannot open program interpreter '%s'", interp_path); + return ENOENT; + } + + cleanup->interp_vmo = real_exec; + cleanup->interp_path = strdup(interp_path); + } + + const char *new_task_name = get_name_from_path(info); + mx_status_t status = mx_task_create(self_task, + new_task_name, strlen(new_task_name), + 0, &cleanup->new_task, &cleanup->new_vmar); + + if (status != MX_OK) { + return mio_errno_from_mx_status(status); + } + + __elf_image_init(local_vmar, cleanup->new_vmar, &cleanup->exec); + if (__elf_image_load_image(&cleanup->exec, real_exec)) { + set_error_msg("cannot load %s image", cleanup->interp_vmo ? "interpreter" : "executable"); + return ENOEXEC; + } + + __elf_image_init(local_vmar, cleanup->new_vmar, &cleanup->vdso); + if (__elf_image_load_image(&cleanup->vdso, vdso_vmo)) { + set_error_msg("cannot load vDSO image"); + return ENOEXEC; + } + + if (__elf_image_link(&cleanup->exec, &cleanup->vdso)) { + set_error_msg("cannot link %s image", cleanup->interp_vmo ? "interpreter" : "executable"); + return ENOEXEC; + } + + cleanup->entry_point = __elf_image_entry_point(&cleanup->exec); + return 0; +} + +static int allocate_stack(struct cleanup *cleanup) +{ + mx_status_t status = mx_vmo_create(DEFAULT_STACK_SZ, 0, &cleanup->stack_vmo); + if (status != MX_OK) { + set_error_msg("cannot allocate stack for new process: %s", mx_status_to_string(status)); + return mio_errno_from_mx_status(status); + } + + mx_vaddr_t stack_buf; + status = mx_vmar_map(cleanup->new_vmar, MX_VM_PERM_READ | MX_VM_PERM_WRITE, + 0, cleanup->stack_vmo, 0, DEFAULT_STACK_SZ, &stack_buf); + + if (status != MX_OK) { + set_error_msg("cannot map stack for new process: %s", mx_status_to_string(status)); + return mio_errno_from_mx_status(status); + } + + cleanup->stack_ptr = stack_buf + DEFAULT_STACK_SZ; + return 0; +} + +static int get_ldv1_handle(mx_handle_t handle, struct cleanup *cleanup) +{ + mx_handle_t t0, t1; + mx_tunnel_create(&t0, &t1); + + mx_ldsvc_msg_t msg; + msg.op = MX_LDSVC_OP_ADD_PEER; + + mx_handle_t original = mx_bootstrap_handle_get(MX_B_TUNNEL_LDSVC); + mx_status_t status = mx_tunnel_write(original, &msg, sizeof msg, &t0, 1); + if (status != MX_OK) { + set_error_msg("cannot contact LDv1 loader service: %s", mx_status_to_string(status)); + return mio_errno_from_mx_status(status); + } + + status = mx_tunnel_read(original, &msg, sizeof msg, NULL, 0, NULL, NULL); + + if (status != MX_OK) { + set_error_msg("cannot contact LDv1 loader service: %s", mx_status_to_string(status)); + mx_handle_close(t1); + return mio_errno_from_mx_status(status); + } + + if (msg.code != MX_LDSVC_CODE_OK) { + set_error_msg("cannot connect to LDv1 loader service: code %d", msg.code); + mx_handle_close(t1); + return ENOLINK; + } + + cleanup->ldsvc_version = LDV1_VERSION; + cleanup->ldsvc.handle = t1; + return 0; +} + +static int get_ldv2_handle(mx_handle_t handle, struct cleanup *cleanup) +{ + xpc_socket sock = xpc_socket_create(XPC_SOCKET_REQUESTER); + xpc_socket_connect_tunnel(sock, handle); + + mx_handle_t t0, t1; + mx_tunnel_create(&t0, &t1); + + int32_t result; + xpc_error err = horizon_sys_LoaderService_AddPeer(sock, t1, &result); + mx_handle_close(t1); + + if (err != XPC_OK) { + set_error_msg("cannot contact LDv2 loader service: %s", xpc_error_to_string(err)); + mx_handle_close(t0); + xpc_socket_destroy(sock); + return ENOLINK; + } + + if (result != 0) { + set_error_msg("cannot connect to LDv2 loader service: code %d", result); + mx_handle_close(t0); + xpc_socket_destroy(sock); + return ENOLINK; + } + + xpc_socket_release_tunnel(sock, NULL); + xpc_socket_connect_tunnel(sock, t0); + + cleanup->ldsvc_version = LDV2_VERSION; + cleanup->ldsvc.sock = sock; + return 0; +} + +static int open_ldv2_handle(struct cleanup *cleanup) +{ + int ldsvc_fd = open("/svc/horizon.sys.LoaderService/0", O_RDWR); + if (ldsvc_fd == -1) { + set_error_msg("cannot connect to loader service: %s", strerror(errno)); + return errno; + } + + mx_handle_t handle; + mio_release_handle(ldsvc_fd, &handle); + + xpc_socket sock = xpc_socket_create(XPC_SOCKET_REQUESTER); + xpc_socket_connect_tunnel(sock, handle); + + cleanup->ldsvc_version = LDV2_VERSION; + cleanup->ldsvc.sock = sock; + + return 0; +} + +static int get_ldsvc_handle(struct cleanup *cleanup) +{ + size_t nr_handles; + const mx_bootstrap_handle_t *handles = mx_bootstrap_handle_get_all(&nr_handles); + + mx_handle_t handle = MX_NULL_HANDLE; + int version; + + for (size_t i = 0; i < nr_handles; i++) { + int type = MX_B_HND_TYPE(handles[i].info); + int ver = MX_B_HND_ARG(handles[i].info); + + if (type != MX_B_TUNNEL_LDSVC) { + continue; + } + + handle = handles[i].handle; + version = ver; + break; + } + + int err = 0; + if (handle == MX_NULL_HANDLE) { + err = open_ldv2_handle(cleanup); + } else if (version == LDV1_VERSION) { + err = get_ldv1_handle(handle, cleanup); + } else if (version == LDV2_VERSION) { + err = get_ldv2_handle(handle, cleanup); + } else { + set_error_msg("no connection to loader service"); + return ENOLINK; + } + + return err; +} + +static mx_bootstrap_msg_t *build_bootstrap_message( + int argc, const char **argv, + int envc, const char **envp, + int n_names, const char **names, + size_t n_handles, mx_bootstrap_handle_t *handles, + size_t *out_msg_len, mx_handle_t **out_handles) +{ + size_t arg_len = 0; + for (int i = 0; i < argc; i++) { + arg_len += strlen(argv[i]) + 1; + } + + size_t env_len = 0; + for (int i = 0; i < envc; i++) { + env_len += strlen(envp[i]) + 1; + } + + size_t names_len = 0; + for (int i = 0; i < n_names; i++) { + names_len += strlen(names[i]) + 1; + } + + size_t msg_len = + sizeof(mx_bootstrap_msg_t) + + (n_handles * sizeof *handles) + + arg_len + + env_len + + names_len; + + mx_bootstrap_msg_t *msg = malloc(msg_len); + + msg->args_num = argc; + msg->environ_num = envc; + msg->names_num = n_names; + + msg->args_off = sizeof *msg; + msg->environ_off = msg->args_off + arg_len; + msg->names_off = msg->environ_off + env_len; + msg->handle_info_off = msg->names_off + names_len; + + char *arg_ptr = (char *)msg + msg->args_off; + for (int i = 0; i < argc; i++) { + const char *arg = argv[i]; + for (int ii = 0; arg[ii]; ii++) { + *arg_ptr++ = arg[ii]; + } + + *arg_ptr++ = '\0'; + } + + char *env_ptr = (char *)msg + msg->environ_off; + for (int i = 0; i < envc; i++) { + const char *env = envp[i]; + for (int ii = 0; env[ii]; ii++) { + *env_ptr++ = env[ii]; + } + + *env_ptr++ = '\0'; + } + + char *name_ptr = (char *)msg + msg->names_off; + for (int i = 0; i < n_names; i++) { + const char *name = names[i]; + for (int ii = 0; name[ii]; ii++) { + *name_ptr++ = name[ii]; + } + + *name_ptr++ = '\0'; + } + + mx_handle_t *handle_buf = calloc(n_handles, sizeof *handle_buf); + uint32_t *uptr = (uint32_t *)((char *)msg + msg->handle_info_off); + for (size_t i = 0; i < n_handles; i++) { + *uptr++ = handles[i].info; + handle_buf[i] = handles[i].handle; + } + + *out_msg_len = msg_len; + *out_handles = handle_buf; + return msg; +} + +static mx_handle_t *raw_handles(mx_bootstrap_handle_t *handles, size_t count) +{ + mx_handle_t *out = calloc(count, sizeof *out); + for (size_t i = 0; i < count; i++) { + out[i] = handles[i].handle; + } + + return out; +} + +static int send_interp_bootstrap_message(const struct launch_info *args, struct cleanup *cleanup) +{ + mx_handle_t vdso_vmo = mx_bootstrap_handle_get(MX_B_VMO_VDSO); + mx_handle_t ldsvc = MX_NULL_HANDLE; + + if (cleanup->ldsvc_version == 0) { + ldsvc = cleanup->ldsvc.handle; + cleanup->ldsvc.handle = MX_NULL_HANDLE; + } else { + xpc_socket_release_tunnel(cleanup->ldsvc.sock, &ldsvc); + xpc_socket_destroy(cleanup->ldsvc.sock); + cleanup->ldsvc.sock = NULL; + } + + /* required handles for interpreter: + * ldsvc + * exec vmo + * vdso vmo + * self task + * self vmar + * exec vmar + */ + mx_bootstrap_handle_t handles[] = { + ARG_HANDLE(cleanup->new_task, MX_B_HND(MX_B_TASK_SELF, 0)), + ARG_HANDLE(cleanup->exec_vmo, MX_B_HND(MX_B_VMO_EXEC, 0)), + ARG_HANDLE(cleanup->new_vmar, MX_B_HND(MX_B_VMAR_ROOT, 0)), + ARG_HANDLE(cleanup->exec.remote_exec_vmar, MX_B_HND(MX_B_VMAR_EXEC, 0)), + ARG_HANDLE(vdso_vmo, MX_B_HND(MX_B_VMO_VDSO, 0)), + ARG_HANDLE(ldsvc, MX_B_HND(MX_B_TUNNEL_LDSVC, cleanup->ldsvc_version)), + }; + + static size_t handle_count = sizeof handles / sizeof handles[0]; + + const char *argv[] = { + cleanup->interp_path, + args->path, + }; + const int argc = sizeof argv / sizeof *argv; + + mx_handle_t *raw_handles; + + size_t msg_len = 0; + mx_bootstrap_msg_t *msg = build_bootstrap_message( + argc, argv, + 0, NULL, + 0, NULL, + handle_count, handles, + &msg_len, &raw_handles); + + mx_tunnel_write_etc(cleanup->bootstrap_local, + MX_TUNNEL_DUPLICATE_HANDLES, + msg, msg_len, + raw_handles, handle_count); + + free(raw_handles); + free(msg); + return 0; +} + +static int send_exec_bootstrap_message(const struct launch_info *args, struct cleanup *cleanup) +{ + mx_bootstrap_handle_t *default_handles = + cleanup->b_handles + + cleanup->b_handle_count + - NR_DEFAULT_HANDLES; + + mx_handle_t vdso_vmo = mx_bootstrap_handle_get(MX_B_VMO_VDSO); + + /* these handles are sent by launch() in all cases + * MX_B_TASK_SELF + * MX_B_VMO_VDSO + * MX_B_VMO_EXEC + * MX_B_VMAR_ROOT + * MX_B_VMAR_EXEC + * MX_B_TUNNEL_BTSTP + */ + int tmp = 0; + default_handles[tmp++] = ARG_HANDLE(cleanup->new_task, MX_B_HND(MX_B_TASK_SELF, 0)); + default_handles[tmp++] = ARG_HANDLE(vdso_vmo, MX_B_HND(MX_B_VMO_VDSO, 0)); + default_handles[tmp++] = ARG_HANDLE(cleanup->exec_vmo, MX_B_HND(MX_B_VMO_EXEC, 0)); + default_handles[tmp++] = ARG_HANDLE(cleanup->new_vmar, MX_B_HND(MX_B_VMAR_ROOT, 0)); + default_handles[tmp++] = ARG_HANDLE(cleanup->exec.remote_exec_vmar, MX_B_HND(MX_B_VMAR_EXEC, 0)); + default_handles[tmp++] = ARG_HANDLE(cleanup->bootstrap_remote, MX_B_HND(MX_B_TUNNEL_BTSTP, 0)); + + mx_handle_t *raw_handles; + + size_t msg_len; + mx_bootstrap_msg_t *msg = build_bootstrap_message( + args->argc, args->argv, + 0, NULL, + cleanup->name_count, cleanup->names, + cleanup->b_handle_count, cleanup->b_handles, + &msg_len, &raw_handles); + + mx_tunnel_write_etc(cleanup->bootstrap_local, + MX_TUNNEL_DUPLICATE_HANDLES, + msg, msg_len, + raw_handles, cleanup->b_handle_count); + + free(raw_handles); + free(msg); + return 0; +} + +static int send_bootstrap_messages(const struct launch_info *args, struct cleanup *cleanup) +{ + if (cleanup->interp_vmo) { + send_interp_bootstrap_message(args, cleanup); + } + + send_exec_bootstrap_message(args, cleanup); + + return 0; +} + +int launch(const struct launch_info *args, mx_handle_t *out_task) +{ + struct cleanup cleanup = {}; + + int err = get_ldsvc_handle(&cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + if (!args->path) { + fail_cleanup(&cleanup); + RET_ERR(EFAULT); + } + + err = validate_argv(args->argc, args->argv); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + err = validate_fds(args, &cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + err = validate_ns(args->flags, args->ns, args->ns_count); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + err = inherit_namespace(args, &cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + err = count_handles(args, &cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + err = collect_handles(args, &cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + err = collect_names(args, &cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + int exec_fd = open(args->path, O_RDONLY); + if (exec_fd == -1) { + int error = errno; + fail_cleanup(&cleanup); + set_error_msg("cannot open file '%s': %s", args->path, strerror(error)); + RET_ERR(error); + } + + err = mio_map_file(exec_fd, &cleanup.exec_vmo); + close(exec_fd); + + if (err != 0) { + fail_cleanup(&cleanup); + set_error_msg("cannot map file '%s': %s", args->path, strerror(-err)); + RET_ERR(-err); + } + + err = load_exec(args, &cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + err = allocate_stack(&cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + mx_tunnel_create(&cleanup.bootstrap_local, &cleanup.bootstrap_remote); + + err = send_bootstrap_messages(args, &cleanup); + if (err != 0) { + fail_cleanup(&cleanup); + RET_ERR(err); + } + + success_cleanup(&cleanup); + set_error_msg("success"); + mx_task_start(cleanup.new_task, cleanup.entry_point, cleanup.stack_ptr, cleanup.bootstrap_remote, 0); + mx_handle_close(cleanup.bootstrap_remote); + + *out_task = cleanup.new_task; + return 0; +} diff --git a/photon/libc/sys/horizon/launch.tar.xz b/photon/libc/sys/horizon/launch.tar.xz new file mode 100644 index 0000000..2387f8d Binary files /dev/null and b/photon/libc/sys/horizon/launch.tar.xz differ diff --git a/photon/libc/sys/horizon/sys/launch.h b/photon/libc/sys/horizon/sys/launch.h new file mode 100644 index 0000000..5e56166 --- /dev/null +++ b/photon/libc/sys/horizon/sys/launch.h @@ -0,0 +1,52 @@ +#ifndef SYS_HORIZON_SYS_LAUNCH_H_ +#define SYS_HORIZON_SYS_LAUNCH_H_ + +#include + +#define FD_NONE -1 +#define FD_INHERIT -2 + +#define LAUNCH_MAX_FDS 3 + +#define NS_ENTRY_FD(p, f) { .path = (p), .src = NULL, .fd = (f), .handle = MX_NULL_HANDLE } +#define NS_ENTRY_HANDLE(p, h) { .path = (p), .src = NULL, .fd = -1, .handle = (h) } +#define NS_ENTRY_PATH(p, s) { .path = (p), .src = (s), .fd = -1, .handle = MX_NULL_HANDLE } + +enum launch_flags { + /* inherit parent task's namespace. if this flag is set, + * launch_info.ns must be NULL. */ + LAUNCH_INHERIT_NS = 0x01u, + /* use launch_info.fd as the new task's stdio file descriptors */ + LAUNCH_SET_FD = 0x02u, +}; + +struct namespace_entry { + const char *path, *src; + + /* if this is not -1, handle must be MX_NULL_HANDLE */ + int fd; + /* if this is not MX_NULL_HANDLE, fd must be -1 */ + mx_handle_t handle; +}; + +struct launch_info { + enum launch_flags flags; + const char *path; + + int argc; + const char **argv; + + int fd[LAUNCH_MAX_FDS]; + + struct namespace_entry cwd; + const struct namespace_entry *ns; + size_t ns_count; + + const mx_bootstrap_handle_t *handles; + size_t handle_count; +}; + +extern int launch(const struct launch_info *info, mx_handle_t *out_task); +extern const char *launch_error(void); + +#endif