reading from block devices is done using the block cache (bcache). This cache stores sectors from a block device in pages of memory marked as 'cached', which will allow them to be reclaimed when memory pressure is high (TODO). while block device drivers implement callbacks allowing reading/writing at block-granularity, the device subsystem uses the block cache to implement reading/writing at byte-granularity in a driver-agnostic way. block drivers can disable the block cache for their devices, but this will require that any clients communicate with the devices at block-granularity. also added an offset parameter to device and object read/write functions/callbacks.
347 lines
9.8 KiB
C
347 lines
9.8 KiB
C
#ifndef SOCKS_VM_H_
|
|
#define SOCKS_VM_H_
|
|
|
|
#include <stddef.h>
|
|
#include <socks/types.h>
|
|
#include <socks/status.h>
|
|
#include <socks/queue.h>
|
|
#include <socks/btree.h>
|
|
#include <socks/bitmap.h>
|
|
#include <socks/locks.h>
|
|
#include <socks/machine/vm.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
struct bcache;
|
|
|
|
/* maximum number of NUMA nodes */
|
|
#define VM_MAX_NODES 64
|
|
/* maximum number of memory zones per node */
|
|
#define VM_MAX_ZONES (VM_ZONE_MAX + 1)
|
|
/* maximum number of supported page orders */
|
|
#define VM_MAX_PAGE_ORDERS (VM_PAGE_MAX_ORDER + 1)
|
|
/* maximum number of sparse memory sectors */
|
|
#define VM_MAX_SECTORS 1024
|
|
|
|
/* maximum number of disk sectors that can be stored in a single
|
|
page. AKA the number of bits in the sector bitmap.
|
|
used by the block cache */
|
|
#define VM_MAX_SECTORS_PER_PAGE 32
|
|
|
|
#define VM_CHECK_ALIGN(p, mask) ((((p) & (mask)) == (p)) ? 1 : 0)
|
|
|
|
#define VM_CACHE_INITIALISED(c) ((c)->c_obj_count != 0)
|
|
#define VM_PAGE_IS_FREE(pg) (((pg)->p_flags & (VM_PAGE_RESERVED | VM_PAGE_ALLOC)) == 0)
|
|
|
|
#define vm_page_foreach(pg, i) \
|
|
for (struct vm_page *i = (pg); i; i = vm_page_get_next_tail(i))
|
|
|
|
typedef phys_addr_t vm_alignment_t;
|
|
typedef unsigned int vm_node_id_t;
|
|
|
|
struct vm_object {
|
|
unsigned int reserved;
|
|
};
|
|
|
|
enum vm_model {
|
|
VM_MODEL_FLAT = 1,
|
|
VM_MODEL_SPARSE,
|
|
};
|
|
|
|
enum vm_prot {
|
|
VM_PROT_READ = 0x01u,
|
|
VM_PROT_WRITE = 0x02u,
|
|
VM_PROT_EXEC = 0x04u,
|
|
VM_PROT_USER = 0x08u,
|
|
VM_PROT_SVR = 0x10u,
|
|
VM_PROT_NOCACHE = 0x20u,
|
|
};
|
|
|
|
enum vm_flags {
|
|
VM_NORMAL = 0x00u,
|
|
VM_GET_DMA = 0x01u,
|
|
};
|
|
|
|
enum vm_zone_id {
|
|
/* NOTE that these are used as indices into the node_zones array in vm/zone.c
|
|
they need to be continuous, and must start at 0!
|
|
|
|
not all of these zones are implemented for every architecture. */
|
|
VM_ZONE_DMA = 0u,
|
|
VM_ZONE_NORMAL = 1u,
|
|
VM_ZONE_HIGHMEM = 2u,
|
|
};
|
|
|
|
enum vm_page_order {
|
|
VM_PAGE_4K = 0u,
|
|
VM_PAGE_8K,
|
|
VM_PAGE_16K,
|
|
VM_PAGE_32K,
|
|
VM_PAGE_64K,
|
|
VM_PAGE_128K,
|
|
VM_PAGE_256K,
|
|
VM_PAGE_512K,
|
|
VM_PAGE_1M,
|
|
VM_PAGE_2M,
|
|
VM_PAGE_4M,
|
|
VM_PAGE_8M,
|
|
VM_PAGE_16M,
|
|
VM_PAGE_32M,
|
|
VM_PAGE_64M,
|
|
VM_PAGE_128M,
|
|
|
|
/* struct vm_page only has 4 bits to store the page order with.
|
|
the maximum order that can be stored in 4 bits is 15 (VM_PAGE_128M)
|
|
to use any of the page orders listed here, this field
|
|
will have to be expanded. */
|
|
VM_PAGE_256M,
|
|
VM_PAGE_512M,
|
|
VM_PAGE_1G,
|
|
VM_PAGE_2G,
|
|
VM_PAGE_4G,
|
|
VM_PAGE_8G,
|
|
VM_PAGE_16G,
|
|
VM_PAGE_32G,
|
|
VM_PAGE_64G,
|
|
};
|
|
|
|
enum vm_page_flags {
|
|
/* page is reserved (probably by a call to memblock_reserve()) and cannot be
|
|
returned by any allocation function */
|
|
VM_PAGE_RESERVED = 0x01u,
|
|
/* page has been allocated by a zone's buddy allocator, and is in-use */
|
|
VM_PAGE_ALLOC = 0x02u,
|
|
/* page is the first page of a huge-page */
|
|
VM_PAGE_HEAD = 0x04u,
|
|
/* page is part of a huge-page */
|
|
VM_PAGE_HUGE = 0x08u,
|
|
/* page is holding cached data from secondary storage, and can be freed if necessary (and not dirty). */
|
|
VM_PAGE_CACHE = 0x10u,
|
|
};
|
|
|
|
enum vm_memory_region_status {
|
|
VM_REGION_FREE = 0x01u,
|
|
VM_REGION_RESERVED = 0x02u,
|
|
};
|
|
|
|
enum vm_cache_flags {
|
|
VM_CACHE_OFFSLAB = 0x01u,
|
|
VM_CACHE_DMA = 0x02u
|
|
};
|
|
|
|
struct vm_zone_descriptor {
|
|
enum vm_zone_id zd_id;
|
|
vm_node_id_t zd_node;
|
|
const char zd_name[32];
|
|
phys_addr_t zd_base;
|
|
phys_addr_t zd_limit;
|
|
};
|
|
|
|
struct vm_zone {
|
|
struct vm_zone_descriptor z_info;
|
|
spin_lock_t z_lock;
|
|
|
|
struct queue z_free_pages[VM_MAX_PAGE_ORDERS];
|
|
unsigned long z_size;
|
|
};
|
|
|
|
struct vm_pg_data {
|
|
struct vm_zone pg_zones[VM_MAX_ZONES];
|
|
};
|
|
|
|
struct vm_region {
|
|
enum vm_memory_region_status r_status;
|
|
phys_addr_t r_base;
|
|
phys_addr_t r_limit;
|
|
};
|
|
|
|
struct vm_cache {
|
|
const char *c_name;
|
|
enum vm_cache_flags c_flags;
|
|
struct queue_entry c_list;
|
|
|
|
struct queue c_slabs_full;
|
|
struct queue c_slabs_partial;
|
|
struct queue c_slabs_empty;
|
|
|
|
spin_lock_t c_lock;
|
|
|
|
/* number of objects that can be stored in a single slab */
|
|
unsigned int c_obj_count;
|
|
/* the size of object kept in the cache */
|
|
unsigned int c_obj_size;
|
|
/* combined size of struct vm_slab and the freelist */
|
|
unsigned int c_hdr_size;
|
|
/* power of 2 alignment for objects returned from the cache */
|
|
unsigned int c_align;
|
|
/* offset from one object to the next in a slab.
|
|
this may be different from c_obj_size depending
|
|
on the alignment settings for this cache. */
|
|
unsigned int c_stride;
|
|
/* size of page used for slabs */
|
|
unsigned int c_page_order;
|
|
};
|
|
|
|
struct vm_slab {
|
|
struct vm_cache *s_cache;
|
|
/* queue entry for struct vm_cache.c_slabs_* */
|
|
struct queue_entry s_list;
|
|
/* pointer to the first object slot. */
|
|
void *s_objects;
|
|
/* the number of objects allocated on the slab. */
|
|
unsigned int s_obj_allocated;
|
|
/* the index of the next free object.
|
|
if s_free is equal to FREELIST_END (defined in vm/cache.c)
|
|
there are no free slots left in the slab. */
|
|
unsigned int s_free;
|
|
/* list of free object slots.
|
|
when allocating:
|
|
- s_free should be set to the value of s_freelist[s_free]
|
|
when freeing:
|
|
- s_free should be set to the index of the object being freed.
|
|
- s_freelist[s_free] should be set to the previous value of s_free.
|
|
this is commented as it as flexible arrays are not supported in c++.
|
|
*/
|
|
//unsigned int s_freelist[];
|
|
};
|
|
|
|
struct vm_page {
|
|
/* order of the page block that this page belongs too */
|
|
uint32_t p_order : 4;
|
|
/* the id of the NUMA node that this page belongs to */
|
|
uint32_t p_node : 6;
|
|
/* the id of the memory zone that this page belongs to */
|
|
uint32_t p_zone : 3;
|
|
/* vm_page_flags_t bitfields. */
|
|
uint32_t p_sector : 11;
|
|
/* some unused bits */
|
|
uint32_t p_reserved : 8;
|
|
|
|
uint32_t p_flags;
|
|
|
|
/* owner-specific pointer */
|
|
union {
|
|
struct vm_slab *p_slab;
|
|
struct bcache *p_bcache;
|
|
void *p_priv0;
|
|
};
|
|
|
|
/* multi-purpose list/tree entry.
|
|
the owner of the page can decide what to do with this.
|
|
some examples:
|
|
- the buddy allocator uses this to maintain its per-zone free-page lists.
|
|
- the block cache uses this to maintain a tree of pages keyed by block number.
|
|
*/
|
|
union {
|
|
struct queue_entry p_list;
|
|
struct btree_node p_bnode;
|
|
|
|
/* btree_node contains three pointers, so provide three pointer-sized integers for
|
|
use if p_bnode isn't needed. */
|
|
uintptr_t priv1[3];
|
|
};
|
|
|
|
union {
|
|
/* used by bcache when sector size is < page size. bitmap of present/missing sectors */
|
|
DECLARE_BITMAP(p_blockbits, VM_MAX_SECTORS_PER_PAGE);
|
|
uint32_t p_priv2;
|
|
};
|
|
|
|
union {
|
|
/* sector address, used by bcache */
|
|
sectors_t p_blockid;
|
|
|
|
uint32_t p_priv3[2];
|
|
};
|
|
} __attribute__((aligned(2 * sizeof(unsigned long))));
|
|
|
|
/* represents a sector of memory, containing its own array of vm_pages.
|
|
this struct is used under the sparse memory model, instead of the
|
|
global vm_page array */
|
|
struct vm_sector {
|
|
/* sector size. this must be a power of 2.
|
|
all sectors in the system have the same size. */
|
|
enum vm_page_order s_size;
|
|
/* PFN of the first page contained in s_pages.
|
|
to find the PFN of any page contained within s_pages,
|
|
simply add its offset within the array to s_first_pfn */
|
|
size_t s_first_pfn;
|
|
/* array of pages contained in this sector */
|
|
struct vm_page *s_pages;
|
|
};
|
|
|
|
extern kern_status_t vm_bootstrap(const struct vm_zone_descriptor *zones, size_t nr_zones);
|
|
extern enum vm_model vm_memory_model(void);
|
|
extern void vm_set_memory_model(enum vm_model model);
|
|
|
|
extern struct vm_pg_data *vm_pg_data_get(vm_node_id_t node);
|
|
|
|
extern phys_addr_t vm_virt_to_phys(void *p);
|
|
extern void *vm_phys_to_virt(phys_addr_t p);
|
|
|
|
extern void vm_page_init_array();
|
|
extern struct vm_page *vm_page_get(phys_addr_t addr);
|
|
extern phys_addr_t vm_page_get_paddr(struct vm_page *pg);
|
|
extern struct vm_zone *vm_page_get_zone(struct vm_page *pg);
|
|
extern void *vm_page_get_vaddr(struct vm_page *pg);
|
|
extern size_t vm_page_get_pfn(struct vm_page *pg);
|
|
extern size_t vm_page_order_to_bytes(enum vm_page_order order);
|
|
extern size_t vm_page_order_to_pages(enum vm_page_order order);
|
|
extern vm_alignment_t vm_page_order_to_alignment(enum vm_page_order order);
|
|
extern struct vm_page *vm_page_alloc(enum vm_page_order order, enum vm_flags flags);
|
|
extern void vm_page_free(struct vm_page *pg);
|
|
|
|
extern int vm_page_split(struct vm_page *pg, struct vm_page **a, struct vm_page **b);
|
|
extern struct vm_page *vm_page_merge(struct vm_page *a, struct vm_page *b);
|
|
extern struct vm_page *vm_page_get_buddy(struct vm_page *pg);
|
|
extern struct vm_page *vm_page_get_next_tail(struct vm_page *pg);
|
|
|
|
extern size_t vm_bytes_to_pages(size_t bytes);
|
|
|
|
extern void vm_zone_init(struct vm_zone *z, const struct vm_zone_descriptor *zone_info);
|
|
extern struct vm_page *vm_zone_alloc_page(struct vm_zone *z, enum vm_page_order order, enum vm_flags flags);
|
|
extern void vm_zone_free_page(struct vm_zone *z, struct vm_page *pg);
|
|
|
|
extern struct vm_cache *vm_cache_create(const char *name, size_t objsz, enum vm_cache_flags flags);
|
|
extern void vm_cache_init(struct vm_cache *cache);
|
|
extern void vm_cache_destroy(struct vm_cache *cache);
|
|
extern void *vm_cache_alloc(struct vm_cache *cache, enum vm_flags flags);
|
|
extern void vm_cache_free(struct vm_cache *cache, void *p);
|
|
|
|
extern void kmalloc_init(void);
|
|
extern void *kmalloc(size_t count, enum vm_flags flags);
|
|
extern void *kzalloc(size_t count, enum vm_flags flags);
|
|
extern void kfree(void *p);
|
|
|
|
/* Flat memory model functions */
|
|
extern void vm_flat_init(void);
|
|
extern struct vm_page *vm_page_get_flat(phys_addr_t addr);
|
|
extern size_t vm_page_get_pfn_flat(struct vm_page *pg);
|
|
|
|
/* Sparse memory model functions */
|
|
extern void vm_sparse_init(void);
|
|
extern struct vm_page *vm_page_get_sparse(phys_addr_t addr);
|
|
extern size_t vm_page_get_pfn_sparse(struct vm_page *pg);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
inline void *operator new(size_t count, void *p) { return p; }
|
|
|
|
#define kmalloc_object(objtype, flags, ...) \
|
|
__extension__({ \
|
|
void *p = kmalloc(sizeof(objtype), flags); \
|
|
if (p) { \
|
|
new (p) objtype(__VA_ARGS__); \
|
|
} \
|
|
(objtype *)p; \
|
|
})
|
|
|
|
#endif
|
|
|
|
#endif
|