[Operating System] salb for memory allocation

1. Principle introduction

In the linux system, the slab allocator is used to allocate memory smaller than the page. The kmem_cache structure is used to manage the small memory objects on the memory page corresponding to the page, and then the page points to kmem_cache, and the kmem_cache_node structure manages multiple pages.

In the SLAB allocator, it divides a memory page or a group of contiguous memory pages into blocks of the same size, of which this small memory block is the SLAB object, but this group of contiguous memory pages is not only the SLAB object , and SLAB manages the header and shader area.

2. Data structure

1. Management header kmem_cache

struct array_cache {
    unsigned int avail;     // The number of currently available objects
    unsigned int limit;     // Maximum number of objects allowed
    void *entry[];          // pointer to an array of objects
};
struct kmem_cache {
    struct array_cache __percpu *cpu_cache;   // __precpu type, there is one variable for each CPU
    unsigned int size;                        // cache size
    slab_flags_t flags;                       // slab flag
    unsigned int num;                         // number of objects
    unsigned int gfporder;                    // The order of allocating memory pages
    gfp_t allocflags;
    size_t colour;                            // Shading area size
    unsigned int colour_off;                  // start offset of shaded area
    const char *name;                         // The name of this SLAB
    struct list_head list;                    // All SLAB s are linked
    int refcount;                             // reference counting
    int object_size;                          // object size
    int align;                                // Align size
    struct kmem_cache_node *node[MAX_NUMNODES]; // Points to the superstructure that manages kmemcache
};

2. Initialize kmem_cache

// global static definition
static struct kmem_cache kmem_cache_boot = {
    .batchcount = 1,
    .limit = BOOT_CPUCACHE_ENTRIES,
    .shared = 1,
    .size = sizeof(struct kmem_cache),
    .name = "kmem_cache",
};

void __init kmem_cache_init(void)
{
    int i;

    kmem_cache = &kmem_cache_boot;

    for (i = 0; i < NUM_INIT_LISTS; i++)
        kmem_cache_node_init(&init_kmem_cache_node[i]);
    
    // Create a kmem_cache that holds the kmem_cache structure
    create_boot_cache(kmem_cache, "kmem_cache",
        offsetof(struct kmem_cache, node) +
                  nr_node_ids * sizeof(struct kmem_cache_node *),
                  SLAB_HWCACHE_ALIGN, 0, 0);
    
    // Add to the global slab_caches list
    list_add(&kmem_cache->list, &slab_caches);
    {
        int nid;
        for_each_online_node(nid) {
            init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
            init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE],                      &init_kmem_cache_node[SIZE_NODE + nid], nid);
        }
    }
    
    // Create the kmem_cache used by the kmalloc function
    create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
}

3. Memory node kmem_cache_node

#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];

struct kmem_cache_node {
    spinlock_t list_lock;             // spin lock
    struct list_head slabs_partial;   // kmem_cache structure with some free objects
    struct list_head slabs_full;      // kmem_cache structure without free objects
    struct list_head slabs_free;      // Objects are all free kmem_cache structures
    unsigned long total_slabs;        // How many kmem_cache structures in total
    unsigned long free_slabs;         // Free kmem_cache structure
    unsigned long free_objects;       // free object
    unsigned int free_limit;
};
static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
    int nodeid)
{
    struct kmem_cache_node *ptr;
    // Allocate space for the new kmem_cache_node structure
    ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
    BUG_ON(!ptr);
    // Copy the initial static kmem_cache_node structure
    memcpy(ptr, list, sizeof(struct kmem_cache_node));
    spin_lock_init(&ptr->list_lock);
    MAKE_ALL_LISTS(cachep, ptr, nodeid);
    // Set the address of kmem_cache_node
    cachep->node[nodeid] = ptr;
}

4. Get the memory page

The cache_grow_begin function will allocate a page for storing objects to the kmem_cache structure, and then call the corresponding cache_grow_end function to mount the page into the linked list of the kmem_cache_node structure, and make the page point to the kmem_cache structure.

static void slab_map_pages(struct kmem_cache *cache, struct page *page,void *freelist)
{
    // The page structure points to the kmem_cache structure
    page->slab_cache = cache;
    // a linked list of free objects
    page->freelist = freelist;
}
static struct page *cache_grow_begin(struct kmem_cache *cachep,
    gfp_t flags, int nodeid)
{
    void *freelist;
    size_t offset;
    gfp_t local_flags;
    int page_node;
    struct kmem_cache_node *n;
    struct page *page;

    WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
    local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
    // get page
    page = kmem_getpages(cachep, local_flags, nodeid);
    // Get the memory node number where the page is located
    page_node = page_to_nid(page);
    // Obtain the corresponding kmem_cache_node structure according to the memory node
    n = get_node(cachep, page_node);
    // Allocate data structures that manage free objects
    freelist = alloc_slabmgmt(cachep, page, offset,
            local_flags & ~GFP_CONSTRAINT_MASK, page_node);
    // Let the relevant fields in the page point to kmem_cache and free objects
    slab_map_pages(cachep, page, freelist);
    // Initialize idle object management data
    cache_init_objs(cachep, page);
    return page;
}

static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
{
    struct kmem_cache_node *n;
    void *list = NULL;
    if (!page)
        return;
    // Initialize the slab_list linked list of the page structure
    INIT_LIST_HEAD(&page->slab_list);
    // Obtain the corresponding kmem_cache_node structure according to the memory node.
    n = get_node(cachep, page_to_nid(page));
    spin_lock(&n->list_lock);
    // slab count increase
    n->total_slabs++;
    if (!page->active) {
        // Add this page structure to the free list of the kmem_cache_node structure
        list_add_tail(&page->slab_list, &n->slabs_free);
        n->free_slabs++;
    } 
    spin_unlock(&n->list_lock);
}

3. Distribution process

The process of slab allocation object is as follows: find the corresponding kmem_cache structure according to the allocation request, then obtain the arry_cache structure, and finally allocate the object;
If it is not found, look for the kmem_cache with free objects in the kmem_cache_node structure corresponding to kmem_cache;
If it has not been found, allocate a memory page to add a kmem_cache structure.

1. Allocation function entry: kmalloc

The kmalloc function, the SLAB allocation interface, is used to allocate small buffers, or to allocate instance space for data structures.
The kmalloc function calls slab_alloc to allocate objects after finding the kmem_cache corresponding to size.

static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,unsigned long caller)
{
    struct kmem_cache *cachep;
    void *ret;
    if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
        return NULL;
    
    // Find the kmem_cache corresponding to size
    cachep = kmalloc_slab(size, flags);    
    if (unlikely(ZERO_OR_NULL_PTR(cachep)))
        return cachep;
    
    // assign object
    ret = slab_alloc(cachep, flags, caller);
    return ret;
}

void *__kmalloc(size_t size, gfp_t flags)
{
    return __do_kmalloc(size, flags, _RET_IP_);
}
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
    return __kmalloc(size, flags);
}

2. Find kmem_cache: kmalloc_slab

According to size and flags as the subscript of the global array, return the contents of the array as kmem_cache.

enum kmalloc_cache_type {
    KMALLOC_NORMAL = 0,
    KMALLOC_RECLAIM,
#ifdef CONFIG_ZONE_DMA
    KMALLOC_DMA,
#endif
    NR_KMALLOC_TYPES
};
struct kmem_cache *kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = { static u8 size_index[24] __ro_after_init = {
    3,  /* 8 */
    4,  /* 16 */
    5,  /* 24 */
    5,  /* 32 */
    6,  /* 40 */
    6,  /* 48 */
    6,  /* 56 */
    6,  /* 64 */
    1,  /* 72 */
    1,  /* 80 */
    1,  /* 88 */
    1,  /* 96 */
    7,  /* 104 */
    7,  /* 112 */
    7,  /* 120 */
    7,  /* 128 */
    2,  /* 136 */
    2,  /* 144 */
    2,  /* 152 */
    2,  /* 160 */
    2,  /* 168 */
    2,  /* 176 */
    2,  /* 184 */
    2   /* 192 */
}};

//Return enum type based on assignment flag
static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
{
#ifdef CONFIG_ZONE_DMA
    if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0))
        return KMALLOC_NORMAL;
    return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM;
#else
    return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL;
#endif
}

// Calculate the index from size, and determine the array subscript together with flags
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
    unsigned int index;
    //calculate index
    if (size <= 192) {
        if (!size)
            return ZERO_SIZE_PTR;
        index = size_index[size_index_elem(size)];
    } else {
        if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
            return NULL;
        index = fls(size - 1);
    }
    return kmalloc_caches[kmalloc_type(flags)][index];
}

The establishment process of the global array kmalloc_caches is as follows:

struct kmem_cache *__init create_kmalloc_cache(const char *name,
        unsigned int size, slab_flags_t flags,
        unsigned int useroffset, unsigned int usersize)
{
    // Allocate an object from the first kmem_cache and put it in kmem_cache
    struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);

    if (!s)
        panic("Out of memory when creating slab %s\n", name);
    // Set the alignment parameter of s, and the freelist for processing s is arr_cache
    create_boot_cache(s, name, size, flags, useroffset, usersize);
    list_add(&s->list, &slab_caches);
    s->refcount = 1;
    return s;
}

// Create a new kmem_cache
static void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
{
    if (type == KMALLOC_RECLAIM)
        flags |= SLAB_RECLAIM_ACCOUNT;
        //Create a kmem_cache based on the information in kmalloc_info
    kmalloc_caches[type][idx] = create_kmalloc_cache(
                    kmalloc_info[idx].name[type],
                    kmalloc_info[idx].size, flags, 0,
                    kmalloc_info[idx].size);
}

//Build all kmalloc_caches in kmem_cache
void __init create_kmalloc_caches(slab_flags_t flags)
{
    int i;
    enum kmalloc_cache_type type;
    for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
        for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
            if (!kmalloc_caches[type][i])
                //Create a new kmem_cache
                new_kmalloc_cache(i, type, flags);
            if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
                    !kmalloc_caches[type][1])
                new_kmalloc_cache(1, type, flags);
            if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
                    !kmalloc_caches[type][2])
                new_kmalloc_cache(2, type, flags);
        }
    }
}

3. Allocation object: slab_alloc

The first parameter of the slab_alloc function is a pointer to the kmem_cache structure, indicating that objects are allocated from the kmem_cache structure.

First, obtain the pointer to the array_cache structure in the current kmem_cache structure. If the address of the free object is found, take out a free object address in the array_cache structure and return it, that is, the allocation is successful.

When no free object is found, get the kmem_cache_node to which the first cache belongs, and then call get_first_slab to get the kmem_cache_node structure to check whether the kmem_cache of the free object is included;
If the kmem_cache_node structure does not contain the kmem_cache of free objects, call the cache_grow_begin function, find the partner system to allocate a new memory page, perform the necessary initialization, and finally call the cache_grow_end function to mount the allocated page to the slabs_list linked list of the kmem_cache_node structure.

The interface calling process is as follows:

slab_alloc
    __do_cache_alloc
        ____cache_alloc
            cpu_cache_get    // Get the pointer to the array_cache structure of the current cpu in the cachep structure
                // After a free object is found, the object address is returned, and the allocation process ends
            
            // When no free object is found
            cache_alloc_refill  
                numa_mem_id
                cpu_cache_get
                get_node          // Get kmem_cache_node
                    // no kmem_cache containing free objects
                    get_first_slab
                    alloc_block      // Get other kmem_cache in the kmem_cache_node structure, 
                
                cache_grow_begin  // Allocate new kmem_cache and initialize
                cpu_cache_get
                cache_grow_end  // Mount the page to the slabs_list linked list of the kmem_cache_node structure

The detailed process is as follows:

static __always_inline void *slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
{
    unsigned long save_flags;
    void *objp;
    //Turn off interrupts
    local_irq_save(save_flags);
    //assign object
    objp = __do_cache_alloc(cachep, flags);
    //resume interruption
    local_irq_restore(save_flags);
    return objp;
}


// object allocation process
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
    void *objp;
    struct array_cache *ac;
    //Get the pointer to the array_cache structure of the current cpu in the cachep structure
    ac = cpu_cache_get(cachep);

    //If the avail in ac is not 0, it means that there are free objects in the freelist in the current kmem_cache structure
    if (likely(ac->avail)) {
        ac->touched = 1;
        //The address of the space object is stored in ac->entry
        objp = ac->entry[--ac->avail];
        goto out;
    }

    // No free object found
    objp = cache_alloc_refill(cachep, flags);    
out:
    return objp;
}
static __always_inline void *__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
    return ____cache_alloc(cachep, flags);
}

static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
    struct page *page;
    assert_spin_locked(&n->list_lock);
    //First, check whether there is a page from the slabs_partial linked list in the kmem_cache_node structure
    page = list_first_entry_or_null(&n->slabs_partial, struct page,slab_list);
    if (!page) {
    //if there is not
        n->free_touched = 1;
    //Check whether there is a page from the slabs_free linked list in the kmem_cache_node structure
        page = list_first_entry_or_null(&n->slabs_free, struct page,slab_list);
        if (page)
            n->free_slabs--; //The idle slab count is decremented by one
    }
    //back to page
    return page;
}
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
{
    int batchcount;
    struct kmem_cache_node *n;
    struct array_cache *ac, *shared;
    int node;
    void *list = NULL;
    struct page *page;

    // get memory node
    node = numa_mem_id();
    ac = cpu_cache_get(cachep);
    batchcount = ac->batchcount;
    // Get the kmem_cache_node to which the cache belongs
    n = get_node(cachep, node);
    shared = READ_ONCE(n->shared);
    if (!n->free_objects && (!shared || !shared->avail))
        goto direct_grow;
    while (batchcount > 0) {
        // Get other kmem_cache in the kmem_cache_node structure, return the page, and the page will point to kmem_cache
        page = get_first_slab(n, false);
        if (!page)
            goto must_grow;
        batchcount = alloc_block(cachep, ac, page, batchcount);
    }
must_grow:
    n->free_objects -= ac->avail;
direct_grow:
    if (unlikely(!ac->avail)) {
        //Allocate new kmem_cache and initialize
        page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
        ac = cpu_cache_get(cachep);
        if (!ac->avail && page)
            alloc_block(cachep, ac, page, batchcount);
        //Let the page be mounted on the slabs_list linked list of the kmem_cache_node structure
        cache_grow_end(cachep, page);
        if (!ac->avail)
            return NULL;
    }
    ac->touched = 1;
    //reallocate
    return ac->entry[--ac->avail];
}

Fourth, slab/slub/slob

Slab is the foundation, and Linux is introduced from Sun OS;
Slob is optimized for micro embedded systems;
Slub is optimized for large computer scenarios.

Tags: Operating System

Posted by Gamerz on Sat, 12 Nov 2022 18:52:54 +1030