Slab的初始化由kmem_cache_init和kmem_cache_init_late兩個(gè)函數(shù)完成kmem_cache_init_late在init/main.c:start_kernel中調(diào)用。kmem_cache_init的調(diào)用路徑是:start_kernel->mm_init_owner->mm_init->kmem_cache_init。
1497 void __init kmem_cache_init(void)
1498 {
1499 size_t left_over;
1500 struct cache_sizes *sizes;
1501 struct cache_names *names;
1502 int i;
1503 int order;
1504 int node;
1505
1506 if (num_possible_nodes() == 1)
1507 use_alien_caches = 0;
1508
1509 for (i = 0; i < NUM_INIT_LISTS; i++) {
1510 kmem_list3_init(&initkmem_list3[i]);
1511 if (i < MAX_NUMNODES)
1512 cache_cache.nodelists[i] = NULL;
1513 }
1514 set_up_list3s(&cache_cache, CACHE_CACHE);
1515
1516 /*
1517 * Fragmentation resistance onlow memory - only use bigger
1518 * page orders on machines with morethan 32MB of memory if
1519 * not overridden on the command line.
1520 */
1521 if (!slab_max_order_set && totalram_pages > (32 << 20)>> PAGE_SHIFT)
1522 slab_max_order =SLAB_MAX_ORDER_HI;
1523
1524 /* Bootstrap is tricky, because several objects are allocated
1525 * from caches that do not exist yet:
1526 * 1) initialize the cache_cachecache: it contains the struct
1527 * kmem_cache structures of all caches, except cache_cache itself:
1528 * cache_cache is statically allocated.
1529 * Initially an __init data area is used for the head array and the
1530 * kmem_list3 structures, it's replaced with a kmalloc allocated
1531 * array at the end of the bootstrap.
1532 * 2) Create the first kmalloc cache.
1533 * The struct kmem_cache for the new cache is allocated normally.
1534 * An __init data area is used for the head array.
1535 * 3) Create the remaining kmalloccaches, with minimally sized
1536 * head arrays.
1537 * 4) Replace the __init data headarrays for cache_cache and the first
1538 * kmalloc cache with kmalloc allocated arrays.
1539 * 5) Replace the __init data forkmem_list3 for cache_cache and
1540 * the other cache's with kmalloc allocated memory.
1541 * 6) Resize the head arrays of thekmalloc caches to their final sizes.
1542 */
1543
1544 node = numa_mem_id();
1545
1546 /* 1) create the cache_cache */
1547 INIT_LIST_HEAD(&cache_chain);
1548 list_add(&cache_cache.next, &cache_chain);
1549 cache_cache.colour_off = cache_line_size();
1550 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1551 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1552
1553 /*
1554 * struct kmem_cache size depends onnr_node_ids & nr_cpu_ids
1555 */
1556 cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids])+
1557 nr_node_ids* sizeof(struct kmem_list3 *);
1558 #if DEBUG
1559 cache_cache.obj_size = cache_cache.buffer_size;
1560 #endif
1561 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1562 cache_line_size());
1563 cache_cache.reciprocal_buffer_size =
1564 reciprocal_value(cache_cache.buffer_size);
1565
1566 for (order = 0; order < MAX_ORDER; order++) {
1567 cache_estimate(order,cache_cache.buffer_size,
1568 cache_line_size(), 0,&left_over, &cache_cache.num);
1569 if (cache_cache.num)
1570 break;
1571 }
1572 BUG_ON(!cache_cache.num);
1573 cache_cache.gfporder = order;
1574 cache_cache.colour = left_over / cache_cache.colour_off;
1575 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1576 sizeof(struct slab), cache_line_size());
1577
1578 /* 2+3) create the kmalloc caches */
1579 sizes = malloc_sizes;
1580 names = cache_names;
1581
1582 /*
1583 * Initialize the caches that providememory for the array cache and the
1584 * kmem_list3 structures first. Without this, further allocations will
1585 * bug.
1586 */
1587
1588 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1589 sizes[INDEX_AC].cs_size,
1590 ARCH_KMALLOC_MINALIGN,
1591 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1592 NULL);
1593
1594 if (INDEX_AC != INDEX_L3) {
1595 sizes[INDEX_L3].cs_cachep =
1596 kmem_cache_create(names[INDEX_L3].name,
1597 sizes[INDEX_L3].cs_size,
1598 ARCH_KMALLOC_MINALIGN,
1599 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1600 NULL);
1601 }
1602
1603 slab_early_init = 0;
1604
1605 while (sizes->cs_size != ULONG_MAX) {
1606 /*
1607 * For performance, all thegeneral caches are L1 aligned.
1608 * This should beparticularly beneficial on SMP boxes, as it
1609 * eliminates "falsesharing".
1610 * Note for systems short onmemory removing the alignment will
1611 * allow tighter packing ofthe smaller caches.
1612 */
1613 if (!sizes->cs_cachep) {
1614 sizes->cs_cachep =kmem_cache_create(names->name,
1615 sizes->cs_size,
1616 ARCH_KMALLOC_MINALIGN,
1617 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1618 NULL);
1619 }
1620 #ifdef CONFIG_ZONE_DMA
1621 sizes->cs_dmacachep =kmem_cache_create(
1622 names->name_dma,
1623 sizes->cs_size,
1624 ARCH_KMALLOC_MINALIGN,
1625 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1626 SLAB_PANIC,
1627 NULL);
1628 #endif
1629 sizes++;
1630 names++;
1631 }
1632 /* 4) Replace the bootstraphead arrays */
1633 {
1634 struct array_cache *ptr;
1635
1636 ptr = kmalloc(sizeof(structarraycache_init), GFP_NOWAIT);
1637
1638 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1639 memcpy(ptr,cpu_cache_get(&cache_cache),
1640 sizeof(structarraycache_init));
1641 /*
1642 * Do not assume thatspinlocks can be initialized via memcpy:
1643 */
1644 spin_lock_init(&ptr->lock);
1645
1646 cache_cache.array[smp_processor_id()] = ptr;
1647
1648 ptr = kmalloc(sizeof(structarraycache_init), GFP_NOWAIT);
1649
1650 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1651 !=&initarray_generic.cache);
1652 memcpy(ptr,cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1653 sizeof(structarraycache_init));
1654 /*
1655 * Do not assume thatspinlocks can be initialized via memcpy:
1656 */
1657 spin_lock_init(&ptr->lock);
1658
1659 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1660 ptr;
1661 }
1662 /* 5) Replace the bootstrap kmem_list3's */
1663 {
1664 int nid;
1665
1666 for_each_online_node(nid) {
1667 init_list(&cache_cache,&initkmem_list3[CACHE_CACHE + nid], nid);
1668
1669 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1670 &initkmem_list3[SIZE_AC + nid], nid);
1671
1672 if (INDEX_AC !=INDEX_L3) {
1673 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1674 &initkmem_list3[SIZE_L3 + nid], nid);
1675 }
1676 }
1677 }
1678
1679 g_cpucache_up = EARLY;
1680 }
kmem_cache_init函數(shù)的實(shí)質(zhì)工作是創(chuàng)建一系列的slab緩存,這里包含用來(lái)分配structkmem_cache結(jié)構(gòu)的緩存和通用長(zhǎng)度緩存。關(guān)鍵問(wèn)題是創(chuàng)建緩存時(shí)需要分配內(nèi)存,這些內(nèi)存從哪里分配呢?如果直接從伙伴系統(tǒng)分配,因?yàn)榛锇橄到y(tǒng)只能分配若干頁(yè),這樣會(huì)造成浪費(fèi),還有一個(gè)方法就是靜態(tài)分配,在系統(tǒng)中定義了四個(gè)有個(gè)slab的靜態(tài)變量cache_cache,initkmem_list3,initarray_generic和initarray_cache。initkmem_list3定義了足以包含3個(gè)三鏈表數(shù)組的空間。
創(chuàng)建slab緩存要為struct kmem_cache結(jié)構(gòu),三鏈表數(shù)組和對(duì)象緩存數(shù)組分配空間。struct kmem_cache,三鏈表數(shù)組和對(duì)象緩存數(shù)組都是從一個(gè)slab緩存中分配空間。問(wèn)題是剛開(kāi)始的時(shí)候沒(méi)有任何slab緩存,那這些結(jié)構(gòu)的空間是從哪里來(lái)的?實(shí)際上創(chuàng)建第一個(gè)slab緩存的時(shí)候所有這些結(jié)構(gòu)都是靜態(tài)分配空間的,實(shí)際上第一個(gè)創(chuàng)建的是分配struct kmem_cache的slab緩存,這個(gè)slab緩存的struct kmem_cache結(jié)構(gòu)用的是全局結(jié)構(gòu)cache_cache,三鏈表使用的是initkmem_list3中的空間,對(duì)象緩存數(shù)組使用的是initarray_cache的空間。這個(gè)時(shí)候g_cpucache_up變量的值是NONE,第二個(gè)創(chuàng)建的是為對(duì)象緩存分配空間的slab緩存,這個(gè)時(shí)候?yàn)閟truct kmem_cache結(jié)構(gòu)分配空間的slab緩存以及創(chuàng)建好,對(duì)象緩存數(shù)組使用的是initarray_generic,三鏈表使用的是initkmem_list3中從索引SIZE_AC開(kāi)始的一段,創(chuàng)建第二個(gè)slab緩存的時(shí)候g_cpucache_up == NONE成立。第三個(gè)創(chuàng)建的是用來(lái)分配三鏈表數(shù)組分配空間的slab緩存,這個(gè)時(shí)候用來(lái)分配對(duì)象緩存的slab已經(jīng)創(chuàng)建好了,但這個(gè)時(shí)候要考慮一直情況,就是如果分配三鏈表數(shù)組和對(duì)象緩存數(shù)組的是在同一個(gè)slab緩存,這時(shí)候三鏈表也可以直接從slab中分配了,如果分配三鏈表數(shù)組和對(duì)象緩存數(shù)組的不是在同一個(gè)slab緩存中,則第三個(gè)創(chuàng)建的slab緩存的三鏈表是以SIZE_L3為索引使用initkmem_list3的空間。
kmem_cache_init函數(shù)的代碼分成5段來(lái)讀
第一段:1499-1577。1514行可以看出對(duì)第零個(gè)創(chuàng)建的緩存的三鏈表初始化是以CACHE_CACHE為下標(biāo)占用initkmem_list3的一段,1550行表明緩存堆棧數(shù)組是指向全局變量initarray_cache,其他對(duì)全局量cache_cache的初始化和kmem_cache_create函數(shù)大同小異。
第二段:1579-1604。第二段的任務(wù)是創(chuàng)建為緩存堆棧分配空間的緩存和為三鏈表分配內(nèi)存的緩存,1594行的判斷條件就是分配緩存堆??臻g和分配三鏈表空間的緩存是同一個(gè)的情況。
第三段:1605-1631。這一段是創(chuàng)建通用長(zhǎng)度緩存。
第四段:1633-1661。這一段的工作從slab系統(tǒng)中分配內(nèi)存替換掉剛才臨時(shí)使用的全局對(duì)象緩存數(shù)組。
第五段:1663-1677。這一段的工作從slab系統(tǒng)中分配內(nèi)存替換掉剛才臨時(shí)使用的全局三鏈表數(shù)組。
setup_cpu_cache函數(shù)是在mm/slab.c中實(shí)現(xiàn)代碼如下:
2195 static int __init_refoksetup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2196 {
2197 if (g_cpucache_up == FULL)
2198 return enable_cpucache(cachep,gfp);
2199
2200 if (g_cpucache_up == NONE) {
2201 /*
2202 * Note: the first kmem_cache_createmust create the cache
2203 * that's used by kmalloc(24),otherwise the creation of
2204 * further caches will BUG().
2205 */
2206 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2207
2208 /*
2209 * If the cache that's used bykmalloc(sizeof(kmem_list3)) is
2210 * the first cache, then weneed to set up all its list3s,
2211 * otherwise the creation offurther caches will BUG().
2212 */
2213 set_up_list3s(cachep,SIZE_AC);
2214 if (INDEX_AC == INDEX_L3)
2215 g_cpucache_up =PARTIAL_L3;
2216 else
2217 g_cpucache_up = PARTIAL_AC;
2218 } else {
2219 cachep->array[smp_processor_id()] =
2220 kmalloc(sizeof(structarraycache_init), gfp);
2221
2222 if (g_cpucache_up ==PARTIAL_AC) {
2223 set_up_list3s(cachep,SIZE_L3);
2224 g_cpucache_up =PARTIAL_L3;
2225 } else {
2226 int node;
2227 for_each_online_node(node) {
2228 cachep->nodelists[node]=
2229 kmalloc_node(sizeof(struct kmem_list3),
2230 gfp, node);
2231 BUG_ON(!cachep->nodelists[node]);
2232 kmem_list3_init(cachep->nodelists[node]);
2233 }
2234 }
2235 }
2236 cachep->nodelists[numa_mem_id()]->next_reap =
2237 jiffies +REAPTIMEOUT_LIST3 +
2238 ((unsignedlong)cachep) % REAPTIMEOUT_LIST3;
2239
2240 cpu_cache_get(cachep)->avail = 0;
2241 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2242 cpu_cache_get(cachep)->batchcount = 1;
2243 cpu_cache_get(cachep)->touched = 0;
2244 cachep->batchcount = 1;
2245 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2246 return 0;
2247 }
setup_cpu_cache函數(shù)是在kmem_cache_create函數(shù)中調(diào)用的,函數(shù)的功能是設(shè)置緩存的緩存堆棧和三鏈表結(jié)構(gòu)。setup_cpu_cache函數(shù)根據(jù)變量g_cpucache_up不同的值有不同的設(shè)置方法。g_cpucache_up變量的取值是g_cpucache_up變量的值之一,在創(chuàng)建kmem_cache緩存的時(shí)候g_cpucache_up的值為NONE,在創(chuàng)建堆棧緩存的時(shí)候g_cpucache_up的值為SIZE_AC,在創(chuàng)建三鏈表緩存的時(shí)候g_cpucache_up的值為PARTIAL_L3。
2200-2218行是創(chuàng)建堆棧緩存時(shí)的處理代碼,從這段代碼可以看出,堆棧緩存的對(duì)象緩存初始化時(shí)使用的是initarray_generic的空間,三鏈表是以SIZE_AC為下標(biāo)initkmem_list3中的空間,這從2213行代碼可以看出來(lái)。2214-2217行知道如果堆棧緩存和三鏈表緩存是同一個(gè)緩存,這時(shí)候直接把g_cpucache_up置為PARTIAL_L3,因?yàn)檫@時(shí)候三鏈表也可以從slab中分配了。
2218-2235行,這是創(chuàng)建三鏈表堆棧的處理代碼。2219-2220行,因?yàn)槎褩>彺嬉约皠?chuàng)建好,這時(shí)候緩存堆棧的空間可以從slab中分配了。三鏈表的分配要看三鏈表和堆棧緩存是不是同一個(gè),不是還有借用靜態(tài)變量initkmem_list3的空間,是則可以直接在slab中分配空間了。
2236-2246是對(duì)一些變量的初始化。
聯(lián)系客服