Linux記憶體初始化之夥伴系統(三)
這裡主要分析zone/zonelist的初始化,以及把系統空閒記憶體釋放到夥伴系統
1.zone的初始化
呼叫關係:start_kernel->setup_arch->paging_init->bootmem_init->zone_sizes_init->free_area_init_node->free_area_init_core->
zone_size_init: 計算每個zone能夠管理的頁面數,以及起始pfn號,初始化zone的等待佇列hash表, 以及非常重要的free list連結串列
static void __paginginit free_area_init_core(struct pglist_data *pgdat) { enum zone_type j; int nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; int ret; pgdat_resize_init(pgdat); init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); pgdat_page_ext_init(pgdat); for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, freesize, memmap_pages; size = zone->spanned_pages; realsize = freesize = zone->present_pages; lruvec_init(&zone->lruvec); if (!size) continue; set_pageblock_order(); setup_usemap(pgdat, zone, zone_start_pfn, size); /*初始化zone的等待隊列表和free list */ ret = init_currently_empty_zone(zone, zone_start_pfn, size); BUG_ON(ret); memmap_init(size, nid, j, zone_start_pfn); zone_start_pfn += size; } }
int __meminit init_currently_empty_zone(struct zone *zone, unsigned long zone_start_pfn, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; int ret; /*初始化等待佇列hash表 */ ret = zone_wait_table_init(zone, size); if (ret) return ret; pgdat->nr_zones = zone_idx(zone) + 1; zone->zone_start_pfn = zone_start_pfn; /*初始化free list */ zone_init_free_lists(zone); return 0; }
1.1.2 pageblock 遷移型別初始化
memmap_init:
1. 關聯page和zone/node id
2. 設定每個pageblock的migrate type
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context)
{
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
struct zone *z;
unsigned long nr_initialised = 0;
if (highest_memmap_pfn < end_pfn - 1)
highest_memmap_pfn = end_pfn - 1;
z = &pgdat->node_zones[zone];
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*pageblock的第一個page時,設定每個pageblock的遷移型別為MOVABLE */
if (!(pfn & (pageblock_nr_pages - 1))) {
struct page *page = pfn_to_page(pfn);
/*關聯page與zone/node,初始化page引用計數 */
__init_single_page(page, pfn, zone, nid);
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
} else {
__init_single_pfn(pfn, zone, nid);
}
}
}
set_pageblock_migratetype:設定page所在pageblock的遷移型別
get_pageblock_migratetype :獲取page所在pageblcok的遷移型別
2. zonelist的初始化
start_kernel->build_all_zonelists->build_all_zonelists_init->__build_all_zonelists->build_zonelists
2.1關鍵資料結構
struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
struct zonelist node_zonelists[MAX_ZONELISTS];
}
其中MAX_NR_ZONES=3,MAX_ZONELIST=1,而MAX_ZONES_PER_ZONELIST=4
struct zonelist {
struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
};
struct zoneref {
struct zone *zone; /* Pointer to actual zone */
int zone_idx; /* zone_idx(zoneref->zone) */
};
2.2 zonelist函式
static void build_zonelists(pg_data_t *pgdat)
{
int node, local_node;
enum zone_type j;
struct zonelist *zonelist;
local_node = pgdat->node_id;
/*選擇第一個zonelist,實際上也只有一個 */
zonelist = &pgdat->node_zonelists[0];
j = build_zonelists_node(pgdat, zonelist, 0);
return NULL;
}
static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
int nr_zones)
{
struct zone *zone;
enum zone_type zone_type = MAX_NR_ZONES;/*MAX_NR_ZONES=3 */
do {
zone_type--;//zone_type=2,為ZONE_MOVEBLE
zone = pgdat->node_zones + zone_type;
if (populated_zone(zone)) {//ZONE_MOVEBLE沒有使用,第一個填充的時ZONE_NORMAL
zoneref_set_zone(zone,
&zonelist->_zonerefs[nr_zones++]);//建立zonerefs與zone關係
check_highest_zone(zone_type);
}
} while (zone_type);
return nr_zones;
}
最終的關係:
3.釋放記憶體到夥伴系統
函式呼叫關係
start_kernel->mm_init->mem_init->free_all_bootmem->free_low_memory_core_early->__free_memory_core->__free_pages_memory
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
int order;
while (start < end) {
/* 找出start的第一個位為1的bit,如果start=8,則__ffs(8)返回3,
那麼從pfn=8的起始頁面到pfn16共8個頁面,會被掛到order=3的佇列*/
order = min(MAX_ORDER - 1UL, __ffs(start));
while (start + (1UL << order) > end)
order--;
/*呼叫__free_pages到 */
__free_pages_bootmem(pfn_to_page(start), start, order);
start += (1UL << order);
}
}
根據公式:order = min(MAX_ORDER - 1UL, __ffs(start))有以下結論:
每個記憶體塊(order)的起始實體地址都是自身記憶體塊大小的整數倍(pfn<<PAGE_SHIFT)/((2**order)<<PAGE_SHIFT)或者:
每個記憶體塊(order)的pfn都是自身頁面數的整數倍
夥伴系統記憶體的分配和釋放,slab/vmalloc模組在另行分析