一、Linux伙伴系统分配器
伙伴系统分配器大体上分为两类。__get_free_pages()类函数返回分配的第一个页面的线性地址;alloc_pages()类函数返回页面描述符地址。不管以哪种函数进行分配,最终会调用alloc_pages()进行分配页面。
为清楚了解其分配制度,先给个伙伴系统数据的存储框图
也就是每个order对应一个free_area结构,free_area以不同的类型以链表的方式存储这些内存块。
二、主分配函数
下面我们来看这个函数(在UMA模式下)
- #define alloc_pages(gfp_mask, order) \
- alloc_pages_node(numa_node_id(), gfp_mask, order)
- static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
- unsigned int order)
- {
- /* Unknown node is current node */
- if (nid
- nid = numa_node_id();
- return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
- }
- static inline struct page *
- __alloc_pages(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist)
- {
- return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
- }
上层分配函数__alloc_pages_nodemask()
- /*
- * This is the 'heart' of the zoned buddy allocator.
- */
- /*上层分配器运用了各种方式进行*/
- struct page *
- __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
- struct zonelist *zonelist, nodemask_t *nodemask)
- {
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
- struct zone *preferred_zone;
- struct page *page;
- /* Convert GFP flags to their corresponding migrate type */
- int migratetype = allocflags_to_migratetype(gfp_mask);
- gfp_mask &= gfp_allowed_mask;
- /*调试用*/
- lockdep_trace_alloc(gfp_mask);
- /*如果__GFP_WAIT标志设置了,需要等待和重新调度*/
- might_sleep_if(gfp_mask & __GFP_WAIT);
- /*没有设置对应的宏*/
- if (should_fail_alloc_page(gfp_mask, order))
- return NULL;
- /*
- * Check the zones suitable for the gfp_mask contain at least one
- * valid zone. It's possible to have an empty zonelist as a result
- * of GFP_THISNODE and a memoryless node
- */
- if (unlikely(!zonelist->_zonerefs->zone))
- return NULL;
- /* The preferred zone is used for statistics later */
- /* 英文注释所说*/
- first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);
- if (!preferred_zone)
- return NULL;
- /* First allocation attempt */
- /*从pcp和伙伴系统中正常的分配内存空间*/
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
- zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
- preferred_zone, migratetype);
- if (unlikely(!page))/*如果上面没有分配到空间,调用下面函数慢速分配,允许等待和回收*/
- page = __alloc_pages_slowpath(gfp_mask, order,
- zonelist, high_zoneidx, nodemask,
- preferred_zone, migratetype);
- /*调试用*/
- trace_mm_page_alloc(page, order, gfp_mask, migratetype);
- return page;
- }
三、从pcp和伙伴系统中正常的分配内存空间
函数get_page_from_freelist()
- /*
- * get_page_from_freelist goes through the zonelist trying to allocate
- * a page.
- */
- /*为分配制定内存空间,遍历每个zone*/
- static struct page *
- get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
- struct zonelist *zonelist, int high_zoneidx, int alloc_flags,
- struct zone *preferred_zone, int migratetype)
- {
- struct zoneref *z;
- struct page *page = NULL;
- int classzone_idx;
- struct zone *zone;
- nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
- int zlc_active = 0; /* set if using zonelist_cache */
- int did_zlc_setup = 0; /* just call zlc_setup() one time */
- /*zone对应的下标*/
- classzone_idx = zone_idx(preferred_zone);
- zonelist_scan:
- /*
- * Scan zonelist, looking for a zone with enough free.
- * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
- */
- /*遍历每个zone,进行分配*/
- for_each_zone_zonelist_nodemask(zone, z, zonelist,
- /*在UMA模式下不成立*/ high_zoneidx, nodemask) {
- if (NUMA_BUILD && zlc_active &&
- !zlc_zone_worth_trying(zonelist, z, allowednodes))
- continue;
- if ((alloc_flags & ALLOC_CPUSET) &&
- !cpuset_zone_allowed_softwall(zone, gfp_mask))
- goto try_next_zone;
- BUILD_BUG_ON(ALLOC_NO_WATERMARKS
- /*需要关注水位*/
- if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
- unsigned long mark;
- int ret;
- /*从flags中取的mark*/
- mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
- /*如果水位正常,从本zone中分配*/
- if (zone_watermark_ok(zone, order, mark,
- classzone_idx, alloc_flags))
- goto try_this_zone;
- if (zone_reclaim_mode == 0)/*如果上面检查的水位低于正常值,且没有设置页面回收值*/
- goto this_zone_full;
- /*在UMA模式下下面函数直接返回0*/
- ret = zone_reclaim(zone, gfp_mask, order);
- switch (ret) {
- case ZONE_RECLAIM_NOSCAN:
- /* did not scan */
- goto try_next_zone;
- case ZONE_RECLAIM_FULL:
- /* scanned but unreclaimable */