硬汉嵌入式论坛

 找回密码
 立即注册
查看: 203|回复: 0
收起左侧

轻量级、高性能的动态内存分配器LWMalloc,专为资源受限的环境而设计

[复制链接]

1万

主题

7万

回帖

11万

积分

管理员

Rank: 9Rank: 9Rank: 9

积分
118308
QQ
发表于 2025-9-14 17:25:46 | 显示全部楼层 |阅读模式
https://www.prnewswire.com/news- ... ance-302543816.html
https://github.com/taehyeon-masu/lwmalloc

代码量很小,大家在各种单片机上也是可以移植的

[C] 纯文本查看 复制代码
#include <unistd.h>
#include <string.h>
#include <stdint.h>

extern void *lw_malloc(size_t size);
extern void lw_free(void *ptr);
extern void *lw_realloc(void *ptr, size_t size);
extern void *lw_calloc(size_t nmemb, size_t size);

void *malloc(size_t size) { return lw_malloc(size); }

void free(void *ptr) { lw_free(ptr); }

void *realloc(void *ptr, size_t size) { return lw_realloc(ptr, size); }

void *calloc(size_t nmemb, size_t size) { return lw_calloc(nmemb, size); }

#define ALIGNMENT 8
#define ALIGN(size) (((size) + (ALIGNMENT - 1)) & ~0x7)

#define WSIZE 8
#define DSIZE 16
#define CHUNKSIZE (1 << 12)
#define MAX(x, y) (x > y ? x : y)
#define PACK(size, alloc) (size | alloc)
#define GET(p) (*(size_t *)(p))
#define PUT(p, val) (*(size_t *)(p) = (val))
#define GET_SIZE(p) (GET(p) & ~0x7)
#define GET_ALLOC(p) (GET(p) & 0x1)
#define HDRP(bp) ((char *)(bp) - WSIZE)
#define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)
#define NEXT_BLKP(bp) ((char *)(bp) + GET_SIZE(((char *)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char *)(bp) - DSIZE)))
#define NEXT_BLKP_S(bp) ((char *)(bp) + GET_SIZE(((char *)(bp) - WSIZE)))
#define GET_NEXT(bp) (*(void **)((char *)(bp) + WSIZE))
#define GET_PREV(bp) (*(void **)(bp))
#define SEGSIZE 62
#define DEFAULT_HEAP 1 * (1 << 20)
#define GET_ROOT(class) (*(void **)((char *)(free_listp) + (class * WSIZE)))
#define IS_BUFFER(p) ((GET(HDRP(p)) >> 1) & 0x1)
#define IS_BIN(p) ((GET(HDRP(p)) >> 2) & 0x1)
#define IS_BIN_N_BUF(p) (GET(HDRP(p)) & 0x6)
#define IS_BUF_N_ALOC(p) (GET(HDRP(p)) & 0x3)
#define GET_NEXT_S(bp) (*(void **)(bp))

static void *lw_find_fit(size_t size);
static void *lw_place(void *bp, size_t size);
static void lw_remove_free_block(void *bp);
static void lw_add_free_block(void *bp);
inline int lw_get_class(size_t size);
static void lw_deferred_coalescing();
inline void set_block(void* ptr, size_t size, int alloc);
static char *mem_start_brk;
static char *mem_max_addr;
static char *mem_brk;
static char *heap_listp = NULL;
static char *free_listp = NULL;

inline void set_block(void* ptr, size_t size, int alloc) {
	*(size_t *)((char *)(ptr) - WSIZE) = (size | alloc);
    *(size_t *)((char *)(ptr) + size - DSIZE) = (size | alloc);
}

void lw_mem_init(void)
{
	if ((mem_start_brk = (char *)sbrk(DEFAULT_HEAP)) == NULL)
		exit(1);

	mem_max_addr = mem_start_brk + DEFAULT_HEAP;
	mem_brk = mem_start_brk;
}

void *lw_sbrk(int incr)
{
	char *old_brk = mem_brk;
	if ((mem_brk + incr) > mem_max_addr)
	{
		int size = MAX(incr, DEFAULT_HEAP);
		sbrk(size);
		mem_max_addr = mem_max_addr + size;
	}

	mem_brk += incr;
	return (void *)old_brk;
}

inline int lw_get_class(size_t size)
{
	int ind = 7;
	while ((1 << ind) < size)
	{
		ind++;
	}

	ind = ind + 9;

	if (ind > SEGSIZE - 1)
		return SEGSIZE - 1;

	return ind;
}

static void lw_remove_free_block(void *bp)
{
	int size = (int)GET_SIZE(HDRP(bp));
	int class = lw_get_class(size);

	if (bp == GET_ROOT(class))
	{
		GET_ROOT(class) = GET_NEXT(GET_ROOT(class));
		return;
	}
	GET_NEXT(GET_PREV(bp)) = GET_NEXT(bp);

	if (GET_NEXT(bp) != NULL)
		GET_PREV(GET_NEXT(bp)) = GET_PREV(bp);
}

static void lw_add_free_block(void *bp)
{
	int class = lw_get_class(GET_SIZE(HDRP(bp)));

	GET_NEXT(bp) = GET_ROOT(class);
	if (GET_ROOT(class) != NULL)
		GET_PREV(GET_ROOT(class)) = bp;

	GET_ROOT(class) = bp;
}

void alloc_init(void)
{
	lw_mem_init();

	if ((heap_listp = lw_sbrk((SEGSIZE + 4) * WSIZE)) == (void *)-1)
		return -1;

	PUT(heap_listp, 0);
	PUT(heap_listp + (1 * WSIZE), PACK((SEGSIZE + 2) * WSIZE, 1));
	for (int i = 0; i < SEGSIZE; i++)
		PUT(heap_listp + ((2 + i) * WSIZE), NULL);
	PUT(heap_listp + ((2 + SEGSIZE) * WSIZE), PACK((SEGSIZE + 2) * WSIZE, 1));
	PUT(heap_listp + ((3 + SEGSIZE) * WSIZE), PACK(0, 1));
	free_listp = heap_listp + 2 * WSIZE;
}

void *lw_malloc(size_t size)
{
	if (heap_listp == NULL)
		alloc_init();

	size_t asize;
	size_t extendsize;
	char *bp;
	char *ptr;
	if (size <= 120)
	{
		size_t asize = ALIGN(size + WSIZE);
		if (asize == 8)
			asize = 16;

		int class = (asize >> 3);

		if ((bp = GET_ROOT(class)) != NULL)
		{
			if (!GET_ALLOC(HDRP(bp)))
			{
				size_t csize = GET_SIZE(HDRP(bp));

				if (csize < (asize << 1))
				{
					PUT(HDRP(bp), PACK(asize, 5));
					GET_ROOT(class) = NULL;
					return bp;
				}
				else
				{
					PUT(HDRP(bp), PACK(asize, 5));
					PUT(HDRP(NEXT_BLKP_S(bp)), PACK(csize - asize, 4));
					GET_ROOT(class) = NEXT_BLKP_S(bp);
					return bp;
				}
			}
			GET_ROOT(class) = GET_NEXT_S(bp);
			return bp;
		}
		else
		{
			if ((bp = lw_sbrk(CHUNKSIZE)) == NULL)
				return NULL;

			set_block(bp, CHUNKSIZE, 1);

			PUT(HDRP(NEXT_BLKP_S(bp)), PACK(0, 1));

			bp += WSIZE;

			PUT(HDRP(bp), PACK(asize, 5));

			PUT(HDRP(NEXT_BLKP_S(bp)), PACK(CHUNKSIZE - DSIZE - asize, 4));
			GET_ROOT(class) = NEXT_BLKP_S(bp);
			return bp;
		}
		return NULL;
	}

	if (size <= DSIZE)
		asize = 2 * DSIZE;
	else
		asize = ALIGN(2 * WSIZE + size);

	lw_deferred_coalescing();

	if ((bp = lw_find_fit(asize)) != NULL)
	{
		return lw_place(bp, asize);
	}
	else
	{
		size_t new_size = asize;
		if (!GET_ALLOC(HDRP(PREV_BLKP(mem_brk))))
		{
			size_t end_size = GET_SIZE(HDRP(PREV_BLKP(mem_brk)));
			if (asize >= end_size)
			{
				bp = PREV_BLKP(mem_brk);
				lw_remove_free_block(bp);
				new_size = asize - end_size;

				if (lw_sbrk(new_size) == NULL)
				{
					return NULL;
				}

				set_block(bp, asize, 1);
				PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));
				return bp;
			}
		}
		else
		{

			if ((bp = lw_sbrk(asize)) == NULL)
			{
				return NULL;
			}
			set_block(bp, asize, 1);
			PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));
			return bp;
		}
	}
}

void lw_free(void *bp)
{
	if (bp == NULL)
		return NULL;

	size_t size = GET_SIZE(HDRP(bp));

	if (IS_BIN(bp))
	{
		int class = (size >> 3);
		GET_NEXT_S(bp) = GET_ROOT(class);
		GET_ROOT(class) = bp;
		return;
	}

	int prev_buf_n_alloc = IS_BUF_N_ALOC(PREV_BLKP(bp));
	int next_buf_n_alloc = IS_BUF_N_ALOC(NEXT_BLKP(bp));

	if ((prev_buf_n_alloc == 2) || (next_buf_n_alloc == 2))
	{
		set_block(bp, size, 6);
		return;
	}
	else if ((prev_buf_n_alloc == 0) || (next_buf_n_alloc == 0))
	{
		set_block(bp, size, 2);
		GET_NEXT(bp) = GET_ROOT(1);
		if (GET_ROOT(1) != NULL)
			GET_PREV(GET_ROOT(1)) = bp;
		GET_ROOT(1) = bp;
	}
	else if ((prev_buf_n_alloc == 1) && (next_buf_n_alloc == 1))
	{
		set_block(bp, size, 0);
		lw_add_free_block(bp);
	}
}

void *lw_calloc(size_t nmemb, size_t size)
{
	size_t bytes = nmemb * size;
	void *new_ptr = lw_malloc(bytes);
	if (bytes <= 120)
		bytes = GET_SIZE(HDRP(new_ptr)) - WSIZE;
	else
		bytes = GET_SIZE(HDRP(new_ptr)) - DSIZE;
	if (new_ptr == NULL)
		return NULL;

	memset(new_ptr, 0, bytes);
	return new_ptr;
}

void *lw_realloc(void *ptr, size_t size)
{
	size_t asize;
	void *newptr;

	if (size <= DSIZE)
		asize = 2 * DSIZE;
	else
		asize = ALIGN(2 * WSIZE + size);

	if (ptr == NULL)
		return lw_malloc(size);

	size_t oldsize = GET_SIZE(HDRP(ptr));

	if (size <= 0)
	{
		lw_free(ptr);
		return 0;
	}

	if (asize <= oldsize)
		return ptr;

	lw_deferred_coalescing();

	if ((GET_SIZE(HDRP(ptr)) <= 128) && IS_BIN(ptr))
	{
		if (size > 120)
		{
			newptr = lw_malloc(size);
			size_t oldsize = GET_SIZE(HDRP(ptr));
			if (newptr == NULL)
				return NULL;

			memcpy(newptr, ptr, (GET_SIZE(HDRP(ptr)) - WSIZE));
			lw_free(ptr);

			return newptr;
		}
		else
		{
			newptr = lw_malloc(size);
			size_t oldsize = GET_SIZE(HDRP(ptr));
			if (newptr == NULL)
				return NULL;

			memcpy(newptr, ptr, (GET_SIZE(HDRP(ptr)) - WSIZE));
			lw_free(ptr);
			return newptr;
		}
	}

	int prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(ptr)));
	size_t prev_size = GET_SIZE(FTRP(PREV_BLKP(ptr)));
	int next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(ptr)));
	size_t next_size = GET_SIZE(HDRP(NEXT_BLKP(ptr)));
	if (PREV_BLKP(mem_brk) == ptr)
	{
		next_alloc = 1;
	}

	if (!next_alloc)
	{
		if ((oldsize + next_size) >= asize)
		{
			lw_remove_free_block(NEXT_BLKP(ptr));
			set_block(ptr, oldsize+next_size, 1);
			return ptr;
		}
	}
	else if ((!next_alloc) && (!prev_alloc) && ((oldsize + prev_size + next_size) >= asize))
	{
		void *prev_block = PREV_BLKP(ptr);
		if (prev_size >= oldsize)
		{
			lw_remove_free_block(PREV_BLKP(ptr));
			lw_remove_free_block(NEXT_BLKP(ptr));
			if ((prev_size + oldsize + next_size - asize) <= 128)
			{
				memcpy(prev_block, ptr, (GET_SIZE(HDRP(ptr)) - DSIZE));
				set_block(prev_block, prev_size + oldsize + next_size - asize, 1);
				return prev_block;
			}
			else
			{
				memcpy(prev_block, ptr, (GET_SIZE(HDRP(ptr)) - DSIZE));
				set_block(prev_block, asize, 1);
				set_block(NEXT_BLKP(prev_block), prev_size + oldsize + next_size - asize, 0);

				lw_add_free_block(NEXT_BLKP(prev_block));
				return prev_block;
			}
		}
		else if (prev_size < oldsize)
		{
			lw_remove_free_block(PREV_BLKP(ptr));
			lw_remove_free_block(NEXT_BLKP(ptr));
			int total_movesize = GET_SIZE(HDRP(ptr)) - DSIZE;
			int sep_movesize = GET_SIZE(HDRP(prev_block));
			int n = total_movesize / sep_movesize;

			for (int i = 0; i < n; i++)
				memcpy(prev_block + i * sep_movesize, ptr + i * sep_movesize, sep_movesize);
			memcpy(prev_block + n * sep_movesize, ptr + n * sep_movesize, total_movesize - (sep_movesize * n));

			if (((prev_size + oldsize + next_size) - asize) <= 128)
			{
				set_block(prev_block, prev_size + oldsize + next_size, 1);
				return prev_block;
			}
			else
			{
				set_block(prev_block, asize, 1);
				set_block(NEXT_BLKP(prev_block), prev_size + oldsize + next_size - asize, 0);
				lw_add_free_block(NEXT_BLKP(prev_block));
				return prev_block;
			}
		}
	}
	else if ((next_alloc) && (!prev_alloc) && ((oldsize + prev_size) >= asize))
	{
		void *prev_block = PREV_BLKP(ptr);
		if (prev_size >= oldsize)
		{
			lw_remove_free_block(PREV_BLKP(ptr));
			if ((prev_size + oldsize - asize) <= 128)
			{
				memcpy(prev_block, ptr, (GET_SIZE(HDRP(ptr)) - DSIZE));
				set_block(prev_block, prev_size + oldsize, 1);
				return prev_block;
			}
			else
			{
				memcpy(prev_block, ptr, (GET_SIZE(HDRP(ptr)) - DSIZE));
				set_block(prev_block, asize, 1);
				set_block(NEXT_BLKP(prev_block), prev_size + oldsize - asize, 0);

				lw_add_free_block(NEXT_BLKP(prev_block));
				return prev_block;
			}
		}
		else if (prev_size < oldsize)
		{
			lw_remove_free_block(PREV_BLKP(ptr));
			int total_movesize = GET_SIZE(HDRP(ptr)) - DSIZE;
			int sep_movesize = GET_SIZE(HDRP(prev_block));
			int n = total_movesize / sep_movesize;

			for (int i = 0; i < n; i++)
			{
				memcpy(prev_block + i * sep_movesize, ptr + i * sep_movesize, sep_movesize);
			}
			memcpy(prev_block + n * sep_movesize, ptr + n * sep_movesize, total_movesize - (sep_movesize * n));

			if (((prev_size + oldsize) - asize) <= 128)
			{
				set_block(prev_block, prev_size + oldsize, 1);
				return prev_block;
			}
			else
			{
				set_block(prev_block, asize, 1);
				set_block(NEXT_BLKP(prev_block), prev_size + oldsize - asize, 0);
				lw_add_free_block(NEXT_BLKP(prev_block));
				return prev_block;
			}
		}
	}

	newptr = lw_malloc(size);
	if (newptr == NULL)
		return NULL;

	memcpy(newptr, ptr, (GET_SIZE(HDRP(ptr)) - DSIZE));
	lw_free(ptr);

	return newptr;
}

static void lw_deferred_coalescing()
{
	int class = 1;
	char *ptr = GET_ROOT(class);
	void *bp;

	if (ptr == NULL)
		return;

	if (ptr == 0)
		return;

	while (ptr != NULL)
	{
		char *start_ptr = ptr;
		char *next_ptr = ptr;
		size_t totalsize = 0;
		while (start_ptr >= (char *)heap_listp + 2 * WSIZE)
		{
			totalsize += GET_SIZE(HDRP(start_ptr));
			if (IS_BUFFER(start_ptr) && !IS_BIN(start_ptr))
			{
				if (start_ptr == GET_ROOT(class))
				{
					GET_ROOT(class) = GET_NEXT(GET_ROOT(class));
				}
				else
				{
					GET_NEXT(GET_PREV(start_ptr)) = GET_NEXT(start_ptr);

					if (GET_NEXT(start_ptr) != NULL)
						GET_PREV(GET_NEXT(start_ptr)) = GET_PREV(start_ptr);
				}
			}

			else if (!IS_BUFFER(start_ptr) && !IS_BIN(start_ptr))
				lw_remove_free_block(start_ptr);

			if (GET_ALLOC(HDRP(PREV_BLKP(start_ptr))))
				break;

			else
			{
				if (start_ptr == PREV_BLKP(start_ptr))
					break;
				start_ptr = PREV_BLKP(start_ptr);
			}
		}

		while (next_ptr <= ((char *)mem_brk - WSIZE))
		{
			next_ptr = NEXT_BLKP(next_ptr);

			if (!GET_ALLOC(HDRP(next_ptr)))
			{
				totalsize += GET_SIZE(HDRP(next_ptr));

				if (IS_BUFFER(next_ptr) && !IS_BIN(next_ptr))
				{
					if (next_ptr == GET_ROOT(class))
					{
						GET_ROOT(class) = GET_NEXT(GET_ROOT(class));
					}
					else
					{
						GET_NEXT(GET_PREV(next_ptr)) = GET_NEXT(next_ptr);

						if (GET_NEXT(next_ptr) != NULL)
							GET_PREV(GET_NEXT(next_ptr)) = GET_PREV(next_ptr);
					}
				}

				else if (!IS_BUFFER(next_ptr) && !IS_BIN(next_ptr))
					lw_remove_free_block(next_ptr);
			}
			else
				break;
		}
		set_block(start_ptr, totalsize, 0);
		lw_add_free_block(start_ptr);

		ptr = GET_ROOT(class);
	}
}

static void *lw_find_fit(size_t asize)
{
	int class = lw_get_class(asize);
	void *bp = GET_ROOT(class);

	while (class < SEGSIZE)
	{
		bp = GET_ROOT(class);
		while (bp != NULL)
		{

			if ((asize <= GET_SIZE(HDRP(bp))))
				return bp;

			bp = GET_NEXT(bp);
		}
		class += 1;
	}
	return NULL;
}

static void *find_fit_one_class(int class)
{
	void *bp = GET_ROOT(class);
	while (bp != NULL)
		return bp;

	return NULL;
}

static void *lw_place(void *bp, size_t asize)
{
	lw_remove_free_block(bp);
	size_t csize = GET_SIZE(HDRP(bp));

	if ((csize - asize) <= 128)
	{
		set_block(bp, csize, 1);
		return bp;
	}
	else
	{
		set_block(bp, asize, 1);
		set_block(NEXT_BLKP(bp), csize - asize, 0);
		lw_add_free_block(NEXT_BLKP(bp));
		return bp;
	}
}


物联网设备和单板计算机等嵌入式系统的内存和处理能力有限,因此需要有效管理这些限制。这使得 Linux 在嵌入式系统中前景广阔。事实上,基于 Linux 的作系统,包括 Ubuntu Core、Raspberry Pi OS、BalenaOS 和 OpenWrt,通常用于各种嵌入式设备。然而,ptmalloc(其默认内存分配器)通常无法满足所有应用程序的需求。虽然专家们提出了 jemalloc、tcmalloc 和 mimalloc 等替代方案来改进内存管理,但这些通用动态内存分配器存在内存消耗大、库大小巨大、复杂性以及最终性能下降的问题。这凸显了对新的轻量级选项的迫切需求。

为了解决这个问题,以降低复杂性和优化性能,由韩国首尔国立科技大学 (SeoulTech) 智能 ICT 融合工程助理教授 Hwajung Kim 博士领导的研究人员开发了 LWMalloc,这是一种轻量级、高性能的动态内存分配器,专为资源受限的环境而设计。他们的新发现已于 2025 年 2 月 12 日在线发布,并已于 2025 年 6 月 15 日发表在 IEEE 物联网杂志第 12 卷第 12 期上

LWMalloc 基于轻量级数据结构,并拥有延迟合并策略和用于内存分配优化的专用小块池。其数据结构有助于减少元数据开销,以实现紧凑高效的实施。值得注意的是,DC 策略将冗余作推迟到分配,从而降低了执行开销并保持了效率以及低响应时间。此外,专用的小块池将动态分配模式中常见的小内存请求分离到固定大小的池中,从而促进其 O1(恒定时间复杂度)分配。

研究人员通过在 Raspberry Pi Zero W、Raspberry Pi 4 和 Jetson Orin Nano 上的广泛实验性实际应用,证明了 LWMalloc 的优越性。“我们的提案优于 Linux 中的默认分配器 ptmalloc,执行时间缩短了 53%,内存使用量降低了 23%。LWMalloc 的紧凑实现仅为 530 行代码,大小为 20 KB,明显小于 ptmalloc 的 4838 行和 116 KB,在性能和内存效率之间实现了有效平衡,使其非常适合资源受限的环境,“Kim 博士评论道。

LWMalloc 可以使任何在严格内存和性能限制下运行的嵌入式或物联网系统受益。其中包括消费电子产品,如智能电视、机顶盒、家用电器、移动和可穿戴设备、具有实时限制的汽车系统,以及处理人工智能或数据处理工作负载的边缘计算节点。

SEOULTECH_Infographic_final.jpg
回复

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

QQ|小黑屋|Archiver|手机版|硬汉嵌入式论坛

GMT+8, 2025-9-26 06:18 , Processed in 0.039195 second(s), 27 queries .

Powered by Discuz! X3.4 Licensed

Copyright © 2001-2023, Tencent Cloud.

快速回复 返回顶部 返回列表