在用gpreftools优化gko_pool的时候我发现一个问题,malloc竟然成了性能瓶颈
由于在每个连接建立的时候gko_pool默认会为读写各分配2KB的buf备用,这个是比较固定的
每个连接的的生命周期会伴随着4KB大小的内存malloc & free
正好可以写个只能分配固定大小内存的“内存池”,基本思路就是每次分配一个大内存bucket(64MB),需要4KB的块的时候就从bucket中取,当bucket没有可用slot就再分配一个新的bucket,当bucket的所有slot都空闲就把bucket整体free。bucket中寻找槽位用的二分查找实现,每个bit对应一个slot是否被占用,废话不说 上代码:
memory.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
/*
* memory.h
*
* Created on: Jun 8, 2012
* Author: auxten
*/
#ifndef GKO_MEMORY_H_
#define GKO_MEMORY_H_
#include <sys/types.h>
#include <pthread.h>
static const u_int32_t SLOT_SIZE = 4 * 1024;
///
one page is good
static const u_int32_t SLOT_COUNT = 1 * 1024 * 1024;
static const u_int32_t M_MAP_SIZE = SLOT_COUNT / sizeof(u_int8_t);
///
bitmap
static const u_int32_t BUCKET_SIZE = 64 * 1024 * 1024;
static const int32_t BUCKET_CAPACITY = BUCKET_SIZE / SLOT_SIZE;
///
capacity, 4096
static const int32_t BUCKET_COUNT = SLOT_COUNT / BUCKET_CAPACITY;
///
256
static const int INVILID_BLOCK = -1;
class gkoAlloc
{
private:
pthread_mutex_t alloc_lock;
u_int8_t m_map[M_MAP_SIZE];
///
1MB can fit L2 cache
void * bucket_s[BUCKET_COUNT];
int16_t bucket_used[BUCKET_COUNT];
int latest_bucket;
int get_bit(u_int8_t * b);
int free_bit(u_int8_t * b, int index);
public:
gkoAlloc(void);
int get_block(void);
int get_clear_block(void);
int get2x_block(int block_id);
void free_block(int block_id);
int clear_block(void *block, int c, size_t size);
void * id2addr(int block_id);
};
#endif /* GKO_MEMORY_H_ */
|
memory.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
|
/*
* memory.cpp
*
* Created on: Jun 8, 2012
* Author: auxten
*
* only support little endian : x86
*/
//
#define MEM_TEST
#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
#include <sys/types.h>
#include "memory.h"
gkoAlloc::gkoAlloc(void)
{
pthread_mutex_init(&alloc_lock, NULL);
memset((void *) m_map, 0, M_MAP_SIZE * sizeof(u_int8_t));
memset((void *) bucket_s, 0, BUCKET_COUNT * sizeof(void *));
memset((void *) bucket_used, 0, BUCKET_COUNT * sizeof(int16_t));
latest_bucket = 0;
}
void * gkoAlloc::id2addr(int block_id)
{
if
(block_id < 0)
return
NULL;
int bucket_no = block_id / BUCKET_CAPACITY;
int bucket_offset = SLOT_SIZE * (block_id % BUCKET_CAPACITY);
return
((char *)bucket_s[bucket_no]) + bucket_offset;
}
int gkoAlloc::get_bit(u_int8_t * b)
{
/**
* idx 01234567
* byte 11111010
*
*
return
5 and
* byte 11111110
*/
int i;
for
(i = 0; i < 8; i++)
{
if
((u_int8_t)((*b >> 7 - i) << 7) == (u_int8_t)0u)
break
;
}
*b |= (u_int8_t)( 1u << 7 - i);
return
i;
}
int gkoAlloc::free_bit(u_int8_t * b, int index)
{
/**
* idx 01234567
* byte 11111110
*
*
return
5 and
* byte 11111010
*/
*b ^= (u_int8_t)( 1u << 7 - index);
return
index;
}
int gkoAlloc::get_block(void)
{
int i;
int the_bucket;
int idx = INVILID_BLOCK;
u_int8_t * p_idx;
u_int64_t * bucket_start_idx;
u_int64_t * bucket_end_idx;
u_int64_t * bucket_idx;
pthread_mutex_lock(&alloc_lock);
for
(i = 0; i < BUCKET_COUNT; i++)
{
the_bucket = (latest_bucket + i) % BUCKET_COUNT;
if
(bucket_used[the_bucket] < BUCKET_CAPACITY)
{
latest_bucket = the_bucket;
break
;
}
}
if
(i == BUCKET_COUNT)
{
fprintf(stderr,
"out of memory in pool\n"
);
//
GKOLOG(FATAL,
"out of memory in pool"
);
idx = INVILID_BLOCK;
goto GET_BLOCK_RET;
}
if
(!bucket_s[the_bucket])
{
void * ptr;
if
(!posix_memalign(&ptr, SLOT_SIZE, BUCKET_SIZE))
{
bucket_s[the_bucket] = ptr;
bucket_used[the_bucket] = 0;
}
else
{
fprintf(stderr,
"posix_memalign fail\n"
);
//
GKOLOG(FATAL,
"posix_memalign fail"
);
idx = INVILID_BLOCK;
goto GET_BLOCK_RET;
}
}
bucket_start_idx = (u_int64_t *) &(this->m_map[the_bucket * BUCKET_CAPACITY / 8]);
bucket_end_idx = (u_int64_t *) &(this->m_map[(the_bucket + 1) * BUCKET_CAPACITY / 8]);
for
(bucket_idx = bucket_start_idx;
bucket_idx < bucket_end_idx;
bucket_idx++)
{
if
(*(u_int64_t *) bucket_idx != ~0uLL)
{
if
(*(u_int32_t *) bucket_idx != ~0u)
{
if
(*((u_int16_t *) bucket_idx) != (u_int16_t) ~0u)
{
if
(*(u_int8_t *) bucket_idx != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 0;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 1;
}
}
else
{
if
(*((u_int8_t *) bucket_idx + 2) != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 2;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 3;
}
}
}
else
{
if
(*((u_int16_t *) bucket_idx + 2) != (u_int16_t) ~0u)
{
if
(*((u_int8_t *) bucket_idx + 4) != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 4;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 5;
}
}
else
{
if
(*((u_int8_t *) bucket_idx + 6) != (u_int8_t) ~0u)
{
p_idx = (u_int8_t *) bucket_idx + 6;
}
else
{
p_idx = (u_int8_t *) bucket_idx + 7;
}
}
}
idx = get_bit(p_idx) +
8 * (p_idx - (u_int8_t *) bucket_start_idx) +
the_bucket * BUCKET_CAPACITY;
bucket_used[the_bucket] ++;
break
;
}
else
{
continue
;
}
}
GET_BLOCK_RET:
pthread_mutex_unlock(&alloc_lock);
return
idx;
}
void gkoAlloc::free_block(int block_id)
{
if
(block_id < 0)
return
;
int bucket_no = block_id / BUCKET_CAPACITY;
pthread_mutex_lock(&alloc_lock);
free_bit(&m_map[block_id / 8], block_id % 8);
if
(--bucket_used[bucket_no] == 0)
{
free
(bucket_s[bucket_no]);
bucket_s[bucket_no] = NULL;
}
else
{
latest_bucket = bucket_no;
}
pthread_mutex_unlock(&alloc_lock);
}
#ifdef MEM_TEST
int main()
{
gkoAlloc mem;
for
(int i = 0; i < BUCKET_CAPACITY - 1; i++)
{
int k = mem.get_block();
printf
(
"%d, %d\n"
, i, k);
if
(i != k)
{
break
;
}
}
int blk1 = mem.get_block();
int blk2 = mem.get_block();
int blk3 = mem.get_block();
printf
(
"%p\n"
, mem.id2addr(blk1));
printf
(
"%p\n"
, mem.id2addr(blk2));
printf
(
"%p\n"
, mem.id2addr(blk3));
mem.free_block(blk1);
mem.free_block(blk2);
mem.free_block(blk3);
return
0;
}
#endif
|
编译测试:
g++ -DMEM_TEST memory.cpp -o memory && ./memory
需要的拿去
本文转自 xjtuhit 51CTO博客,原文链接:http://blog.51cto.com/51reboot/1560498