]>
Commit | Line | Data |
---|---|---|
b4edb8d2 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | ||
3 | #ifndef _LINUX_OBJPOOL_H | |
4 | #define _LINUX_OBJPOOL_H | |
5 | ||
6 | #include <linux/types.h> | |
7 | #include <linux/refcount.h> | |
a3b00f10 AN |
8 | #include <linux/atomic.h> |
9 | #include <linux/cpumask.h> | |
10 | #include <linux/irqflags.h> | |
11 | #include <linux/smp.h> | |
b4edb8d2 | 12 | |
13 | /* | |
14 | * objpool: ring-array based lockless MPMC queue | |
15 | * | |
16 | * Copyright: [email protected],[email protected] | |
17 | * | |
18 | * objpool is a scalable implementation of high performance queue for | |
19 | * object allocation and reclamation, such as kretprobe instances. | |
20 | * | |
21 | * With leveraging percpu ring-array to mitigate hot spots of memory | |
22 | * contention, it delivers near-linear scalability for high parallel | |
23 | * scenarios. The objpool is best suited for the following cases: | |
24 | * 1) Memory allocation or reclamation are prohibited or too expensive | |
25 | * 2) Consumers are of different priorities, such as irqs and threads | |
26 | * | |
27 | * Limitations: | |
28 | * 1) Maximum objects (capacity) is fixed after objpool creation | |
29 | * 2) All pre-allocated objects are managed in percpu ring array, | |
30 | * which consumes more memory than linked lists | |
31 | */ | |
32 | ||
33 | /** | |
34 | * struct objpool_slot - percpu ring array of objpool | |
35 | * @head: head sequence of the local ring array (to retrieve at) | |
36 | * @tail: tail sequence of the local ring array (to append at) | |
37 | * @last: the last sequence number marked as ready for retrieve | |
38 | * @mask: bits mask for modulo capacity to compute array indexes | |
39 | * @entries: object entries on this slot | |
40 | * | |
41 | * Represents a cpu-local array-based ring buffer, its size is specialized | |
42 | * during initialization of object pool. The percpu objpool node is to be | |
43 | * allocated from local memory for NUMA system, and to be kept compact in | |
44 | * continuous memory: CPU assigned number of objects are stored just after | |
45 | * the body of objpool_node. | |
46 | * | |
47 | * Real size of the ring array is far too smaller than the value range of | |
48 | * head and tail, typed as uint32_t: [0, 2^32), so only lower bits (mask) | |
49 | * of head and tail are used as the actual position in the ring array. In | |
50 | * general the ring array is acting like a small sliding window, which is | |
51 | * always moving forward in the loop of [0, 2^32). | |
52 | */ | |
53 | struct objpool_slot { | |
54 | uint32_t head; | |
55 | uint32_t tail; | |
56 | uint32_t last; | |
57 | uint32_t mask; | |
58 | void *entries[]; | |
59 | } __packed; | |
60 | ||
61 | struct objpool_head; | |
62 | ||
63 | /* | |
64 | * caller-specified callback for object initial setup, it's only called | |
65 | * once for each object (just after the memory allocation of the object) | |
66 | */ | |
67 | typedef int (*objpool_init_obj_cb)(void *obj, void *context); | |
68 | ||
69 | /* caller-specified cleanup callback for objpool destruction */ | |
70 | typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); | |
71 | ||
72 | /** | |
73 | * struct objpool_head - object pooling metadata | |
74 | * @obj_size: object size, aligned to sizeof(void *) | |
75 | * @nr_objs: total objs (to be pre-allocated with objpool) | |
76 | * @nr_cpus: local copy of nr_cpu_ids | |
77 | * @capacity: max objs can be managed by one objpool_slot | |
78 | * @gfp: gfp flags for kmalloc & vmalloc | |
79 | * @ref: refcount of objpool | |
80 | * @flags: flags for objpool management | |
81 | * @cpu_slots: pointer to the array of objpool_slot | |
82 | * @release: resource cleanup callback | |
83 | * @context: caller-provided context | |
84 | */ | |
85 | struct objpool_head { | |
86 | int obj_size; | |
87 | int nr_objs; | |
88 | int nr_cpus; | |
89 | int capacity; | |
90 | gfp_t gfp; | |
91 | refcount_t ref; | |
92 | unsigned long flags; | |
93 | struct objpool_slot **cpu_slots; | |
94 | objpool_fini_cb release; | |
95 | void *context; | |
96 | }; | |
97 | ||
98 | #define OBJPOOL_NR_OBJECT_MAX (1UL << 24) /* maximum numbers of total objects */ | |
99 | #define OBJPOOL_OBJECT_SIZE_MAX (1UL << 16) /* maximum size of an object */ | |
100 | ||
101 | /** | |
102 | * objpool_init() - initialize objpool and pre-allocated objects | |
103 | * @pool: the object pool to be initialized, declared by caller | |
104 | * @nr_objs: total objects to be pre-allocated by this object pool | |
105 | * @object_size: size of an object (should be > 0) | |
106 | * @gfp: flags for memory allocation (via kmalloc or vmalloc) | |
107 | * @context: user context for object initialization callback | |
108 | * @objinit: object initialization callback for extra setup | |
109 | * @release: cleanup callback for extra cleanup task | |
110 | * | |
111 | * return value: 0 for success, otherwise error code | |
112 | * | |
113 | * All pre-allocated objects are to be zeroed after memory allocation. | |
114 | * Caller could do extra initialization in objinit callback. objinit() | |
115 | * will be called just after slot allocation and called only once for | |
116 | * each object. After that the objpool won't touch any content of the | |
117 | * objects. It's caller's duty to perform reinitialization after each | |
118 | * pop (object allocation) or do clearance before each push (object | |
119 | * reclamation). | |
120 | */ | |
121 | int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, | |
122 | gfp_t gfp, void *context, objpool_init_obj_cb objinit, | |
123 | objpool_fini_cb release); | |
124 | ||
a3b00f10 AN |
125 | /* try to retrieve object from slot */ |
126 | static inline void *__objpool_try_get_slot(struct objpool_head *pool, int cpu) | |
127 | { | |
128 | struct objpool_slot *slot = pool->cpu_slots[cpu]; | |
129 | /* load head snapshot, other cpus may change it */ | |
130 | uint32_t head = smp_load_acquire(&slot->head); | |
131 | ||
132 | while (head != READ_ONCE(slot->last)) { | |
133 | void *obj; | |
134 | ||
135 | /* | |
136 | * data visibility of 'last' and 'head' could be out of | |
137 | * order since memory updating of 'last' and 'head' are | |
138 | * performed in push() and pop() independently | |
139 | * | |
140 | * before any retrieving attempts, pop() must guarantee | |
141 | * 'last' is behind 'head', that is to say, there must | |
142 | * be available objects in slot, which could be ensured | |
143 | * by condition 'last != head && last - head <= nr_objs' | |
144 | * that is equivalent to 'last - head - 1 < nr_objs' as | |
145 | * 'last' and 'head' are both unsigned int32 | |
146 | */ | |
147 | if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) { | |
148 | head = READ_ONCE(slot->head); | |
149 | continue; | |
150 | } | |
151 | ||
152 | /* obj must be retrieved before moving forward head */ | |
153 | obj = READ_ONCE(slot->entries[head & slot->mask]); | |
154 | ||
155 | /* move head forward to mark it's consumption */ | |
156 | if (try_cmpxchg_release(&slot->head, &head, head + 1)) | |
157 | return obj; | |
158 | } | |
159 | ||
160 | return NULL; | |
161 | } | |
162 | ||
b4edb8d2 | 163 | /** |
164 | * objpool_pop() - allocate an object from objpool | |
165 | * @pool: object pool | |
166 | * | |
167 | * return value: object ptr or NULL if failed | |
168 | */ | |
a3b00f10 AN |
169 | static inline void *objpool_pop(struct objpool_head *pool) |
170 | { | |
171 | void *obj = NULL; | |
172 | unsigned long flags; | |
173 | int i, cpu; | |
174 | ||
175 | /* disable local irq to avoid preemption & interruption */ | |
176 | raw_local_irq_save(flags); | |
177 | ||
178 | cpu = raw_smp_processor_id(); | |
179 | for (i = 0; i < num_possible_cpus(); i++) { | |
180 | obj = __objpool_try_get_slot(pool, cpu); | |
181 | if (obj) | |
182 | break; | |
183 | cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1); | |
184 | } | |
185 | raw_local_irq_restore(flags); | |
186 | ||
187 | return obj; | |
188 | } | |
189 | ||
190 | /* adding object to slot, abort if the slot was already full */ | |
191 | static inline int | |
192 | __objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu) | |
193 | { | |
194 | struct objpool_slot *slot = pool->cpu_slots[cpu]; | |
195 | uint32_t head, tail; | |
196 | ||
197 | /* loading tail and head as a local snapshot, tail first */ | |
198 | tail = READ_ONCE(slot->tail); | |
199 | ||
200 | do { | |
201 | head = READ_ONCE(slot->head); | |
202 | /* fault caught: something must be wrong */ | |
203 | WARN_ON_ONCE(tail - head > pool->nr_objs); | |
204 | } while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1)); | |
205 | ||
206 | /* now the tail position is reserved for the given obj */ | |
207 | WRITE_ONCE(slot->entries[tail & slot->mask], obj); | |
208 | /* update sequence to make this obj available for pop() */ | |
209 | smp_store_release(&slot->last, tail + 1); | |
210 | ||
211 | return 0; | |
212 | } | |
b4edb8d2 | 213 | |
214 | /** | |
215 | * objpool_push() - reclaim the object and return back to objpool | |
216 | * @obj: object ptr to be pushed to objpool | |
217 | * @pool: object pool | |
218 | * | |
219 | * return: 0 or error code (it fails only when user tries to push | |
220 | * the same object multiple times or wrong "objects" into objpool) | |
221 | */ | |
a3b00f10 AN |
222 | static inline int objpool_push(void *obj, struct objpool_head *pool) |
223 | { | |
224 | unsigned long flags; | |
225 | int rc; | |
226 | ||
227 | /* disable local irq to avoid preemption & interruption */ | |
228 | raw_local_irq_save(flags); | |
229 | rc = __objpool_try_add_slot(obj, pool, raw_smp_processor_id()); | |
230 | raw_local_irq_restore(flags); | |
231 | ||
232 | return rc; | |
233 | } | |
234 | ||
b4edb8d2 | 235 | |
236 | /** | |
237 | * objpool_drop() - discard the object and deref objpool | |
238 | * @obj: object ptr to be discarded | |
239 | * @pool: object pool | |
240 | * | |
241 | * return: 0 if objpool was released; -EAGAIN if there are still | |
242 | * outstanding objects | |
243 | * | |
244 | * objpool_drop is normally for the release of outstanding objects | |
245 | * after objpool cleanup (objpool_fini). Thinking of this example: | |
246 | * kretprobe is unregistered and objpool_fini() is called to release | |
247 | * all remained objects, but there are still objects being used by | |
248 | * unfinished kretprobes (like blockable function: sys_accept). So | |
249 | * only when the last outstanding object is dropped could the whole | |
250 | * objpool be released along with the call of objpool_drop() | |
251 | */ | |
252 | int objpool_drop(void *obj, struct objpool_head *pool); | |
253 | ||
254 | /** | |
255 | * objpool_free() - release objpool forcely (all objects to be freed) | |
256 | * @pool: object pool to be released | |
257 | */ | |
258 | void objpool_free(struct objpool_head *pool); | |
259 | ||
260 | /** | |
261 | * objpool_fini() - deref object pool (also releasing unused objects) | |
262 | * @pool: object pool to be dereferenced | |
263 | * | |
264 | * objpool_fini() will try to release all remained free objects and | |
265 | * then drop an extra reference of the objpool. If all objects are | |
266 | * already returned to objpool (so called synchronous use cases), | |
267 | * the objpool itself will be freed together. But if there are still | |
268 | * outstanding objects (so called asynchronous use cases, such like | |
269 | * blockable kretprobe), the objpool won't be released until all | |
270 | * the outstanding objects are dropped, but the caller must assure | |
271 | * there are no concurrent objpool_push() on the fly. Normally RCU | |
272 | * is being required to make sure all ongoing objpool_push() must | |
273 | * be finished before calling objpool_fini(), so does test_objpool, | |
274 | * kretprobe or rethook | |
275 | */ | |
276 | void objpool_fini(struct objpool_head *pool); | |
277 | ||
278 | #endif /* _LINUX_OBJPOOL_H */ |