2 * Copyright (C) 2016 Facebook
3 * Copyright (C) 2013-2014 Jens Axboe
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
18 #include <linux/random.h>
19 #include <linux/sbitmap.h>
21 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
22 gfp_t flags, int node)
24 unsigned int bits_per_word;
28 shift = ilog2(BITS_PER_LONG);
30 * If the bitmap is small, shrink the number of bits per word so
31 * we spread over a few cachelines, at least. If less than 4
32 * bits, just forget about it, it's not going to work optimally
36 while ((4U << shift) > depth)
40 bits_per_word = 1U << shift;
41 if (bits_per_word > BITS_PER_LONG)
46 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
53 sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
57 for (i = 0; i < sb->map_nr; i++) {
58 sb->map[i].depth = min(depth, bits_per_word);
59 depth -= sb->map[i].depth;
63 EXPORT_SYMBOL_GPL(sbitmap_init_node);
65 void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
67 unsigned int bits_per_word = 1U << sb->shift;
71 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
73 for (i = 0; i < sb->map_nr; i++) {
74 sb->map[i].depth = min(depth, bits_per_word);
75 depth -= sb->map[i].depth;
78 EXPORT_SYMBOL_GPL(sbitmap_resize);
80 static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
83 unsigned int orig_hint = hint;
87 nr = find_next_zero_bit(&word->word, word->depth, hint);
88 if (unlikely(nr >= word->depth)) {
90 * We started with an offset, and we didn't reset the
91 * offset to 0 in a failure case, so start from 0 to
94 if (orig_hint && hint && wrap) {
101 if (!test_and_set_bit(nr, &word->word))
105 if (hint >= word->depth - 1)
112 int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
114 unsigned int i, index;
117 index = SB_NR_TO_INDEX(sb, alloc_hint);
119 for (i = 0; i < sb->map_nr; i++) {
120 nr = __sbitmap_get_word(&sb->map[index],
121 SB_NR_TO_BIT(sb, alloc_hint),
124 nr += index << sb->shift;
128 /* Jump to next index. */
130 alloc_hint = index << sb->shift;
132 if (index >= sb->map_nr) {
140 EXPORT_SYMBOL_GPL(sbitmap_get);
142 bool sbitmap_any_bit_set(const struct sbitmap *sb)
146 for (i = 0; i < sb->map_nr; i++) {
152 EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
154 bool sbitmap_any_bit_clear(const struct sbitmap *sb)
158 for (i = 0; i < sb->map_nr; i++) {
159 const struct sbitmap_word *word = &sb->map[i];
162 ret = find_first_zero_bit(&word->word, word->depth);
163 if (ret < word->depth)
168 EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
170 unsigned int sbitmap_weight(const struct sbitmap *sb)
172 unsigned int i, weight = 0;
174 for (i = 0; i < sb->map_nr; i++) {
175 const struct sbitmap_word *word = &sb->map[i];
177 weight += bitmap_weight(&word->word, word->depth);
181 EXPORT_SYMBOL_GPL(sbitmap_weight);
183 static unsigned int sbq_calc_wake_batch(unsigned int depth)
185 unsigned int wake_batch;
188 * For each batch, we wake up one queue. We need to make sure that our
189 * batch size is small enough that the full depth of the bitmap is
190 * enough to wake up all of the queues.
192 wake_batch = SBQ_WAKE_BATCH;
193 if (wake_batch > depth / SBQ_WAIT_QUEUES)
194 wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
199 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
200 int shift, bool round_robin, gfp_t flags, int node)
205 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
209 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
210 if (!sbq->alloc_hint) {
211 sbitmap_free(&sbq->sb);
215 if (depth && !round_robin) {
216 for_each_possible_cpu(i)
217 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
220 sbq->wake_batch = sbq_calc_wake_batch(depth);
221 atomic_set(&sbq->wake_index, 0);
223 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
225 free_percpu(sbq->alloc_hint);
226 sbitmap_free(&sbq->sb);
230 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
231 init_waitqueue_head(&sbq->ws[i].wait);
232 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
235 sbq->round_robin = round_robin;
238 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
240 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
242 sbq->wake_batch = sbq_calc_wake_batch(depth);
243 sbitmap_resize(&sbq->sb, depth);
245 EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
247 int __sbitmap_queue_get(struct sbitmap_queue *sbq)
249 unsigned int hint, depth;
252 hint = this_cpu_read(*sbq->alloc_hint);
253 depth = READ_ONCE(sbq->sb.depth);
254 if (unlikely(hint >= depth)) {
255 hint = depth ? prandom_u32() % depth : 0;
256 this_cpu_write(*sbq->alloc_hint, hint);
258 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
261 /* If the map is full, a hint won't do us much good. */
262 this_cpu_write(*sbq->alloc_hint, 0);
263 } else if (nr == hint || unlikely(sbq->round_robin)) {
264 /* Only update the hint if we used it. */
266 if (hint >= depth - 1)
268 this_cpu_write(*sbq->alloc_hint, hint);
273 EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
275 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
279 wake_index = atomic_read(&sbq->wake_index);
280 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
281 struct sbq_wait_state *ws = &sbq->ws[wake_index];
283 if (waitqueue_active(&ws->wait)) {
284 int o = atomic_read(&sbq->wake_index);
287 atomic_cmpxchg(&sbq->wake_index, o, wake_index);
291 wake_index = sbq_index_inc(wake_index);
297 static void sbq_wake_up(struct sbitmap_queue *sbq)
299 struct sbq_wait_state *ws;
302 /* Ensure that the wait list checks occur after clear_bit(). */
305 ws = sbq_wake_ptr(sbq);
309 wait_cnt = atomic_dec_return(&ws->wait_cnt);
310 if (unlikely(wait_cnt < 0))
311 wait_cnt = atomic_inc_return(&ws->wait_cnt);
313 atomic_add(sbq->wake_batch, &ws->wait_cnt);
314 sbq_index_atomic_inc(&sbq->wake_index);
319 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
322 sbitmap_clear_bit(&sbq->sb, nr);
324 if (likely(!sbq->round_robin && nr < sbq->sb.depth))
325 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
327 EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
329 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
334 * Make sure all changes prior to this are visible from other CPUs.
337 wake_index = atomic_read(&sbq->wake_index);
338 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
339 struct sbq_wait_state *ws = &sbq->ws[wake_index];
341 if (waitqueue_active(&ws->wait))
344 wake_index = sbq_index_inc(wake_index);
347 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);