kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/bpf_trace.h>
  14 #include <linux/syscalls.h>
  15 #include <linux/slab.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/vmalloc.h>
  18 #include <linux/mmzone.h>
  19 #include <linux/anon_inodes.h>
  20 #include <linux/file.h>
  21 #include <linux/license.h>
  22 #include <linux/filter.h>
  23 #include <linux/version.h>
  24 #include <linux/kernel.h>
  25
  26 DEFINE_PER_CPU(int, bpf_prog_active);
  27
  28 int sysctl_unprivileged_bpf_disabled __read_mostly;
  29
  30 static const struct bpf_map_ops * const bpf_map_types[] = {
  31 #define BPF_PROG_TYPE(_id, _ops)
  32 #define BPF_MAP_TYPE(_id, _ops) \
  33         [_id] = &_ops,
  34 #include <linux/bpf_types.h>
  35 #undef BPF_PROG_TYPE
  36 #undef BPF_MAP_TYPE
  37 };
  38
  39 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  40 {
  41         struct bpf_map *map;
  42
  43         if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
  44             !bpf_map_types[attr->map_type])
  45                 return ERR_PTR(-EINVAL);
  46
  47         map = bpf_map_types[attr->map_type]->map_alloc(attr);
  48         if (IS_ERR(map))
  49                 return map;
  50         map->ops = bpf_map_types[attr->map_type];
  51         map->map_type = attr->map_type;
  52         return map;
  53 }
  54
  55 void *bpf_map_area_alloc(size_t size)
  56 {
  57         /* We definitely need __GFP_NORETRY, so OOM killer doesn't
  58          * trigger under memory pressure as we really just want to
  59          * fail instead.
  60          */
  61         const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
  62         void *area;
  63
  64         if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
  65                 area = kmalloc(size, GFP_USER | flags);
  66                 if (area != NULL)
  67                         return area;
  68         }
  69
  70         return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
  71 }
  72
  73 void bpf_map_area_free(void *area)
  74 {
  75         kvfree(area);
  76 }
  77
  78 int bpf_map_precharge_memlock(u32 pages)
  79 {
  80         struct user_struct *user = get_current_user();
  81         unsigned long memlock_limit, cur;
  82
  83         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  84         cur = atomic_long_read(&user->locked_vm);
  85         free_uid(user);
  86         if (cur + pages > memlock_limit)
  87                 return -EPERM;
  88         return 0;
  89 }
  90
  91 static int bpf_map_charge_memlock(struct bpf_map *map)
  92 {
  93         struct user_struct *user = get_current_user();
  94         unsigned long memlock_limit;
  95
  96         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  97
  98         atomic_long_add(map->pages, &user->locked_vm);
  99
 100         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 101                 atomic_long_sub(map->pages, &user->locked_vm);
 102                 free_uid(user);
 103                 return -EPERM;
 104         }
 105         map->user = user;
 106         return 0;
 107 }
 108
 109 static void bpf_map_uncharge_memlock(struct bpf_map *map)
 110 {
 111         struct user_struct *user = map->user;
 112
 113         atomic_long_sub(map->pages, &user->locked_vm);
 114         free_uid(user);
 115 }
 116
 117 /* called from workqueue */
 118 static void bpf_map_free_deferred(struct work_struct *work)
 119 {
 120         struct bpf_map *map = container_of(work, struct bpf_map, work);
 121
 122         bpf_map_uncharge_memlock(map);
 123         /* implementation dependent freeing */
 124         map->ops->map_free(map);
 125 }
 126
 127 static void bpf_map_put_uref(struct bpf_map *map)
 128 {
 129         if (atomic_dec_and_test(&map->usercnt)) {
 130                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 131                         bpf_fd_array_map_clear(map);
 132         }
 133 }
 134
 135 /* decrement map refcnt and schedule it for freeing via workqueue
 136  * (unrelying map implementation ops->map_free() might sleep)
 137  */
 138 void bpf_map_put(struct bpf_map *map)
 139 {
 140         if (atomic_dec_and_test(&map->refcnt)) {
 141                 INIT_WORK(&map->work, bpf_map_free_deferred);
 142                 schedule_work(&map->work);
 143         }
 144 }
 145
 146 void bpf_map_put_with_uref(struct bpf_map *map)
 147 {
 148         bpf_map_put_uref(map);
 149         bpf_map_put(map);
 150 }
 151
 152 static int bpf_map_release(struct inode *inode, struct file *filp)
 153 {
 154         struct bpf_map *map = filp->private_data;
 155
 156         if (map->ops->map_release)
 157                 map->ops->map_release(map, filp);
 158
 159         bpf_map_put_with_uref(map);
 160         return 0;
 161 }
 162
 163 #ifdef CONFIG_PROC_FS
 164 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 165 {
 166         const struct bpf_map *map = filp->private_data;
 167         const struct bpf_array *array;
 168         u32 owner_prog_type = 0;
 169
 170         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 171                 array = container_of(map, struct bpf_array, map);
 172                 owner_prog_type = array->owner_prog_type;
 173         }
 174
 175         seq_printf(m,
 176                    "map_type:\t%u\n"
 177                    "key_size:\t%u\n"
 178                    "value_size:\t%u\n"
 179                    "max_entries:\t%u\n"
 180                    "map_flags:\t%#x\n"
 181                    "memlock:\t%llu\n",
 182                    map->map_type,
 183                    map->key_size,
 184                    map->value_size,
 185                    map->max_entries,
 186                    map->map_flags,
 187                    map->pages * 1ULL << PAGE_SHIFT);
 188
 189         if (owner_prog_type)
 190                 seq_printf(m, "owner_prog_type:\t%u\n",
 191                            owner_prog_type);
 192 }
 193 #endif
 194
 195 static const struct file_operations bpf_map_fops = {
 196 #ifdef CONFIG_PROC_FS
 197         .show_fdinfo    = bpf_map_show_fdinfo,
 198 #endif
 199         .release        = bpf_map_release,
 200 };
 201
 202 int bpf_map_new_fd(struct bpf_map *map)
 203 {
 204         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 205                                 O_RDWR | O_CLOEXEC);
 206 }
 207
 208 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 209 #define CHECK_ATTR(CMD) \
 210         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 211                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 212                    sizeof(*attr) - \
 213                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 214                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 215
 216 #define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
 217 /* called via syscall */
 218 static int map_create(union bpf_attr *attr)
 219 {
 220         struct bpf_map *map;
 221         int err;
 222
 223         err = CHECK_ATTR(BPF_MAP_CREATE);
 224         if (err)
 225                 return -EINVAL;
 226
 227         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 228         map = find_and_alloc_map(attr);
 229         if (IS_ERR(map))
 230                 return PTR_ERR(map);
 231
 232         atomic_set(&map->refcnt, 1);
 233         atomic_set(&map->usercnt, 1);
 234
 235         err = bpf_map_charge_memlock(map);
 236         if (err)
 237                 goto free_map_nouncharge;
 238
 239         err = bpf_map_new_fd(map);
 240         if (err < 0)
 241                 /* failed to allocate fd */
 242                 goto free_map;
 243
 244         trace_bpf_map_create(map, err);
 245         return err;
 246
 247 free_map:
 248         bpf_map_uncharge_memlock(map);
 249 free_map_nouncharge:
 250         map->ops->map_free(map);
 251         return err;
 252 }
 253
 254 /* if error is returned, fd is released.
 255  * On success caller should complete fd access with matching fdput()
 256  */
 257 struct bpf_map *__bpf_map_get(struct fd f)
 258 {
 259         if (!f.file)
 260                 return ERR_PTR(-EBADF);
 261         if (f.file->f_op != &bpf_map_fops) {
 262                 fdput(f);
 263                 return ERR_PTR(-EINVAL);
 264         }
 265
 266         return f.file->private_data;
 267 }
 268
 269 /* prog's and map's refcnt limit */
 270 #define BPF_MAX_REFCNT 32768
 271
 272 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 273 {
 274         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 275                 atomic_dec(&map->refcnt);
 276                 return ERR_PTR(-EBUSY);
 277         }
 278         if (uref)
 279                 atomic_inc(&map->usercnt);
 280         return map;
 281 }
 282
 283 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 284 {
 285         struct fd f = fdget(ufd);
 286         struct bpf_map *map;
 287
 288         map = __bpf_map_get(f);
 289         if (IS_ERR(map))
 290                 return map;
 291
 292         map = bpf_map_inc(map, true);
 293         fdput(f);
 294
 295         return map;
 296 }
 297
 298 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 299 {
 300         return -ENOTSUPP;
 301 }
 302
 303 /* last field in 'union bpf_attr' used by this command */
 304 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 305
 306 static int map_lookup_elem(union bpf_attr *attr)
 307 {
 308         void __user *ukey = u64_to_user_ptr(attr->key);
 309         void __user *uvalue = u64_to_user_ptr(attr->value);
 310         int ufd = attr->map_fd;
 311         struct bpf_map *map;
 312         void *key, *value, *ptr;
 313         u32 value_size;
 314         struct fd f;
 315         int err;
 316
 317         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 318                 return -EINVAL;
 319
 320         f = fdget(ufd);
 321         map = __bpf_map_get(f);
 322         if (IS_ERR(map))
 323                 return PTR_ERR(map);
 324
 325         key = memdup_user(ukey, map->key_size);
 326         if (IS_ERR(key)) {
 327                 err = PTR_ERR(key);
 328                 goto err_put;
 329         }
 330
 331         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 332             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 333             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 334                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 335         else
 336                 value_size = map->value_size;
 337
 338         err = -ENOMEM;
 339         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 340         if (!value)
 341                 goto free_key;
 342
 343         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 344             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 345                 err = bpf_percpu_hash_copy(map, key, value);
 346         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 347                 err = bpf_percpu_array_copy(map, key, value);
 348         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 349                 err = bpf_stackmap_copy(map, key, value);
 350         } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 351                    map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 352                 err = -ENOTSUPP;
 353         } else {
 354                 rcu_read_lock();
 355                 ptr = map->ops->map_lookup_elem(map, key);
 356                 if (ptr)
 357                         memcpy(value, ptr, value_size);
 358                 rcu_read_unlock();
 359                 err = ptr ? 0 : -ENOENT;
 360         }
 361
 362         if (err)
 363                 goto free_value;
 364
 365         err = -EFAULT;
 366         if (copy_to_user(uvalue, value, value_size) != 0)
 367                 goto free_value;
 368
 369         trace_bpf_map_lookup_elem(map, ufd, key, value);
 370         err = 0;
 371
 372 free_value:
 373         kfree(value);
 374 free_key:
 375         kfree(key);
 376 err_put:
 377         fdput(f);
 378         return err;
 379 }
 380
 381 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 382
 383 static int map_update_elem(union bpf_attr *attr)
 384 {
 385         void __user *ukey = u64_to_user_ptr(attr->key);
 386         void __user *uvalue = u64_to_user_ptr(attr->value);
 387         int ufd = attr->map_fd;
 388         struct bpf_map *map;
 389         void *key, *value;
 390         u32 value_size;
 391         struct fd f;
 392         int err;
 393
 394         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 395                 return -EINVAL;
 396
 397         f = fdget(ufd);
 398         map = __bpf_map_get(f);
 399         if (IS_ERR(map))
 400                 return PTR_ERR(map);
 401
 402         key = memdup_user(ukey, map->key_size);
 403         if (IS_ERR(key)) {
 404                 err = PTR_ERR(key);
 405                 goto err_put;
 406         }
 407
 408         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 409             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 410             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 411                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 412         else
 413                 value_size = map->value_size;
 414
 415         err = -ENOMEM;
 416         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 417         if (!value)
 418                 goto free_key;
 419
 420         err = -EFAULT;
 421         if (copy_from_user(value, uvalue, value_size) != 0)
 422                 goto free_value;
 423
 424         /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 425          * inside bpf map update or delete otherwise deadlocks are possible
 426          */
 427         preempt_disable();
 428         __this_cpu_inc(bpf_prog_active);
 429         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 430             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 431                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 432         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 433                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 434         } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 435                    map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 436                    map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
 437                    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
 438                 rcu_read_lock();
 439                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 440                                                    attr->flags);
 441                 rcu_read_unlock();
 442         } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 443                 rcu_read_lock();
 444                 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 445                                                   attr->flags);
 446                 rcu_read_unlock();
 447         } else {
 448                 rcu_read_lock();
 449                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 450                 rcu_read_unlock();
 451         }
 452         __this_cpu_dec(bpf_prog_active);
 453         preempt_enable();
 454
 455         if (!err)
 456                 trace_bpf_map_update_elem(map, ufd, key, value);
 457 free_value:
 458         kfree(value);
 459 free_key:
 460         kfree(key);
 461 err_put:
 462         fdput(f);
 463         return err;
 464 }
 465
 466 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 467
 468 static int map_delete_elem(union bpf_attr *attr)
 469 {
 470         void __user *ukey = u64_to_user_ptr(attr->key);
 471         int ufd = attr->map_fd;
 472         struct bpf_map *map;
 473         struct fd f;
 474         void *key;
 475         int err;
 476
 477         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 478                 return -EINVAL;
 479
 480         f = fdget(ufd);
 481         map = __bpf_map_get(f);
 482         if (IS_ERR(map))
 483                 return PTR_ERR(map);
 484
 485         key = memdup_user(ukey, map->key_size);
 486         if (IS_ERR(key)) {
 487                 err = PTR_ERR(key);
 488                 goto err_put;
 489         }
 490
 491         preempt_disable();
 492         __this_cpu_inc(bpf_prog_active);
 493         rcu_read_lock();
 494         err = map->ops->map_delete_elem(map, key);
 495         rcu_read_unlock();
 496         __this_cpu_dec(bpf_prog_active);
 497         preempt_enable();
 498
 499         if (!err)
 500                 trace_bpf_map_delete_elem(map, ufd, key);
 501         kfree(key);
 502 err_put:
 503         fdput(f);
 504         return err;
 505 }
 506
 507 /* last field in 'union bpf_attr' used by this command */
 508 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 509
 510 static int map_get_next_key(union bpf_attr *attr)
 511 {
 512         void __user *ukey = u64_to_user_ptr(attr->key);
 513         void __user *unext_key = u64_to_user_ptr(attr->next_key);
 514         int ufd = attr->map_fd;
 515         struct bpf_map *map;
 516         void *key, *next_key;
 517         struct fd f;
 518         int err;
 519
 520         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 521                 return -EINVAL;
 522
 523         f = fdget(ufd);
 524         map = __bpf_map_get(f);
 525         if (IS_ERR(map))
 526                 return PTR_ERR(map);
 527
 528         if (ukey) {
 529                 key = memdup_user(ukey, map->key_size);
 530                 if (IS_ERR(key)) {
 531                         err = PTR_ERR(key);
 532                         goto err_put;
 533                 }
 534         } else {
 535                 key = NULL;
 536         }
 537
 538         err = -ENOMEM;
 539         next_key = kmalloc(map->key_size, GFP_USER);
 540         if (!next_key)
 541                 goto free_key;
 542
 543         rcu_read_lock();
 544         err = map->ops->map_get_next_key(map, key, next_key);
 545         rcu_read_unlock();
 546         if (err)
 547                 goto free_next_key;
 548
 549         err = -EFAULT;
 550         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 551                 goto free_next_key;
 552
 553         trace_bpf_map_next_key(map, ufd, key, next_key);
 554         err = 0;
 555
 556 free_next_key:
 557         kfree(next_key);
 558 free_key:
 559         kfree(key);
 560 err_put:
 561         fdput(f);
 562         return err;
 563 }
 564
 565 static const struct bpf_verifier_ops * const bpf_prog_types[] = {
 566 #define BPF_PROG_TYPE(_id, _ops) \
 567         [_id] = &_ops,
 568 #define BPF_MAP_TYPE(_id, _ops)
 569 #include <linux/bpf_types.h>
 570 #undef BPF_PROG_TYPE
 571 #undef BPF_MAP_TYPE
 572 };
 573
 574 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 575 {
 576         if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
 577                 return -EINVAL;
 578
 579         prog->aux->ops = bpf_prog_types[type];
 580         prog->type = type;
 581         return 0;
 582 }
 583
 584 /* drop refcnt on maps used by eBPF program and free auxilary data */
 585 static void free_used_maps(struct bpf_prog_aux *aux)
 586 {
 587         int i;
 588
 589         for (i = 0; i < aux->used_map_cnt; i++)
 590                 bpf_map_put(aux->used_maps[i]);
 591
 592         kfree(aux->used_maps);
 593 }
 594
 595 int __bpf_prog_charge(struct user_struct *user, u32 pages)
 596 {
 597         unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 598         unsigned long user_bufs;
 599
 600         if (user) {
 601                 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
 602                 if (user_bufs > memlock_limit) {
 603                         atomic_long_sub(pages, &user->locked_vm);
 604                         return -EPERM;
 605                 }
 606         }
 607
 608         return 0;
 609 }
 610
 611 void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 612 {
 613         if (user)
 614                 atomic_long_sub(pages, &user->locked_vm);
 615 }
 616
 617 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 618 {
 619         struct user_struct *user = get_current_user();
 620         int ret;
 621
 622         ret = __bpf_prog_charge(user, prog->pages);
 623         if (ret) {
 624                 free_uid(user);
 625                 return ret;
 626         }
 627
 628         prog->aux->user = user;
 629         return 0;
 630 }
 631
 632 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 633 {
 634         struct user_struct *user = prog->aux->user;
 635
 636         __bpf_prog_uncharge(user, prog->pages);
 637         free_uid(user);
 638 }
 639
 640 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 641 {
 642         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 643
 644         free_used_maps(aux);
 645         bpf_prog_uncharge_memlock(aux->prog);
 646         bpf_prog_free(aux->prog);
 647 }
 648
 649 void bpf_prog_put(struct bpf_prog *prog)
 650 {
 651         if (atomic_dec_and_test(&prog->aux->refcnt)) {
 652                 trace_bpf_prog_put_rcu(prog);
 653                 bpf_prog_kallsyms_del(prog);
 654                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 655         }
 656 }
 657 EXPORT_SYMBOL_GPL(bpf_prog_put);
 658
 659 static int bpf_prog_release(struct inode *inode, struct file *filp)
 660 {
 661         struct bpf_prog *prog = filp->private_data;
 662
 663         bpf_prog_put(prog);
 664         return 0;
 665 }
 666
 667 #ifdef CONFIG_PROC_FS
 668 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 669 {
 670         const struct bpf_prog *prog = filp->private_data;
 671         char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
 672
 673         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 674         seq_printf(m,
 675                    "prog_type:\t%u\n"
 676                    "prog_jited:\t%u\n"
 677                    "prog_tag:\t%s\n"
 678                    "memlock:\t%llu\n",
 679                    prog->type,
 680                    prog->jited,
 681                    prog_tag,
 682                    prog->pages * 1ULL << PAGE_SHIFT);
 683 }
 684 #endif
 685
 686 static const struct file_operations bpf_prog_fops = {
 687 #ifdef CONFIG_PROC_FS
 688         .show_fdinfo    = bpf_prog_show_fdinfo,
 689 #endif
 690         .release        = bpf_prog_release,
 691 };
 692
 693 int bpf_prog_new_fd(struct bpf_prog *prog)
 694 {
 695         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 696                                 O_RDWR | O_CLOEXEC);
 697 }
 698
 699 static struct bpf_prog *____bpf_prog_get(struct fd f)
 700 {
 701         if (!f.file)
 702                 return ERR_PTR(-EBADF);
 703         if (f.file->f_op != &bpf_prog_fops) {
 704                 fdput(f);
 705                 return ERR_PTR(-EINVAL);
 706         }
 707
 708         return f.file->private_data;
 709 }
 710
 711 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 712 {
 713         if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 714                 atomic_sub(i, &prog->aux->refcnt);
 715                 return ERR_PTR(-EBUSY);
 716         }
 717         return prog;
 718 }
 719 EXPORT_SYMBOL_GPL(bpf_prog_add);
 720
 721 void bpf_prog_sub(struct bpf_prog *prog, int i)
 722 {
 723         /* Only to be used for undoing previous bpf_prog_add() in some
 724          * error path. We still know that another entity in our call
 725          * path holds a reference to the program, thus atomic_sub() can
 726          * be safely used in such cases!
 727          */
 728         WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 729 }
 730 EXPORT_SYMBOL_GPL(bpf_prog_sub);
 731
 732 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 733 {
 734         return bpf_prog_add(prog, 1);
 735 }
 736 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 737
 738 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 739 {
 740         struct fd f = fdget(ufd);
 741         struct bpf_prog *prog;
 742
 743         prog = ____bpf_prog_get(f);
 744         if (IS_ERR(prog))
 745                 return prog;
 746         if (type && prog->type != *type) {
 747                 prog = ERR_PTR(-EINVAL);
 748                 goto out;
 749         }
 750
 751         prog = bpf_prog_inc(prog);
 752 out:
 753         fdput(f);
 754         return prog;
 755 }
 756
 757 struct bpf_prog *bpf_prog_get(u32 ufd)
 758 {
 759         return __bpf_prog_get(ufd, NULL);
 760 }
 761
 762 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 763 {
 764         struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
 765
 766         if (!IS_ERR(prog))
 767                 trace_bpf_prog_get_type(prog);
 768         return prog;
 769 }
 770 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 771
 772 /* last field in 'union bpf_attr' used by this command */
 773 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 774
 775 static int bpf_prog_load(union bpf_attr *attr)
 776 {
 777         enum bpf_prog_type type = attr->prog_type;
 778         struct bpf_prog *prog;
 779         int err;
 780         char license[128];
 781         bool is_gpl;
 782
 783         if (CHECK_ATTR(BPF_PROG_LOAD))
 784                 return -EINVAL;
 785
 786         /* copy eBPF program license from user space */
 787         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 788                               sizeof(license) - 1) < 0)
 789                 return -EFAULT;
 790         license[sizeof(license) - 1] = 0;
 791
 792         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 793         is_gpl = license_is_gpl_compatible(license);
 794
 795         if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
 796                 return -E2BIG;
 797
 798         if (type == BPF_PROG_TYPE_KPROBE &&
 799             attr->kern_version != LINUX_VERSION_CODE)
 800                 return -EINVAL;
 801
 802         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 803                 return -EPERM;
 804
 805         /* plain bpf_prog allocation */
 806         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 807         if (!prog)
 808                 return -ENOMEM;
 809
 810         err = bpf_prog_charge_memlock(prog);
 811         if (err)
 812                 goto free_prog_nouncharge;
 813
 814         prog->len = attr->insn_cnt;
 815
 816         err = -EFAULT;
 817         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 818                            bpf_prog_insn_size(prog)) != 0)
 819                 goto free_prog;
 820
 821         prog->orig_prog = NULL;
 822         prog->jited = 0;
 823
 824         atomic_set(&prog->aux->refcnt, 1);
 825         prog->gpl_compatible = is_gpl ? 1 : 0;
 826
 827         /* find program type: socket_filter vs tracing_filter */
 828         err = find_prog_type(type, prog);
 829         if (err < 0)
 830                 goto free_prog;
 831
 832         /* run eBPF verifier */
 833         err = bpf_check(&prog, attr);
 834         if (err < 0)
 835                 goto free_used_maps;
 836
 837         /* eBPF program is ready to be JITed */
 838         prog = bpf_prog_select_runtime(prog, &err);
 839         if (err < 0)
 840                 goto free_used_maps;
 841
 842         err = bpf_prog_new_fd(prog);
 843         if (err < 0)
 844                 /* failed to allocate fd */
 845                 goto free_used_maps;
 846
 847         bpf_prog_kallsyms_add(prog);
 848         trace_bpf_prog_load(prog, err);
 849         return err;
 850
 851 free_used_maps:
 852         free_used_maps(prog->aux);
 853 free_prog:
 854         bpf_prog_uncharge_memlock(prog);
 855 free_prog_nouncharge:
 856         bpf_prog_free(prog);
 857         return err;
 858 }
 859
 860 #define BPF_OBJ_LAST_FIELD bpf_fd
 861
 862 static int bpf_obj_pin(const union bpf_attr *attr)
 863 {
 864         if (CHECK_ATTR(BPF_OBJ))
 865                 return -EINVAL;
 866
 867         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 868 }
 869
 870 static int bpf_obj_get(const union bpf_attr *attr)
 871 {
 872         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 873                 return -EINVAL;
 874
 875         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 876 }
 877
 878 #ifdef CONFIG_CGROUP_BPF
 879
 880 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 881
 882 static int bpf_prog_attach(const union bpf_attr *attr)
 883 {
 884         enum bpf_prog_type ptype;
 885         struct bpf_prog *prog;
 886         struct cgroup *cgrp;
 887         int ret;
 888
 889         if (!capable(CAP_NET_ADMIN))
 890                 return -EPERM;
 891
 892         if (CHECK_ATTR(BPF_PROG_ATTACH))
 893                 return -EINVAL;
 894
 895         if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
 896                 return -EINVAL;
 897
 898         switch (attr->attach_type) {
 899         case BPF_CGROUP_INET_INGRESS:
 900         case BPF_CGROUP_INET_EGRESS:
 901                 ptype = BPF_PROG_TYPE_CGROUP_SKB;
 902                 break;
 903         case BPF_CGROUP_INET_SOCK_CREATE:
 904                 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 905                 break;
 906         default:
 907                 return -EINVAL;
 908         }
 909
 910         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 911         if (IS_ERR(prog))
 912                 return PTR_ERR(prog);
 913
 914         cgrp = cgroup_get_from_fd(attr->target_fd);
 915         if (IS_ERR(cgrp)) {
 916                 bpf_prog_put(prog);
 917                 return PTR_ERR(cgrp);
 918         }
 919
 920         ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
 921                                 attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
 922         if (ret)
 923                 bpf_prog_put(prog);
 924         cgroup_put(cgrp);
 925
 926         return ret;
 927 }
 928
 929 #define BPF_PROG_DETACH_LAST_FIELD attach_type
 930
 931 static int bpf_prog_detach(const union bpf_attr *attr)
 932 {
 933         struct cgroup *cgrp;
 934         int ret;
 935
 936         if (!capable(CAP_NET_ADMIN))
 937                 return -EPERM;
 938
 939         if (CHECK_ATTR(BPF_PROG_DETACH))
 940                 return -EINVAL;
 941
 942         switch (attr->attach_type) {
 943         case BPF_CGROUP_INET_INGRESS:
 944         case BPF_CGROUP_INET_EGRESS:
 945         case BPF_CGROUP_INET_SOCK_CREATE:
 946                 cgrp = cgroup_get_from_fd(attr->target_fd);
 947                 if (IS_ERR(cgrp))
 948                         return PTR_ERR(cgrp);
 949
 950                 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
 951                 cgroup_put(cgrp);
 952                 break;
 953
 954         default:
 955                 return -EINVAL;
 956         }
 957
 958         return ret;
 959 }
 960 #endif /* CONFIG_CGROUP_BPF */
 961
 962 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
 963
 964 static int bpf_prog_test_run(const union bpf_attr *attr,
 965                              union bpf_attr __user *uattr)
 966 {
 967         struct bpf_prog *prog;
 968         int ret = -ENOTSUPP;
 969
 970         if (CHECK_ATTR(BPF_PROG_TEST_RUN))
 971                 return -EINVAL;
 972
 973         prog = bpf_prog_get(attr->test.prog_fd);
 974         if (IS_ERR(prog))
 975                 return PTR_ERR(prog);
 976
 977         if (prog->aux->ops->test_run)
 978                 ret = prog->aux->ops->test_run(prog, attr, uattr);
 979
 980         bpf_prog_put(prog);
 981         return ret;
 982 }
 983
 984 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 985 {
 986         union bpf_attr attr = {};
 987         int err;
 988
 989         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 990                 return -EPERM;
 991
 992         if (!access_ok(VERIFY_READ, uattr, 1))
 993                 return -EFAULT;
 994
 995         if (size > PAGE_SIZE)   /* silly large */
 996                 return -E2BIG;
 997
 998         /* If we're handed a bigger struct than we know of,
 999          * ensure all the unknown bits are 0 - i.e. new
1000          * user-space does not rely on any kernel feature
1001          * extensions we dont know about yet.
1002          */
1003         if (size > sizeof(attr)) {
1004                 unsigned char __user *addr;
1005                 unsigned char __user *end;
1006                 unsigned char val;
1007
1008                 addr = (void __user *)uattr + sizeof(attr);
1009                 end  = (void __user *)uattr + size;
1010
1011                 for (; addr < end; addr++) {
1012                         err = get_user(val, addr);
1013                         if (err)
1014                                 return err;
1015                         if (val)
1016                                 return -E2BIG;
1017                 }
1018                 size = sizeof(attr);
1019         }
1020
1021         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1022         if (copy_from_user(&attr, uattr, size) != 0)
1023                 return -EFAULT;
1024
1025         switch (cmd) {
1026         case BPF_MAP_CREATE:
1027                 err = map_create(&attr);
1028                 break;
1029         case BPF_MAP_LOOKUP_ELEM:
1030                 err = map_lookup_elem(&attr);
1031                 break;
1032         case BPF_MAP_UPDATE_ELEM:
1033                 err = map_update_elem(&attr);
1034                 break;
1035         case BPF_MAP_DELETE_ELEM:
1036                 err = map_delete_elem(&attr);
1037                 break;
1038         case BPF_MAP_GET_NEXT_KEY:
1039                 err = map_get_next_key(&attr);
1040                 break;
1041         case BPF_PROG_LOAD:
1042                 err = bpf_prog_load(&attr);
1043                 break;
1044         case BPF_OBJ_PIN:
1045                 err = bpf_obj_pin(&attr);
1046                 break;
1047         case BPF_OBJ_GET:
1048                 err = bpf_obj_get(&attr);
1049                 break;
1050 #ifdef CONFIG_CGROUP_BPF
1051         case BPF_PROG_ATTACH:
1052                 err = bpf_prog_attach(&attr);
1053                 break;
1054         case BPF_PROG_DETACH:
1055                 err = bpf_prog_detach(&attr);
1056                 break;
1057 #endif
1058         case BPF_PROG_TEST_RUN:
1059                 err = bpf_prog_test_run(&attr, uattr);
1060                 break;
1061         default:
1062                 err = -EINVAL;
1063                 break;
1064         }
1065
1066         return err;
1067 }