Merge branch 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

author Linus Torvalds <[email protected]>

Fri, 10 Oct 2014 11:26:02 +0000 (07:26 -0400)

committer Linus Torvalds <[email protected]>

Fri, 10 Oct 2014 11:26:02 +0000 (07:26 -0400)
author Linus Torvalds <[email protected]>
Fri, 10 Oct 2014 11:26:02 +0000 (07:26 -0400)
committer Linus Torvalds <[email protected]>
Fri, 10 Oct 2014 11:26:02 +0000 (07:26 -0400)
diff --combined arch/x86/kvm/mmu.c

index 3201e93ebd07ca336e7e15c046c8596b73fdcbc7,5bd53f206f4f2df2cd9777fb692702781c199407..ac1c4de3a48491d9b0cf939897e9af57238b3f71
--- 1/arch/x86/kvm/mmu.c
--- 2/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@@ -199,20 -199,16 +199,20 @@@ void kvm_mmu_set_mmio_spte_mask(u64 mmi
   EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
   
   /*
- - * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
- - * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
- - * number.
+ + * the low bit of the generation number is always presumed to be zero.
+ + * This disables mmio caching during memslot updates.  The concept is
+ + * similar to a seqcount but instead of retrying the access we just punt
+ + * and ignore the cache.
+ + *
+ + * spte bits 3-11 are used as bits 1-9 of the generation number,
+ + * the bits 52-61 are used as bits 10-19 of the generation number.
    */
- -#define MMIO_SPTE_GEN_LOW_SHIFT               3
+ +#define MMIO_SPTE_GEN_LOW_SHIFT               2
   #define MMIO_SPTE_GEN_HIGH_SHIFT      52
   
- -#define MMIO_GEN_SHIFT                        19
- -#define MMIO_GEN_LOW_SHIFT            9
- -#define MMIO_GEN_LOW_MASK             ((1 << MMIO_GEN_LOW_SHIFT) - 1)
+ +#define MMIO_GEN_SHIFT                        20
+ +#define MMIO_GEN_LOW_SHIFT            10
+ +#define MMIO_GEN_LOW_MASK             ((1 << MMIO_GEN_LOW_SHIFT) - 2)
   #define MMIO_GEN_MASK                 ((1 << MMIO_GEN_SHIFT) - 1)
   #define MMIO_MAX_GEN                  ((1 << MMIO_GEN_SHIFT) - 1)
   
@@@ -240,7 -236,12 +240,7 @@@ static unsigned int get_mmio_spte_gener
   
   static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
   {
- -      /*
- -       * Init kvm generation close to MMIO_MAX_GEN to easily test the
- -       * code of handling generation number wrap-around.
- -       */
- -      return (kvm_memslots(kvm)->generation +
- -                    MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
+ +      return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
   }
   
   static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
@@@ -295,6 -296,11 +295,6 @@@ static bool check_mmio_spte(struct kvm 
         return likely(kvm_gen == spte_gen);
   }
   
- -static inline u64 rsvd_bits(int s, int e)
- -{
- -      return ((1ULL << (e - s + 1)) - 1) << s;
- -}
- -
   void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
                 u64 dirty_mask, u64 nx_mask, u64 x_mask)
   {
@@@ -1174,7 -1180,7 +1174,7 @@@ static void drop_large_spte(struct kvm_
    * Write-protect on the specified @sptep, @pt_protect indicates whether
    * spte write-protection is caused by protecting shadow page table.
    *
- - * Note: write protection is difference between drity logging and spte
+ + * Note: write protection is difference between dirty logging and spte
    * protection:
    * - for dirty logging, the spte can be set to writable at anytime if
    *   its dirty bitmap is properly set.
@@@ -1262,8 -1268,7 +1262,8 @@@ static bool rmap_write_protect(struct k
   }
   
   static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
- -                         struct kvm_memory_slot *slot, unsigned long data)
+ +                         struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ +                         unsigned long data)
   {
         u64 *sptep;
         struct rmap_iterator iter;
@@@ -1271,8 -1276,7 +1271,8 @@@
   
         while ((sptep = rmap_get_first(*rmapp, &iter))) {
                 BUG_ON(!(*sptep & PT_PRESENT_MASK));
- -              rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
+ +              rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n",
+ +                           sptep, *sptep, gfn, level);
   
                 drop_spte(kvm, sptep);
                 need_tlb_flush = 1;
@@@ -1282,8 -1286,7 +1282,8 @@@
   }
   
   static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
- -                           struct kvm_memory_slot *slot, unsigned long data)
+ +                           struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ +                           unsigned long data)
   {
         u64 *sptep;
         struct rmap_iterator iter;
@@@ -1297,8 -1300,7 +1297,8 @@@
   
         for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
                 BUG_ON(!is_shadow_present_pte(*sptep));
- -              rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+ +              rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
+ +                           sptep, *sptep, gfn, level);
   
                 need_flush = 1;
   
@@@ -1332,8 -1334,6 +1332,8 @@@ static int kvm_handle_hva_range(struct 
                                 int (*handler)(struct kvm *kvm,
                                                unsigned long *rmapp,
                                                struct kvm_memory_slot *slot,
+ +                                             gfn_t gfn,
+ +                                             int level,
                                                unsigned long data))
   {
         int j;
@@@ -1363,7 -1363,6 +1363,7 @@@
                      j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
                         unsigned long idx, idx_end;
                         unsigned long *rmapp;
+ +                      gfn_t gfn = gfn_start;
   
                         /*
                          * {idx(page_j) | page_j intersects with
@@@ -1374,10 -1373,8 +1374,10 @@@
   
                         rmapp = __gfn_to_rmap(gfn_start, j, memslot);
   
- -                      for (; idx <= idx_end; ++idx)
- -                              ret |= handler(kvm, rmapp++, memslot, data);
+ +                      for (; idx <= idx_end;
+ +                             ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j)))
+ +                              ret |= handler(kvm, rmapp++, memslot,
+ +                                             gfn, j, data);
                 }
         }
   
@@@ -1388,7 -1385,6 +1388,7 @@@ static int kvm_handle_hva(struct kvm *k
                           unsigned long data,
                           int (*handler)(struct kvm *kvm, unsigned long *rmapp,
                                          struct kvm_memory_slot *slot,
+ +                                       gfn_t gfn, int level,
                                          unsigned long data))
   {
         return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@@ -1410,14 -1406,24 +1410,14 @@@ void kvm_set_spte_hva(struct kvm *kvm, 
   }
   
   static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- -                       struct kvm_memory_slot *slot, unsigned long data)
+ +                       struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ +                       unsigned long data)
   {
         u64 *sptep;
         struct rmap_iterator uninitialized_var(iter);
         int young = 0;
   
- -      /*
- -       * In case of absence of EPT Access and Dirty Bits supports,
- -       * emulate the accessed bit for EPT, by checking if this page has
- -       * an EPT mapping, and clearing it if it does. On the next access,
- -       * a new EPT mapping will be established.
- -       * This has some overhead, but not as much as the cost of swapping
- -       * out actively used pages or breaking up actively used hugepages.
- -       */
- -      if (!shadow_accessed_mask) {
- -              young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
- -              goto out;
- -      }
+ +      BUG_ON(!shadow_accessed_mask);
   
         for (sptep = rmap_get_first(*rmapp, &iter); sptep;
              sptep = rmap_get_next(&iter)) {
@@@ -1429,13 -1435,14 +1429,13 @@@
                                  (unsigned long *)sptep);
                 }
         }
- -out:
- -      /* @data has hva passed to kvm_age_hva(). */
- -      trace_kvm_age_page(data, slot, young);
+ +      trace_kvm_age_page(gfn, level, slot, young);
         return young;
   }
   
   static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- -                            struct kvm_memory_slot *slot, unsigned long data)
+ +                            struct kvm_memory_slot *slot, gfn_t gfn,
+ +                            int level, unsigned long data)
   {
         u64 *sptep;
         struct rmap_iterator iter;
@@@ -1473,33 -1480,13 +1473,33 @@@ static void rmap_recycle(struct kvm_vcp
   
         rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
   
- -      kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+ +      kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
         kvm_flush_remote_tlbs(vcpu->kvm);
   }
   
- -int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+ +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
   {
- -      return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
+ +      /*
+ +       * In case of absence of EPT Access and Dirty Bits supports,
+ +       * emulate the accessed bit for EPT, by checking if this page has
+ +       * an EPT mapping, and clearing it if it does. On the next access,
+ +       * a new EPT mapping will be established.
+ +       * This has some overhead, but not as much as the cost of swapping
+ +       * out actively used pages or breaking up actively used hugepages.
+ +       */
+ +      if (!shadow_accessed_mask) {
+ +              /*
+ +               * We are holding the kvm->mmu_lock, and we are blowing up
+ +               * shadow PTEs. MMU notifier consumers need to be kept at bay.
+ +               * This is correct as long as we don't decouple the mmu_lock
+ +               * protected regions (like invalidate_range_start|end does).
+ +               */
+ +              kvm->mmu_notifier_seq++;
+ +              return kvm_handle_hva_range(kvm, start, end, 0,
+ +                                          kvm_unmap_rmapp);
+ +      }
+ +
+ +      return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
   }
   
   int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
@@@ -1762,7 -1749,7 +1762,7 @@@ static int __kvm_sync_page(struct kvm_v
                 return 1;
         }
   
- -      kvm_mmu_flush_tlb(vcpu);
+ +      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
         return 0;
   }
   
@@@ -1815,7 -1802,7 +1815,7 @@@ static void kvm_sync_pages(struct kvm_v
   
         kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
         if (flush)
- -              kvm_mmu_flush_tlb(vcpu);
+ +              kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
   }
   
   struct mmu_page_path {
@@@ -2549,7 -2536,7 +2549,7 @@@ static void mmu_set_spte(struct kvm_vcp
               true, host_writable)) {
                 if (write_fault)
                         *emulate = 1;
- -              kvm_mmu_flush_tlb(vcpu);
+ +              kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
         }
   
         if (unlikely(is_mmio_spte(*sptep) && emulate))
@@@ -3176,7 -3163,7 +3176,7 @@@ static void mmu_sync_roots(struct kvm_v
         if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                 return;
   
- -      vcpu_clear_mmio_info(vcpu, ~0ul);
+ +      vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
         kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
         if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
                 hpa_t root = vcpu->arch.mmu.root_hpa;
@@@ -3219,7 -3206,7 +3219,7 @@@ static gpa_t nonpaging_gva_to_gpa_neste
   {
         if (exception)
                 exception->error_code = 0;
- -      return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
+ +      return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
   }
   
   static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@@ -3463,6 -3450,13 +3463,6 @@@ static void nonpaging_init_context(stru
         context->nx = false;
   }
   
- -void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
- -{
- -      ++vcpu->stat.tlb_flush;
- -      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- -}
- -EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
- -
   void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
   {
         mmu_free_roots(vcpu);
@@@ -3524,7 -3518,6 +3524,7 @@@ static void reset_rsvds_bits_mask(struc
         int maxphyaddr = cpuid_maxphyaddr(vcpu);
         u64 exb_bit_rsvd = 0;
         u64 gbpages_bit_rsvd = 0;
+ +      u64 nonleaf_bit8_rsvd = 0;
   
         context->bad_mt_xwr = 0;
   
@@@ -3532,14 -3525,6 +3532,14 @@@
                 exb_bit_rsvd = rsvd_bits(63, 63);
         if (!guest_cpuid_has_gbpages(vcpu))
                 gbpages_bit_rsvd = rsvd_bits(7, 7);
+ +
+ +      /*
+ +       * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
+ +       * leaf entries) on AMD CPUs only.
+ +       */
+ +      if (guest_cpuid_is_amd(vcpu))
+ +              nonleaf_bit8_rsvd = rsvd_bits(8, 8);
+ +
         switch (context->root_level) {
         case PT32_ROOT_LEVEL:
                 /* no rsvd bits for 2 level 4K page table entries */
@@@ -3574,9 -3559,9 +3574,9 @@@
                 break;
         case PT64_ROOT_LEVEL:
                 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
- -                      rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
+ +                      nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
                 context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- -                      gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
+ +                      nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
                 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
                         rsvd_bits(maxphyaddr, 51);
                 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@@ -3977,7 -3962,7 +3977,7 @@@ static void mmu_pte_write_flush_tlb(str
         if (remote_flush)
                 kvm_flush_remote_tlbs(vcpu->kvm);
         else if (local_flush)
- -              kvm_mmu_flush_tlb(vcpu);
+ +              kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
   }
   
   static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
@@@ -4238,7 -4223,7 +4238,7 @@@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault)
   void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
   {
         vcpu->arch.mmu.invlpg(vcpu, gva);
- -      kvm_mmu_flush_tlb(vcpu);
+ +      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
         ++vcpu->stat.invlpg;
   }
   EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@@ -4448,7 -4433,7 +4448,7 @@@ void kvm_mmu_invalidate_mmio_sptes(stru
          * The very rare case: if the generation-number is round,
          * zap all shadow pages.
          */
- -      if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
+ +      if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
                 printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
                 kvm_mmu_invalidate_zap_all_pages(kvm);
         }
@@@ -4549,7 -4534,7 +4549,7 @@@ int kvm_mmu_module_init(void
         if (!mmu_page_header_cache)
                 goto nomem;
   
-       if (percpu_counter_init(&kvm_total_used_mmu_pages, 0))
+       if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
                 goto nomem;
   
         register_shrinker(&mmu_shrinker);
diff --combined include/linux/percpu-refcount.h

index 68a64f11ce0215e34f7e1e0fbeefd42c5e49b40c,00c01fc6d88c58cfa53abe4ceb380d940a6bafbe..d5c89e0dd0e6725c614b491c78b5bfafe9cc46f4
--- 1/include/linux/percpu-refcount.h
--- 2/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@@ -13,7 -13,7 +13,7 @@@
    *
    * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less
    * than an atomic_t - this is because of the way shutdown works, see
-  * percpu_ref_kill()/PCPU_COUNT_BIAS.
+  * percpu_ref_kill()/PERCPU_COUNT_BIAS.
    *
    * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the
    * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill()
@@@ -29,7 -29,7 +29,7 @@@
    * calls io_destroy() or the process exits.
    *
    * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
- - * calls percpu_ref_kill(), then hlist_del_rcu() and sychronize_rcu() to remove
+ + * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove
    * the kioctx from the proccess's list of kioctxs - after that, there can't be
    * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop
    * the initial ref with percpu_ref_put().
@@@ -49,29 -49,60 +49,60 @@@
   #include <linux/kernel.h>
   #include <linux/percpu.h>
   #include <linux/rcupdate.h>
+ #include <linux/gfp.h>
   
   struct percpu_ref;
   typedef void (percpu_ref_func_t)(struct percpu_ref *);
   
+ /* flags set in the lower bits of percpu_ref->percpu_count_ptr */
+ enum {
+       __PERCPU_REF_ATOMIC     = 1LU << 0,     /* operating in atomic mode */
+       __PERCPU_REF_DEAD       = 1LU << 1,     /* (being) killed */
+       __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,
+ 
+       __PERCPU_REF_FLAG_BITS  = 2,
+ };
+ 
+ /* @flags for percpu_ref_init() */
+ enum {
+       /*
+        * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
+        * operation using percpu_ref_switch_to_percpu().  If initialized
+        * with this flag, the ref will stay in atomic mode until
+        * percpu_ref_switch_to_percpu() is invoked on it.
+        */
+       PERCPU_REF_INIT_ATOMIC  = 1 << 0,
+ 
+       /*
+        * Start dead w/ ref == 0 in atomic mode.  Must be revived with
+        * percpu_ref_reinit() before used.  Implies INIT_ATOMIC.
+        */
+       PERCPU_REF_INIT_DEAD    = 1 << 1,
+ };
+ 
   struct percpu_ref {
-       atomic_t                count;
+       atomic_long_t           count;
         /*
          * The low bit of the pointer indicates whether the ref is in percpu
          * mode; if set, then get/put will manipulate the atomic_t.
          */
-       unsigned long           pcpu_count_ptr;
+       unsigned long           percpu_count_ptr;
         percpu_ref_func_t       *release;
-       percpu_ref_func_t       *confirm_kill;
+       percpu_ref_func_t       *confirm_switch;
+       bool                    force_atomic:1;
         struct rcu_head         rcu;
   };
   
   int __must_check percpu_ref_init(struct percpu_ref *ref,
-                                percpu_ref_func_t *release);
- void percpu_ref_reinit(struct percpu_ref *ref);
+                                percpu_ref_func_t *release, unsigned int flags,
+                                gfp_t gfp);
   void percpu_ref_exit(struct percpu_ref *ref);
+ void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+                                percpu_ref_func_t *confirm_switch);
+ void percpu_ref_switch_to_percpu(struct percpu_ref *ref);
   void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
                                  percpu_ref_func_t *confirm_kill);
- void __percpu_ref_kill_expedited(struct percpu_ref *ref);
+ void percpu_ref_reinit(struct percpu_ref *ref);
   
   /**
    * percpu_ref_kill - drop the initial ref
@@@ -88,26 -119,24 +119,24 @@@ static inline void percpu_ref_kill(stru
         return percpu_ref_kill_and_confirm(ref, NULL);
   }
   
- #define PCPU_REF_DEAD         1
- 
   /*
    * Internal helper.  Don't use outside percpu-refcount proper.  The
    * function doesn't return the pointer and let the caller test it for NULL
    * because doing so forces the compiler to generate two conditional
-  * branches as it can't assume that @ref->pcpu_count is not NULL.
+  * branches as it can't assume that @ref->percpu_count is not NULL.
    */
- static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
-                                   unsigned __percpu **pcpu_countp)
+ static inline bool __ref_is_percpu(struct percpu_ref *ref,
+                                         unsigned long __percpu **percpu_countp)
   {
-       unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
+       unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
   
         /* paired with smp_store_release() in percpu_ref_reinit() */
         smp_read_barrier_depends();
   
-       if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
+       if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
                 return false;
   
-       *pcpu_countp = (unsigned __percpu *)pcpu_ptr;
+       *percpu_countp = (unsigned long __percpu *)percpu_ptr;
         return true;
   }
   
@@@ -115,18 -144,20 +144,20 @@@
    * percpu_ref_get - increment a percpu refcount
    * @ref: percpu_ref to get
    *
-  * Analagous to atomic_inc().
-   */
+  * Analagous to atomic_long_inc().
+  *
+  * This function is safe to call as long as @ref is between init and exit.
+  */
   static inline void percpu_ref_get(struct percpu_ref *ref)
   {
-       unsigned __percpu *pcpu_count;
+       unsigned long __percpu *percpu_count;
   
         rcu_read_lock_sched();
   
-       if (__pcpu_ref_alive(ref, &pcpu_count))
-               this_cpu_inc(*pcpu_count);
+       if (__ref_is_percpu(ref, &percpu_count))
+               this_cpu_inc(*percpu_count);
         else
-               atomic_inc(&ref->count);
+               atomic_long_inc(&ref->count);
   
         rcu_read_unlock_sched();
   }
@@@ -138,20 -169,20 +169,20 @@@
    * Increment a percpu refcount unless its count already reached zero.
    * Returns %true on success; %false on failure.
    *
-  * The caller is responsible for ensuring that @ref stays accessible.
+  * This function is safe to call as long as @ref is between init and exit.
    */
   static inline bool percpu_ref_tryget(struct percpu_ref *ref)
   {
-       unsigned __percpu *pcpu_count;
-       int ret = false;
+       unsigned long __percpu *percpu_count;
+       int ret;
   
         rcu_read_lock_sched();
   
-       if (__pcpu_ref_alive(ref, &pcpu_count)) {
-               this_cpu_inc(*pcpu_count);
+       if (__ref_is_percpu(ref, &percpu_count)) {
+               this_cpu_inc(*percpu_count);
                 ret = true;
         } else {
-               ret = atomic_inc_not_zero(&ref->count);
+               ret = atomic_long_inc_not_zero(&ref->count);
         }
   
         rcu_read_unlock_sched();
@@@ -166,23 -197,26 +197,26 @@@
    * Increment a percpu refcount unless it has already been killed.  Returns
    * %true on success; %false on failure.
    *
-  * Completion of percpu_ref_kill() in itself doesn't guarantee that tryget
-  * will fail.  For such guarantee, percpu_ref_kill_and_confirm() should be
-  * used.  After the confirm_kill callback is invoked, it's guaranteed that
-  * no new reference will be given out by percpu_ref_tryget().
+  * Completion of percpu_ref_kill() in itself doesn't guarantee that this
+  * function will fail.  For such guarantee, percpu_ref_kill_and_confirm()
+  * should be used.  After the confirm_kill callback is invoked, it's
+  * guaranteed that no new reference will be given out by
+  * percpu_ref_tryget_live().
    *
-  * The caller is responsible for ensuring that @ref stays accessible.
+  * This function is safe to call as long as @ref is between init and exit.
    */
   static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
   {
-       unsigned __percpu *pcpu_count;
+       unsigned long __percpu *percpu_count;
         int ret = false;
   
         rcu_read_lock_sched();
   
-       if (__pcpu_ref_alive(ref, &pcpu_count)) {
-               this_cpu_inc(*pcpu_count);
+       if (__ref_is_percpu(ref, &percpu_count)) {
+               this_cpu_inc(*percpu_count);
                 ret = true;
+       } else if (!(ACCESS_ONCE(ref->percpu_count_ptr) & __PERCPU_REF_DEAD)) {
+               ret = atomic_long_inc_not_zero(&ref->count);
         }
   
         rcu_read_unlock_sched();
@@@ -196,16 -230,18 +230,18 @@@
    *
    * Decrement the refcount, and if 0, call the release function (which was passed
    * to percpu_ref_init())
+  *
+  * This function is safe to call as long as @ref is between init and exit.
    */
   static inline void percpu_ref_put(struct percpu_ref *ref)
   {
-       unsigned __percpu *pcpu_count;
+       unsigned long __percpu *percpu_count;
   
         rcu_read_lock_sched();
   
-       if (__pcpu_ref_alive(ref, &pcpu_count))
-               this_cpu_dec(*pcpu_count);
-       else if (unlikely(atomic_dec_and_test(&ref->count)))
+       if (__ref_is_percpu(ref, &percpu_count))
+               this_cpu_dec(*percpu_count);
+       else if (unlikely(atomic_long_dec_and_test(&ref->count)))
                 ref->release(ref);
   
         rcu_read_unlock_sched();
@@@ -216,14 -252,16 +252,16 @@@
    * @ref: percpu_ref to test
    *
    * Returns %true if @ref reached zero.
+  *
+  * This function is safe to call as long as @ref is between init and exit.
    */
   static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
   {
-       unsigned __percpu *pcpu_count;
+       unsigned long __percpu *percpu_count;
   
-       if (__pcpu_ref_alive(ref, &pcpu_count))
+       if (__ref_is_percpu(ref, &percpu_count))
                 return false;
-       return !atomic_read(&ref->count);
+       return !atomic_long_read(&ref->count);
   }
   
   #endif
diff --combined kernel/cgroup.c

index cab7dc4284dcb332748169525440951ee9254247,753df01a9831ed13a6ebecccf2550540f2cf93af..136eceadeed138b71fd55dfed22a9850b38dd776
--- 1/kernel/cgroup.c
--- 2/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@@ -185,6 -185,7 +185,6 @@@ static int need_forkexit_callback __rea
   static struct cftype cgroup_dfl_base_files[];
   static struct cftype cgroup_legacy_base_files[];
   
- -static void cgroup_put(struct cgroup *cgrp);
   static int rebind_subsystems(struct cgroup_root *dst_root,
                              unsigned int ss_mask);
   static int cgroup_destroy_locked(struct cgroup *cgrp);
@@@ -194,6 -195,7 +194,6 @@@ static void css_release(struct percpu_r
   static void kill_css(struct cgroup_subsys_state *css);
   static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
                               bool is_add);
- -static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
   
   /* IDR wrappers which synchronize using cgroup_idr_lock */
   static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
@@@ -329,6 -331,14 +329,6 @@@ bool cgroup_is_descendant(struct cgrou
         return false;
   }
   
- -static int cgroup_is_releasable(const struct cgroup *cgrp)
- -{
- -      const int bits =
- -              (1 << CGRP_RELEASABLE) |
- -              (1 << CGRP_NOTIFY_ON_RELEASE);
- -      return (cgrp->flags & bits) == bits;
- -}
- -
   static int notify_on_release(const struct cgroup *cgrp)
   {
         return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@@ -384,7 -394,12 +384,7 @@@
                         ;                                               \
                 else
   
- -/* the list of cgroups eligible for automatic release. Protected by
- - * release_list_lock */
- -static LIST_HEAD(release_list);
- -static DEFINE_RAW_SPINLOCK(release_list_lock);
   static void cgroup_release_agent(struct work_struct *work);
- -static DECLARE_WORK(release_agent_work, cgroup_release_agent);
   static void check_for_release(struct cgroup *cgrp);
   
   /*
@@@ -483,7 -498,7 +483,7 @@@ static unsigned long css_set_hash(struc
         return key;
   }
   
- -static void put_css_set_locked(struct css_set *cset, bool taskexit)
+ +static void put_css_set_locked(struct css_set *cset)
   {
         struct cgrp_cset_link *link, *tmp_link;
         struct cgroup_subsys *ss;
@@@ -509,7 -524,11 +509,7 @@@
                 /* @cgrp can't go away while we're holding css_set_rwsem */
                 if (list_empty(&cgrp->cset_links)) {
                         cgroup_update_populated(cgrp, false);
- -                      if (notify_on_release(cgrp)) {
- -                              if (taskexit)
- -                                      set_bit(CGRP_RELEASABLE, &cgrp->flags);
- -                              check_for_release(cgrp);
- -                      }
+ +                      check_for_release(cgrp);
                 }
   
                 kfree(link);
@@@ -518,7 -537,7 +518,7 @@@
         kfree_rcu(cset, rcu_head);
   }
   
- -static void put_css_set(struct css_set *cset, bool taskexit)
+ +static void put_css_set(struct css_set *cset)
   {
         /*
          * Ensure that the refcount doesn't hit zero while any readers
@@@ -529,7 -548,7 +529,7 @@@
                 return;
   
         down_write(&css_set_rwsem);
- -      put_css_set_locked(cset, taskexit);
+ +      put_css_set_locked(cset);
         up_write(&css_set_rwsem);
   }
   
@@@ -950,6 -969,14 +950,6 @@@ static struct cgroup *task_cgroup_from_
    * knows that the cgroup won't be removed, as cgroup_rmdir()
    * needs that mutex.
    *
- - * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
- - * (usually) take cgroup_mutex.  These are the two most performance
- - * critical pieces of code here.  The exception occurs on cgroup_exit(),
- - * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
- - * is taken, and if the cgroup count is zero, a usermode call made
- - * to the release agent with the name of the cgroup (path relative to
- - * the root of cgroup file system) as the argument.
- - *
    * A cgroup can only be deleted if both its 'count' of using tasks
    * is zero, and its list of 'children' cgroups is empty.  Since all
    * tasks in the system use _some_ cgroup, and since there is always at
@@@ -1560,6 -1587,7 +1560,6 @@@ static void init_cgroup_housekeeping(st
         INIT_LIST_HEAD(&cgrp->self.sibling);
         INIT_LIST_HEAD(&cgrp->self.children);
         INIT_LIST_HEAD(&cgrp->cset_links);
- -      INIT_LIST_HEAD(&cgrp->release_list);
         INIT_LIST_HEAD(&cgrp->pidlists);
         mutex_init(&cgrp->pidlist_mutex);
         cgrp->self.cgroup = cgrp;
@@@ -1569,7 -1597,6 +1569,7 @@@
                 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
   
         init_waitqueue_head(&cgrp->offline_waitq);
+ +      INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
   }
   
   static void init_cgroup_root(struct cgroup_root *root,
@@@ -1607,7 -1634,8 +1607,8 @@@ static int cgroup_setup_root(struct cgr
                 goto out;
         root_cgrp->id = ret;
   
-       ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+       ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
+                             GFP_KERNEL);
         if (ret)
                 goto out;
   
@@@ -2025,7 -2053,8 +2026,7 @@@ static void cgroup_task_migrate(struct 
          * task. As trading it for new_cset is protected by cgroup_mutex,
          * we're safe to drop it here; it will be freed under RCU.
          */
- -      set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
- -      put_css_set_locked(old_cset, false);
+ +      put_css_set_locked(old_cset);
   }
   
   /**
@@@ -2046,7 -2075,7 +2047,7 @@@ static void cgroup_migrate_finish(struc
                 cset->mg_src_cgrp = NULL;
                 cset->mg_dst_cset = NULL;
                 list_del_init(&cset->mg_preload_node);
- -              put_css_set_locked(cset, false);
+ +              put_css_set_locked(cset);
         }
         up_write(&css_set_rwsem);
   }
@@@ -2140,8 -2169,8 +2141,8 @@@ static int cgroup_migrate_prepare_dst(s
                 if (src_cset == dst_cset) {
                         src_cset->mg_src_cgrp = NULL;
                         list_del_init(&src_cset->mg_preload_node);
- -                      put_css_set(src_cset, false);
- -                      put_css_set(dst_cset, false);
+ +                      put_css_set(src_cset);
+ +                      put_css_set(dst_cset);
                         continue;
                 }
   
@@@ -2150,7 -2179,7 +2151,7 @@@
                 if (list_empty(&dst_cset->mg_preload_node))
                         list_add(&dst_cset->mg_preload_node, &csets);
                 else
- -                      put_css_set(dst_cset, false);
+ +                      put_css_set(dst_cset);
         }
   
         list_splice_tail(&csets, preloaded_csets);
@@@ -4145,6 -4174,7 +4146,6 @@@ static u64 cgroup_read_notify_on_releas
   static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
                                           struct cftype *cft, u64 val)
   {
- -      clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
         if (val)
                 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
         else
@@@ -4322,7 -4352,6 +4323,7 @@@ static void css_free_work_fn(struct wor
                 /* cgroup free path */
                 atomic_dec(&cgrp->root->nr_cgrps);
                 cgroup_pidlist_destroy_all(cgrp);
+ +              cancel_work_sync(&cgrp->release_agent_work);
   
                 if (cgroup_parent(cgrp)) {
                         /*
@@@ -4482,7 -4511,7 +4483,7 @@@ static int create_css(struct cgroup *cg
   
         init_and_link_css(css, ss, cgrp);
   
-       err = percpu_ref_init(&css->refcnt, css_release);
+       err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
         if (err)
                 goto err_free_css;
   
@@@ -4555,7 -4584,7 +4556,7 @@@ static int cgroup_mkdir(struct kernfs_n
                 goto out_unlock;
         }
   
-       ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+       ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
         if (ret)
                 goto out_free_cgrp;
   
@@@ -4785,12 -4814,19 +4786,12 @@@ static int cgroup_destroy_locked(struc
         for_each_css(css, ssid, cgrp)
                 kill_css(css);
   
- -      /* CSS_ONLINE is clear, remove from ->release_list for the last time */
- -      raw_spin_lock(&release_list_lock);
- -      if (!list_empty(&cgrp->release_list))
- -              list_del_init(&cgrp->release_list);
- -      raw_spin_unlock(&release_list_lock);
- -
         /*
          * Remove @cgrp directory along with the base files.  @cgrp has an
          * extra ref on its kn.
          */
         kernfs_remove(cgrp->kn);
   
- -      set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
         check_for_release(cgroup_parent(cgrp));
   
         /* put the base reference */
@@@ -4807,10 -4843,13 +4808,10 @@@ static int cgroup_rmdir(struct kernfs_n
         cgrp = cgroup_kn_lock_live(kn);
         if (!cgrp)
                 return 0;
- -      cgroup_get(cgrp);       /* for @kn->priv clearing */
   
         ret = cgroup_destroy_locked(cgrp);
   
         cgroup_kn_unlock(kn);
- -
- -      cgroup_put(cgrp);
         return ret;
   }
   
@@@ -5014,9 -5053,12 +5015,9 @@@ core_initcall(cgroup_wq_init)
    *  - Print task's cgroup paths into seq_file, one line for each hierarchy
    *  - Used for /proc/<pid>/cgroup.
    */
- -
- -/* TODO: Use a proper seq_file iterator */
- -int proc_cgroup_show(struct seq_file *m, void *v)
+ +int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+ +                   struct pid *pid, struct task_struct *tsk)
   {
- -      struct pid *pid;
- -      struct task_struct *tsk;
         char *buf, *path;
         int retval;
         struct cgroup_root *root;
@@@ -5026,6 -5068,14 +5027,6 @@@
         if (!buf)
                 goto out;
   
- -      retval = -ESRCH;
- -      pid = m->private;
- -      tsk = get_pid_task(pid, PIDTYPE_PID);
- -      if (!tsk)
- -              goto out_free;
- -
- -      retval = 0;
- -
         mutex_lock(&cgroup_mutex);
         down_read(&css_set_rwsem);
   
@@@ -5055,10 -5105,11 +5056,10 @@@
                 seq_putc(m, '\n');
         }
   
+ +      retval = 0;
   out_unlock:
         up_read(&css_set_rwsem);
         mutex_unlock(&cgroup_mutex);
- -      put_task_struct(tsk);
- -out_free:
         kfree(buf);
   out:
         return retval;
@@@ -5129,7 -5180,7 +5130,7 @@@ void cgroup_post_fork(struct task_struc
         int i;
   
         /*
- -       * This may race against cgroup_enable_task_cg_links().  As that
+ +       * This may race against cgroup_enable_task_cg_lists().  As that
          * function sets use_task_css_set_links before grabbing
          * tasklist_lock and we just went through tasklist_lock to add
          * @child, it's guaranteed that either we see the set
@@@ -5144,7 -5195,7 +5145,7 @@@
          * when implementing operations which need to migrate all tasks of
          * a cgroup to another.
          *
- -       * Note that if we lose to cgroup_enable_task_cg_links(), @child
+ +       * Note that if we lose to cgroup_enable_task_cg_lists(), @child
          * will remain in init_css_set.  This is safe because all tasks are
          * in the init_css_set before cg_links is enabled and there's no
          * operation which transfers all tasks out of init_css_set.
@@@ -5228,14 -5279,30 +5229,14 @@@ void cgroup_exit(struct task_struct *ts
         }
   
         if (put_cset)
- -              put_css_set(cset, true);
+ +              put_css_set(cset);
   }
   
   static void check_for_release(struct cgroup *cgrp)
   {
- -      if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
- -          !css_has_online_children(&cgrp->self)) {
- -              /*
- -               * Control Group is currently removeable. If it's not
- -               * already queued for a userspace notification, queue
- -               * it now
- -               */
- -              int need_schedule_work = 0;
- -
- -              raw_spin_lock(&release_list_lock);
- -              if (!cgroup_is_dead(cgrp) &&
- -                  list_empty(&cgrp->release_list)) {
- -                      list_add(&cgrp->release_list, &release_list);
- -                      need_schedule_work = 1;
- -              }
- -              raw_spin_unlock(&release_list_lock);
- -              if (need_schedule_work)
- -                      schedule_work(&release_agent_work);
- -      }
+ +      if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) &&
+ +          !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
+ +              schedule_work(&cgrp->release_agent_work);
   }
   
   /*
@@@ -5263,39 -5330,52 +5264,39 @@@
    */
   static void cgroup_release_agent(struct work_struct *work)
   {
- -      BUG_ON(work != &release_agent_work);
+ +      struct cgroup *cgrp =
+ +              container_of(work, struct cgroup, release_agent_work);
+ +      char *pathbuf = NULL, *agentbuf = NULL, *path;
+ +      char *argv[3], *envp[3];
+ +
         mutex_lock(&cgroup_mutex);
- -      raw_spin_lock(&release_list_lock);
- -      while (!list_empty(&release_list)) {
- -              char *argv[3], *envp[3];
- -              int i;
- -              char *pathbuf = NULL, *agentbuf = NULL, *path;
- -              struct cgroup *cgrp = list_entry(release_list.next,
- -                                                  struct cgroup,
- -                                                  release_list);
- -              list_del_init(&cgrp->release_list);
- -              raw_spin_unlock(&release_list_lock);
- -              pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
- -              if (!pathbuf)
- -                      goto continue_free;
- -              path = cgroup_path(cgrp, pathbuf, PATH_MAX);
- -              if (!path)
- -                      goto continue_free;
- -              agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
- -              if (!agentbuf)
- -                      goto continue_free;
- -
- -              i = 0;
- -              argv[i++] = agentbuf;
- -              argv[i++] = path;
- -              argv[i] = NULL;
- -
- -              i = 0;
- -              /* minimal command environment */
- -              envp[i++] = "HOME=/";
- -              envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
- -              envp[i] = NULL;
- -
- -              /* Drop the lock while we invoke the usermode helper,
- -               * since the exec could involve hitting disk and hence
- -               * be a slow process */
- -              mutex_unlock(&cgroup_mutex);
- -              call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
- -              mutex_lock(&cgroup_mutex);
- - continue_free:
- -              kfree(pathbuf);
- -              kfree(agentbuf);
- -              raw_spin_lock(&release_list_lock);
- -      }
- -      raw_spin_unlock(&release_list_lock);
+ +
+ +      pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+ +      agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+ +      if (!pathbuf || !agentbuf)
+ +              goto out;
+ +
+ +      path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+ +      if (!path)
+ +              goto out;
+ +
+ +      argv[0] = agentbuf;
+ +      argv[1] = path;
+ +      argv[2] = NULL;
+ +
+ +      /* minimal command environment */
+ +      envp[0] = "HOME=/";
+ +      envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+ +      envp[2] = NULL;
+ +
+ +      mutex_unlock(&cgroup_mutex);
+ +      call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+ +      goto out_free;
+ +out:
         mutex_unlock(&cgroup_mutex);
+ +out_free:
+ +      kfree(agentbuf);
+ +      kfree(pathbuf);
   }
   
   static int __init cgroup_disable(char *str)
@@@ -5483,8 -5563,7 +5484,8 @@@ static int cgroup_css_links_read(struc
   
   static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
   {
- -      return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
+ +      return (!cgroup_has_tasks(css->cgroup) &&
+ +              !css_has_online_children(&css->cgroup->self));
   }
   
   static struct cftype debug_files[] =  {
diff --combined mm/backing-dev.c

index b27714f1b40fbef79c547ebeedcdcb5863a0b49d,64ec49d1772be9dc75cd34ddfb8872d78470e166..12a992b625765b6b43b34a184e7e62536ae3e321
--- 1/mm/backing-dev.c
--- 2/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@@ -455,7 -455,7 +455,7 @@@ int bdi_init(struct backing_dev_info *b
         bdi_wb_init(&bdi->wb, bdi);
   
         for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
-               err = percpu_counter_init(&bdi->bdi_stat[i], 0);
+               err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL);
                 if (err)
                         goto err;
         }
@@@ -470,7 -470,7 +470,7 @@@
         bdi->write_bandwidth = INIT_BW;
         bdi->avg_write_bandwidth = INIT_BW;
   
-       err = fprop_local_init_percpu(&bdi->completions);
+       err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
   
         if (err) {
   err:
@@@ -631,7 -631,7 +631,7 @@@ long wait_iff_congested(struct zone *zo
          * of sleeping on the congestion queue
          */
         if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
- -                      !zone_is_reclaim_congested(zone)) {
+ +          !test_bit(ZONE_CONGESTED, &zone->flags)) {
                 cond_resched();
   
                 /* In case we scheduled, work out time remaining */
diff --combined mm/mmap.c

index 16d19b48e2ad749bf03a6fc73181efadf41da9f1,650a1f14a945f6686aca8ad00839c8ee738e1999..93d28c7e54201de5549dab1a3dd696d785a06302
--- 1/mm/mmap.c
--- 2/mm/mmap.c
+++ b/mm/mmap.c
@@@ -70,7 -70,7 +70,7 @@@ static void unmap_region(struct mm_stru
    * MAP_SHARED r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
    *            w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
    *            x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
- - *            
+ + *
    * MAP_PRIVATE        r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
    *            w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
    *            x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
@@@ -268,7 -268,7 +268,7 @@@ static unsigned long do_brk(unsigned lo
   
   SYSCALL_DEFINE1(brk, unsigned long, brk)
   {
- -      unsigned long rlim, retval;
+ +      unsigned long retval;
         unsigned long newbrk, oldbrk;
         struct mm_struct *mm = current->mm;
         unsigned long min_brk;
@@@ -298,8 -298,9 +298,8 @@@
          * segment grow beyond its set limit the in case where the limit is
          * not page aligned -Ram Gupta
          */
- -      rlim = rlimit(RLIMIT_DATA);
- -      if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
- -                      (mm->end_data - mm->start_data) > rlim)
+ +      if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
+ +                            mm->end_data, mm->start_data))
                 goto out;
   
         newbrk = PAGE_ALIGN(brk);
@@@ -368,18 -369,16 +368,18 @@@ static int browse_rb(struct rb_root *ro
                 struct vm_area_struct *vma;
                 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
                 if (vma->vm_start < prev) {
- -                      pr_emerg("vm_start %lx prev %lx\n", vma->vm_start, prev);
+ +                      pr_emerg("vm_start %lx < prev %lx\n",
+ +                                vma->vm_start, prev);
                         bug = 1;
                 }
                 if (vma->vm_start < pend) {
- -                      pr_emerg("vm_start %lx pend %lx\n", vma->vm_start, pend);
+ +                      pr_emerg("vm_start %lx < pend %lx\n",
+ +                                vma->vm_start, pend);
                         bug = 1;
                 }
                 if (vma->vm_start > vma->vm_end) {
- -                      pr_emerg("vm_end %lx < vm_start %lx\n",
- -                              vma->vm_end, vma->vm_start);
+ +                      pr_emerg("vm_start %lx > vm_end %lx\n",
+ +                                vma->vm_start, vma->vm_end);
                         bug = 1;
                 }
                 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
@@@ -410,9 -409,8 +410,9 @@@ static void validate_mm_rb(struct rb_ro
         for (nd = rb_first(root); nd; nd = rb_next(nd)) {
                 struct vm_area_struct *vma;
                 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
- -              BUG_ON(vma != ignore &&
- -                     vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
+ +              VM_BUG_ON_VMA(vma != ignore &&
+ +                      vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
+ +                      vma);
         }
   }
   
@@@ -422,10 -420,8 +422,10 @@@ static void validate_mm(struct mm_struc
         int i = 0;
         unsigned long highest_address = 0;
         struct vm_area_struct *vma = mm->mmap;
+ +
         while (vma) {
                 struct anon_vma_chain *avc;
+ +
                 vma_lock_anon_vma(vma);
                 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                         anon_vma_interval_tree_verify(avc);
@@@ -440,16 -436,15 +440,16 @@@
         }
         if (highest_address != mm->highest_vm_end) {
                 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
- -                     mm->highest_vm_end, highest_address);
+ +                        mm->highest_vm_end, highest_address);
                 bug = 1;
         }
         i = browse_rb(&mm->mm_rb);
         if (i != mm->map_count) {
- -              pr_emerg("map_count %d rb %d\n", mm->map_count, i);
+ +              if (i != -1)
+ +                      pr_emerg("map_count %d rb %d\n", mm->map_count, i);
                 bug = 1;
         }
- -      BUG_ON(bug);
+ +      VM_BUG_ON_MM(bug, mm);
   }
   #else
   #define validate_mm_rb(root, ignore) do { } while (0)
@@@ -746,7 -741,7 +746,7 @@@ again:                     remove_next = 1 + (end > next-
                          * split_vma inserting another: so it must be
                          * mprotect case 4 shifting the boundary down.
                          */
- -                      adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
+ +                      adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
                         exporter = vma;
                         importer = next;
                 }
@@@ -792,8 -787,8 +792,8 @@@
         if (!anon_vma && adjust_next)
                 anon_vma = next->anon_vma;
         if (anon_vma) {
- -              VM_BUG_ON(adjust_next && next->anon_vma &&
- -                        anon_vma != next->anon_vma);
+ +              VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
+ +                        anon_vma != next->anon_vma, next);
                 anon_vma_lock_write(anon_vma);
                 anon_vma_interval_tree_pre_update_vma(vma);
                 if (adjust_next)
@@@ -1015,7 -1010,7 +1015,7 @@@ can_vma_merge_after(struct vm_area_stru
   struct vm_area_struct *vma_merge(struct mm_struct *mm,
                         struct vm_area_struct *prev, unsigned long addr,
                         unsigned long end, unsigned long vm_flags,
- -                      struct anon_vma *anon_vma, struct file *file,
+ +                      struct anon_vma *anon_vma, struct file *file,
                         pgoff_t pgoff, struct mempolicy *policy)
   {
         pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
@@@ -1041,7 -1036,7 +1041,7 @@@
          * Can it merge with the predecessor?
          */
         if (prev && prev->vm_end == addr &&
- -                      mpol_equal(vma_policy(prev), policy) &&
+ +                      mpol_equal(vma_policy(prev), policy) &&
                         can_vma_merge_after(prev, vm_flags,
                                                 anon_vma, file, pgoff)) {
                 /*
@@@ -1069,7 -1064,7 +1069,7 @@@
          * Can this new request be merged in front of next?
          */
         if (next && end == next->vm_start &&
- -                      mpol_equal(policy, vma_policy(next)) &&
+ +                      mpol_equal(policy, vma_policy(next)) &&
                         can_vma_merge_before(next, vm_flags,
                                         anon_vma, file, pgoff+pglen)) {
                 if (prev && addr < prev->vm_end)        /* case 4 */
@@@ -1240,7 -1235,7 +1240,7 @@@ unsigned long do_mmap_pgoff(struct fil
                         unsigned long flags, unsigned long pgoff,
                         unsigned long *populate)
   {
- -      struct mm_struct * mm = current->mm;
+ +      struct mm_struct *mm = current->mm;
         vm_flags_t vm_flags;
   
         *populate = 0;
@@@ -1268,7 -1263,7 +1268,7 @@@
   
         /* offset overflow? */
         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
- -               return -EOVERFLOW;
+ +              return -EOVERFLOW;
   
         /* Too many mappings? */
         if (mm->map_count > sysctl_max_map_count)
@@@ -1926,7 -1921,7 +1926,7 @@@ arch_get_unmapped_area(struct file *fil
         info.align_mask = 0;
         return vm_unmapped_area(&info);
   }
- -#endif        
+ +#endif
   
   /*
    * This mmap-allocator allocates new areas top-down from below the
@@@ -2326,13 -2321,13 +2326,13 @@@ int expand_stack(struct vm_area_struct 
   }
   
   struct vm_area_struct *
- -find_extend_vma(struct mm_struct * mm, unsigned long addr)
+ +find_extend_vma(struct mm_struct *mm, unsigned long addr)
   {
- -      struct vm_area_struct * vma;
+ +      struct vm_area_struct *vma;
         unsigned long start;
   
         addr &= PAGE_MASK;
- -      vma = find_vma(mm,addr);
+ +      vma = find_vma(mm, addr);
         if (!vma)
                 return NULL;
         if (vma->vm_start <= addr)
@@@ -2381,7 -2376,7 +2381,7 @@@ static void unmap_region(struct mm_stru
                 struct vm_area_struct *vma, struct vm_area_struct *prev,
                 unsigned long start, unsigned long end)
   {
- -      struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
+ +      struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
         struct mmu_gather tlb;
   
         lru_add_drain();
@@@ -2428,7 -2423,7 +2428,7 @@@ detach_vmas_to_be_unmapped(struct mm_st
    * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
    * munmap path where it doesn't make sense to fail.
    */
- -static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+ +static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
               unsigned long addr, int new_below)
   {
         struct vm_area_struct *new;
@@@ -2517,8 -2512,7 +2517,8 @@@ int do_munmap(struct mm_struct *mm, uns
         if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
                 return -EINVAL;
   
- -      if ((len = PAGE_ALIGN(len)) == 0)
+ +      len = PAGE_ALIGN(len);
+ +      if (len == 0)
                 return -EINVAL;
   
         /* Find the first overlapping VMA */
@@@ -2564,7 -2558,7 +2564,7 @@@
                 if (error)
                         return error;
         }
- -      vma = prev? prev->vm_next: mm->mmap;
+ +      vma = prev ? prev->vm_next : mm->mmap;
   
         /*
          * unlock any mlock()ed ranges before detaching vmas
@@@ -2627,10 -2621,10 +2627,10 @@@ static inline void verify_mm_writelocke
    */
   static unsigned long do_brk(unsigned long addr, unsigned long len)
   {
- -      struct mm_struct * mm = current->mm;
- -      struct vm_area_struct * vma, * prev;
+ +      struct mm_struct *mm = current->mm;
+ +      struct vm_area_struct *vma, *prev;
         unsigned long flags;
- -      struct rb_node ** rb_link, * rb_parent;
+ +      struct rb_node **rb_link, *rb_parent;
         pgoff_t pgoff = addr >> PAGE_SHIFT;
         int error;
   
@@@ -2854,7 -2848,7 +2854,7 @@@ struct vm_area_struct *copy_vma(struct 
                          * safe. It is only safe to keep the vm_pgoff
                          * linear if there are no pages mapped yet.
                          */
- -                      VM_BUG_ON(faulted_in_anon_vma);
+ +                      VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
                         *vmap = vma = new_vma;
                 }
                 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
@@@ -3202,7 -3196,7 +3202,7 @@@ void __init mmap_init(void
   {
         int ret;
   
-       ret = percpu_counter_init(&vm_committed_as, 0);
+       ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
         VM_BUG_ON(ret);
   }
   
diff --combined mm/page-writeback.c

index 35ca7102d421ee3bc49aab644f18b00ed1935153,5085994037210a8aff433e10290662f0d4b5ddcb..ff24c9d83112ece05dab0d3b5c941a92252deef0
--- 1/mm/page-writeback.c
--- 2/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@@ -1075,13 -1075,13 +1075,13 @@@ static void bdi_update_dirty_ratelimit(
         }
   
         if (dirty < setpoint) {
- -              x = min(bdi->balanced_dirty_ratelimit,
- -                       min(balanced_dirty_ratelimit, task_ratelimit));
+ +              x = min3(bdi->balanced_dirty_ratelimit,
+ +                       balanced_dirty_ratelimit, task_ratelimit);
                 if (dirty_ratelimit < x)
                         step = x - dirty_ratelimit;
         } else {
- -              x = max(bdi->balanced_dirty_ratelimit,
- -                       max(balanced_dirty_ratelimit, task_ratelimit));
+ +              x = max3(bdi->balanced_dirty_ratelimit,
+ +                       balanced_dirty_ratelimit, task_ratelimit);
                 if (dirty_ratelimit > x)
                         step = dirty_ratelimit - x;
         }
@@@ -1777,7 -1777,7 +1777,7 @@@ void __init page_writeback_init(void
         writeback_set_ratelimit();
         register_cpu_notifier(&ratelimit_nb);
   
-       fprop_global_init(&writeout_completions);
+       fprop_global_init(&writeout_completions, GFP_KERNEL);
   }
   
   /**
diff --combined mm/shmem.c

index 4fad61bb41e522053a6c7fd9cea2a04d650361b4,d4bc55d3f10741cdbef823d23a2e227c4346b380..cd6fc7590e54f758adacdcfc9cdcb402c7cd3005
--- 1/mm/shmem.c
--- 2/mm/shmem.c
+++ b/mm/shmem.c
@@@ -2367,10 -2367,8 +2367,10 @@@ static int shmem_rename2(struct inode *
   
         if (new_dentry->d_inode) {
                 (void) shmem_unlink(new_dir, new_dentry);
- -              if (they_are_dirs)
+ +              if (they_are_dirs) {
+ +                      drop_nlink(new_dentry->d_inode);
                         drop_nlink(old_dir);
+ +              }
         } else if (they_are_dirs) {
                 drop_nlink(old_dir);
                 inc_nlink(new_dir);
@@@ -2995,7 -2993,7 +2995,7 @@@ int shmem_fill_super(struct super_bloc
   #endif
   
         spin_lock_init(&sbinfo->stat_lock);
-       if (percpu_counter_init(&sbinfo->used_blocks, 0))
+       if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
                 goto failed;
         sbinfo->free_inodes = sbinfo->max_inodes;
   
@@@ -3077,9 -3075,7 +3077,9 @@@ static const struct address_space_opera
         .write_begin    = shmem_write_begin,
         .write_end      = shmem_write_end,
   #endif
+ +#ifdef CONFIG_MIGRATION
         .migratepage    = migrate_page,
+ +#endif
         .error_remove_page = generic_error_remove_page,
   };
   
diff --combined net/dccp/proto.c

index 97b0fcc79547aa48476feda4e0c6ef95054bd5e7,e421eddf67b4a8e87258d5efa1b6eb58ae7987b0..5ab6627cf3704a8479d35170546d1d683185dbbd
--- 1/net/dccp/proto.c
--- 2/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@@ -848,7 -848,7 +848,7 @@@ int dccp_recvmsg(struct kiocb *iocb, st
                 default:
                         dccp_pr_debug("packet_type=%s\n",
                                       dccp_packet_name(dh->dccph_type));
- -                      sk_eat_skb(sk, skb, false);
+ +                      sk_eat_skb(sk, skb);
                 }
   verify_sock_status:
                 if (sock_flag(sk, SOCK_DONE)) {
@@@ -905,7 -905,7 +905,7 @@@
                         len = skb->len;
         found_fin_ok:
                 if (!(flags & MSG_PEEK))
- -                      sk_eat_skb(sk, skb, false);
+ +                      sk_eat_skb(sk, skb);
                 break;
         } while (1);
   out:
@@@ -1082,7 -1082,7 +1082,7 @@@ void dccp_shutdown(struct sock *sk, in
   
   EXPORT_SYMBOL_GPL(dccp_shutdown);
   
- -static inline int dccp_mib_init(void)
+ +static inline int __init dccp_mib_init(void)
   {
         dccp_statistics = alloc_percpu(struct dccp_mib);
         if (!dccp_statistics)
@@@ -1115,7 -1115,7 +1115,7 @@@ static int __init dccp_init(void
   
         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
                      FIELD_SIZEOF(struct sk_buff, cb));
-       rc = percpu_counter_init(&dccp_orphan_count, 0);
+       rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
         if (rc)
                 goto out_fail;
         rc = -ENOBUFS;
diff --combined net/ipv4/tcp.c

index 461003d258ba4030b1a8aa1c49930b7a1ac94b9c,d59c2604c2470ed05ff1122fb426e50c2b2068ba..86023b9be47f46eeef7373935af024c1e7c4fa9d
--- 1/net/ipv4/tcp.c
--- 2/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -274,6 -274,7 +274,6 @@@
   #include <net/tcp.h>
   #include <net/xfrm.h>
   #include <net/ip.h>
- -#include <net/netdma.h>
   #include <net/sock.h>
   
   #include <asm/uaccess.h>
@@@ -404,7 -405,7 +404,7 @@@ void tcp_init_sock(struct sock *sk
   
         tp->reordering = sysctl_tcp_reordering;
         tcp_enable_early_retrans(tp);
- -      icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ +      tcp_assign_congestion_control(sk);
   
         tp->tsoffset = 0;
   
@@@ -608,7 -609,7 +608,7 @@@ static inline bool forced_push(const st
         return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
   }
   
- -static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
+ +static void skb_entail(struct sock *sk, struct sk_buff *skb)
   {
         struct tcp_sock *tp = tcp_sk(sk);
         struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@@ -617,7 -618,7 +617,7 @@@
         tcb->seq     = tcb->end_seq = tp->write_seq;
         tcb->tcp_flags = TCPHDR_ACK;
         tcb->sacked  = 0;
- -      skb_header_release(skb);
+ +      __skb_header_release(skb);
         tcp_add_write_queue_tail(sk, skb);
         sk->sk_wmem_queued += skb->truesize;
         sk_mem_charge(sk, skb->truesize);
@@@ -962,7 -963,7 +962,7 @@@ new_segment
                 skb->ip_summed = CHECKSUM_PARTIAL;
                 tp->write_seq += copy;
                 TCP_SKB_CB(skb)->end_seq += copy;
- -              skb_shinfo(skb)->gso_segs = 0;
+ +              tcp_skb_pcount_set(skb, 0);
   
                 if (!copied)
                         TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
@@@ -1260,7 -1261,7 +1260,7 @@@ new_segment
   
                         tp->write_seq += copy;
                         TCP_SKB_CB(skb)->end_seq += copy;
- -                      skb_shinfo(skb)->gso_segs = 0;
+ +                      tcp_skb_pcount_set(skb, 0);
   
                         from += copy;
                         copied += copy;
@@@ -1393,7 -1394,7 +1393,7 @@@ static int tcp_peek_sndq(struct sock *s
    * calculation of whether or not we must ACK for the sake of
    * a window update.
    */
- -void tcp_cleanup_rbuf(struct sock *sk, int copied)
+ +static void tcp_cleanup_rbuf(struct sock *sk, int copied)
   {
         struct tcp_sock *tp = tcp_sk(sk);
         bool time_to_ack = false;
@@@ -1469,6 -1470,39 +1469,6 @@@ static void tcp_prequeue_process(struc
         tp->ucopy.memory = 0;
   }
   
- -#ifdef CONFIG_NET_DMA
- -static void tcp_service_net_dma(struct sock *sk, bool wait)
- -{
- -      dma_cookie_t done, used;
- -      dma_cookie_t last_issued;
- -      struct tcp_sock *tp = tcp_sk(sk);
- -
- -      if (!tp->ucopy.dma_chan)
- -              return;
- -
- -      last_issued = tp->ucopy.dma_cookie;
- -      dma_async_issue_pending(tp->ucopy.dma_chan);
- -
- -      do {
- -              if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
- -                                            last_issued, &done,
- -                                            &used) == DMA_COMPLETE) {
- -                      /* Safe to free early-copied skbs now */
- -                      __skb_queue_purge(&sk->sk_async_wait_queue);
- -                      break;
- -              } else {
- -                      struct sk_buff *skb;
- -                      while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
- -                             (dma_async_is_complete(skb->dma_cookie, done,
- -                                                    used) == DMA_COMPLETE)) {
- -                              __skb_dequeue(&sk->sk_async_wait_queue);
- -                              kfree_skb(skb);
- -                      }
- -              }
- -      } while (wait);
- -}
- -#endif
- -
   static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
   {
         struct sk_buff *skb;
@@@ -1476,9 -1510,9 +1476,9 @@@
   
         while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
                 offset = seq - TCP_SKB_CB(skb)->seq;
- -              if (tcp_hdr(skb)->syn)
+ +              if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
                         offset--;
- -              if (offset < skb->len || tcp_hdr(skb)->fin) {
+ +              if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
                         *off = offset;
                         return skb;
                 }
@@@ -1486,7 -1520,7 +1486,7 @@@
                  * splitted a fat GRO packet, while we released socket lock
                  * in skb_splice_bits()
                  */
- -              sk_eat_skb(sk, skb, false);
+ +              sk_eat_skb(sk, skb);
         }
         return NULL;
   }
@@@ -1551,12 -1585,12 +1551,12 @@@ int tcp_read_sock(struct sock *sk, read
                         if (offset + 1 != skb->len)
                                 continue;
                 }
- -              if (tcp_hdr(skb)->fin) {
- -                      sk_eat_skb(sk, skb, false);
+ +              if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+ +                      sk_eat_skb(sk, skb);
                         ++seq;
                         break;
                 }
- -              sk_eat_skb(sk, skb, false);
+ +              sk_eat_skb(sk, skb);
                 if (!desc->count)
                         break;
                 tp->copied_seq = seq;
@@@ -1594,6 -1628,7 +1594,6 @@@ int tcp_recvmsg(struct kiocb *iocb, str
         int target;             /* Read at least this many bytes */
         long timeo;
         struct task_struct *user_recv = NULL;
- -      bool copied_early = false;
         struct sk_buff *skb;
         u32 urg_hole = 0;
   
@@@ -1639,6 -1674,28 +1639,6 @@@
   
         target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
   
- -#ifdef CONFIG_NET_DMA
- -      tp->ucopy.dma_chan = NULL;
- -      preempt_disable();
- -      skb = skb_peek_tail(&sk->sk_receive_queue);
- -      {
- -              int available = 0;
- -
- -              if (skb)
- -                      available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
- -              if ((available < target) &&
- -                  (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
- -                  !sysctl_tcp_low_latency &&
- -                  net_dma_find_channel()) {
- -                      preempt_enable();
- -                      tp->ucopy.pinned_list =
- -                                      dma_pin_iovec_pages(msg->msg_iov, len);
- -              } else {
- -                      preempt_enable();
- -              }
- -      }
- -#endif
- -
         do {
                 u32 offset;
   
@@@ -1665,11 -1722,11 +1665,11 @@@
                                 break;
   
                         offset = *seq - TCP_SKB_CB(skb)->seq;
- -                      if (tcp_hdr(skb)->syn)
+ +                      if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
                                 offset--;
                         if (offset < skb->len)
                                 goto found_ok_skb;
- -                      if (tcp_hdr(skb)->fin)
+ +                      if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                                 goto found_fin_ok;
                         WARN(!(flags & MSG_PEEK),
                              "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
@@@ -1769,6 -1826,16 +1769,6 @@@
                         /* __ Set realtime policy in scheduler __ */
                 }
   
- -#ifdef CONFIG_NET_DMA
- -              if (tp->ucopy.dma_chan) {
- -                      if (tp->rcv_wnd == 0 &&
- -                          !skb_queue_empty(&sk->sk_async_wait_queue)) {
- -                              tcp_service_net_dma(sk, true);
- -                              tcp_cleanup_rbuf(sk, copied);
- -                      } else
- -                              dma_async_issue_pending(tp->ucopy.dma_chan);
- -              }
- -#endif
                 if (copied >= target) {
                         /* Do not sleep, just process backlog. */
                         release_sock(sk);
@@@ -1776,6 -1843,11 +1776,6 @@@
                 } else
                         sk_wait_data(sk, &timeo);
   
- -#ifdef CONFIG_NET_DMA
- -              tcp_service_net_dma(sk, false);  /* Don't block */
- -              tp->ucopy.wakeup = 0;
- -#endif
- -
                 if (user_recv) {
                         int chunk;
   
@@@ -1833,13 -1905,43 +1833,13 @@@ do_prequeue
                 }
   
                 if (!(flags & MSG_TRUNC)) {
- -#ifdef CONFIG_NET_DMA
- -                      if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
- -                              tp->ucopy.dma_chan = net_dma_find_channel();
- -
- -                      if (tp->ucopy.dma_chan) {
- -                              tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
- -                                      tp->ucopy.dma_chan, skb, offset,
- -                                      msg->msg_iov, used,
- -                                      tp->ucopy.pinned_list);
- -
- -                              if (tp->ucopy.dma_cookie < 0) {
- -
- -                                      pr_alert("%s: dma_cookie < 0\n",
- -                                               __func__);
- -
- -                                      /* Exception. Bailout! */
- -                                      if (!copied)
- -                                              copied = -EFAULT;
- -                                      break;
- -                              }
- -
- -                              dma_async_issue_pending(tp->ucopy.dma_chan);
- -
- -                              if ((offset + used) == skb->len)
- -                                      copied_early = true;
- -
- -                      } else
- -#endif
- -                      {
- -                              err = skb_copy_datagram_iovec(skb, offset,
- -                                              msg->msg_iov, used);
- -                              if (err) {
- -                                      /* Exception. Bailout! */
- -                                      if (!copied)
- -                                              copied = -EFAULT;
- -                                      break;
- -                              }
+ +                      err = skb_copy_datagram_iovec(skb, offset,
+ +                                                    msg->msg_iov, used);
+ +                      if (err) {
+ +                              /* Exception. Bailout! */
+ +                              if (!copied)
+ +                                      copied = -EFAULT;
+ +                              break;
                         }
                 }
   
@@@ -1857,17 -1959,21 +1857,17 @@@ skip_copy
                 if (used + offset < skb->len)
                         continue;
   
- -              if (tcp_hdr(skb)->fin)
+ +              if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                         goto found_fin_ok;
- -              if (!(flags & MSG_PEEK)) {
- -                      sk_eat_skb(sk, skb, copied_early);
- -                      copied_early = false;
- -              }
+ +              if (!(flags & MSG_PEEK))
+ +                      sk_eat_skb(sk, skb);
                 continue;
   
         found_fin_ok:
                 /* Process the FIN. */
                 ++*seq;
- -              if (!(flags & MSG_PEEK)) {
- -                      sk_eat_skb(sk, skb, copied_early);
- -                      copied_early = false;
- -              }
+ +              if (!(flags & MSG_PEEK))
+ +                      sk_eat_skb(sk, skb);
                 break;
         } while (len > 0);
   
@@@ -1890,6 -1996,16 +1890,6 @@@
                 tp->ucopy.len = 0;
         }
   
- -#ifdef CONFIG_NET_DMA
- -      tcp_service_net_dma(sk, true);  /* Wait for queue to drain */
- -      tp->ucopy.dma_chan = NULL;
- -
- -      if (tp->ucopy.pinned_list) {
- -              dma_unpin_iovec_pages(tp->ucopy.pinned_list);
- -              tp->ucopy.pinned_list = NULL;
- -      }
- -#endif
- -
         /* According to UNIX98, msg_name/msg_namelen are ignored
          * on connected socket. I was just happy when found this 8) --ANK
          */
@@@ -2044,10 -2160,8 +2044,10 @@@ void tcp_close(struct sock *sk, long ti
          *  reader process may not have drained the data yet!
          */
         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- -              u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
- -                        tcp_hdr(skb)->fin;
+ +              u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
+ +
+ +              if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ +                      len--;
                 data_was_unread += len;
                 __kfree_skb(skb);
         }
@@@ -2235,6 -2349,9 +2235,6 @@@ int tcp_disconnect(struct sock *sk, in
         __skb_queue_purge(&sk->sk_receive_queue);
         tcp_write_queue_purge(sk);
         __skb_queue_purge(&tp->out_of_order_queue);
- -#ifdef CONFIG_NET_DMA
- -      __skb_queue_purge(&sk->sk_async_wait_queue);
- -#endif
   
         inet->inet_dport = 0;
   
@@@ -2574,7 -2691,7 +2574,7 @@@ static int do_tcp_setsockopt(struct soc
                 break;
   #endif
         case TCP_USER_TIMEOUT:
- -              /* Cap the max timeout in ms TCP will retry/retrans
+ +              /* Cap the max time in ms TCP will retry or probe the window
                  * before giving up and aborting (ETIMEDOUT) a connection.
                  */
                 if (val < 0)
@@@ -3053,7 -3170,7 +3053,7 @@@ static int __init set_thash_entries(cha
   }
   __setup("thash_entries=", set_thash_entries);
   
- -static void tcp_init_mem(void)
+ +static void __init tcp_init_mem(void)
   {
         unsigned long limit = nr_free_buffer_pages() / 8;
         limit = max(limit, 128UL);
@@@ -3071,8 -3188,8 +3071,8 @@@ void __init tcp_init(void
   
         BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
   
-       percpu_counter_init(&tcp_sockets_allocated, 0);
-       percpu_counter_init(&tcp_orphan_count, 0);
+       percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
+       percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
         tcp_hashinfo.bind_bucket_cachep =
                 kmem_cache_create("tcp_bind_bucket",
                                   sizeof(struct inet_bind_bucket), 0,
@@@ -3139,6 -3256,8 +3139,6 @@@
                 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
   
         tcp_metrics_init();
- -
- -      tcp_register_congestion_control(&tcp_reno);
- -
+ +      BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
         tcp_tasklet_init();
   }
diff --combined net/sctp/protocol.c

index 9d2c6c9facb6a4f4dabee29d8430aaf55b323636,f00a85a3fdddf58732624925c7bb73966c1a38c7..8f34b27d5775f053ffde8a763f724c0d8b4f6e1f
--- 1/net/sctp/protocol.c
--- 2/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@@ -366,7 -366,7 +366,7 @@@ static int sctp_v4_available(union sctp
         if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
            ret != RTN_LOCAL &&
            !sp->inet.freebind &&
- -         !sysctl_ip_nonlocal_bind)
+ +         !net->ipv4.sysctl_ip_nonlocal_bind)
                 return 0;
   
         if (ipv6_only_sock(sctp_opt2sk(sp)))
@@@ -1341,7 -1341,7 +1341,7 @@@ static __init int sctp_init(void
         if (!sctp_chunk_cachep)
                 goto err_chunk_cachep;
   
-       status = percpu_counter_init(&sctp_sockets_allocated, 0);
+       status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL);
         if (status)
                 goto err_percpu_counter_init;
author	Linus Torvalds <[email protected]>
	Fri, 10 Oct 2014 11:26:02 +0000 (07:26 -0400)
committer	Linus Torvalds <[email protected]>
	Fri, 10 Oct 2014 11:26:02 +0000 (07:26 -0400)
		1	2
arch/x86/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/percpu-refcount.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/backing-dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/mmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/shmem.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/dccp/proto.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv4/tcp.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sctp/protocol.c	patch \|	diff1 \|	diff2 \|	blob \| history