Merge branch 'for-linus' into for-next

author Tejun Heo <[email protected]>

Tue, 8 Dec 2009 01:02:12 +0000 (10:02 +0900)

committer Tejun Heo <[email protected]>

Tue, 8 Dec 2009 01:02:12 +0000 (10:02 +0900)
author Tejun Heo <[email protected]>
Tue, 8 Dec 2009 01:02:12 +0000 (10:02 +0900)
committer Tejun Heo <[email protected]>
Tue, 8 Dec 2009 01:02:12 +0000 (10:02 +0900)
diff --combined include/linux/percpu.h

index 522f421ec2130262f75bf6dbe2778ddcfa78a63e,8e4ead6435fbd07099e78b582859f47e712b35e4..cf5efbcf716c8cecf74d4d315e2619f6fdcfa1f4
--- 1/include/linux/percpu.h
--- 2/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@@ -34,6 -34,8 +34,6 @@@
   
   #ifdef CONFIG_SMP
   
- -#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
- -
   /* minimum unit size, also is the maximum supported allocation size */
   #define PCPU_MIN_UNIT_SIZE            PFN_ALIGN(64 << 10)
   
@@@ -128,8 -130,31 +128,9 @@@ extern int __init pcpu_page_first_chunk
   #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
   
   extern void *__alloc_reserved_percpu(size_t size, size_t align);
- -
- -#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
- -
- -struct percpu_data {
- -      void *ptrs[1];
- -};
- -
- -/* pointer disguising messes up the kmemleak objects tracking */
- -#ifndef CONFIG_DEBUG_KMEMLEAK
- -#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
- -#else
- -#define __percpu_disguise(pdata) (struct percpu_data *)(pdata)
- -#endif
- -
- -#define per_cpu_ptr(ptr, cpu)                                         \
- -({                                                                    \
- -        struct percpu_data *__p = __percpu_disguise(ptr);             \
- -        (__typeof__(ptr))__p->ptrs[(cpu)];                            \
- -})
- -
- -#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
- -
   extern void *__alloc_percpu(size_t size, size_t align);
   extern void free_percpu(void *__pdata);
+ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
   
   #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
   extern void __init setup_per_cpu_areas(void);
@@@ -155,6 -180,11 +156,11 @@@ static inline void free_percpu(void *p
         kfree(p);
   }
   
+ static inline phys_addr_t per_cpu_ptr_to_phys(void *addr)
+ {
+       return __pa(addr);
+ }
+ 
   static inline void __init setup_per_cpu_areas(void) { }
   
   static inline void *pcpu_lpage_remapped(void *kaddr)
@@@ -164,8 -194,8 +170,8 @@@
   
   #endif /* CONFIG_SMP */
   
- -#define alloc_percpu(type)    (type *)__alloc_percpu(sizeof(type), \
- -                                                     __alignof__(type))
+ +#define alloc_percpu(type)    \
+ +      (typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
   
   /*
    * Optional methods for optimized non-lvalue per-cpu variable access.
@@@ -219,404 -249,4 +225,404 @@@ do {                                                                   
   # define percpu_xor(var, val)         __percpu_generic_to_op(var, (val), ^=)
   #endif
   
+ +/*
+ + * Branching function to split up a function into a set of functions that
+ + * are called for different scalar sizes of the objects handled.
+ + */
+ +
+ +extern void __bad_size_call_parameter(void);
+ +
+ +#define __pcpu_size_call_return(stem, variable)                               \
+ +({    typeof(variable) pscr_ret__;                                    \
+ +      switch(sizeof(variable)) {                                      \
+ +      case 1: pscr_ret__ = stem##1(variable);break;                   \
+ +      case 2: pscr_ret__ = stem##2(variable);break;                   \
+ +      case 4: pscr_ret__ = stem##4(variable);break;                   \
+ +      case 8: pscr_ret__ = stem##8(variable);break;                   \
+ +      default:                                                        \
+ +              __bad_size_call_parameter();break;                      \
+ +      }                                                               \
+ +      pscr_ret__;                                                     \
+ +})
+ +
+ +#define __pcpu_size_call(stem, variable, ...)                         \
+ +do {                                                                  \
+ +      switch(sizeof(variable)) {                                      \
+ +              case 1: stem##1(variable, __VA_ARGS__);break;           \
+ +              case 2: stem##2(variable, __VA_ARGS__);break;           \
+ +              case 4: stem##4(variable, __VA_ARGS__);break;           \
+ +              case 8: stem##8(variable, __VA_ARGS__);break;           \
+ +              default:                                                \
+ +                      __bad_size_call_parameter();break;              \
+ +      }                                                               \
+ +} while (0)
+ +
+ +/*
+ + * Optimized manipulation for memory allocated through the per cpu
+ + * allocator or for addresses of per cpu variables (can be determined
+ + * using per_cpu_var(xx).
+ + *
+ + * These operation guarantee exclusivity of access for other operations
+ + * on the *same* processor. The assumption is that per cpu data is only
+ + * accessed by a single processor instance (the current one).
+ + *
+ + * The first group is used for accesses that must be done in a
+ + * preemption safe way since we know that the context is not preempt
+ + * safe. Interrupts may occur. If the interrupt modifies the variable
+ + * too then RMW actions will not be reliable.
+ + *
+ + * The arch code can provide optimized functions in two ways:
+ + *
+ + * 1. Override the function completely. F.e. define this_cpu_add().
+ + *    The arch must then ensure that the various scalar format passed
+ + *    are handled correctly.
+ + *
+ + * 2. Provide functions for certain scalar sizes. F.e. provide
+ + *    this_cpu_add_2() to provide per cpu atomic operations for 2 byte
+ + *    sized RMW actions. If arch code does not provide operations for
+ + *    a scalar size then the fallback in the generic code will be
+ + *    used.
+ + */
+ +
+ +#define _this_cpu_generic_read(pcp)                                   \
+ +({    typeof(pcp) ret__;                                              \
+ +      preempt_disable();                                              \
+ +      ret__ = *this_cpu_ptr(&(pcp));                                  \
+ +      preempt_enable();                                               \
+ +      ret__;                                                          \
+ +})
+ +
+ +#ifndef this_cpu_read
+ +# ifndef this_cpu_read_1
+ +#  define this_cpu_read_1(pcp)        _this_cpu_generic_read(pcp)
+ +# endif
+ +# ifndef this_cpu_read_2
+ +#  define this_cpu_read_2(pcp)        _this_cpu_generic_read(pcp)
+ +# endif
+ +# ifndef this_cpu_read_4
+ +#  define this_cpu_read_4(pcp)        _this_cpu_generic_read(pcp)
+ +# endif
+ +# ifndef this_cpu_read_8
+ +#  define this_cpu_read_8(pcp)        _this_cpu_generic_read(pcp)
+ +# endif
+ +# define this_cpu_read(pcp)   __pcpu_size_call_return(this_cpu_read_, (pcp))
+ +#endif
+ +
+ +#define _this_cpu_generic_to_op(pcp, val, op)                         \
+ +do {                                                                  \
+ +      preempt_disable();                                              \
+ +      *__this_cpu_ptr(&pcp) op val;                                   \
+ +      preempt_enable();                                               \
+ +} while (0)
+ +
+ +#ifndef this_cpu_write
+ +# ifndef this_cpu_write_1
+ +#  define this_cpu_write_1(pcp, val)  _this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# ifndef this_cpu_write_2
+ +#  define this_cpu_write_2(pcp, val)  _this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# ifndef this_cpu_write_4
+ +#  define this_cpu_write_4(pcp, val)  _this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# ifndef this_cpu_write_8
+ +#  define this_cpu_write_8(pcp, val)  _this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# define this_cpu_write(pcp, val)     __pcpu_size_call(this_cpu_write_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef this_cpu_add
+ +# ifndef this_cpu_add_1
+ +#  define this_cpu_add_1(pcp, val)    _this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef this_cpu_add_2
+ +#  define this_cpu_add_2(pcp, val)    _this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef this_cpu_add_4
+ +#  define this_cpu_add_4(pcp, val)    _this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef this_cpu_add_8
+ +#  define this_cpu_add_8(pcp, val)    _this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# define this_cpu_add(pcp, val)               __pcpu_size_call(this_cpu_add_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef this_cpu_sub
+ +# define this_cpu_sub(pcp, val)               this_cpu_add((pcp), -(val))
+ +#endif
+ +
+ +#ifndef this_cpu_inc
+ +# define this_cpu_inc(pcp)            this_cpu_add((pcp), 1)
+ +#endif
+ +
+ +#ifndef this_cpu_dec
+ +# define this_cpu_dec(pcp)            this_cpu_sub((pcp), 1)
+ +#endif
+ +
+ +#ifndef this_cpu_and
+ +# ifndef this_cpu_and_1
+ +#  define this_cpu_and_1(pcp, val)    _this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef this_cpu_and_2
+ +#  define this_cpu_and_2(pcp, val)    _this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef this_cpu_and_4
+ +#  define this_cpu_and_4(pcp, val)    _this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef this_cpu_and_8
+ +#  define this_cpu_and_8(pcp, val)    _this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# define this_cpu_and(pcp, val)               __pcpu_size_call(this_cpu_and_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef this_cpu_or
+ +# ifndef this_cpu_or_1
+ +#  define this_cpu_or_1(pcp, val)     _this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef this_cpu_or_2
+ +#  define this_cpu_or_2(pcp, val)     _this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef this_cpu_or_4
+ +#  define this_cpu_or_4(pcp, val)     _this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef this_cpu_or_8
+ +#  define this_cpu_or_8(pcp, val)     _this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# define this_cpu_or(pcp, val)                __pcpu_size_call(this_cpu_or_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef this_cpu_xor
+ +# ifndef this_cpu_xor_1
+ +#  define this_cpu_xor_1(pcp, val)    _this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef this_cpu_xor_2
+ +#  define this_cpu_xor_2(pcp, val)    _this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef this_cpu_xor_4
+ +#  define this_cpu_xor_4(pcp, val)    _this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef this_cpu_xor_8
+ +#  define this_cpu_xor_8(pcp, val)    _this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# define this_cpu_xor(pcp, val)               __pcpu_size_call(this_cpu_or_, (pcp), (val))
+ +#endif
+ +
+ +/*
+ + * Generic percpu operations that do not require preemption handling.
+ + * Either we do not care about races or the caller has the
+ + * responsibility of handling preemptions issues. Arch code can still
+ + * override these instructions since the arch per cpu code may be more
+ + * efficient and may actually get race freeness for free (that is the
+ + * case for x86 for example).
+ + *
+ + * If there is no other protection through preempt disable and/or
+ + * disabling interupts then one of these RMW operations can show unexpected
+ + * behavior because the execution thread was rescheduled on another processor
+ + * or an interrupt occurred and the same percpu variable was modified from
+ + * the interrupt context.
+ + */
+ +#ifndef __this_cpu_read
+ +# ifndef __this_cpu_read_1
+ +#  define __this_cpu_read_1(pcp)      (*__this_cpu_ptr(&(pcp)))
+ +# endif
+ +# ifndef __this_cpu_read_2
+ +#  define __this_cpu_read_2(pcp)      (*__this_cpu_ptr(&(pcp)))
+ +# endif
+ +# ifndef __this_cpu_read_4
+ +#  define __this_cpu_read_4(pcp)      (*__this_cpu_ptr(&(pcp)))
+ +# endif
+ +# ifndef __this_cpu_read_8
+ +#  define __this_cpu_read_8(pcp)      (*__this_cpu_ptr(&(pcp)))
+ +# endif
+ +# define __this_cpu_read(pcp) __pcpu_size_call_return(__this_cpu_read_, (pcp))
+ +#endif
+ +
+ +#define __this_cpu_generic_to_op(pcp, val, op)                                \
+ +do {                                                                  \
+ +      *__this_cpu_ptr(&(pcp)) op val;                                 \
+ +} while (0)
+ +
+ +#ifndef __this_cpu_write
+ +# ifndef __this_cpu_write_1
+ +#  define __this_cpu_write_1(pcp, val)        __this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# ifndef __this_cpu_write_2
+ +#  define __this_cpu_write_2(pcp, val)        __this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# ifndef __this_cpu_write_4
+ +#  define __this_cpu_write_4(pcp, val)        __this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# ifndef __this_cpu_write_8
+ +#  define __this_cpu_write_8(pcp, val)        __this_cpu_generic_to_op((pcp), (val), =)
+ +# endif
+ +# define __this_cpu_write(pcp, val)   __pcpu_size_call(__this_cpu_write_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef __this_cpu_add
+ +# ifndef __this_cpu_add_1
+ +#  define __this_cpu_add_1(pcp, val)  __this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef __this_cpu_add_2
+ +#  define __this_cpu_add_2(pcp, val)  __this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef __this_cpu_add_4
+ +#  define __this_cpu_add_4(pcp, val)  __this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef __this_cpu_add_8
+ +#  define __this_cpu_add_8(pcp, val)  __this_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# define __this_cpu_add(pcp, val)     __pcpu_size_call(__this_cpu_add_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef __this_cpu_sub
+ +# define __this_cpu_sub(pcp, val)     __this_cpu_add((pcp), -(val))
+ +#endif
+ +
+ +#ifndef __this_cpu_inc
+ +# define __this_cpu_inc(pcp)          __this_cpu_add((pcp), 1)
+ +#endif
+ +
+ +#ifndef __this_cpu_dec
+ +# define __this_cpu_dec(pcp)          __this_cpu_sub((pcp), 1)
+ +#endif
+ +
+ +#ifndef __this_cpu_and
+ +# ifndef __this_cpu_and_1
+ +#  define __this_cpu_and_1(pcp, val)  __this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef __this_cpu_and_2
+ +#  define __this_cpu_and_2(pcp, val)  __this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef __this_cpu_and_4
+ +#  define __this_cpu_and_4(pcp, val)  __this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef __this_cpu_and_8
+ +#  define __this_cpu_and_8(pcp, val)  __this_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# define __this_cpu_and(pcp, val)     __pcpu_size_call(__this_cpu_and_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef __this_cpu_or
+ +# ifndef __this_cpu_or_1
+ +#  define __this_cpu_or_1(pcp, val)   __this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef __this_cpu_or_2
+ +#  define __this_cpu_or_2(pcp, val)   __this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef __this_cpu_or_4
+ +#  define __this_cpu_or_4(pcp, val)   __this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef __this_cpu_or_8
+ +#  define __this_cpu_or_8(pcp, val)   __this_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# define __this_cpu_or(pcp, val)      __pcpu_size_call(__this_cpu_or_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef __this_cpu_xor
+ +# ifndef __this_cpu_xor_1
+ +#  define __this_cpu_xor_1(pcp, val)  __this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef __this_cpu_xor_2
+ +#  define __this_cpu_xor_2(pcp, val)  __this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef __this_cpu_xor_4
+ +#  define __this_cpu_xor_4(pcp, val)  __this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef __this_cpu_xor_8
+ +#  define __this_cpu_xor_8(pcp, val)  __this_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# define __this_cpu_xor(pcp, val)     __pcpu_size_call(__this_cpu_xor_, (pcp), (val))
+ +#endif
+ +
+ +/*
+ + * IRQ safe versions of the per cpu RMW operations. Note that these operations
+ + * are *not* safe against modification of the same variable from another
+ + * processors (which one gets when using regular atomic operations)
+ + . They are guaranteed to be atomic vs. local interrupts and
+ + * preemption only.
+ + */
+ +#define irqsafe_cpu_generic_to_op(pcp, val, op)                               \
+ +do {                                                                  \
+ +      unsigned long flags;                                            \
+ +      local_irq_save(flags);                                          \
+ +      *__this_cpu_ptr(&(pcp)) op val;                                 \
+ +      local_irq_restore(flags);                                       \
+ +} while (0)
+ +
+ +#ifndef irqsafe_cpu_add
+ +# ifndef irqsafe_cpu_add_1
+ +#  define irqsafe_cpu_add_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef irqsafe_cpu_add_2
+ +#  define irqsafe_cpu_add_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef irqsafe_cpu_add_4
+ +#  define irqsafe_cpu_add_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# ifndef irqsafe_cpu_add_8
+ +#  define irqsafe_cpu_add_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=)
+ +# endif
+ +# define irqsafe_cpu_add(pcp, val) __pcpu_size_call(irqsafe_cpu_add_, (pcp), (val))
+ +#endif
+ +
+ +#ifndef irqsafe_cpu_sub
+ +# define irqsafe_cpu_sub(pcp, val)    irqsafe_cpu_add((pcp), -(val))
+ +#endif
+ +
+ +#ifndef irqsafe_cpu_inc
+ +# define irqsafe_cpu_inc(pcp) irqsafe_cpu_add((pcp), 1)
+ +#endif
+ +
+ +#ifndef irqsafe_cpu_dec
+ +# define irqsafe_cpu_dec(pcp) irqsafe_cpu_sub((pcp), 1)
+ +#endif
+ +
+ +#ifndef irqsafe_cpu_and
+ +# ifndef irqsafe_cpu_and_1
+ +#  define irqsafe_cpu_and_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef irqsafe_cpu_and_2
+ +#  define irqsafe_cpu_and_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef irqsafe_cpu_and_4
+ +#  define irqsafe_cpu_and_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# ifndef irqsafe_cpu_and_8
+ +#  define irqsafe_cpu_and_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=)
+ +# endif
+ +# define irqsafe_cpu_and(pcp, val) __pcpu_size_call(irqsafe_cpu_and_, (val))
+ +#endif
+ +
+ +#ifndef irqsafe_cpu_or
+ +# ifndef irqsafe_cpu_or_1
+ +#  define irqsafe_cpu_or_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef irqsafe_cpu_or_2
+ +#  define irqsafe_cpu_or_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef irqsafe_cpu_or_4
+ +#  define irqsafe_cpu_or_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# ifndef irqsafe_cpu_or_8
+ +#  define irqsafe_cpu_or_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=)
+ +# endif
+ +# define irqsafe_cpu_or(pcp, val) __pcpu_size_call(irqsafe_cpu_or_, (val))
+ +#endif
+ +
+ +#ifndef irqsafe_cpu_xor
+ +# ifndef irqsafe_cpu_xor_1
+ +#  define irqsafe_cpu_xor_1(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef irqsafe_cpu_xor_2
+ +#  define irqsafe_cpu_xor_2(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef irqsafe_cpu_xor_4
+ +#  define irqsafe_cpu_xor_4(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# ifndef irqsafe_cpu_xor_8
+ +#  define irqsafe_cpu_xor_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=)
+ +# endif
+ +# define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val))
+ +#endif
+ +
   #endif /* __LINUX_PERCPU_H */
diff --combined kernel/sched.c

index 854ab418fd42b524925b12af75d9859899762247,526d237b8ce50c66c909d07a380f8da58b91bac3..eecf070ffd1a1deeef02a1588b38c5cbcfe35968
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -298,7 -298,7 +298,7 @@@ static DEFINE_PER_CPU_SHARED_ALIGNED(st
   
   #ifdef CONFIG_RT_GROUP_SCHED
   static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
- -static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
+ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
   #endif /* CONFIG_RT_GROUP_SCHED */
   #else /* !CONFIG_USER_SCHED */
   #define root_task_group init_task_group
@@@ -780,7 -780,7 +780,7 @@@ static int sched_feat_open(struct inod
         return single_open(filp, sched_feat_show, NULL);
   }
   
- -static struct file_operations sched_feat_fops = {
+ +static const struct file_operations sched_feat_fops = {
         .open           = sched_feat_open,
         .write          = sched_feat_write,
         .read           = seq_read,
@@@ -1563,11 -1563,7 +1563,7 @@@ static unsigned long cpu_avg_load_per_t
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
   
- struct update_shares_data {
-       unsigned long rq_weight[NR_CPUS];
- };
- 
- static DEFINE_PER_CPU(struct update_shares_data, update_shares_data);
+ static __read_mostly unsigned long *update_shares_data;
   
   static void __set_se_shares(struct sched_entity *se, unsigned long shares);
   
@@@ -1577,12 -1573,12 +1573,12 @@@
   static void update_group_shares_cpu(struct task_group *tg, int cpu,
                                     unsigned long sd_shares,
                                     unsigned long sd_rq_weight,
-                                   struct update_shares_data *usd)
+                                   unsigned long *usd_rq_weight)
   {
         unsigned long shares, rq_weight;
         int boost = 0;
   
-       rq_weight = usd->rq_weight[cpu];
+       rq_weight = usd_rq_weight[cpu];
         if (!rq_weight) {
                 boost = 1;
                 rq_weight = NICE_0_LOAD;
@@@ -1617,7 -1613,7 +1613,7 @@@
   static int tg_shares_up(struct task_group *tg, void *data)
   {
         unsigned long weight, rq_weight = 0, shares = 0;
-       struct update_shares_data *usd;
+       unsigned long *usd_rq_weight;
         struct sched_domain *sd = data;
         unsigned long flags;
         int i;
@@@ -1626,11 -1622,11 +1622,11 @@@
                 return 0;
   
         local_irq_save(flags);
-       usd = &__get_cpu_var(update_shares_data);
+       usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
   
         for_each_cpu(i, sched_domain_span(sd)) {
                 weight = tg->cfs_rq[i]->load.weight;
-               usd->rq_weight[i] = weight;
+               usd_rq_weight[i] = weight;
   
                 /*
                  * If there are currently no tasks on the cpu pretend there
@@@ -1651,7 -1647,7 +1647,7 @@@
                 shares = tg->shares;
   
         for_each_cpu(i, sched_domain_span(sd))
-               update_group_shares_cpu(tg, i, shares, rq_weight, usd);
+               update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
   
         local_irq_restore(flags);
   
@@@ -8199,14 -8195,14 +8195,14 @@@ enum s_alloc 
    */
   #ifdef CONFIG_SCHED_SMT
   static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
- -static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
+ +static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
   
   static int
   cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
                  struct sched_group **sg, struct cpumask *unused)
   {
         if (sg)
- -              *sg = &per_cpu(sched_group_cpus, cpu).sg;
+ +              *sg = &per_cpu(sched_groups, cpu).sg;
         return cpu;
   }
   #endif /* CONFIG_SCHED_SMT */
@@@ -9406,6 -9402,10 +9402,10 @@@ void __init sched_init(void
   #endif /* CONFIG_USER_SCHED */
   #endif /* CONFIG_GROUP_SCHED */
   
+ #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
+       update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
+                                           __alignof__(unsigned long));
+ #endif
         for_each_possible_cpu(i) {
                 struct rq *rq;
   
@@@ -9470,7 -9470,7 +9470,7 @@@
   #elif defined CONFIG_USER_SCHED
                 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
                 init_tg_rt_entry(&init_task_group,
- -                              &per_cpu(init_rt_rq, i),
+ +                              &per_cpu(init_rt_rq_var, i),
                                 &per_cpu(init_sched_rt_entity, i), i, 1,
                                 root_task_group.rt_se[i]);
   #endif
diff --combined mm/percpu.c

index e2e80fc786018270b78766f55531bb8001a7ce76,008fbd9e6fa4c30bf5c234741b6151d0fdc75d3f..442010cc91c6c82eb8489e64d21500baa52b5911
--- 1/mm/percpu.c
--- 2/mm/percpu.c
+++ b/mm/percpu.c
@@@ -46,6 -46,8 +46,6 @@@
    *
    * To use this allocator, arch code should do the followings.
    *
- - * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA
- - *
    * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
    *   regular address to percpu pointer and back if they need to be
    *   different from the default
@@@ -72,6 -74,7 +72,7 @@@
   #include <asm/cacheflush.h>
   #include <asm/sections.h>
   #include <asm/tlbflush.h>
+ #include <asm/io.h>
   
   #define PCPU_SLOT_BASE_SHIFT          5       /* 1-31 shares the same slot */
   #define PCPU_DFL_MAP_ALLOC            16      /* start a map with 16 ents */
@@@ -151,7 -154,10 +152,10 @@@ static int pcpu_reserved_chunk_limit
    *
    * During allocation, pcpu_alloc_mutex is kept locked all the time and
    * pcpu_lock is grabbed and released as necessary.  All actual memory
-  * allocations are done using GFP_KERNEL with pcpu_lock released.
+  * allocations are done using GFP_KERNEL with pcpu_lock released.  In
+  * general, percpu memory can't be allocated with irq off but
+  * irqsave/restore are still used in alloc path so that it can be used
+  * from early init path - sched_init() specifically.
    *
    * Free path accesses and alters only the index data structures, so it
    * can be safely called from atomic context.  When memory needs to be
@@@ -350,63 -356,86 +354,86 @@@ static struct pcpu_chunk *pcpu_chunk_ad
   }
   
   /**
-  * pcpu_extend_area_map - extend area map for allocation
-  * @chunk: target chunk
+  * pcpu_need_to_extend - determine whether chunk area map needs to be extended
+  * @chunk: chunk of interest
    *
-  * Extend area map of @chunk so that it can accomodate an allocation.
-  * A single allocation can split an area into three areas, so this
-  * function makes sure that @chunk->map has at least two extra slots.
+  * Determine whether area map of @chunk needs to be extended to
+  * accomodate a new allocation.
    *
    * CONTEXT:
-  * pcpu_alloc_mutex, pcpu_lock.  pcpu_lock is released and reacquired
-  * if area map is extended.
+  * pcpu_lock.
    *
    * RETURNS:
-  * 0 if noop, 1 if successfully extended, -errno on failure.
+  * New target map allocation length if extension is necessary, 0
+  * otherwise.
    */
- static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
-       __releases(lock) __acquires(lock)
+ static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
   {
         int new_alloc;
-       int *new;
-       size_t size;
   
-       /* has enough? */
         if (chunk->map_alloc >= chunk->map_used + 2)
                 return 0;
   
-       spin_unlock_irq(&pcpu_lock);
- 
         new_alloc = PCPU_DFL_MAP_ALLOC;
         while (new_alloc < chunk->map_used + 2)
                 new_alloc *= 2;
   
-       new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
-       if (!new) {
-               spin_lock_irq(&pcpu_lock);
+       return new_alloc;
+ }
+ 
+ /**
+  * pcpu_extend_area_map - extend area map of a chunk
+  * @chunk: chunk of interest
+  * @new_alloc: new target allocation length of the area map
+  *
+  * Extend area map of @chunk to have @new_alloc entries.
+  *
+  * CONTEXT:
+  * Does GFP_KERNEL allocation.  Grabs and releases pcpu_lock.
+  *
+  * RETURNS:
+  * 0 on success, -errno on failure.
+  */
+ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
+ {
+       int *old = NULL, *new = NULL;
+       size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
+       unsigned long flags;
+ 
+       new = pcpu_mem_alloc(new_size);
+       if (!new)
                 return -ENOMEM;
-       }
   
-       /*
-        * Acquire pcpu_lock and switch to new area map.  Only free
-        * could have happened inbetween, so map_used couldn't have
-        * grown.
-        */
-       spin_lock_irq(&pcpu_lock);
-       BUG_ON(new_alloc < chunk->map_used + 2);
+       /* acquire pcpu_lock and switch to new area map */
+       spin_lock_irqsave(&pcpu_lock, flags);
+ 
+       if (new_alloc <= chunk->map_alloc)
+               goto out_unlock;
   
-       size = chunk->map_alloc * sizeof(chunk->map[0]);
-       memcpy(new, chunk->map, size);
+       old_size = chunk->map_alloc * sizeof(chunk->map[0]);
+       memcpy(new, chunk->map, old_size);
   
         /*
          * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
          * one of the first chunks and still using static map.
          */
         if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-               pcpu_mem_free(chunk->map, size);
+               old = chunk->map;
   
         chunk->map_alloc = new_alloc;
         chunk->map = new;
+       new = NULL;
+ 
+ out_unlock:
+       spin_unlock_irqrestore(&pcpu_lock, flags);
+ 
+       /*
+        * pcpu_mem_free() might end up calling vfree() which uses
+        * IRQ-unsafe lock and thus can't be called under pcpu_lock.
+        */
+       pcpu_mem_free(old, old_size);
+       pcpu_mem_free(new, new_size);
+ 
         return 0;
   }
   
@@@ -1045,7 -1074,8 +1072,8 @@@ static void *pcpu_alloc(size_t size, si
         static int warn_limit = 10;
         struct pcpu_chunk *chunk;
         const char *err;
-       int slot, off;
+       int slot, off, new_alloc;
+       unsigned long flags;
   
         if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
                 WARN(true, "illegal size (%zu) or align (%zu) for "
@@@ -1054,19 -1084,30 +1082,30 @@@
         }
   
         mutex_lock(&pcpu_alloc_mutex);
-       spin_lock_irq(&pcpu_lock);
+       spin_lock_irqsave(&pcpu_lock, flags);
   
         /* serve reserved allocations from the reserved chunk if available */
         if (reserved && pcpu_reserved_chunk) {
                 chunk = pcpu_reserved_chunk;
-               if (size > chunk->contig_hint ||
-                   pcpu_extend_area_map(chunk) < 0) {
-                       err = "failed to extend area map of reserved chunk";
+ 
+               if (size > chunk->contig_hint) {
+                       err = "alloc from reserved chunk failed";
                         goto fail_unlock;
                 }
+ 
+               while ((new_alloc = pcpu_need_to_extend(chunk))) {
+                       spin_unlock_irqrestore(&pcpu_lock, flags);
+                       if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
+                               err = "failed to extend area map of reserved chunk";
+                               goto fail_unlock_mutex;
+                       }
+                       spin_lock_irqsave(&pcpu_lock, flags);
+               }
+ 
                 off = pcpu_alloc_area(chunk, size, align);
                 if (off >= 0)
                         goto area_found;
+ 
                 err = "alloc from reserved chunk failed";
                 goto fail_unlock;
         }
@@@ -1078,14 -1119,20 +1117,20 @@@ restart
                         if (size > chunk->contig_hint)
                                 continue;
   
-                       switch (pcpu_extend_area_map(chunk)) {
-                       case 0:
-                               break;
-                       case 1:
-                               goto restart;   /* pcpu_lock dropped, restart */
-                       default:
-                               err = "failed to extend area map";
-                               goto fail_unlock;
+                       new_alloc = pcpu_need_to_extend(chunk);
+                       if (new_alloc) {
+                               spin_unlock_irqrestore(&pcpu_lock, flags);
+                               if (pcpu_extend_area_map(chunk,
+                                                        new_alloc) < 0) {
+                                       err = "failed to extend area map";
+                                       goto fail_unlock_mutex;
+                               }
+                               spin_lock_irqsave(&pcpu_lock, flags);
+                               /*
+                                * pcpu_lock has been dropped, need to
+                                * restart cpu_slot list walking.
+                                */
+                               goto restart;
                         }
   
                         off = pcpu_alloc_area(chunk, size, align);
@@@ -1095,7 -1142,7 +1140,7 @@@
         }
   
         /* hmmm... no space left, create a new chunk */
-       spin_unlock_irq(&pcpu_lock);
+       spin_unlock_irqrestore(&pcpu_lock, flags);
   
         chunk = alloc_pcpu_chunk();
         if (!chunk) {
@@@ -1103,16 -1150,16 +1148,16 @@@
                 goto fail_unlock_mutex;
         }
   
-       spin_lock_irq(&pcpu_lock);
+       spin_lock_irqsave(&pcpu_lock, flags);
         pcpu_chunk_relocate(chunk, -1);
         goto restart;
   
   area_found:
-       spin_unlock_irq(&pcpu_lock);
+       spin_unlock_irqrestore(&pcpu_lock, flags);
   
         /* populate, map and clear the area */
         if (pcpu_populate_chunk(chunk, off, size)) {
-               spin_lock_irq(&pcpu_lock);
+               spin_lock_irqsave(&pcpu_lock, flags);
                 pcpu_free_area(chunk, off);
                 err = "failed to populate";
                 goto fail_unlock;
@@@ -1124,7 -1171,7 +1169,7 @@@
         return __addr_to_pcpu_ptr(chunk->base_addr + off);
   
   fail_unlock:
-       spin_unlock_irq(&pcpu_lock);
+       spin_unlock_irqrestore(&pcpu_lock, flags);
   fail_unlock_mutex:
         mutex_unlock(&pcpu_alloc_mutex);
         if (warn_limit) {
@@@ -1256,6 -1303,27 +1301,27 @@@ void free_percpu(void *ptr
   }
   EXPORT_SYMBOL_GPL(free_percpu);
   
+ /**
+  * per_cpu_ptr_to_phys - convert translated percpu address to physical address
+  * @addr: the address to be converted to physical address
+  *
+  * Given @addr which is dereferenceable address obtained via one of
+  * percpu access macros, this function translates it into its physical
+  * address.  The caller is responsible for ensuring @addr stays valid
+  * until this function finishes.
+  *
+  * RETURNS:
+  * The physical address for @addr.
+  */
+ phys_addr_t per_cpu_ptr_to_phys(void *addr)
+ {
+       if ((unsigned long)addr < VMALLOC_START ||
+                       (unsigned long)addr >= VMALLOC_END)
+               return __pa(addr);
+       else
+               return page_to_phys(vmalloc_to_page(addr));
+ }
+ 
   static inline size_t pcpu_calc_fc_sizes(size_t static_size,
                                         size_t reserved_size,
                                         ssize_t *dyn_sizep)
author	Tejun Heo <[email protected]>
	Tue, 8 Dec 2009 01:02:12 +0000 (10:02 +0900)
committer	Tejun Heo <[email protected]>
	Tue, 8 Dec 2009 01:02:12 +0000 (10:02 +0900)
		1	2
include/linux/percpu.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/percpu.c	patch \|	diff1 \|	diff2 \|	blob \| history