Merge branch 'nohz/printk-v8' into irq/core

author Frederic Weisbecker <[email protected]>

Mon, 4 Feb 2013 23:48:46 +0000 (00:48 +0100)

committer Frederic Weisbecker <[email protected]>

Mon, 4 Feb 2013 23:48:46 +0000 (00:48 +0100)
author Frederic Weisbecker <[email protected]>
Mon, 4 Feb 2013 23:48:46 +0000 (00:48 +0100)
committer Frederic Weisbecker <[email protected]>
Mon, 4 Feb 2013 23:48:46 +0000 (00:48 +0100)
diff --combined include/linux/irq_work.h

index ce60c084635b5a9cef539ed79c8b34fe0b583a15,b28eb60c8bf6379765db6514c5cfbfb0462cfa4c..f5dbce50466e6546fc72f4084ed0d0c91d91fe1e
--- 1/include/linux/irq_work.h
--- 2/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@@ -3,6 -3,20 +3,20 @@@
   
   #include <linux/llist.h>
   
+ /*
+  * An entry can be in one of four states:
+  *
+  * free            NULL, 0 -> {claimed}       : free to be used
+  * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+  * pending   next, 3 -> {busy}          : queued, pending callback
+  * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+  */
+ 
+ #define IRQ_WORK_PENDING      1UL
+ #define IRQ_WORK_BUSY         2UL
+ #define IRQ_WORK_FLAGS                3UL
+ #define IRQ_WORK_LAZY         4UL /* Doesn't want IPI, wait for tick */
+ 
   struct irq_work {
         unsigned long flags;
         struct llist_node llnode;
@@@ -16,8 -30,14 +30,14 @@@ void init_irq_work(struct irq_work *wor
         work->func = func;
   }
   
- -bool irq_work_queue(struct irq_work *work);
+ +void irq_work_queue(struct irq_work *work);
   void irq_work_run(void);
   void irq_work_sync(struct irq_work *work);
   
+ #ifdef CONFIG_IRQ_WORK
+ bool irq_work_needs_cpu(void);
+ #else
+ static bool irq_work_needs_cpu(void) { return false; }
+ #endif
+ 
   #endif /* _LINUX_IRQ_WORK_H */
diff --combined include/linux/tick.h

index 1a6567b48492d3dd782b0646a9d87d496b8c95f1,2307dd31b966014d18ca19f5a9e8666389029e87..553272e6af554844fbea920f0c3b08707c52e13a
--- 1/include/linux/tick.h
--- 2/include/linux/tick.h
+++ b/include/linux/tick.h
@@@ -8,6 -8,8 +8,8 @@@
   
   #include <linux/clockchips.h>
   #include <linux/irqflags.h>
+ #include <linux/percpu.h>
+ #include <linux/hrtimer.h>
   
   #ifdef CONFIG_GENERIC_CLOCKEVENTS
   
@@@ -122,13 -124,26 +124,26 @@@ static inline int tick_oneshot_mode_act
   #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
   
   # ifdef CONFIG_NO_HZ
+ DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+ 
+ static inline int tick_nohz_tick_stopped(void)
+ {
+       return __this_cpu_read(tick_cpu_sched.tick_stopped);
+ }
+ 
   extern void tick_nohz_idle_enter(void);
   extern void tick_nohz_idle_exit(void);
   extern void tick_nohz_irq_exit(void);
   extern ktime_t tick_nohz_get_sleep_length(void);
   extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
   extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
- # else
+ 
+ # else /* !CONFIG_NO_HZ */
+ static inline int tick_nohz_tick_stopped(void)
+ {
+       return 0;
+ }
+ 
   static inline void tick_nohz_idle_enter(void) { }
   static inline void tick_nohz_idle_exit(void) { }
   
@@@ -142,10 -157,4 +157,10 @@@ static inline u64 get_cpu_idle_time_us(
   static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
   # endif /* !NO_HZ */
   
+ +# ifdef CONFIG_CPU_IDLE_GOV_MENU
+ +extern void menu_hrtimer_cancel(void);
+ +# else
+ +static inline void menu_hrtimer_cancel(void) {}
+ +# endif /* CONFIG_CPU_IDLE_GOV_MENU */
+ +
   #endif
diff --combined init/Kconfig

index e3227d7ba35d0710d65c09de08e12b263b8a9378,c575566be47d46801ac3af27c8ecf30494644696..a98e1acc122de8fc84912ab805e19fa0f395cfdd
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -482,35 -482,35 +482,35 @@@ config PREEMPT_RC
           This option enables preemptible-RCU code that is common between
           the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
   
+ +config CONTEXT_TRACKING
+ +       bool
+ +
   config RCU_USER_QS
         bool "Consider userspace as in RCU extended quiescent state"
- -      depends on HAVE_RCU_USER_QS && SMP
+ +      depends on HAVE_CONTEXT_TRACKING && SMP
+ +      select CONTEXT_TRACKING
         help
           This option sets hooks on kernel / userspace boundaries and
           puts RCU in extended quiescent state when the CPU runs in
           userspace. It means that when a CPU runs in userspace, it is
           excluded from the global RCU state machine and thus doesn't
- -        to keep the timer tick on for RCU.
+ +        try to keep the timer tick on for RCU.
   
           Unless you want to hack and help the development of the full
- -        tickless feature, you shouldn't enable this option. It adds
- -        unnecessary overhead.
+ +        dynticks mode, you shouldn't enable this option.  It also
+ +        adds unnecessary overhead.
   
           If unsure say N
   
- -config RCU_USER_QS_FORCE
- -      bool "Force userspace extended QS by default"
- -      depends on RCU_USER_QS
+ +config CONTEXT_TRACKING_FORCE
+ +      bool "Force context tracking"
+ +      depends on CONTEXT_TRACKING
         help
- -        Set the hooks in user/kernel boundaries by default in order to
- -        test this feature that treats userspace as an extended quiescent
- -        state until we have a real user like a full adaptive nohz option.
- -
- -        Unless you want to hack and help the development of the full
- -        tickless feature, you shouldn't enable this option. It adds
- -        unnecessary overhead.
- -
- -        If unsure say N
+ +        Probe on user/kernel boundaries by default in order to
+ +        test the features that rely on it such as userspace RCU extended
+ +        quiescent states.
+ +        This test is there for debugging until we have a real user like the
+ +        full dynticks mode.
   
   config RCU_FANOUT
         int "Tree-based hierarchical RCU fanout value"
@@@ -578,13 -578,14 +578,13 @@@ config RCU_FAST_NO_H
         depends on NO_HZ && SMP
         default n
         help
- -        This option causes RCU to attempt to accelerate grace periods
- -        in order to allow CPUs to enter dynticks-idle state more
- -        quickly.  On the other hand, this option increases the overhead
- -        of the dynticks-idle checking, particularly on systems with
- -        large numbers of CPUs.
+ +        This option causes RCU to attempt to accelerate grace periods in
+ +        order to allow CPUs to enter dynticks-idle state more quickly.
+ +        On the other hand, this option increases the overhead of the
+ +        dynticks-idle checking, thus degrading scheduling latency.
   
- -        Say Y if energy efficiency is critically important, particularly
- -              if you have relatively few CPUs.
+ +        Say Y if energy efficiency is critically important, and you don't
+ +              care about real-time response.
   
           Say N if you are unsure.
   
@@@ -650,28 -651,6 +650,28 @@@ config RCU_BOOST_DELA
   
           Accept the default if unsure.
   
+ +config RCU_NOCB_CPU
+ +      bool "Offload RCU callback processing from boot-selected CPUs"
+ +      depends on TREE_RCU || TREE_PREEMPT_RCU
+ +      default n
+ +      help
+ +        Use this option to reduce OS jitter for aggressive HPC or
+ +        real-time workloads.  It can also be used to offload RCU
+ +        callback invocation to energy-efficient CPUs in battery-powered
+ +        asymmetric multiprocessors.
+ +
+ +        This option offloads callback invocation from the set of
+ +        CPUs specified at boot time by the rcu_nocbs parameter.
+ +        For each such CPU, a kthread ("rcuoN") will be created to
+ +        invoke callbacks, where the "N" is the CPU being offloaded.
+ +        Nothing prevents this kthread from running on the specified
+ +        CPUs, but (1) the kthreads may be preempted between each
+ +        callback, and (2) affinity or cgroups can be used to force
+ +        the kthreads to run on whatever set of CPUs is desired.
+ +
+ +        Say Y here if you want reduced OS jitter on selected CPUs.
+ +        Say N here if you are unsure.
+ +
   endmenu # "RCU Subsystem"
   
   config IKCONFIG
@@@ -713,50 -692,6 +713,50 @@@ config LOG_BUF_SHIF
   config HAVE_UNSTABLE_SCHED_CLOCK
         bool
   
+ +#
+ +# For architectures that want to enable the support for NUMA-affine scheduler
+ +# balancing logic:
+ +#
+ +config ARCH_SUPPORTS_NUMA_BALANCING
+ +      bool
+ +
+ +# For architectures that (ab)use NUMA to represent different memory regions
+ +# all cpu-local but of different latencies, such as SuperH.
+ +#
+ +config ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ +      bool
+ +
+ +#
+ +# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
+ +config ARCH_WANTS_PROT_NUMA_PROT_NONE
+ +      bool
+ +
+ +config ARCH_USES_NUMA_PROT_NONE
+ +      bool
+ +      default y
+ +      depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
+ +      depends on NUMA_BALANCING
+ +
+ +config NUMA_BALANCING_DEFAULT_ENABLED
+ +      bool "Automatically enable NUMA aware memory/task placement"
+ +      default y
+ +      depends on NUMA_BALANCING
+ +      help
+ +        If set, autonumic NUMA balancing will be enabled if running on a NUMA
+ +        machine.
+ +
+ +config NUMA_BALANCING
+ +      bool "Memory placement aware NUMA scheduler"
+ +      depends on ARCH_SUPPORTS_NUMA_BALANCING
+ +      depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ +      depends on SMP && NUMA && MIGRATION
+ +      help
+ +        This option adds support for automatic NUMA aware memory/task placement.
+ +        The mechanism is quite primitive and is based on migrating memory when
+ +        it is references to the node the task is running on.
+ +
+ +        This system will be inactive on UMA systems.
+ +
   menuconfig CGROUPS
         boolean "Control Group support"
         depends on EVENTFD
@@@ -878,7 -813,7 +878,7 @@@ config MEMCG_SWAP_ENABLE
   config MEMCG_KMEM
         bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
         depends on MEMCG && EXPERIMENTAL
- -      default n
+ +      depends on SLUB || SLAB
         help
           The Kernel Memory extension for Memory Resource Controller can limit
           the amount of memory used by kernel objects in the system. Those are
@@@ -1065,9 -1000,11 +1065,9 @@@ config UIDGID_CONVERTE
         # Filesystems
         depends on 9P_FS = n
         depends on AFS_FS = n
- -      depends on AUTOFS4_FS = n
         depends on CEPH_FS = n
         depends on CIFS = n
         depends on CODA_FS = n
- -      depends on FUSE_FS = n
         depends on GFS2_FS = n
         depends on NCP_FS = n
         depends on NFSD = n
@@@ -1178,7 -1115,7 +1178,7 @@@ config CC_OPTIMIZE_FOR_SIZ
           Enabling this option will pass "-Os" instead of "-O2" to gcc
           resulting in a smaller kernel.
   
- -        If unsure, say Y.
+ +        If unsure, say N.
   
   config SYSCTL
         bool
@@@ -1259,6 -1196,7 +1259,7 @@@ config HOTPLU
   config PRINTK
         default y
         bool "Enable support for printk" if EXPERT
+       select IRQ_WORK
         help
           This option enables normal printk support. Removing it
           eliminates most of the message strings from the kernel image
diff --combined kernel/irq_work.c

index c9d7478e4889937e053e283ede32a0ad321764e0,7f3a59bc8e3d97660a8e75fdf6847fd4124f96c6..55fcce6065cf6bc3213829fd8188cae90b61b778
--- 1/kernel/irq_work.c
--- 2/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@@ -12,22 -12,15 +12,15 @@@
   #include <linux/percpu.h>
   #include <linux/hardirq.h>
   #include <linux/irqflags.h>
+ #include <linux/sched.h>
+ #include <linux/tick.h>
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
   #include <asm/processor.h>
   
- /*
-  * An entry can be in one of four states:
-  *
-  * free            NULL, 0 -> {claimed}       : free to be used
-  * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
-  * pending   next, 3 -> {busy}          : queued, pending callback
-  * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
-  */
- 
- #define IRQ_WORK_PENDING      1UL
- #define IRQ_WORK_BUSY         2UL
- #define IRQ_WORK_FLAGS                3UL
   
   static DEFINE_PER_CPU(struct llist_head, irq_work_list);
+ static DEFINE_PER_CPU(int, irq_work_raised);
   
   /*
    * Claim the entry so that no one else will poke at it.
@@@ -63,46 -56,80 +56,69 @@@ void __weak arch_irq_work_raise(void
   }
   
   /*
- - * Queue the entry and raise the IPI if needed.
+ + * Enqueue the irq_work @entry unless it's already pending
+ + * somewhere.
+ + *
+ + * Can be re-enqueued while the callback is still in progress.
    */
- -static void __irq_work_queue(struct irq_work *work)
+ +void irq_work_queue(struct irq_work *work)
   {
-       bool empty;
- 
+ +      /* Only queue if not already pending */
+ +      if (!irq_work_claim(work))
+ +              return;
+ +
+ +      /* Queue the entry and raise the IPI if needed. */
         preempt_disable();
   
-       empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
-       /* The list was empty, raise self-interrupt to start processing. */
-       if (empty)
-               arch_irq_work_raise();
+       llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+ 
+       /*
+        * If the work is not "lazy" or the tick is stopped, raise the irq
+        * work interrupt (if supported by the arch), otherwise, just wait
+        * for the next tick.
+        */
+       if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
+               if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
+                       arch_irq_work_raise();
+       }
   
         preempt_enable();
   }
- -
- -/*
- - * Enqueue the irq_work @entry, returns true on success, failure when the
- - * @entry was already enqueued by someone else.
- - *
- - * Can be re-enqueued while the callback is still in progress.
- - */
- -bool irq_work_queue(struct irq_work *work)
- -{
- -      if (!irq_work_claim(work)) {
- -              /*
- -               * Already enqueued, can't do!
- -               */
- -              return false;
- -      }
- -
- -      __irq_work_queue(work);
- -      return true;
- -}
   EXPORT_SYMBOL_GPL(irq_work_queue);
   
- /*
-  * Run the irq_work entries on this cpu. Requires to be ran from hardirq
-  * context with local IRQs disabled.
-  */
- void irq_work_run(void)
+ bool irq_work_needs_cpu(void)
+ {
+       struct llist_head *this_list;
+ 
+       this_list = &__get_cpu_var(irq_work_list);
+       if (llist_empty(this_list))
+               return false;
+ 
+       /* All work should have been flushed before going offline */
+       WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
+ 
+       return true;
+ }
+ 
+ static void __irq_work_run(void)
   {
+       unsigned long flags;
         struct irq_work *work;
         struct llist_head *this_list;
         struct llist_node *llnode;
   
+ 
+       /*
+        * Reset the "raised" state right before we check the list because
+        * an NMI may enqueue after we find the list empty from the runner.
+        */
+       __this_cpu_write(irq_work_raised, 0);
+       barrier();
+ 
         this_list = &__get_cpu_var(irq_work_list);
         if (llist_empty(this_list))
                 return;
   
-       BUG_ON(!in_irq());
         BUG_ON(!irqs_disabled());
   
         llnode = llist_del_all(this_list);
@@@ -118,15 -145,27 +134,27 @@@
                  * to claim that work don't rely on us to handle their data
                  * while we are in the middle of the func.
                  */
-               xchg(&work->flags, IRQ_WORK_BUSY);
+               flags = work->flags & ~IRQ_WORK_PENDING;
+               xchg(&work->flags, flags);
+ 
                 work->func(work);
                 /*
                  * Clear the BUSY bit and return to the free state if
                  * no-one else claimed it meanwhile.
                  */
-               (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
+               (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
         }
   }
+ 
+ /*
+  * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+  * context with local IRQs disabled.
+  */
+ void irq_work_run(void)
+ {
+       BUG_ON(!in_irq());
+       __irq_work_run();
+ }
   EXPORT_SYMBOL_GPL(irq_work_run);
   
   /*
@@@ -141,3 -180,35 +169,35 @@@ void irq_work_sync(struct irq_work *wor
                 cpu_relax();
   }
   EXPORT_SYMBOL_GPL(irq_work_sync);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+ static int irq_work_cpu_notify(struct notifier_block *self,
+                              unsigned long action, void *hcpu)
+ {
+       long cpu = (long)hcpu;
+ 
+       switch (action) {
+       case CPU_DYING:
+               /* Called from stop_machine */
+               if (WARN_ON_ONCE(cpu != smp_processor_id()))
+                       break;
+               __irq_work_run();
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+ }
+ 
+ static struct notifier_block cpu_notify;
+ 
+ static __init int irq_work_init_cpu_notifier(void)
+ {
+       cpu_notify.notifier_call = irq_work_cpu_notify;
+       cpu_notify.priority = 0;
+       register_cpu_notifier(&cpu_notify);
+       return 0;
+ }
+ device_initcall(irq_work_init_cpu_notifier);
+ 
+ #endif /* CONFIG_HOTPLUG_CPU */
diff --combined kernel/printk.c

index 357f714ddd4983e75e3816577b237d8c8d38bd3f,c9104feba5ece1b0a6edea7309ed0cdb6b7ab5ff..0b31715f335a7a8ba1a846fe3b93fe71d97cd8d7
--- 1/kernel/printk.c
--- 2/kernel/printk.c
+++ b/kernel/printk.c
@@@ -42,6 -42,7 +42,7 @@@
   #include <linux/notifier.h>
   #include <linux/rculist.h>
   #include <linux/poll.h>
+ #include <linux/irq_work.h>
   
   #include <asm/uaccess.h>
   
@@@ -87,12 -88,6 +88,12 @@@ static DEFINE_SEMAPHORE(console_sem)
   struct console *console_drivers;
   EXPORT_SYMBOL_GPL(console_drivers);
   
+ +#ifdef CONFIG_LOCKDEP
+ +static struct lockdep_map console_lock_dep_map = {
+ +      .name = "console_lock"
+ +};
+ +#endif
+ +
   /*
    * This is used for debugging the mess that is the VT code by
    * keeping track if we have the console semaphore held. It's
@@@ -747,21 -742,6 +748,21 @@@ void __init setup_log_buf(int early
                 free, (free * 100) / __LOG_BUF_LEN);
   }
   
+ +static bool __read_mostly ignore_loglevel;
+ +
+ +static int __init ignore_loglevel_setup(char *str)
+ +{
+ +      ignore_loglevel = 1;
+ +      printk(KERN_INFO "debug: ignoring loglevel setting.\n");
+ +
+ +      return 0;
+ +}
+ +
+ +early_param("ignore_loglevel", ignore_loglevel_setup);
+ +module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
+ +MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
+ +      "print all kernel messages to the console.");
+ +
   #ifdef CONFIG_BOOT_PRINTK_DELAY
   
   static int boot_delay; /* msecs delay after each printk during bootup */
@@@ -785,15 -765,13 +786,15 @@@ static int __init boot_delay_setup(cha
   }
   __setup("boot_delay=", boot_delay_setup);
   
- -static void boot_delay_msec(void)
+ +static void boot_delay_msec(int level)
   {
         unsigned long long k;
         unsigned long timeout;
   
- -      if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
+ +      if ((boot_delay == 0 || system_state != SYSTEM_BOOTING)
+ +              || (level >= console_loglevel && !ignore_loglevel)) {
                 return;
+ +      }
   
         k = (unsigned long long)loops_per_msec * boot_delay;
   
@@@ -812,7 -790,7 +813,7 @@@
         }
   }
   #else
- -static inline void boot_delay_msec(void)
+ +static inline void boot_delay_msec(int level)
   {
   }
   #endif
@@@ -870,11 -848,10 +871,11 @@@ static size_t print_time(u64 ts, char *
         if (!printk_time)
                 return 0;
   
+ +      rem_nsec = do_div(ts, 1000000000);
+ +
         if (!buf)
- -              return 15;
+ +              return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
   
- -      rem_nsec = do_div(ts, 1000000000);
         return sprintf(buf, "[%5lu.%06lu] ",
                        (unsigned long)ts, rem_nsec / 1000);
   }
@@@ -1256,6 -1233,21 +1257,6 @@@ SYSCALL_DEFINE3(syslog, int, type, cha
         return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
   }
   
- -static bool __read_mostly ignore_loglevel;
- -
- -static int __init ignore_loglevel_setup(char *str)
- -{
- -      ignore_loglevel = 1;
- -      printk(KERN_INFO "debug: ignoring loglevel setting.\n");
- -
- -      return 0;
- -}
- -
- -early_param("ignore_loglevel", ignore_loglevel_setup);
- -module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
- -MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
- -      "print all kernel messages to the console.");
- -
   /*
    * Call the console drivers, asking them to write out
    * log_buf[start] to log_buf[end - 1].
@@@ -1501,7 -1493,7 +1502,7 @@@ asmlinkage int vprintk_emit(int facilit
         int this_cpu;
         int printed_len = 0;
   
- -      boot_delay_msec();
+ +      boot_delay_msec(level);
         printk_delay();
   
         /* This stops the holder of console_sem just where we want him */
@@@ -1917,14 -1909,12 +1918,14 @@@ static int __cpuinit console_cpu_notify
    */
   void console_lock(void)
   {
- -      BUG_ON(in_interrupt());
+ +      might_sleep();
+ +
         down(&console_sem);
         if (console_suspended)
                 return;
         console_locked = 1;
         console_may_schedule = 1;
+ +      mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
   }
   EXPORT_SYMBOL(console_lock);
   
@@@ -1946,7 -1936,6 +1947,7 @@@ int console_trylock(void
         }
         console_locked = 1;
         console_may_schedule = 0;
+ +      mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
         return 1;
   }
   EXPORT_SYMBOL(console_trylock);
@@@ -1967,30 -1956,32 +1968,32 @@@ int is_console_locked(void
   static DEFINE_PER_CPU(int, printk_pending);
   static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
   
- void printk_tick(void)
+ static void wake_up_klogd_work_func(struct irq_work *irq_work)
   {
-       if (__this_cpu_read(printk_pending)) {
-               int pending = __this_cpu_xchg(printk_pending, 0);
-               if (pending & PRINTK_PENDING_SCHED) {
-                       char *buf = __get_cpu_var(printk_sched_buf);
-                       printk(KERN_WARNING "[sched_delayed] %s", buf);
-               }
-               if (pending & PRINTK_PENDING_WAKEUP)
-                       wake_up_interruptible(&log_wait);
+       int pending = __this_cpu_xchg(printk_pending, 0);
+ 
+       if (pending & PRINTK_PENDING_SCHED) {
+               char *buf = __get_cpu_var(printk_sched_buf);
+               printk(KERN_WARNING "[sched_delayed] %s", buf);
         }
- }
   
- int printk_needs_cpu(int cpu)
- {
-       if (cpu_is_offline(cpu))
-               printk_tick();
-       return __this_cpu_read(printk_pending);
+       if (pending & PRINTK_PENDING_WAKEUP)
+               wake_up_interruptible(&log_wait);
   }
   
+ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+       .func = wake_up_klogd_work_func,
+       .flags = IRQ_WORK_LAZY,
+ };
+ 
   void wake_up_klogd(void)
   {
-       if (waitqueue_active(&log_wait))
+       preempt_disable();
+       if (waitqueue_active(&log_wait)) {
                 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+               irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
+       }
+       preempt_enable();
   }
   
   static void console_cont_flush(char *text, size_t size)
@@@ -2107,7 -2098,6 +2110,7 @@@ skip
                 local_irq_restore(flags);
         }
         console_locked = 0;
+ +      mutex_release(&console_lock_dep_map, 1, _RET_IP_);
   
         /* Release the exclusive_console once it is used */
         if (unlikely(exclusive_console))
@@@ -2471,6 -2461,7 +2474,7 @@@ int printk_sched(const char *fmt, ...
         va_end(args);
   
         __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
+       irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
         local_irq_restore(flags);
   
         return r;
diff --combined kernel/time/tick-sched.c

index d58e552d9fd154b39fffaa012a6028838b9955fd,822d7572bf2dcb35163ff6e8aacc5bd440fd6d2b..fb8e5e469d1cdf82c559f6fe15835d3b70410aef
--- 1/kernel/time/tick-sched.c
--- 2/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@@ -20,6 -20,7 +20,7 @@@
   #include <linux/profile.h>
   #include <linux/sched.h>
   #include <linux/module.h>
+ #include <linux/irq_work.h>
   
   #include <asm/irq_regs.h>
   
@@@ -28,10 -29,10 +29,10 @@@
   /*
    * Per cpu nohz control structure
    */
- static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+ DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
   
   /*
- - * The time, when the last jiffy update happened. Protected by xtime_lock.
+ + * The time, when the last jiffy update happened. Protected by jiffies_lock.
    */
   static ktime_t last_jiffies_update;
   
@@@ -49,14 -50,14 +50,14 @@@ static void tick_do_update_jiffies64(kt
         ktime_t delta;
   
         /*
- -       * Do a quick check without holding xtime_lock:
+ +       * Do a quick check without holding jiffies_lock:
          */
         delta = ktime_sub(now, last_jiffies_update);
         if (delta.tv64 < tick_period.tv64)
                 return;
   
- -      /* Reevalute with xtime_lock held */
- -      write_seqlock(&xtime_lock);
+ +      /* Reevalute with jiffies_lock held */
+ +      write_seqlock(&jiffies_lock);
   
         delta = ktime_sub(now, last_jiffies_update);
         if (delta.tv64 >= tick_period.tv64) {
@@@ -79,7 -80,7 +80,7 @@@
                 /* Keep the tick_next_period variable up to date */
                 tick_next_period = ktime_add(last_jiffies_update, tick_period);
         }
- -      write_sequnlock(&xtime_lock);
+ +      write_sequnlock(&jiffies_lock);
   }
   
   /*
@@@ -89,58 -90,15 +90,58 @@@ static ktime_t tick_init_jiffy_update(v
   {
         ktime_t period;
   
- -      write_seqlock(&xtime_lock);
+ +      write_seqlock(&jiffies_lock);
         /* Did we start the jiffies update yet ? */
         if (last_jiffies_update.tv64 == 0)
                 last_jiffies_update = tick_next_period;
         period = last_jiffies_update;
- -      write_sequnlock(&xtime_lock);
+ +      write_sequnlock(&jiffies_lock);
         return period;
   }
   
+ +
+ +static void tick_sched_do_timer(ktime_t now)
+ +{
+ +      int cpu = smp_processor_id();
+ +
+ +#ifdef CONFIG_NO_HZ
+ +      /*
+ +       * Check if the do_timer duty was dropped. We don't care about
+ +       * concurrency: This happens only when the cpu in charge went
+ +       * into a long sleep. If two cpus happen to assign themself to
+ +       * this duty, then the jiffies update is still serialized by
+ +       * jiffies_lock.
+ +       */
+ +      if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ +              tick_do_timer_cpu = cpu;
+ +#endif
+ +
+ +      /* Check, if the jiffies need an update */
+ +      if (tick_do_timer_cpu == cpu)
+ +              tick_do_update_jiffies64(now);
+ +}
+ +
+ +static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
+ +{
+ +#ifdef CONFIG_NO_HZ
+ +      /*
+ +       * When we are idle and the tick is stopped, we have to touch
+ +       * the watchdog as we might not schedule for a really long
+ +       * time. This happens on complete idle SMP systems while
+ +       * waiting on the login prompt. We also increment the "start of
+ +       * idle" jiffy stamp so the idle accounting adjustment we do
+ +       * when we go busy again does not account too much ticks.
+ +       */
+ +      if (ts->tick_stopped) {
+ +              touch_softlockup_watchdog();
+ +              if (is_idle_task(current))
+ +                      ts->idle_jiffies++;
+ +      }
+ +#endif
+ +      update_process_times(user_mode(regs));
+ +      profile_tick(CPU_PROFILING);
+ +}
+ +
   /*
    * NOHZ - aka dynamic tick functionality
    */
@@@ -325,14 -283,14 +326,14 @@@ static ktime_t tick_nohz_stop_sched_tic
   
         /* Read jiffies and the time when jiffies were updated last */
         do {
- -              seq = read_seqbegin(&xtime_lock);
+ +              seq = read_seqbegin(&jiffies_lock);
                 last_update = last_jiffies_update;
                 last_jiffies = jiffies;
                 time_delta = timekeeping_max_deferment();
- -      } while (read_seqretry(&xtime_lock, seq));
+ +      } while (read_seqretry(&jiffies_lock, seq));
   
-       if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
-           arch_needs_cpu(cpu)) {
+       if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
+           arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
                 next_jiffies = last_jiffies + 1;
                 delta_jiffies = 1;
         } else {
@@@ -569,8 -527,6 +570,8 @@@ void tick_nohz_irq_exit(void
         if (!ts->inidle)
                 return;
   
+ +      /* Cancel the timer because CPU already waken up from the C-states*/
+ +      menu_hrtimer_cancel();
         __tick_nohz_idle_enter(ts);
   }
   
@@@ -666,8 -622,6 +667,8 @@@ void tick_nohz_idle_exit(void
   
         ts->inidle = 0;
   
+ +      /* Cancel the timer because CPU already waken up from the C-states*/
+ +      menu_hrtimer_cancel();
         if (ts->idle_active || ts->tick_stopped)
                 now = ktime_get();
   
@@@ -695,12 -649,40 +696,12 @@@ static void tick_nohz_handler(struct cl
   {
         struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
         struct pt_regs *regs = get_irq_regs();
- -      int cpu = smp_processor_id();
         ktime_t now = ktime_get();
   
         dev->next_event.tv64 = KTIME_MAX;
   
- -      /*
- -       * Check if the do_timer duty was dropped. We don't care about
- -       * concurrency: This happens only when the cpu in charge went
- -       * into a long sleep. If two cpus happen to assign themself to
- -       * this duty, then the jiffies update is still serialized by
- -       * xtime_lock.
- -       */
- -      if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
- -              tick_do_timer_cpu = cpu;
- -
- -      /* Check, if the jiffies need an update */
- -      if (tick_do_timer_cpu == cpu)
- -              tick_do_update_jiffies64(now);
- -
- -      /*
- -       * When we are idle and the tick is stopped, we have to touch
- -       * the watchdog as we might not schedule for a really long
- -       * time. This happens on complete idle SMP systems while
- -       * waiting on the login prompt. We also increment the "start
- -       * of idle" jiffy stamp so the idle accounting adjustment we
- -       * do when we go busy again does not account too much ticks.
- -       */
- -      if (ts->tick_stopped) {
- -              touch_softlockup_watchdog();
- -              ts->idle_jiffies++;
- -      }
- -
- -      update_process_times(user_mode(regs));
- -      profile_tick(CPU_PROFILING);
+ +      tick_sched_do_timer(now);
+ +      tick_sched_handle(ts, regs);
   
         while (tick_nohz_reprogram(ts, now)) {
                 now = ktime_get();
@@@ -813,7 -795,7 +814,7 @@@ void tick_check_idle(int cpu
   #ifdef CONFIG_HIGH_RES_TIMERS
   /*
    * We rearm the timer until we get disabled by the idle code.
- - * Called with interrupts disabled and timer->base->cpu_base->lock held.
+ + * Called with interrupts disabled.
    */
   static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
   {
@@@ -821,15 -803,45 +822,15 @@@
                 container_of(timer, struct tick_sched, sched_timer);
         struct pt_regs *regs = get_irq_regs();
         ktime_t now = ktime_get();
- -      int cpu = smp_processor_id();
   
- -#ifdef CONFIG_NO_HZ
- -      /*
- -       * Check if the do_timer duty was dropped. We don't care about
- -       * concurrency: This happens only when the cpu in charge went
- -       * into a long sleep. If two cpus happen to assign themself to
- -       * this duty, then the jiffies update is still serialized by
- -       * xtime_lock.
- -       */
- -      if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
- -              tick_do_timer_cpu = cpu;
- -#endif
- -
- -      /* Check, if the jiffies need an update */
- -      if (tick_do_timer_cpu == cpu)
- -              tick_do_update_jiffies64(now);
+ +      tick_sched_do_timer(now);
   
         /*
          * Do not call, when we are not in irq context and have
          * no valid regs pointer
          */
- -      if (regs) {
- -              /*
- -               * When we are idle and the tick is stopped, we have to touch
- -               * the watchdog as we might not schedule for a really long
- -               * time. This happens on complete idle SMP systems while
- -               * waiting on the login prompt. We also increment the "start of
- -               * idle" jiffy stamp so the idle accounting adjustment we do
- -               * when we go busy again does not account too much ticks.
- -               */
- -              if (ts->tick_stopped) {
- -                      touch_softlockup_watchdog();
- -                      if (is_idle_task(current))
- -                              ts->idle_jiffies++;
- -              }
- -              update_process_times(user_mode(regs));
- -              profile_tick(CPU_PROFILING);
- -      }
+ +      if (regs)
+ +              tick_sched_handle(ts, regs);
   
         hrtimer_forward(timer, now, tick_period);
   
@@@ -863,7 -875,7 +864,7 @@@ void tick_setup_sched_timer(void
         /* Get the next period (per cpu) */
         hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
   
- -      /* Offset the tick to avert xtime_lock contention. */
+ +      /* Offset the tick to avert jiffies_lock contention. */
         if (sched_skew_tick) {
                 u64 offset = ktime_to_ns(tick_period) >> 1;
                 do_div(offset, num_possible_cpus());
author	Frederic Weisbecker <[email protected]>
	Mon, 4 Feb 2013 23:48:46 +0000 (00:48 +0100)
committer	Frederic Weisbecker <[email protected]>
	Mon, 4 Feb 2013 23:48:46 +0000 (00:48 +0100)
		1	2
include/linux/irq_work.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/tick.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq_work.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/printk.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/tick-sched.c	patch \|	diff1 \|	diff2 \|	blob \| history