]> Git Repo - linux.git/commitdiff
Merge branch 'linus' into sched/core
authorIngo Molnar <[email protected]>
Fri, 2 Apr 2010 18:02:55 +0000 (20:02 +0200)
committerIngo Molnar <[email protected]>
Fri, 2 Apr 2010 18:03:08 +0000 (20:03 +0200)
Merge reason: update to latest upstream

Signed-off-by: Ingo Molnar <[email protected]>
1  2 
include/linux/sched.h
kernel/sched.c
kernel/sched_fair.c
kernel/sched_rt.c

diff --combined include/linux/sched.h
index 8604884cee87c01e5363d2ecf3fff06a005173c3,dad7f668ebf70041f3897102a0ff13a1a456edad..43c9451527321ecdbb945a275b45c39d500d1deb
@@@ -258,6 -258,10 +258,10 @@@ extern spinlock_t mmlist_lock
  
  struct task_struct;
  
+ #ifdef CONFIG_PROVE_RCU
+ extern int lockdep_tasklist_lock_is_held(void);
+ #endif /* #ifdef CONFIG_PROVE_RCU */
  extern void sched_init(void);
  extern void sched_init_smp(void);
  extern asmlinkage void schedule_tail(struct task_struct *prev);
@@@ -271,17 -275,11 +275,17 @@@ extern cpumask_var_t nohz_cpu_mask
  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
  extern int select_nohz_load_balancer(int cpu);
  extern int get_nohz_load_balancer(void);
 +extern int nohz_ratelimit(int cpu);
  #else
  static inline int select_nohz_load_balancer(int cpu)
  {
        return 0;
  }
 +
 +static inline int nohz_ratelimit(int cpu)
 +{
 +      return 0;
 +}
  #endif
  
  /*
@@@ -402,60 -400,6 +406,6 @@@ extern void arch_unmap_area_topdown(str
  static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
  #endif
  
- #if USE_SPLIT_PTLOCKS
- /*
-  * The mm counters are not protected by its page_table_lock,
-  * so must be incremented atomically.
-  */
- #define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
- #define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
- #define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
- #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
- #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
- #else  /* !USE_SPLIT_PTLOCKS */
- /*
-  * The mm counters are protected by its page_table_lock,
-  * so can be incremented directly.
-  */
- #define set_mm_counter(mm, member, value) (mm)->_##member = (value)
- #define get_mm_counter(mm, member) ((mm)->_##member)
- #define add_mm_counter(mm, member, value) (mm)->_##member += (value)
- #define inc_mm_counter(mm, member) (mm)->_##member++
- #define dec_mm_counter(mm, member) (mm)->_##member--
- #endif /* !USE_SPLIT_PTLOCKS */
- #define get_mm_rss(mm)                                        \
-       (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
- #define update_hiwater_rss(mm)        do {                    \
-       unsigned long _rss = get_mm_rss(mm);            \
-       if ((mm)->hiwater_rss < _rss)                   \
-               (mm)->hiwater_rss = _rss;               \
- } while (0)
- #define update_hiwater_vm(mm) do {                    \
-       if ((mm)->hiwater_vm < (mm)->total_vm)          \
-               (mm)->hiwater_vm = (mm)->total_vm;      \
- } while (0)
- static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
- {
-       return max(mm->hiwater_rss, get_mm_rss(mm));
- }
- static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
-                                        struct mm_struct *mm)
- {
-       unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
-       if (*maxrss < hiwater_rss)
-               *maxrss = hiwater_rss;
- }
- static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
- {
-       return max(mm->hiwater_vm, mm->total_vm);
- }
  
  extern void set_dumpable(struct mm_struct *mm, int value);
  extern int get_dumpable(struct mm_struct *mm);
@@@ -1133,8 -1077,36 +1083,8 @@@ struct load_weight 
        unsigned long weight, inv_weight;
  };
  
 -/*
 - * CFS stats for a schedulable entity (task, task-group etc)
 - *
 - * Current field usage histogram:
 - *
 - *     4 se->block_start
 - *     4 se->run_node
 - *     4 se->sleep_start
 - *     6 se->load.weight
 - */
 -struct sched_entity {
 -      struct load_weight      load;           /* for load-balancing */
 -      struct rb_node          run_node;
 -      struct list_head        group_node;
 -      unsigned int            on_rq;
 -
 -      u64                     exec_start;
 -      u64                     sum_exec_runtime;
 -      u64                     vruntime;
 -      u64                     prev_sum_exec_runtime;
 -
 -      u64                     last_wakeup;
 -      u64                     avg_overlap;
 -
 -      u64                     nr_migrations;
 -
 -      u64                     start_runtime;
 -      u64                     avg_wakeup;
 -
  #ifdef CONFIG_SCHEDSTATS
 +struct sched_statistics {
        u64                     wait_start;
        u64                     wait_max;
        u64                     wait_count;
        u64                     nr_wakeups_affine_attempts;
        u64                     nr_wakeups_passive;
        u64                     nr_wakeups_idle;
 +};
 +#endif
 +
 +struct sched_entity {
 +      struct load_weight      load;           /* for load-balancing */
 +      struct rb_node          run_node;
 +      struct list_head        group_node;
 +      unsigned int            on_rq;
 +
 +      u64                     exec_start;
 +      u64                     sum_exec_runtime;
 +      u64                     vruntime;
 +      u64                     prev_sum_exec_runtime;
 +
 +      u64                     nr_migrations;
 +
 +#ifdef CONFIG_SCHEDSTATS
 +      struct sched_statistics statistics;
  #endif
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
@@@ -1270,7 -1224,9 +1220,9 @@@ struct task_struct 
        struct plist_node pushable_tasks;
  
        struct mm_struct *mm, *active_mm;
+ #if defined(SPLIT_RSS_COUNTING)
+       struct task_rss_stat    rss_stat;
+ #endif
  /* task state */
        int exit_state;
        int exit_code, exit_signal;
  
        struct list_head        *scm_work_list;
  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       /* Index of current stored adress in ret_stack */
+       /* Index of current stored address in ret_stack */
        int curr_ret_stack;
        /* Stack of return addresses for return function tracing */
        struct ftrace_ret_stack *ret_stack;
@@@ -2439,9 -2395,7 +2391,7 @@@ void thread_group_cputimer(struct task_
  
  static inline void thread_group_cputime_init(struct signal_struct *sig)
  {
-       sig->cputimer.cputime = INIT_CPUTIME;
        spin_lock_init(&sig->cputimer.lock);
-       sig->cputimer.running = 0;
  }
  
  static inline void thread_group_cputime_free(struct signal_struct *sig)
diff --combined kernel/sched.c
index cc6dc8caa3809f6e73e8780a39b8f6a3c29e715e,49d2fa7b687a6cd9956e5d7179ab26822bcff6f1..52b7efd274167faf094605665ba37ca73f714804
@@@ -492,11 -492,8 +492,11 @@@ struct rq 
        #define CPU_LOAD_IDX_MAX 5
        unsigned long cpu_load[CPU_LOAD_IDX_MAX];
  #ifdef CONFIG_NO_HZ
 +      u64 nohz_stamp;
        unsigned char in_nohz_recently;
  #endif
 +      unsigned int skip_clock_update;
 +
        /* capture load from *all* tasks on this cpu: */
        struct load_weight load;
        unsigned long nr_load_updates;
@@@ -594,13 -591,6 +594,13 @@@ static inlin
  void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
  {
        rq->curr->sched_class->check_preempt_curr(rq, p, flags);
 +
 +      /*
 +       * A queue event has occurred, and we're going to schedule.  In
 +       * this case, we can save a useless back to back clock update.
 +       */
 +      if (test_tsk_need_resched(p))
 +              rq->skip_clock_update = 1;
  }
  
  static inline int cpu_of(struct rq *rq)
  
  inline void update_rq_clock(struct rq *rq)
  {
 -      rq->clock = sched_clock_cpu(cpu_of(rq));
 +      if (!rq->skip_clock_update)
 +              rq->clock = sched_clock_cpu(cpu_of(rq));
  }
  
  /*
@@@ -1239,17 -1228,6 +1239,17 @@@ void wake_up_idle_cpu(int cpu
        if (!tsk_is_polling(rq->idle))
                smp_send_reschedule(cpu);
  }
 +
 +int nohz_ratelimit(int cpu)
 +{
 +      struct rq *rq = cpu_rq(cpu);
 +      u64 diff = rq->clock - rq->nohz_stamp;
 +
 +      rq->nohz_stamp = rq->clock;
 +
 +      return diff < (NSEC_PER_SEC / HZ) >> 1;
 +}
 +
  #endif /* CONFIG_NO_HZ */
  
  static u64 sched_avg_period(void)
@@@ -1543,7 -1521,7 +1543,7 @@@ static unsigned long cpu_avg_load_per_t
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
- static __read_mostly unsigned long *update_shares_data;
+ static __read_mostly unsigned long __percpu *update_shares_data;
  
  static void __set_se_shares(struct sched_entity *se, unsigned long shares);
  
@@@ -1792,6 -1770,8 +1792,6 @@@ static void double_rq_lock(struct rq *r
                        raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
                }
        }
 -      update_rq_clock(rq1);
 -      update_rq_clock(rq2);
  }
  
  /*
@@@ -1888,7 -1868,9 +1888,7 @@@ static void update_avg(u64 *avg, u64 sa
  static void
  enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
  {
 -      if (wakeup)
 -              p->se.start_runtime = p->se.sum_exec_runtime;
 -
 +      update_rq_clock(rq);
        sched_info_queued(p);
        p->sched_class->enqueue_task(rq, p, wakeup, head);
        p->se.on_rq = 1;
  
  static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
  {
 -      if (sleep) {
 -              if (p->se.last_wakeup) {
 -                      update_avg(&p->se.avg_overlap,
 -                              p->se.sum_exec_runtime - p->se.last_wakeup);
 -                      p->se.last_wakeup = 0;
 -              } else {
 -                      update_avg(&p->se.avg_wakeup,
 -                              sysctl_sched_wakeup_granularity);
 -              }
 -      }
 -
 +      update_rq_clock(rq);
        sched_info_dequeued(p);
        p->sched_class->dequeue_task(rq, p, sleep);
        p->se.on_rq = 0;
@@@ -2369,10 -2361,14 +2369,10 @@@ static int try_to_wake_up(struct task_s
        unsigned long flags;
        struct rq *rq;
  
 -      if (!sched_feat(SYNC_WAKEUPS))
 -              wake_flags &= ~WF_SYNC;
 -
        this_cpu = get_cpu();
  
        smp_wmb();
        rq = task_rq_lock(p, &flags);
 -      update_rq_clock(rq);
        if (!(p->state & state))
                goto out;
  
  
        rq = cpu_rq(cpu);
        raw_spin_lock(&rq->lock);
 -      update_rq_clock(rq);
  
        /*
         * We migrated the task without holding either rq->lock, however
  
  out_activate:
  #endif /* CONFIG_SMP */
 -      schedstat_inc(p, se.nr_wakeups);
 +      schedstat_inc(p, se.statistics.nr_wakeups);
        if (wake_flags & WF_SYNC)
 -              schedstat_inc(p, se.nr_wakeups_sync);
 +              schedstat_inc(p, se.statistics.nr_wakeups_sync);
        if (orig_cpu != cpu)
 -              schedstat_inc(p, se.nr_wakeups_migrate);
 +              schedstat_inc(p, se.statistics.nr_wakeups_migrate);
        if (cpu == this_cpu)
 -              schedstat_inc(p, se.nr_wakeups_local);
 +              schedstat_inc(p, se.statistics.nr_wakeups_local);
        else
 -              schedstat_inc(p, se.nr_wakeups_remote);
 +              schedstat_inc(p, se.statistics.nr_wakeups_remote);
        activate_task(rq, p, 1);
        success = 1;
  
 -      /*
 -       * Only attribute actual wakeups done by this task.
 -       */
 -      if (!in_interrupt()) {
 -              struct sched_entity *se = &current->se;
 -              u64 sample = se->sum_exec_runtime;
 -
 -              if (se->last_wakeup)
 -                      sample -= se->last_wakeup;
 -              else
 -                      sample -= se->start_runtime;
 -              update_avg(&se->avg_wakeup, sample);
 -
 -              se->last_wakeup = se->sum_exec_runtime;
 -      }
 -
  out_running:
        trace_sched_wakeup(rq, p, success);
        check_preempt_curr(rq, p, wake_flags);
@@@ -2513,9 -2526,42 +2513,9 @@@ static void __sched_fork(struct task_st
        p->se.sum_exec_runtime          = 0;
        p->se.prev_sum_exec_runtime     = 0;
        p->se.nr_migrations             = 0;
 -      p->se.last_wakeup               = 0;
 -      p->se.avg_overlap               = 0;
 -      p->se.start_runtime             = 0;
 -      p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
  
  #ifdef CONFIG_SCHEDSTATS
 -      p->se.wait_start                        = 0;
 -      p->se.wait_max                          = 0;
 -      p->se.wait_count                        = 0;
 -      p->se.wait_sum                          = 0;
 -
 -      p->se.sleep_start                       = 0;
 -      p->se.sleep_max                         = 0;
 -      p->se.sum_sleep_runtime                 = 0;
 -
 -      p->se.block_start                       = 0;
 -      p->se.block_max                         = 0;
 -      p->se.exec_max                          = 0;
 -      p->se.slice_max                         = 0;
 -
 -      p->se.nr_migrations_cold                = 0;
 -      p->se.nr_failed_migrations_affine       = 0;
 -      p->se.nr_failed_migrations_running      = 0;
 -      p->se.nr_failed_migrations_hot          = 0;
 -      p->se.nr_forced_migrations              = 0;
 -
 -      p->se.nr_wakeups                        = 0;
 -      p->se.nr_wakeups_sync                   = 0;
 -      p->se.nr_wakeups_migrate                = 0;
 -      p->se.nr_wakeups_local                  = 0;
 -      p->se.nr_wakeups_remote                 = 0;
 -      p->se.nr_wakeups_affine                 = 0;
 -      p->se.nr_wakeups_affine_attempts        = 0;
 -      p->se.nr_wakeups_passive                = 0;
 -      p->se.nr_wakeups_idle                   = 0;
 -
 +      memset(&p->se.statistics, 0, sizeof(p->se.statistics));
  #endif
  
        INIT_LIST_HEAD(&p->rt.run_list);
@@@ -2604,7 -2650,7 +2604,7 @@@ void wake_up_new_task(struct task_struc
  {
        unsigned long flags;
        struct rq *rq;
-       int cpu = get_cpu();
+       int cpu __maybe_unused = get_cpu();
  
  #ifdef CONFIG_SMP
        /*
  
        BUG_ON(p->state != TASK_WAKING);
        p->state = TASK_RUNNING;
 -      update_rq_clock(rq);
        activate_task(rq, p, 0);
        trace_sched_wakeup_new(rq, p, 1);
        check_preempt_curr(rq, p, WF_FORK);
@@@ -3582,9 -3629,23 +3582,9 @@@ static inline void schedule_debug(struc
  
  static void put_prev_task(struct rq *rq, struct task_struct *prev)
  {
 -      if (prev->state == TASK_RUNNING) {
 -              u64 runtime = prev->se.sum_exec_runtime;
 -
 -              runtime -= prev->se.prev_sum_exec_runtime;
 -              runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
 -
 -              /*
 -               * In order to avoid avg_overlap growing stale when we are
 -               * indeed overlapping and hence not getting put to sleep, grow
 -               * the avg_overlap on preemption.
 -               *
 -               * We use the average preemption runtime because that
 -               * correlates to the amount of cache footprint a task can
 -               * build up.
 -               */
 -              update_avg(&prev->se.avg_overlap, runtime);
 -      }
 +      if (prev->se.on_rq)
 +              update_rq_clock(rq);
 +      rq->skip_clock_update = 0;
        prev->sched_class->put_prev_task(rq, prev);
  }
  
@@@ -3647,6 -3708,7 +3647,6 @@@ need_resched_nonpreemptible
                hrtick_clear(rq);
  
        raw_spin_lock_irq(&rq->lock);
 -      update_rq_clock(rq);
        clear_tsk_need_resched(prev);
  
        if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@@ -4203,6 -4265,7 +4203,6 @@@ void rt_mutex_setprio(struct task_struc
        BUG_ON(prio < 0 || prio > MAX_PRIO);
  
        rq = task_rq_lock(p, &flags);
 -      update_rq_clock(rq);
  
        oldprio = p->prio;
        prev_class = p->sched_class;
@@@ -4245,6 -4308,7 +4245,6 @@@ void set_user_nice(struct task_struct *
         * the task might be in the middle of scheduling on another CPU.
         */
        rq = task_rq_lock(p, &flags);
 -      update_rq_clock(rq);
        /*
         * The RT priorities are set via sched_setscheduler(), but we still
         * allow the 'normal' nice value to be set - but as expected
@@@ -4289,7 -4353,7 +4289,7 @@@ int can_nice(const struct task_struct *
        /* convert nice value [19,-20] to rlimit style value [1,40] */
        int nice_rlim = 20 - nice;
  
-       return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
+       return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
                capable(CAP_SYS_NICE));
  }
  
@@@ -4466,7 -4530,7 +4466,7 @@@ recheck
  
                        if (!lock_task_sighand(p, &flags))
                                return -ESRCH;
-                       rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
+                       rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
                        unlock_task_sighand(p, &flags);
  
                        /* can't set/change the rt policy */
                raw_spin_unlock_irqrestore(&p->pi_lock, flags);
                goto recheck;
        }
 -      update_rq_clock(rq);
        on_rq = p->se.on_rq;
        running = task_current(rq, p);
        if (on_rq)
@@@ -4837,7 -4902,9 +4837,9 @@@ SYSCALL_DEFINE3(sched_getaffinity, pid_
        int ret;
        cpumask_var_t mask;
  
-       if (len < cpumask_size())
+       if (len < nr_cpu_ids)
+               return -EINVAL;
+       if (len & (sizeof(unsigned long)-1))
                return -EINVAL;
  
        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
  
        ret = sched_getaffinity(pid, mask);
        if (ret == 0) {
-               if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
+               size_t retlen = min_t(size_t, len, cpumask_size());
+               if (copy_to_user(user_mask_ptr, mask, retlen))
                        ret = -EFAULT;
                else
-                       ret = cpumask_size();
+                       ret = retlen;
        }
        free_cpumask_var(mask);
  
@@@ -5533,6 -5602,7 +5537,6 @@@ void sched_idle_next(void
  
        __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
  
 -      update_rq_clock(rq);
        activate_task(rq, p, 0);
  
        raw_spin_unlock_irqrestore(&rq->lock, flags);
@@@ -5587,6 -5657,7 +5591,6 @@@ static void migrate_dead_tasks(unsigne
        for ( ; ; ) {
                if (!rq->nr_running)
                        break;
 -              update_rq_clock(rq);
                next = pick_next_task(rq);
                if (!next)
                        break;
@@@ -5870,6 -5941,7 +5874,6 @@@ migration_call(struct notifier_block *n
                rq->migration_thread = NULL;
                /* Idle task back to normal (off runqueue, low prio) */
                raw_spin_lock_irq(&rq->lock);
 -              update_rq_clock(rq);
                deactivate_task(rq, rq->idle, 0);
                __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
                rq->idle->sched_class = &idle_sched_class;
@@@ -7338,11 -7410,13 +7342,13 @@@ static ssize_t sched_power_savings_stor
  
  #ifdef CONFIG_SCHED_MC
  static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
+                                          struct sysdev_class_attribute *attr,
                                           char *page)
  {
        return sprintf(page, "%u\n", sched_mc_power_savings);
  }
  static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
+                                           struct sysdev_class_attribute *attr,
                                            const char *buf, size_t count)
  {
        return sched_power_savings_store(buf, count, 0);
@@@ -7354,11 -7428,13 +7360,13 @@@ static SYSDEV_CLASS_ATTR(sched_mc_power
  
  #ifdef CONFIG_SCHED_SMT
  static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
+                                           struct sysdev_class_attribute *attr,
                                            char *page)
  {
        return sprintf(page, "%u\n", sched_smt_power_savings);
  }
  static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
+                                            struct sysdev_class_attribute *attr,
                                             const char *buf, size_t count)
  {
        return sched_power_savings_store(buf, count, 1);
@@@ -7815,6 -7891,7 +7823,6 @@@ static void normalize_task(struct rq *r
  {
        int on_rq;
  
 -      update_rq_clock(rq);
        on_rq = p->se.on_rq;
        if (on_rq)
                deactivate_task(rq, p, 0);
@@@ -7841,9 -7918,9 +7849,9 @@@ void normalize_rt_tasks(void
  
                p->se.exec_start                = 0;
  #ifdef CONFIG_SCHEDSTATS
 -              p->se.wait_start                = 0;
 -              p->se.sleep_start               = 0;
 -              p->se.block_start               = 0;
 +              p->se.statistics.wait_start     = 0;
 +              p->se.statistics.sleep_start    = 0;
 +              p->se.statistics.block_start    = 0;
  #endif
  
                if (!rt_task(p)) {
@@@ -8176,6 -8253,8 +8184,6 @@@ void sched_move_task(struct task_struc
  
        rq = task_rq_lock(tsk, &flags);
  
 -      update_rq_clock(rq);
 -
        running = task_current(rq, tsk);
        on_rq = tsk->se.on_rq;
  
@@@ -8742,7 -8821,7 +8750,7 @@@ struct cgroup_subsys cpu_cgroup_subsys 
  struct cpuacct {
        struct cgroup_subsys_state css;
        /* cpuusage holds pointer to a u64-type object on every cpu */
-       u64 *cpuusage;
+       u64 __percpu *cpuusage;
        struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
        struct cpuacct *parent;
  };
diff --combined kernel/sched_fair.c
index 35a5c649638b8cdcbd9901c9a9dd659465ac823f,5a5ea2cd924fa8494abfa21f8203f919f40ff1ca..49ad99378f82b064258d67829f1443d569716067
@@@ -35,8 -35,8 +35,8 @@@
   * (to see the precise effective timeslice length of your workload,
   *  run vmstat and monitor the context-switches (cs) field)
   */
 -unsigned int sysctl_sched_latency = 5000000ULL;
 -unsigned int normalized_sysctl_sched_latency = 5000000ULL;
 +unsigned int sysctl_sched_latency = 6000000ULL;
 +unsigned int normalized_sysctl_sched_latency = 6000000ULL;
  
  /*
   * The initial- and re-scaling of tunables is configurable
@@@ -52,15 -52,15 +52,15 @@@ enum sched_tunable_scaling sysctl_sched
  
  /*
   * Minimal preemption granularity for CPU-bound tasks:
 - * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
 + * (default: 2 msec * (1 + ilog(ncpus)), units: nanoseconds)
   */
 -unsigned int sysctl_sched_min_granularity = 1000000ULL;
 -unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL;
 +unsigned int sysctl_sched_min_granularity = 2000000ULL;
 +unsigned int normalized_sysctl_sched_min_granularity = 2000000ULL;
  
  /*
   * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
   */
 -static unsigned int sched_nr_latency = 5;
 +static unsigned int sched_nr_latency = 3;
  
  /*
   * After fork, child runs first. If set to 0 (default) then
@@@ -505,8 -505,7 +505,8 @@@ __update_curr(struct cfs_rq *cfs_rq, st
  {
        unsigned long delta_exec_weighted;
  
 -      schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 +      schedstat_set(curr->statistics.exec_max,
 +                    max((u64)delta_exec, curr->statistics.exec_max));
  
        curr->sum_exec_runtime += delta_exec;
        schedstat_add(cfs_rq, exec_clock, delta_exec);
@@@ -549,7 -548,7 +549,7 @@@ static void update_curr(struct cfs_rq *
  static inline void
  update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
 -      schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
 +      schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock);
  }
  
  /*
@@@ -568,18 -567,18 +568,18 @@@ static void update_stats_enqueue(struc
  static void
  update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
 -      schedstat_set(se->wait_max, max(se->wait_max,
 -                      rq_of(cfs_rq)->clock - se->wait_start));
 -      schedstat_set(se->wait_count, se->wait_count + 1);
 -      schedstat_set(se->wait_sum, se->wait_sum +
 -                      rq_of(cfs_rq)->clock - se->wait_start);
 +      schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
 +                      rq_of(cfs_rq)->clock - se->statistics.wait_start));
 +      schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
 +      schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
 +                      rq_of(cfs_rq)->clock - se->statistics.wait_start);
  #ifdef CONFIG_SCHEDSTATS
        if (entity_is_task(se)) {
                trace_sched_stat_wait(task_of(se),
 -                      rq_of(cfs_rq)->clock - se->wait_start);
 +                      rq_of(cfs_rq)->clock - se->statistics.wait_start);
        }
  #endif
 -      schedstat_set(se->wait_start, 0);
 +      schedstat_set(se->statistics.wait_start, 0);
  }
  
  static inline void
@@@ -658,39 -657,39 +658,39 @@@ static void enqueue_sleeper(struct cfs_
        if (entity_is_task(se))
                tsk = task_of(se);
  
 -      if (se->sleep_start) {
 -              u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
 +      if (se->statistics.sleep_start) {
 +              u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start;
  
                if ((s64)delta < 0)
                        delta = 0;
  
 -              if (unlikely(delta > se->sleep_max))
 -                      se->sleep_max = delta;
 +              if (unlikely(delta > se->statistics.sleep_max))
 +                      se->statistics.sleep_max = delta;
  
 -              se->sleep_start = 0;
 -              se->sum_sleep_runtime += delta;
 +              se->statistics.sleep_start = 0;
 +              se->statistics.sum_sleep_runtime += delta;
  
                if (tsk) {
                        account_scheduler_latency(tsk, delta >> 10, 1);
                        trace_sched_stat_sleep(tsk, delta);
                }
        }
 -      if (se->block_start) {
 -              u64 delta = rq_of(cfs_rq)->clock - se->block_start;
 +      if (se->statistics.block_start) {
 +              u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start;
  
                if ((s64)delta < 0)
                        delta = 0;
  
 -              if (unlikely(delta > se->block_max))
 -                      se->block_max = delta;
 +              if (unlikely(delta > se->statistics.block_max))
 +                      se->statistics.block_max = delta;
  
 -              se->block_start = 0;
 -              se->sum_sleep_runtime += delta;
 +              se->statistics.block_start = 0;
 +              se->statistics.sum_sleep_runtime += delta;
  
                if (tsk) {
                        if (tsk->in_iowait) {
 -                              se->iowait_sum += delta;
 -                              se->iowait_count++;
 +                              se->statistics.iowait_sum += delta;
 +                              se->statistics.iowait_count++;
                                trace_sched_stat_iowait(tsk, delta);
                        }
  
@@@ -738,9 -737,19 +738,9 @@@ place_entity(struct cfs_rq *cfs_rq, str
                vruntime += sched_vslice(cfs_rq, se);
  
        /* sleeps up to a single latency don't count. */
 -      if (!initial && sched_feat(FAIR_SLEEPERS)) {
 +      if (!initial) {
                unsigned long thresh = sysctl_sched_latency;
  
 -              /*
 -               * Convert the sleeper threshold into virtual time.
 -               * SCHED_IDLE is a special sub-class.  We care about
 -               * fairness only relative to other SCHED_IDLE tasks,
 -               * all of which have the same weight.
 -               */
 -              if (sched_feat(NORMALIZED_SLEEPER) && (!entity_is_task(se) ||
 -                               task_of(se)->policy != SCHED_IDLE))
 -                      thresh = calc_delta_fair(thresh, se);
 -
                /*
                 * Halve their sleep time's effect, to allow
                 * for a gentler effect of sleepers:
@@@ -817,9 -826,9 +817,9 @@@ dequeue_entity(struct cfs_rq *cfs_rq, s
                        struct task_struct *tsk = task_of(se);
  
                        if (tsk->state & TASK_INTERRUPTIBLE)
 -                              se->sleep_start = rq_of(cfs_rq)->clock;
 +                              se->statistics.sleep_start = rq_of(cfs_rq)->clock;
                        if (tsk->state & TASK_UNINTERRUPTIBLE)
 -                              se->block_start = rq_of(cfs_rq)->clock;
 +                              se->statistics.block_start = rq_of(cfs_rq)->clock;
                }
  #endif
        }
@@@ -903,7 -912,7 +903,7 @@@ set_next_entity(struct cfs_rq *cfs_rq, 
         * when there are only lesser-weight tasks around):
         */
        if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
 -              se->slice_max = max(se->slice_max,
 +              se->statistics.slice_max = max(se->statistics.slice_max,
                        se->sum_exec_runtime - se->prev_sum_exec_runtime);
        }
  #endif
@@@ -1231,6 -1240,7 +1231,6 @@@ static inline unsigned long effective_l
  
  static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
  {
 -      struct task_struct *curr = current;
        unsigned long this_load, load;
        int idx, this_cpu, prev_cpu;
        unsigned long tl_per_task;
        load      = source_load(prev_cpu, idx);
        this_load = target_load(this_cpu, idx);
  
 -      if (sync) {
 -             if (sched_feat(SYNC_LESS) &&
 -                 (curr->se.avg_overlap > sysctl_sched_migration_cost ||
 -                  p->se.avg_overlap > sysctl_sched_migration_cost))
 -                     sync = 0;
 -      } else {
 -              if (sched_feat(SYNC_MORE) &&
 -                  (curr->se.avg_overlap < sysctl_sched_migration_cost &&
 -                   p->se.avg_overlap < sysctl_sched_migration_cost))
 -                      sync = 1;
 -      }
 -
        /*
         * If sync wakeup then subtract the (maximum possible)
         * effect of the currently running task from the load
        if (sync && balanced)
                return 1;
  
 -      schedstat_inc(p, se.nr_wakeups_affine_attempts);
 +      schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
        tl_per_task = cpu_avg_load_per_task(this_cpu);
  
        if (balanced ||
                 * there is no bad imbalance.
                 */
                schedstat_inc(sd, ttwu_move_affine);
 -              schedstat_inc(p, se.nr_wakeups_affine);
 +              schedstat_inc(p, se.statistics.nr_wakeups_affine);
  
                return 1;
        }
@@@ -1429,12 -1451,13 +1429,12 @@@ static int select_task_rq_fair(struct t
        int cpu = smp_processor_id();
        int prev_cpu = task_cpu(p);
        int new_cpu = cpu;
 -      int want_affine = 0;
 +      int want_affine = 0, cpu_idle = !current->pid;
        int want_sd = 1;
        int sync = wake_flags & WF_SYNC;
  
        if (sd_flag & SD_BALANCE_WAKE) {
 -              if (sched_feat(AFFINE_WAKEUPS) &&
 -                  cpumask_test_cpu(cpu, &p->cpus_allowed))
 +              if (cpumask_test_cpu(cpu, &p->cpus_allowed))
                        want_affine = 1;
                new_cpu = prev_cpu;
        }
                         * If there's an idle sibling in this domain, make that
                         * the wake_affine target instead of the current cpu.
                         */
 -                      if (tmp->flags & SD_SHARE_PKG_RESOURCES)
 +                      if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES)
                                target = select_idle_sibling(p, tmp, target);
  
                        if (target >= 0) {
                                if (tmp->flags & SD_WAKE_AFFINE) {
                                        affine_sd = tmp;
                                        want_affine = 0;
 +                                      if (target != cpu)
 +                                              cpu_idle = 1;
                                }
                                cpu = target;
                        }
                        sd = tmp;
        }
  
 +#ifdef CONFIG_FAIR_GROUP_SCHED
        if (sched_feat(LB_SHARES_UPDATE)) {
                /*
                 * Pick the largest domain to update shares over
                if (tmp)
                        update_shares(tmp);
        }
 +#endif
  
 -      if (affine_sd && wake_affine(affine_sd, p, sync))
 -              return cpu;
 +      if (affine_sd) {
 +              if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync))
 +                      return cpu;
 +      }
  
        while (sd) {
                int load_idx = sd->forkexec_idx;
  }
  #endif /* CONFIG_SMP */
  
 -/*
 - * Adaptive granularity
 - *
 - * se->avg_wakeup gives the average time a task runs until it does a wakeup,
 - * with the limit of wakeup_gran -- when it never does a wakeup.
 - *
 - * So the smaller avg_wakeup is the faster we want this task to preempt,
 - * but we don't want to treat the preemptee unfairly and therefore allow it
 - * to run for at least the amount of time we'd like to run.
 - *
 - * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
 - *
 - * NOTE: we use *nr_running to scale with load, this nicely matches the
 - *       degrading latency on load.
 - */
 -static unsigned long
 -adaptive_gran(struct sched_entity *curr, struct sched_entity *se)
 -{
 -      u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
 -      u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running;
 -      u64 gran = 0;
 -
 -      if (this_run < expected_wakeup)
 -              gran = expected_wakeup - this_run;
 -
 -      return min_t(s64, gran, sysctl_sched_wakeup_granularity);
 -}
 -
  static unsigned long
  wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
  {
        unsigned long gran = sysctl_sched_wakeup_granularity;
  
 -      if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
 -              gran = adaptive_gran(curr, se);
 -
        /*
         * Since its curr running now, convert the gran from real-time
         * to virtual-time in his units.
 +       *
 +       * By using 'se' instead of 'curr' we penalize light tasks, so
 +       * they get preempted easier. That is, if 'se' < 'curr' then
 +       * the resulting gran will be larger, therefore penalizing the
 +       * lighter, if otoh 'se' > 'curr' then the resulting gran will
 +       * be smaller, again penalizing the lighter task.
 +       *
 +       * This is especially important for buddies when the leftmost
 +       * task is higher priority than the buddy.
         */
 -      if (sched_feat(ASYM_GRAN)) {
 -              /*
 -               * By using 'se' instead of 'curr' we penalize light tasks, so
 -               * they get preempted easier. That is, if 'se' < 'curr' then
 -               * the resulting gran will be larger, therefore penalizing the
 -               * lighter, if otoh 'se' > 'curr' then the resulting gran will
 -               * be smaller, again penalizing the lighter task.
 -               *
 -               * This is especially important for buddies when the leftmost
 -               * task is higher priority than the buddy.
 -               */
 -              if (unlikely(se->load.weight != NICE_0_LOAD))
 -                      gran = calc_delta_fair(gran, se);
 -      } else {
 -              if (unlikely(curr->load.weight != NICE_0_LOAD))
 -                      gran = calc_delta_fair(gran, curr);
 -      }
 +      if (unlikely(se->load.weight != NICE_0_LOAD))
 +              gran = calc_delta_fair(gran, se);
  
        return gran;
  }
@@@ -1651,6 -1705,7 +1651,6 @@@ static void check_preempt_wakeup(struc
        struct task_struct *curr = rq->curr;
        struct sched_entity *se = &curr->se, *pse = &p->se;
        struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 -      int sync = wake_flags & WF_SYNC;
        int scale = cfs_rq->nr_running >= sched_nr_latency;
  
        if (unlikely(rt_prio(p->prio)))
        if (unlikely(curr->policy == SCHED_IDLE))
                goto preempt;
  
 -      if (sched_feat(WAKEUP_SYNC) && sync)
 -              goto preempt;
 -
 -      if (sched_feat(WAKEUP_OVERLAP) &&
 -                      se->avg_overlap < sysctl_sched_migration_cost &&
 -                      pse->avg_overlap < sysctl_sched_migration_cost)
 -              goto preempt;
 -
        if (!sched_feat(WAKEUP_PREEMPT))
                return;
  
@@@ -1781,13 -1844,13 +1781,13 @@@ int can_migrate_task(struct task_struc
         * 3) are cache-hot on their current CPU.
         */
        if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
 -              schedstat_inc(p, se.nr_failed_migrations_affine);
 +              schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
                return 0;
        }
        *all_pinned = 0;
  
        if (task_running(rq, p)) {
 -              schedstat_inc(p, se.nr_failed_migrations_running);
 +              schedstat_inc(p, se.statistics.nr_failed_migrations_running);
                return 0;
        }
  
  #ifdef CONFIG_SCHEDSTATS
                if (tsk_cache_hot) {
                        schedstat_inc(sd, lb_hot_gained[idle]);
 -                      schedstat_inc(p, se.nr_forced_migrations);
 +                      schedstat_inc(p, se.statistics.nr_forced_migrations);
                }
  #endif
                return 1;
        }
  
        if (tsk_cache_hot) {
 -              schedstat_inc(p, se.nr_failed_migrations_hot);
 +              schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
                return 0;
        }
        return 1;
@@@ -3049,6 -3112,8 +3049,6 @@@ static void active_load_balance(struct 
  
        /* move a task from busiest_rq to target_rq */
        double_lock_balance(busiest_rq, target_rq);
 -      update_rq_clock(busiest_rq);
 -      update_rq_clock(target_rq);
  
        /* Search for an sd spanning us and the target CPU. */
        for_each_domain(target_cpu, sd) {
@@@ -3411,7 -3476,7 +3411,7 @@@ static void run_rebalance_domains(struc
  
  static inline int on_null_domain(int cpu)
  {
-       return !rcu_dereference(cpu_rq(cpu)->sd);
+       return !rcu_dereference_sched(cpu_rq(cpu)->sd);
  }
  
  /*
diff --combined kernel/sched_rt.c
index 0335e87f5204f096378dd6ef40a7027fff4d13a2,b5b920ae2ea7fe83ca17d2c94d0a7b638574144c..012d69bb67c7a0e41a18141fe7d338026a9491be
@@@ -613,7 -613,7 +613,7 @@@ static void update_curr_rt(struct rq *r
        if (unlikely((s64)delta_exec < 0))
                delta_exec = 0;
  
 -      schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 +      schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec));
  
        curr->se.sum_exec_runtime += delta_exec;
        account_group_exec_runtime(curr, delta_exec);
@@@ -1667,8 -1667,9 +1667,9 @@@ static void watchdog(struct rq *rq, str
        if (!p->signal)
                return;
  
-       soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
-       hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max;
+       /* max may change after cur was read, this will be fixed next tick */
+       soft = task_rlimit(p, RLIMIT_RTTIME);
+       hard = task_rlimit_max(p, RLIMIT_RTTIME);
  
        if (soft != RLIM_INFINITY) {
                unsigned long next;
This page took 0.162318 seconds and 4 git commands to generate.