]> Git Repo - linux.git/commitdiff
Merge branches 'pm-sleep' and 'pm-cpufreq'
authorRafael J. Wysocki <[email protected]>
Thu, 26 Jan 2017 23:08:59 +0000 (00:08 +0100)
committerRafael J. Wysocki <[email protected]>
Thu, 26 Jan 2017 23:08:59 +0000 (00:08 +0100)
* pm-sleep:
  Revert "PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag"

* pm-cpufreq:
  cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy

1  2  3 
drivers/acpi/sleep.c
drivers/cpufreq/intel_pstate.c

diff --combined drivers/acpi/sleep.c
index 9b6cebe227a08562985ce304463dcbe1f1ee1154,deb0ff78eba8705b56292d1acf2e206541ee8bf8,deb0ff78eba8705b56292d1acf2e206541ee8bf8..54abb26b736639ca54aa7051ae742d6657a501bc
@@@@ -47,15 -47,32 -47,32 +47,15 @@@@ static void acpi_sleep_tts_switch(u32 a
        }
   }
   
 --static void acpi_sleep_pts_switch(u32 acpi_state)
 --{
 --     acpi_status status;
 --
 --     status = acpi_execute_simple_method(NULL, "\\_PTS", acpi_state);
 --     if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
 --             /*
 --              * OS can't evaluate the _PTS object correctly. Some warning
 --              * message will be printed. But it won't break anything.
 --              */
 --             printk(KERN_NOTICE "Failure in evaluating _PTS object\n");
 --     }
 --}
 --
 --static int sleep_notify_reboot(struct notifier_block *this,
 ++static int tts_notify_reboot(struct notifier_block *this,
                        unsigned long code, void *x)
   {
        acpi_sleep_tts_switch(ACPI_STATE_S5);
 --
 --     acpi_sleep_pts_switch(ACPI_STATE_S5);
 --
        return NOTIFY_DONE;
   }
   
 --static struct notifier_block sleep_notifier = {
 --     .notifier_call  = sleep_notify_reboot,
 ++static struct notifier_block tts_notifier = {
 ++     .notifier_call  = tts_notify_reboot,
        .next           = NULL,
        .priority       = 0,
   };
@@@@ -674,14 -691,6 -691,6 +674,6 @@@@ static void acpi_sleep_suspend_setup(vo
                if (acpi_sleep_state_supported(i))
                        sleep_states[i] = 1;
   
-       /*
-        * Use suspend-to-idle by default if ACPI_FADT_LOW_POWER_S0 is set and
-        * the default suspend mode was not selected from the command line.
-        */
-       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0 &&
-           mem_sleep_default > PM_SUSPEND_MEM)
-               mem_sleep_default = PM_SUSPEND_FREEZE;
-  
        suspend_set_ops(old_suspend_ordering ?
                &acpi_suspend_ops_old : &acpi_suspend_ops);
        freeze_set_ops(&acpi_freeze_ops);
@@@@ -907,9 -916,9 -916,9 +899,9 @@@@ int __init acpi_sleep_init(void
        pr_info(PREFIX "(supports%s)\n", supported);
   
        /*
 --      * Register the sleep_notifier to reboot notifier list so that the _TTS
 --      * and _PTS object can also be evaluated when the system enters S5.
 ++      * Register the tts_notifier to reboot notifier list so that the _TTS
 ++      * object can also be evaluated when the system enters S5.
         */
 --     register_reboot_notifier(&sleep_notifier);
 ++     register_reboot_notifier(&tts_notifier);
        return 0;
   }
index f91c25718d164c9d9339acf671d67937995fe076,4737520ec8230a830d80e81c0dbc9dbaa96d0dc7,ed215c5592f6245309ee38cc96487fdbf25fbd56..a54d65aa776d064f025036cf9aa35790d83d82e8
   #include <asm/cpufeature.h>
   #include <asm/intel-family.h>
   
 + #define INTEL_CPUFREQ_TRANSITION_LATENCY     20000
 + 
   #define ATOM_RATIOS          0x66a
   #define ATOM_VIDS            0x66b
   #define ATOM_TURBO_RATIOS    0x66c
   
   #ifdef CONFIG_ACPI
   #include <acpi/processor.h>
 ++#include <acpi/cppc_acpi.h>
   #endif
   
   #define FRAC_BITS 8
   
   #define EXT_BITS 6
   #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
 + #define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
 + #define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
   
   static inline int32_t mul_fp(int32_t x, int32_t y)
   {
@@@@ -127,8 -122,6 -126,8 +127,8 @@@@ struct sample 
    * @scaling:         Scaling factor to  convert frequency to cpufreq
    *                   frequency units
    * @turbo_pstate:    Max Turbo P state possible for this platform
 +  * @max_freq:                @max_pstate frequency in cpufreq units
 +  * @turbo_freq:              @turbo_pstate frequency in cpufreq units
    *
    * Stores the per cpu model P state limits and current P state.
    */
@@@@ -139,8 -132,6 -138,8 +139,8 @@@@ struct pstate_data 
        int     max_pstate_physical;
        int     scaling;
        int     turbo_pstate;
 +      unsigned int max_freq;
 +      unsigned int turbo_freq;
   };
   
   /**
@@@@ -185,48 -176,6 -184,48 +185,48 @@@@ struct _pid 
        int32_t last_err;
   };
   
 + /**
 +  * struct perf_limits - Store user and policy limits
 +  * @no_turbo:                User requested turbo state from intel_pstate sysfs
 +  * @turbo_disabled:  Platform turbo status either from msr
 +  *                   MSR_IA32_MISC_ENABLE or when maximum available pstate
 +  *                   matches the maximum turbo pstate
 +  * @max_perf_pct:    Effective maximum performance limit in percentage, this
 +  *                   is minimum of either limits enforced by cpufreq policy
 +  *                   or limits from user set limits via intel_pstate sysfs
 +  * @min_perf_pct:    Effective minimum performance limit in percentage, this
 +  *                   is maximum of either limits enforced by cpufreq policy
 +  *                   or limits from user set limits via intel_pstate sysfs
 +  * @max_perf:                This is a scaled value between 0 to 255 for max_perf_pct
 +  *                   This value is used to limit max pstate
 +  * @min_perf:                This is a scaled value between 0 to 255 for min_perf_pct
 +  *                   This value is used to limit min pstate
 +  * @max_policy_pct:  The maximum performance in percentage enforced by
 +  *                   cpufreq setpolicy interface
 +  * @max_sysfs_pct:   The maximum performance in percentage enforced by
 +  *                   intel pstate sysfs interface, unused when per cpu
 +  *                   controls are enforced
 +  * @min_policy_pct:  The minimum performance in percentage enforced by
 +  *                   cpufreq setpolicy interface
 +  * @min_sysfs_pct:   The minimum performance in percentage enforced by
 +  *                   intel pstate sysfs interface, unused when per cpu
 +  *                   controls are enforced
 +  *
 +  * Storage for user and policy defined limits.
 +  */
 + struct perf_limits {
 +      int no_turbo;
 +      int turbo_disabled;
 +      int max_perf_pct;
 +      int min_perf_pct;
 +      int32_t max_perf;
 +      int32_t min_perf;
 +      int max_policy_pct;
 +      int max_sysfs_pct;
 +      int min_policy_pct;
 +      int min_sysfs_pct;
 + };
 + 
   /**
    * struct cpudata -  Per CPU instance data storage
    * @cpu:             CPU number for this instance data
    * @prev_cummulative_iowait: IO Wait time difference from last and
    *                   current sample
    * @sample:          Storage for storing last Sample data
 +  * @perf_limits:     Pointer to perf_limit unique to this CPU
 +  *                   Not all field in the structure are applicable
 +  *                   when per cpu controls are enforced
    * @acpi_perf_data:  Stores ACPI perf information read from _PSS
    * @valid_pss_table: Set to true for valid ACPI _PSS entries found
 +  * @epp_powersave:   Last saved HWP energy performance preference
 +  *                   (EPP) or energy performance bias (EPB),
 +  *                   when policy switched to performance
 +  * @epp_policy:              Last saved policy used to set EPP/EPB
 +  * @epp_default:     Power on default HWP energy performance
 +  *                   preference/bias
 +  * @epp_saved:               Saved EPP/EPB during system suspend or CPU offline
 +  *                   operation
    *
    * This structure stores per CPU instance data for all CPUs.
    */
@@@@ -279,16 -217,11 -278,16 +279,16 @@@@ struct cpudata 
        u64     prev_tsc;
        u64     prev_cummulative_iowait;
        struct sample sample;
 +      struct perf_limits *perf_limits;
   #ifdef CONFIG_ACPI
        struct acpi_processor_performance acpi_perf_data;
        bool valid_pss_table;
   #endif
        unsigned int iowait_boost;
 +      s16 epp_powersave;
 +      s16 epp_policy;
 +      s16 epp_default;
 +      s16 epp_saved;
   };
   
   static struct cpudata **all_cpu_data;
    * @p_gain_pct:              PID proportional gain
    * @i_gain_pct:              PID integral gain
    * @d_gain_pct:              PID derivative gain
 -  * @boost_iowait:    Whether or not to use iowait boosting.
    *
    * Stores per CPU model static PID configuration data.
    */
@@@@ -313,6 -247,7 -312,6 +313,6 @@@@ struct pstate_adjust_policy 
        int p_gain_pct;
        int d_gain_pct;
        int i_gain_pct;
 -      bool boost_iowait;
   };
   
   /**
@@@@ -356,19 -291,58 -355,19 +356,19 @@@@ static inline int32_t get_target_pstate
   static struct pstate_adjust_policy pid_params __read_mostly;
   static struct pstate_funcs pstate_funcs __read_mostly;
   static int hwp_active __read_mostly;
 + static bool per_cpu_limits __read_mostly;
   
   #ifdef CONFIG_ACPI
   static bool acpi_ppc;
   #endif
   
 - /**
 -  * struct perf_limits - Store user and policy limits
 -  * @no_turbo:                User requested turbo state from intel_pstate sysfs
 -  * @turbo_disabled:  Platform turbo status either from msr
 -  *                   MSR_IA32_MISC_ENABLE or when maximum available pstate
 -  *                   matches the maximum turbo pstate
 -  * @max_perf_pct:    Effective maximum performance limit in percentage, this
 -  *                   is minimum of either limits enforced by cpufreq policy
 -  *                   or limits from user set limits via intel_pstate sysfs
 -  * @min_perf_pct:    Effective minimum performance limit in percentage, this
 -  *                   is maximum of either limits enforced by cpufreq policy
 -  *                   or limits from user set limits via intel_pstate sysfs
 -  * @max_perf:                This is a scaled value between 0 to 255 for max_perf_pct
 -  *                   This value is used to limit max pstate
 -  * @min_perf:                This is a scaled value between 0 to 255 for min_perf_pct
 -  *                   This value is used to limit min pstate
 -  * @max_policy_pct:  The maximum performance in percentage enforced by
 -  *                   cpufreq setpolicy interface
 -  * @max_sysfs_pct:   The maximum performance in percentage enforced by
 -  *                   intel pstate sysfs interface
 -  * @min_policy_pct:  The minimum performance in percentage enforced by
 -  *                   cpufreq setpolicy interface
 -  * @min_sysfs_pct:   The minimum performance in percentage enforced by
 -  *                   intel pstate sysfs interface
 -  *
 -  * Storage for user and policy defined limits.
 -  */
 - struct perf_limits {
 -      int no_turbo;
 -      int turbo_disabled;
 -      int max_perf_pct;
 -      int min_perf_pct;
 -      int32_t max_perf;
 -      int32_t min_perf;
 -      int max_policy_pct;
 -      int max_sysfs_pct;
 -      int min_policy_pct;
 -      int min_sysfs_pct;
 - };
 - 
   static struct perf_limits performance_limits = {
        .no_turbo = 0,
        .turbo_disabled = 0,
        .max_perf_pct = 100,
 -      .max_perf = int_tofp(1),
 +      .max_perf = int_ext_tofp(1),
        .min_perf_pct = 100,
 -      .min_perf = int_tofp(1),
 +      .min_perf = int_ext_tofp(1),
        .max_policy_pct = 100,
        .max_sysfs_pct = 100,
        .min_policy_pct = 0,
@@@@ -379,7 -353,7 -378,7 +379,7 @@@@ static struct perf_limits powersave_lim
        .no_turbo = 0,
        .turbo_disabled = 0,
        .max_perf_pct = 100,
 -      .max_perf = int_tofp(1),
 +      .max_perf = int_ext_tofp(1),
        .min_perf_pct = 0,
        .min_perf = 0,
        .max_policy_pct = 100,
@@@@ -394,8 -368,6 -393,8 +394,8 @@@@ static struct perf_limits *limits = &pe
   static struct perf_limits *limits = &powersave_limits;
   #endif
   
 + static DEFINE_MUTEX(intel_pstate_limits_lock);
 + 
   #ifdef CONFIG_ACPI
   
   static bool intel_pstate_get_ppc_enable_status(void)
        return acpi_ppc;
   }
   
 ++#ifdef CONFIG_ACPI_CPPC_LIB
 ++
 ++/* The work item is needed to avoid CPU hotplug locking issues */
 ++static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
 ++{
 ++     sched_set_itmt_support();
 ++}
 ++
 ++static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
 ++
 ++static void intel_pstate_set_itmt_prio(int cpu)
 ++{
 ++     struct cppc_perf_caps cppc_perf;
 ++     static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
 ++     int ret;
 ++
 ++     ret = cppc_get_perf_caps(cpu, &cppc_perf);
 ++     if (ret)
 ++             return;
 ++
 ++     /*
 ++      * The priorities can be set regardless of whether or not
 ++      * sched_set_itmt_support(true) has been called and it is valid to
 ++      * update them at any time after it has been called.
 ++      */
 ++     sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
 ++
 ++     if (max_highest_perf <= min_highest_perf) {
 ++             if (cppc_perf.highest_perf > max_highest_perf)
 ++                     max_highest_perf = cppc_perf.highest_perf;
 ++
 ++             if (cppc_perf.highest_perf < min_highest_perf)
 ++                     min_highest_perf = cppc_perf.highest_perf;
 ++
 ++             if (max_highest_perf > min_highest_perf) {
 ++                     /*
 ++                      * This code can be run during CPU online under the
 ++                      * CPU hotplug locks, so sched_set_itmt_support()
 ++                      * cannot be called from here.  Queue up a work item
 ++                      * to invoke it.
 ++                      */
 ++                     schedule_work(&sched_itmt_work);
 ++             }
 ++     }
 ++}
 ++#else
 ++static void intel_pstate_set_itmt_prio(int cpu)
 ++{
 ++}
 ++#endif
 ++
   static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
   {
        struct cpudata *cpu;
        int ret;
        int i;
   
 --     if (hwp_active)
 ++     if (hwp_active) {
 ++             intel_pstate_set_itmt_prio(policy->cpu);
                return;
 ++     }
   
        if (!intel_pstate_get_ppc_enable_status())
                return;
@@@@ -540,11 -459,11 -486,11 +540,11 @@@@ static void intel_pstate_exit_perf_limi
   }
   
   #else
 - static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 + static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
   {
   }
   
 - static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 + static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
   {
   }
   #endif
@@@@ -640,252 -559,24 -586,252 +640,252 @@@@ static inline void update_turbo_state(v
                 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
   }
   
 - static void intel_pstate_hwp_set(const struct cpumask *cpumask)
 + static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
 + {
 +      u64 epb;
 +      int ret;
 + 
 +      if (!static_cpu_has(X86_FEATURE_EPB))
 +              return -ENXIO;
 + 
 +      ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
 +      if (ret)
 +              return (s16)ret;
 + 
 +      return (s16)(epb & 0x0f);
 + }
 + 
 + static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
 + {
 +      s16 epp;
 + 
 +      if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
 +              /*
 +               * When hwp_req_data is 0, means that caller didn't read
 +               * MSR_HWP_REQUEST, so need to read and get EPP.
 +               */
 +              if (!hwp_req_data) {
 +                      epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
 +                                          &hwp_req_data);
 +                      if (epp)
 +                              return epp;
 +              }
 +              epp = (hwp_req_data >> 24) & 0xff;
 +      } else {
 +              /* When there is no EPP present, HWP uses EPB settings */
 +              epp = intel_pstate_get_epb(cpu_data);
 +      }
 + 
 +      return epp;
 + }
 + 
 + static int intel_pstate_set_epb(int cpu, s16 pref)
 + {
 +      u64 epb;
 +      int ret;
 + 
 +      if (!static_cpu_has(X86_FEATURE_EPB))
 +              return -ENXIO;
 + 
 +      ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
 +      if (ret)
 +              return ret;
 + 
 +      epb = (epb & ~0x0f) | pref;
 +      wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
 + 
 +      return 0;
 + }
 + 
 + /*
 +  * EPP/EPB display strings corresponding to EPP index in the
 +  * energy_perf_strings[]
 +  *   index           String
 +  *-------------------------------------
 +  *   0               default
 +  *   1               performance
 +  *   2               balance_performance
 +  *   3               balance_power
 +  *   4               power
 +  */
 + static const char * const energy_perf_strings[] = {
 +      "default",
 +      "performance",
 +      "balance_performance",
 +      "balance_power",
 +      "power",
 +      NULL
 + };
 + 
 + static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
 + {
 +      s16 epp;
 +      int index = -EINVAL;
 + 
 +      epp = intel_pstate_get_epp(cpu_data, 0);
 +      if (epp < 0)
 +              return epp;
 + 
 +      if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
 +              /*
 +               * Range:
 +               *      0x00-0x3F       :       Performance
 +               *      0x40-0x7F       :       Balance performance
 +               *      0x80-0xBF       :       Balance power
 +               *      0xC0-0xFF       :       Power
 +               * The EPP is a 8 bit value, but our ranges restrict the
 +               * value which can be set. Here only using top two bits
 +               * effectively.
 +               */
 +              index = (epp >> 6) + 1;
 +      } else if (static_cpu_has(X86_FEATURE_EPB)) {
 +              /*
 +               * Range:
 +               *      0x00-0x03       :       Performance
 +               *      0x04-0x07       :       Balance performance
 +               *      0x08-0x0B       :       Balance power
 +               *      0x0C-0x0F       :       Power
 +               * The EPB is a 4 bit value, but our ranges restrict the
 +               * value which can be set. Here only using top two bits
 +               * effectively.
 +               */
 +              index = (epp >> 2) + 1;
 +      }
 + 
 +      return index;
 + }
 + 
 + static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
 +                                            int pref_index)
 + {
 +      int epp = -EINVAL;
 +      int ret;
 + 
 +      if (!pref_index)
 +              epp = cpu_data->epp_default;
 + 
 +      mutex_lock(&intel_pstate_limits_lock);
 + 
 +      if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
 +              u64 value;
 + 
 +              ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
 +              if (ret)
 +                      goto return_pref;
 + 
 +              value &= ~GENMASK_ULL(31, 24);
 + 
 +              /*
 +               * If epp is not default, convert from index into
 +               * energy_perf_strings to epp value, by shifting 6
 +               * bits left to use only top two bits in epp.
 +               * The resultant epp need to shifted by 24 bits to
 +               * epp position in MSR_HWP_REQUEST.
 +               */
 +              if (epp == -EINVAL)
 +                      epp = (pref_index - 1) << 6;
 + 
 +              value |= (u64)epp << 24;
 +              ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
 +      } else {
 +              if (epp == -EINVAL)
 +                      epp = (pref_index - 1) << 2;
 +              ret = intel_pstate_set_epb(cpu_data->cpu, epp);
 +      }
 + return_pref:
 +      mutex_unlock(&intel_pstate_limits_lock);
 + 
 +      return ret;
 + }
 + 
 + static ssize_t show_energy_performance_available_preferences(
 +                              struct cpufreq_policy *policy, char *buf)
 + {
 +      int i = 0;
 +      int ret = 0;
 + 
 +      while (energy_perf_strings[i] != NULL)
 +              ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
 + 
 +      ret += sprintf(&buf[ret], "\n");
 + 
 +      return ret;
 + }
 + 
 + cpufreq_freq_attr_ro(energy_performance_available_preferences);
 + 
 + static ssize_t store_energy_performance_preference(
 +              struct cpufreq_policy *policy, const char *buf, size_t count)
 + {
 +      struct cpudata *cpu_data = all_cpu_data[policy->cpu];
 +      char str_preference[21];
 +      int ret, i = 0;
 + 
 +      ret = sscanf(buf, "%20s", str_preference);
 +      if (ret != 1)
 +              return -EINVAL;
 + 
 +      while (energy_perf_strings[i] != NULL) {
 +              if (!strcmp(str_preference, energy_perf_strings[i])) {
 +                      intel_pstate_set_energy_pref_index(cpu_data, i);
 +                      return count;
 +              }
 +              ++i;
 +      }
 + 
 +      return -EINVAL;
 + }
 + 
 + static ssize_t show_energy_performance_preference(
 +                              struct cpufreq_policy *policy, char *buf)
 + {
 +      struct cpudata *cpu_data = all_cpu_data[policy->cpu];
 +      int preference;
 + 
 +      preference = intel_pstate_get_energy_pref_index(cpu_data);
 +      if (preference < 0)
 +              return preference;
 + 
 +      return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
 + }
 + 
 + cpufreq_freq_attr_rw(energy_performance_preference);
 + 
 + static struct freq_attr *hwp_cpufreq_attrs[] = {
 +      &energy_performance_preference,
 +      &energy_performance_available_preferences,
 +      NULL,
 + };
 + 
 + static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
   {
        int min, hw_min, max, hw_max, cpu, range, adj_range;
 +      struct perf_limits *perf_limits = limits;
        u64 value, cap;
   
 -      for_each_cpu(cpu, cpumask) {
 +      for_each_cpu(cpu, policy->cpus) {
 +              int max_perf_pct, min_perf_pct;
 +              struct cpudata *cpu_data = all_cpu_data[cpu];
 +              s16 epp;
 + 
 +              if (per_cpu_limits)
 +                      perf_limits = all_cpu_data[cpu]->perf_limits;
 + 
                rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
                hw_min = HWP_LOWEST_PERF(cap);
                hw_max = HWP_HIGHEST_PERF(cap);
                range = hw_max - hw_min;
   
 +              max_perf_pct = perf_limits->max_perf_pct;
 +              min_perf_pct = perf_limits->min_perf_pct;
 + 
                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 -              adj_range = limits->min_perf_pct * range / 100;
 +              adj_range = min_perf_pct * range / 100;
                min = hw_min + adj_range;
                value &= ~HWP_MIN_PERF(~0L);
                value |= HWP_MIN_PERF(min);
   
 -              adj_range = limits->max_perf_pct * range / 100;
 +              adj_range = max_perf_pct * range / 100;
                max = hw_min + adj_range;
                if (limits->no_turbo) {
                        hw_max = HWP_GUARANTEED_PERF(cap);
   
                value &= ~HWP_MAX_PERF(~0L);
                value |= HWP_MAX_PERF(max);
 + 
 +              if (cpu_data->epp_policy == cpu_data->policy)
 +                      goto skip_epp;
 + 
 +              cpu_data->epp_policy = cpu_data->policy;
 + 
 +              if (cpu_data->epp_saved >= 0) {
 +                      epp = cpu_data->epp_saved;
 +                      cpu_data->epp_saved = -EINVAL;
 +                      goto update_epp;
 +              }
 + 
 +              if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
 +                      epp = intel_pstate_get_epp(cpu_data, value);
 +                      cpu_data->epp_powersave = epp;
 +                      /* If EPP read was failed, then don't try to write */
 +                      if (epp < 0)
 +                              goto skip_epp;
 + 
 + 
 +                      epp = 0;
 +              } else {
 +                      /* skip setting EPP, when saved value is invalid */
 +                      if (cpu_data->epp_powersave < 0)
 +                              goto skip_epp;
 + 
 +                      /*
 +                       * No need to restore EPP when it is not zero. This
 +                       * means:
 +                       *  - Policy is not changed
 +                       *  - user has manually changed
 +                       *  - Error reading EPB
 +                       */
 +                      epp = intel_pstate_get_epp(cpu_data, value);
 +                      if (epp)
 +                              goto skip_epp;
 + 
 +                      epp = cpu_data->epp_powersave;
 +              }
 + update_epp:
 +              if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
 +                      value &= ~GENMASK_ULL(31, 24);
 +                      value |= (u64)epp << 24;
 +              } else {
 +                      intel_pstate_set_epb(cpu, epp);
 +              }
 + skip_epp:
                wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
        }
   }
   static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
   {
        if (hwp_active)
 -              intel_pstate_hwp_set(policy->cpus);
 +              intel_pstate_hwp_set(policy);
   
        return 0;
   }
   
 - static void intel_pstate_hwp_set_online_cpus(void)
 + static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
   {
 -      get_online_cpus();
 -      intel_pstate_hwp_set(cpu_online_mask);
 -      put_online_cpus();
 +      struct cpudata *cpu_data = all_cpu_data[policy->cpu];
 + 
 +      if (!hwp_active)
 +              return 0;
 + 
 +      cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
 + 
 +      return 0;
 + }
 + 
 + static int intel_pstate_resume(struct cpufreq_policy *policy)
 + {
 +      int ret;
 + 
 +      if (!hwp_active)
 +              return 0;
 + 
 +      mutex_lock(&intel_pstate_limits_lock);
 + 
 +      all_cpu_data[policy->cpu]->epp_policy = 0;
 + 
 +      ret = intel_pstate_hwp_set_policy(policy);
 + 
 +      mutex_unlock(&intel_pstate_limits_lock);
 + 
 +      return ret;
 + }
 + 
 + static void intel_pstate_update_policies(void)
 + {
 +      int cpu;
 + 
 +      for_each_possible_cpu(cpu)
 +              cpufreq_update_policy(cpu);
   }
   
   /************************** debugfs begin ************************/
@@@@ -1027,6 -640,8 -973,6 +1027,6 @@@@ static void __init intel_pstate_debug_e
        struct dentry *debugfs_parent;
        int i = 0;
   
 -      if (hwp_active)
 -              return;
        debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
        if (IS_ERR_OR_NULL(debugfs_parent))
                return;
@@@@ -1099,20 -714,16 -1045,20 +1099,20 @@@@ static ssize_t store_no_turbo(struct ko
        if (ret != 1)
                return -EINVAL;
   
 +      mutex_lock(&intel_pstate_limits_lock);
 + 
        update_turbo_state();
        if (limits->turbo_disabled) {
                pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 +              mutex_unlock(&intel_pstate_limits_lock);
                return -EPERM;
        }
   
        limits->no_turbo = clamp_t(int, input, 0, 1);
   
 -      if (hwp_active)
 -              intel_pstate_hwp_set_online_cpus();
 +      mutex_unlock(&intel_pstate_limits_lock);
 + 
 +      intel_pstate_update_policies();
   
        return count;
   }
@@@@ -1127,8 -738,6 -1073,8 +1127,8 @@@@ static ssize_t store_max_perf_pct(struc
        if (ret != 1)
                return -EINVAL;
   
 +      mutex_lock(&intel_pstate_limits_lock);
 + 
        limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
        limits->max_perf_pct = min(limits->max_policy_pct,
                                   limits->max_sysfs_pct);
                                   limits->max_perf_pct);
        limits->max_perf_pct = max(limits->min_perf_pct,
                                   limits->max_perf_pct);
 -      limits->max_perf = div_fp(limits->max_perf_pct, 100);
 +      limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
 + 
 +      mutex_unlock(&intel_pstate_limits_lock);
 + 
 +      intel_pstate_update_policies();
   
 -      if (hwp_active)
 -              intel_pstate_hwp_set_online_cpus();
        return count;
   }
   
@@@@ -1155,8 -762,6 -1101,8 +1155,8 @@@@ static ssize_t store_min_perf_pct(struc
        if (ret != 1)
                return -EINVAL;
   
 +      mutex_lock(&intel_pstate_limits_lock);
 + 
        limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
        limits->min_perf_pct = max(limits->min_policy_pct,
                                   limits->min_sysfs_pct);
                                   limits->min_perf_pct);
        limits->min_perf_pct = min(limits->max_perf_pct,
                                   limits->min_perf_pct);
 -      limits->min_perf = div_fp(limits->min_perf_pct, 100);
 +      limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
 + 
 +      mutex_unlock(&intel_pstate_limits_lock);
 + 
 +      intel_pstate_update_policies();
   
 -      if (hwp_active)
 -              intel_pstate_hwp_set_online_cpus();
        return count;
   }
   
@@@@ -1184,6 -787,8 -1130,6 +1184,6 @@@@ define_one_global_ro(num_pstates)
   
   static struct attribute *intel_pstate_attributes[] = {
        &no_turbo.attr,
 -      &max_perf_pct.attr,
 -      &min_perf_pct.attr,
        &turbo_pct.attr,
        &num_pstates.attr,
        NULL
@@@@ -1200,26 -805,9 -1146,26 +1200,26 @@@@ static void __init intel_pstate_sysfs_e
   
        intel_pstate_kobject = kobject_create_and_add("intel_pstate",
                                                &cpu_subsys.dev_root->kobj);
 -      BUG_ON(!intel_pstate_kobject);
 +      if (WARN_ON(!intel_pstate_kobject))
 +              return;
 + 
        rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
 -      BUG_ON(rc);
 +      if (WARN_ON(rc))
 +              return;
 + 
 +      /*
 +       * If per cpu limits are enforced there are no global limits, so
 +       * return without creating max/min_perf_pct attributes
 +       */
 +      if (per_cpu_limits)
 +              return;
 + 
 +      rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
 +      WARN_ON(rc);
 + 
 +      rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
 +      WARN_ON(rc);
 + 
   }
   /************************** sysfs end ************************/
   
@@@@ -1230,9 -818,6 -1176,9 +1230,9 @@@@ static void intel_pstate_hwp_enable(str
                wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
   
        wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 +      cpudata->epp_policy = 0;
 +      if (cpudata->epp_default == -EINVAL)
 +              cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
   }
   
   static int atom_get_min_pstate(void)
@@@@ -1460,6 -1045,7 -1406,6 +1460,6 @@@@ static const struct cpu_defaults silver
                .p_gain_pct = 14,
                .d_gain_pct = 0,
                .i_gain_pct = 4,
 -              .boost_iowait = true,
        },
        .funcs = {
                .get_max = atom_get_max_pstate,
@@@@ -1481,6 -1067,7 -1427,6 +1481,6 @@@@ static const struct cpu_defaults airmon
                .p_gain_pct = 14,
                .d_gain_pct = 0,
                .i_gain_pct = 4,
 -              .boost_iowait = true,
        },
        .funcs = {
                .get_max = atom_get_max_pstate,
@@@@ -1522,6 -1109,7 -1468,6 +1522,6 @@@@ static const struct cpu_defaults bxt_pa
                .p_gain_pct = 14,
                .d_gain_pct = 0,
                .i_gain_pct = 4,
 -              .boost_iowait = true,
        },
        .funcs = {
                .get_max = core_get_max_pstate,
@@@@ -1539,24 -1127,20 -1485,24 +1539,24 @@@@ static void intel_pstate_get_min_max(st
        int max_perf = cpu->pstate.turbo_pstate;
        int max_perf_adj;
        int min_perf;
 +      struct perf_limits *perf_limits = limits;
   
        if (limits->no_turbo || limits->turbo_disabled)
                max_perf = cpu->pstate.max_pstate;
   
 +      if (per_cpu_limits)
 +              perf_limits = cpu->perf_limits;
 + 
        /*
         * performance can be limited by user through sysfs, by cpufreq
         * policy, or by cpu specific default values determined through
         * experimentation.
         */
 -      max_perf_adj = fp_toint(max_perf * limits->max_perf);
 +      max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf);
        *max = clamp_t(int, max_perf_adj,
                        cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
   
 -      min_perf = fp_toint(max_perf * limits->min_perf);
 +      min_perf = fp_ext_toint(max_perf * perf_limits->min_perf);
        *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
   }
   
@@@@ -1594,8 -1178,6 -1540,8 +1594,8 @@@@ static void intel_pstate_get_cpu_pstate
        cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
        cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
        cpu->pstate.scaling = pstate_funcs.get_scaling();
 +      cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
 +      cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
   
        if (pstate_funcs.get_vid)
                pstate_funcs.get_vid(cpu);
@@@@ -1734,19 -1316,15 -1680,19 +1734,19 @@@@ static inline int32_t get_target_pstate
        return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled);
   }
   
 - static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 + static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
   {
        int max_perf, min_perf;
   
 -      update_turbo_state();
 - 
        intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
        pstate = clamp_t(int, pstate, min_perf, max_perf);
        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 +      return pstate;
 + }
 + 
 + static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 + {
 +      pstate = intel_pstate_prepare_request(cpu, pstate);
        if (pstate == cpu->pstate.current_pstate)
                return;
   
@@@@ -1764,8 -1342,6 -1710,8 +1764,8 @@@@ static inline void intel_pstate_adjust_
        target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
                cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
   
 +      update_turbo_state();
 + 
        intel_pstate_update_pstate(cpu, target_pstate);
   
        sample = &cpu->sample;
@@@@ -1786,7 -1362,7 -1732,7 +1786,7 @@@@ static void intel_pstate_update_util(st
        struct cpudata *cpu = container_of(data, struct cpudata, update_util);
        u64 delta_ns;
   
 -      if (pid_params.boost_iowait) {
 +      if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) {
                if (flags & SCHED_CPUFREQ_IOWAIT) {
                        cpu->iowait_boost = int_tofp(1);
                } else if (cpu->iowait_boost) {
@@@@ -1832,7 -1408,6 -1778,7 +1832,7 @@@@ static const struct x86_cpu_id intel_ps
        ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,        core_params),
        ICPU(INTEL_FAM6_BROADWELL_XEON_D,       core_params),
        ICPU(INTEL_FAM6_XEON_PHI_KNL,           knl_params),
 +      ICPU(INTEL_FAM6_XEON_PHI_KNM,           knl_params),
        ICPU(INTEL_FAM6_ATOM_GOLDMONT,          bxt_params),
        {}
   };
@@@@ -1849,26 -1424,11 -1795,26 +1849,26 @@@@ static int intel_pstate_init_cpu(unsign
   {
        struct cpudata *cpu;
   
 -      if (!all_cpu_data[cpunum])
 -              all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
 -                                             GFP_KERNEL);
 -      if (!all_cpu_data[cpunum])
 -              return -ENOMEM;
 +      cpu = all_cpu_data[cpunum];
 + 
 +      if (!cpu) {
 +              unsigned int size = sizeof(struct cpudata);
 + 
 +              if (per_cpu_limits)
 +                      size += sizeof(struct perf_limits);
 + 
 +              cpu = kzalloc(size, GFP_KERNEL);
 +              if (!cpu)
 +                      return -ENOMEM;
 + 
 +              all_cpu_data[cpunum] = cpu;
 +              if (per_cpu_limits)
 +                      cpu->perf_limits = (struct perf_limits *)(cpu + 1);
 + 
 +              cpu->epp_default = -EINVAL;
 +              cpu->epp_powersave = -EINVAL;
 +              cpu->epp_saved = -EINVAL;
 +      }
   
        cpu = all_cpu_data[cpunum];
   
@@@@ -1927,57 -1487,18 -1873,57 +1927,57 @@@@ static void intel_pstate_set_performanc
        limits->no_turbo = 0;
        limits->turbo_disabled = 0;
        limits->max_perf_pct = 100;
 -      limits->max_perf = int_tofp(1);
 +      limits->max_perf = int_ext_tofp(1);
        limits->min_perf_pct = 100;
 -      limits->min_perf = int_tofp(1);
 +      limits->min_perf = int_ext_tofp(1);
        limits->max_policy_pct = 100;
        limits->max_sysfs_pct = 100;
        limits->min_policy_pct = 0;
        limits->min_sysfs_pct = 0;
   }
   
 + static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 +                                          struct perf_limits *limits)
 + {
 + 
 +      limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
 +                                            policy->cpuinfo.max_freq);
 +      limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
 +      if (policy->max == policy->min) {
 +              limits->min_policy_pct = limits->max_policy_pct;
 +      } else {
 +              limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
 +                                                    policy->cpuinfo.max_freq);
 +              limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
 +                                               0, 100);
 +      }
 + 
 +      /* Normalize user input to [min_policy_pct, max_policy_pct] */
 +      limits->min_perf_pct = max(limits->min_policy_pct,
 +                                 limits->min_sysfs_pct);
 +      limits->min_perf_pct = min(limits->max_policy_pct,
 +                                 limits->min_perf_pct);
 +      limits->max_perf_pct = min(limits->max_policy_pct,
 +                                 limits->max_sysfs_pct);
 +      limits->max_perf_pct = max(limits->min_policy_pct,
 +                                 limits->max_perf_pct);
 + 
 +      /* Make sure min_perf_pct <= max_perf_pct */
 +      limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 + 
 +      limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
 +      limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
 +      limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
 +      limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
 + 
 +      pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
 +               limits->max_perf_pct, limits->min_perf_pct);
 + }
 + 
   static int intel_pstate_set_policy(struct cpufreq_policy *policy)
   {
        struct cpudata *cpu;
 +      struct perf_limits *perf_limits = NULL;
   
        if (!policy->cpuinfo.max_freq)
                return -ENODEV;
                policy->max = policy->cpuinfo.max_freq;
        }
   
 -      if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
 -              limits = &performance_limits;
 -              if (policy->max >= policy->cpuinfo.max_freq) {
 +      if (per_cpu_limits)
 +              perf_limits = cpu->perf_limits;
 + 
 +      mutex_lock(&intel_pstate_limits_lock);
 + 
 +      if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
 +              if (!perf_limits) {
 +                      limits = &performance_limits;
 +                      perf_limits = limits;
 +              }
-               if (policy->max >= policy->cpuinfo.max_freq) {
++              if (policy->max >= policy->cpuinfo.max_freq &&
++                  !limits->no_turbo) {
                        pr_debug("set performance\n");
 -                      intel_pstate_set_performance_limits(limits);
 +                      intel_pstate_set_performance_limits(perf_limits);
                        goto out;
                }
        } else {
                pr_debug("set powersave\n");
 -              limits = &powersave_limits;
 -      }
 - 
 -      limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
 -      limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
 -      limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
 -                                            policy->cpuinfo.max_freq);
 -      limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
 - 
 -      /* Normalize user input to [min_policy_pct, max_policy_pct] */
 -      limits->min_perf_pct = max(limits->min_policy_pct,
 -                                 limits->min_sysfs_pct);
 -      limits->min_perf_pct = min(limits->max_policy_pct,
 -                                 limits->min_perf_pct);
 -      limits->max_perf_pct = min(limits->max_policy_pct,
 -                                 limits->max_sysfs_pct);
 -      limits->max_perf_pct = max(limits->min_policy_pct,
 -                                 limits->max_perf_pct);
 - 
 -      /* Make sure min_perf_pct <= max_perf_pct */
 -      limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 +              if (!perf_limits) {
 +                      limits = &powersave_limits;
 +                      perf_limits = limits;
 +              }
   
 -      limits->min_perf = div_fp(limits->min_perf_pct, 100);
 -      limits->max_perf = div_fp(limits->max_perf_pct, 100);
 -      limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
 +      }
   
 +      intel_pstate_update_perf_limits(policy, perf_limits);
    out:
        if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
                /*
   
        intel_pstate_hwp_set_policy(policy);
   
 +      mutex_unlock(&intel_pstate_limits_lock);
 + 
        return 0;
   }
   
@@@@ -2047,35 -1576,25 -1994,46 +2048,46 @@@@ static int intel_pstate_verify_policy(s
            policy->policy != CPUFREQ_POLICY_PERFORMANCE)
                return -EINVAL;
   
++      /* When per-CPU limits are used, sysfs limits are not used */
++      if (!per_cpu_limits) {
++              unsigned int max_freq, min_freq;
++ 
++              max_freq = policy->cpuinfo.max_freq *
++                                              limits->max_sysfs_pct / 100;
++              min_freq = policy->cpuinfo.max_freq *
++                                              limits->min_sysfs_pct / 100;
++              cpufreq_verify_within_limits(policy, min_freq, max_freq);
++      }
++ 
        return 0;
   }
   
 + static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 + {
 +      intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
 + }
 + 
   static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
   {
 -      int cpu_num = policy->cpu;
 -      struct cpudata *cpu = all_cpu_data[cpu_num];
 +      pr_debug("CPU %d exiting\n", policy->cpu);
   
 -      pr_debug("CPU %d exiting\n", cpu_num);
 +      intel_pstate_clear_update_util_hook(policy->cpu);
 +      if (hwp_active)
 +              intel_pstate_hwp_save_state(policy);
 +      else
 +              intel_cpufreq_stop_cpu(policy);
 + }
   
 -      intel_pstate_clear_update_util_hook(cpu_num);
 + static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
 + {
 +      intel_pstate_exit_perf_limits(policy);
   
 -      if (hwp_active)
 -              return;
 +      policy->fast_switch_possible = false;
   
 -      intel_pstate_set_min_pstate(cpu);
 +      return 0;
   }
   
 - static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 + static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
   {
        struct cpudata *cpu;
        int rc;
   
        cpu = all_cpu_data[policy->cpu];
   
 -      if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
 -              policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 -      else
 -              policy->policy = CPUFREQ_POLICY_POWERSAVE;
 +      /*
 +       * We need sane value in the cpu->perf_limits, so inherit from global
 +       * perf_limits limits, which are seeded with values based on the
 +       * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
 +       */
 +      if (per_cpu_limits)
 +              memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
   
        policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
        policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
        policy->cpuinfo.max_freq *= cpu->pstate.scaling;
   
        intel_pstate_init_acpi_perf_limits(policy);
 -      policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
        cpumask_set_cpu(policy->cpu, policy->cpus);
   
 +      policy->fast_switch_possible = true;
 + 
        return 0;
   }
   
 - static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
 + static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
   {
 -      intel_pstate_exit_perf_limits(policy);
 +      int ret = __intel_pstate_cpu_init(policy);
 + 
 +      if (ret)
 +              return ret;
 + 
 +      policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 +      if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
 +              policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 +      else
 +              policy->policy = CPUFREQ_POLICY_POWERSAVE;
   
        return 0;
   }
   
 - static struct cpufreq_driver intel_pstate_driver = {
 + static struct cpufreq_driver intel_pstate = {
        .flags          = CPUFREQ_CONST_LOOPS,
        .verify         = intel_pstate_verify_policy,
        .setpolicy      = intel_pstate_set_policy,
 -      .resume         = intel_pstate_hwp_set_policy,
 +      .suspend        = intel_pstate_hwp_save_state,
 +      .resume         = intel_pstate_resume,
        .get            = intel_pstate_get,
        .init           = intel_pstate_cpu_init,
        .exit           = intel_pstate_cpu_exit,
        .name           = "intel_pstate",
   };
   
 + static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 + {
 +      struct cpudata *cpu = all_cpu_data[policy->cpu];
 +      struct perf_limits *perf_limits = limits;
 + 
 +      update_turbo_state();
 +      policy->cpuinfo.max_freq = limits->turbo_disabled ?
 +                      cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 + 
 +      cpufreq_verify_within_cpu_limits(policy);
 + 
 +      if (per_cpu_limits)
 +              perf_limits = cpu->perf_limits;
 + 
 +      mutex_lock(&intel_pstate_limits_lock);
 + 
 +      intel_pstate_update_perf_limits(policy, perf_limits);
 + 
 +      mutex_unlock(&intel_pstate_limits_lock);
 + 
 +      return 0;
 + }
 + 
 + static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
 +                                             struct cpufreq_policy *policy,
 +                                             unsigned int target_freq)
 + {
 +      unsigned int max_freq;
 + 
 +      update_turbo_state();
 + 
 +      max_freq = limits->no_turbo || limits->turbo_disabled ?
 +                      cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 +      policy->cpuinfo.max_freq = max_freq;
 +      if (policy->max > max_freq)
 +              policy->max = max_freq;
 + 
 +      if (target_freq > max_freq)
 +              target_freq = max_freq;
 + 
 +      return target_freq;
 + }
 + 
 + static int intel_cpufreq_target(struct cpufreq_policy *policy,
 +                              unsigned int target_freq,
 +                              unsigned int relation)
 + {
 +      struct cpudata *cpu = all_cpu_data[policy->cpu];
 +      struct cpufreq_freqs freqs;
 +      int target_pstate;
 + 
 +      freqs.old = policy->cur;
 +      freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
 + 
 +      cpufreq_freq_transition_begin(policy, &freqs);
 +      switch (relation) {
 +      case CPUFREQ_RELATION_L:
 +              target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
 +              break;
 +      case CPUFREQ_RELATION_H:
 +              target_pstate = freqs.new / cpu->pstate.scaling;
 +              break;
 +      default:
 +              target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
 +              break;
 +      }
 +      target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
 +      if (target_pstate != cpu->pstate.current_pstate) {
 +              cpu->pstate.current_pstate = target_pstate;
 +              wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
 +                            pstate_funcs.get_val(cpu, target_pstate));
 +      }
 +      cpufreq_freq_transition_end(policy, &freqs, false);
 + 
 +      return 0;
 + }
 + 
 + static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 +                                            unsigned int target_freq)
 + {
 +      struct cpudata *cpu = all_cpu_data[policy->cpu];
 +      int target_pstate;
 + 
 +      target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
 +      target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
 +      intel_pstate_update_pstate(cpu, target_pstate);
 +      return target_freq;
 + }
 + 
 + static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 + {
 +      int ret = __intel_pstate_cpu_init(policy);
 + 
 +      if (ret)
 +              return ret;
 + 
 +      policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
 +      /* This reflects the intel_pstate_get_cpu_pstates() setting. */
 +      policy->cur = policy->cpuinfo.min_freq;
 + 
 +      return 0;
 + }
 + 
 + static struct cpufreq_driver intel_cpufreq = {
 +      .flags          = CPUFREQ_CONST_LOOPS,
 +      .verify         = intel_cpufreq_verify_policy,
 +      .target         = intel_cpufreq_target,
 +      .fast_switch    = intel_cpufreq_fast_switch,
 +      .init           = intel_cpufreq_cpu_init,
 +      .exit           = intel_pstate_cpu_exit,
 +      .stop_cpu       = intel_cpufreq_stop_cpu,
 +      .name           = "intel_cpufreq",
 + };
 + 
 + static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
 + 
   static int no_load __initdata;
   static int no_hwp __initdata;
   static int hwp_only __initdata;
@@@@ -2283,19 -1672,6 -2241,19 +2295,19 @@@@ static void __init copy_pid_params(stru
        pid_params.setpoint = policy->setpoint;
   }
   
 + #ifdef CONFIG_ACPI
 + static void intel_pstate_use_acpi_profile(void)
 + {
 +      if (acpi_gbl_FADT.preferred_profile == PM_MOBILE)
 +              pstate_funcs.get_target_pstate =
 +                              get_target_pstate_use_cpu_load;
 + }
 + #else
 + static void intel_pstate_use_acpi_profile(void)
 + {
 + }
 + #endif
 + 
   static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
   {
        pstate_funcs.get_max   = funcs->get_max;
        pstate_funcs.get_vid   = funcs->get_vid;
        pstate_funcs.get_target_pstate = funcs->get_target_pstate;
   
 +      intel_pstate_use_acpi_profile();
   }
   
   #ifdef CONFIG_ACPI
@@@@ -2421,20 -1796,9 -2379,20 +2433,20 @@@@ static bool __init intel_pstate_platfor
   
        return false;
   }
 + 
 + static void intel_pstate_request_control_from_smm(void)
 + {
 +      /*
 +       * It may be unsafe to request P-states control from SMM if _PPC support
 +       * has not been enabled.
 +       */
 +      if (acpi_ppc)
 +              acpi_processor_pstate_control();
 + }
   #else /* CONFIG_ACPI not enabled */
   static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
   static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
 + static inline void intel_pstate_request_control_from_smm(void) {}
   #endif /* CONFIG_ACPI */
   
   static const struct x86_cpu_id hwp_support_ids[] __initconst = {
@@@@ -2454,7 -1818,6 -2412,7 +2466,7 @@@@ static int __init intel_pstate_init(voi
        if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
                copy_cpu_funcs(&core_params.funcs);
                hwp_active++;
 +              intel_pstate.attr = hwp_cpufreq_attrs;
                goto hwp_cpu_matched;
        }
   
@@@@ -2487,16 -1850,11 -2445,16 +2499,16 @@@@ hwp_cpu_matched
        if (!hwp_active && hwp_only)
                goto out;
   
 -      rc = cpufreq_register_driver(&intel_pstate_driver);
 +      intel_pstate_request_control_from_smm();
 + 
 +      rc = cpufreq_register_driver(intel_pstate_driver);
        if (rc)
                goto out;
   
 -      intel_pstate_debug_expose_params();
 +      if (intel_pstate_driver == &intel_pstate && !hwp_active &&
 +          pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
 +              intel_pstate_debug_expose_params();
 + 
        intel_pstate_sysfs_expose_params();
   
        if (hwp_active)
        get_online_cpus();
        for_each_online_cpu(cpu) {
                if (all_cpu_data[cpu]) {
 -                      intel_pstate_clear_update_util_hook(cpu);
 +                      if (intel_pstate_driver == &intel_pstate)
 +                              intel_pstate_clear_update_util_hook(cpu);
 + 
                        kfree(all_cpu_data[cpu]);
                }
        }
@@@@ -2525,13 -1881,8 -2483,13 +2537,13 @@@@ static int __init intel_pstate_setup(ch
        if (!str)
                return -EINVAL;
   
 -      if (!strcmp(str, "disable"))
 +      if (!strcmp(str, "disable")) {
                no_load = 1;
 +      } else if (!strcmp(str, "passive")) {
 +              pr_info("Passive mode enabled\n");
 +              intel_pstate_driver = &intel_cpufreq;
 +              no_hwp = 1;
 +      }
        if (!strcmp(str, "no_hwp")) {
                pr_info("HWP disabled\n");
                no_hwp = 1;
                force_load = 1;
        if (!strcmp(str, "hwp_only"))
                hwp_only = 1;
 +      if (!strcmp(str, "per_cpu_perf_limits"))
 +              per_cpu_limits = true;
   
   #ifdef CONFIG_ACPI
        if (!strcmp(str, "support_acpi_ppc"))
This page took 0.185612 seconds and 4 git commands to generate.