" falling back to system-wide allocation\n",
__func__, nid);
arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
+ if (!arena)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(*arena));
}
arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid);
printk("%s: couldn't allocate arena ptes from node %d\n"
" falling back to system-wide allocation\n",
__func__, nid);
- arena->ptes = memblock_alloc_from(mem_size, align, 0);
+ arena->ptes = memblock_alloc(mem_size, align);
+ if (!arena->ptes)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, mem_size, align);
}
#else /* CONFIG_DISCONTIGMEM */
arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
- arena->ptes = memblock_alloc_from(mem_size, align, 0);
+ if (!arena)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(*arena));
+ arena->ptes = memblock_alloc(mem_size, align);
+ if (!arena->ptes)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, mem_size, align);
#endif /* CONFIG_DISCONTIGMEM */
ok = 0;
/* If both conditions above are met, we are fine. */
- DBGA("pci_dac_dma_supported %s from %pf\n",
+ DBGA("pci_dac_dma_supported %s from %ps\n",
ok ? "yes" : "no", __builtin_return_address(0));
return ok;
&& paddr + size <= __direct_map_size) {
ret = paddr + __direct_map_base;
- DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %pf\n",
+ DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %ps\n",
cpu_addr, size, ret, __builtin_return_address(0));
return ret;
if (dac_allowed) {
ret = paddr + alpha_mv.pci_dac_offset;
- DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %pf\n",
+ DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %ps\n",
cpu_addr, size, ret, __builtin_return_address(0));
return ret;
ret = arena->dma_base + dma_ofs * PAGE_SIZE;
ret += (unsigned long)cpu_addr & ~PAGE_MASK;
- DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %pf\n",
+ DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %ps\n",
cpu_addr, size, npages, ret, __builtin_return_address(0));
return ret;
&& dma_addr < __direct_map_base + __direct_map_size) {
/* Nothing to do. */
- DBGA2("pci_unmap_single: direct [%llx,%zx] from %pf\n",
+ DBGA2("pci_unmap_single: direct [%llx,%zx] from %ps\n",
dma_addr, size, __builtin_return_address(0));
return;
}
if (dma_addr > 0xffffffff) {
- DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %pf\n",
+ DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %ps\n",
dma_addr, size, __builtin_return_address(0));
return;
}
spin_unlock_irqrestore(&arena->lock, flags);
- DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %pf\n",
+ DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %ps\n",
dma_addr, size, npages, __builtin_return_address(0));
}
cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order);
if (! cpu_addr) {
printk(KERN_INFO "pci_alloc_consistent: "
- "get_free_pages failed from %pf\n",
+ "get_free_pages failed from %ps\n",
__builtin_return_address(0));
/* ??? Really atomic allocation? Otherwise we could play
with vmalloc and sg if we can't find contiguous memory. */
goto try_again;
}
- DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %pf\n",
+ DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %ps\n",
size, cpu_addr, *dma_addrp, __builtin_return_address(0));
return cpu_addr;
pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
free_pages((unsigned long)cpu_addr, get_order(size));
- DBGA2("pci_free_consistent: [%llx,%zx] from %pf\n",
+ DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n",
dma_addr, size, __builtin_return_address(0));
}
if (adev->wakeup.flags.notifier_present) {
pm_wakeup_ws_event(adev->wakeup.ws, 0, acpi_s2idle_wakeup());
if (adev->wakeup.context.func) {
- acpi_handle_debug(handle, "Running %pF for %s\n",
+ acpi_handle_debug(handle, "Running %pS for %s\n",
adev->wakeup.context.func,
dev_name(adev->wakeup.context.dev));
adev->wakeup.context.func(&adev->wakeup.context);
goto out;
}
+ acpi_handle_debug(adev->handle, "GPE%2X enabled for wakeup\n",
+ (unsigned int)wakeup->gpe_number);
+
inc:
wakeup->enable_count++;
* subsystem list maintains.
*/
+#define pr_fmt(fmt) "PM: " fmt
+
#include <linux/device.h>
#include <linux/export.h>
#include <linux/mutex.h>
if (device_pm_not_required(dev))
return;
- pr_debug("PM: Adding info for %s:%s\n",
+ pr_debug("Adding info for %s:%s\n",
dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
device_pm_check_callbacks(dev);
mutex_lock(&dpm_list_mtx);
if (device_pm_not_required(dev))
return;
- pr_debug("PM: Removing info for %s:%s\n",
+ pr_debug("Removing info for %s:%s\n",
dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
complete_all(&dev->power.completion);
mutex_lock(&dpm_list_mtx);
*/
void device_pm_move_before(struct device *deva, struct device *devb)
{
- pr_debug("PM: Moving %s:%s before %s:%s\n",
+ pr_debug("Moving %s:%s before %s:%s\n",
deva->bus ? deva->bus->name : "No Bus", dev_name(deva),
devb->bus ? devb->bus->name : "No Bus", dev_name(devb));
/* Delete deva from dpm_list and reinsert before devb. */
*/
void device_pm_move_after(struct device *deva, struct device *devb)
{
- pr_debug("PM: Moving %s:%s after %s:%s\n",
+ pr_debug("Moving %s:%s after %s:%s\n",
deva->bus ? deva->bus->name : "No Bus", dev_name(deva),
devb->bus ? devb->bus->name : "No Bus", dev_name(devb));
/* Delete deva from dpm_list and reinsert after devb. */
*/
void device_pm_move_last(struct device *dev)
{
- pr_debug("PM: Moving %s:%s to end of list\n",
+ pr_debug("Moving %s:%s to end of list\n",
dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
list_move_tail(&dev->power.entry, &dpm_list);
}
if (!pm_print_times_enabled)
return 0;
- dev_info(dev, "calling %pF @ %i, parent: %s\n", cb,
+ dev_info(dev, "calling %pS @ %i, parent: %s\n", cb,
task_pid_nr(current),
dev->parent ? dev_name(dev->parent) : "none");
return ktime_get();
rettime = ktime_get();
nsecs = (s64) ktime_to_ns(ktime_sub(rettime, calltime));
- dev_info(dev, "%pF returned %d after %Ld usecs\n", cb, error,
+ dev_info(dev, "%pS returned %d after %Ld usecs\n", cb, error,
(unsigned long long)nsecs >> 10);
}
static void pm_dev_err(struct device *dev, pm_message_t state, const char *info,
int error)
{
- printk(KERN_ERR "PM: Device %s failed to %s%s: error %d\n",
- dev_name(dev), pm_verb(state.event), info, error);
+ pr_err("Device %s failed to %s%s: error %d\n",
+ dev_name(dev), pm_verb(state.event), info, error);
}
static void dpm_show_time(ktime_t starttime, pm_message_t state, int error,
/**
* dpm_watchdog_handler - Driver suspend / resume watchdog handler.
- * @data: Watchdog object address.
+ * @t: The timer that PM watchdog depends on.
*
* Called when a driver has timed out suspending or resuming.
* There's not much we can do here to recover so panic() to
&& !pm_trace_is_enabled();
}
+static bool dpm_async_fn(struct device *dev, async_func_t func)
+{
+ reinit_completion(&dev->power.completion);
+
+ if (is_async(dev)) {
+ get_device(dev);
+ async_schedule(func, dev);
+ return true;
+ }
+
+ return false;
+}
+
static void async_resume_noirq(void *data, async_cookie_t cookie)
{
struct device *dev = (struct device *)data;
* in case the starting of async threads is
* delayed by non-async resuming devices.
*/
- list_for_each_entry(dev, &dpm_noirq_list, power.entry) {
- reinit_completion(&dev->power.completion);
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_resume_noirq, dev);
- }
- }
+ list_for_each_entry(dev, &dpm_noirq_list, power.entry)
+ dpm_async_fn(dev, async_resume_noirq);
while (!list_empty(&dpm_noirq_list)) {
dev = to_device(dpm_noirq_list.next);
* in case the starting of async threads is
* delayed by non-async resuming devices.
*/
- list_for_each_entry(dev, &dpm_late_early_list, power.entry) {
- reinit_completion(&dev->power.completion);
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_resume_early, dev);
- }
- }
+ list_for_each_entry(dev, &dpm_late_early_list, power.entry)
+ dpm_async_fn(dev, async_resume_early);
while (!list_empty(&dpm_late_early_list)) {
dev = to_device(dpm_late_early_list.next);
pm_transition = state;
async_error = 0;
- list_for_each_entry(dev, &dpm_suspended_list, power.entry) {
- reinit_completion(&dev->power.completion);
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_resume, dev);
- }
- }
+ list_for_each_entry(dev, &dpm_suspended_list, power.entry)
+ dpm_async_fn(dev, async_resume);
while (!list_empty(&dpm_suspended_list)) {
dev = to_device(dpm_suspended_list.next);
static int device_suspend_noirq(struct device *dev)
{
- reinit_completion(&dev->power.completion);
-
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_suspend_noirq, dev);
+ if (dpm_async_fn(dev, async_suspend_noirq))
return 0;
- }
+
return __device_suspend_noirq(dev, pm_transition, false);
}
static int device_suspend_late(struct device *dev)
{
- reinit_completion(&dev->power.completion);
-
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_suspend_late, dev);
+ if (dpm_async_fn(dev, async_suspend_late))
return 0;
- }
return __device_suspend_late(dev, pm_transition, false);
}
if (dev->power.syscore)
goto Complete;
+ /* Avoid direct_complete to let wakeup_path propagate. */
+ if (device_may_wakeup(dev) || dev->power.wakeup_path)
+ dev->power.direct_complete = false;
+
if (dev->power.direct_complete) {
if (pm_runtime_status_suspended(dev)) {
pm_runtime_disable(dev);
static int device_suspend(struct device *dev)
{
- reinit_completion(&dev->power.completion);
-
- if (is_async(dev)) {
- get_device(dev);
- async_schedule_dev(async_suspend, dev);
+ if (dpm_async_fn(dev, async_suspend))
return 0;
- }
return __device_suspend(dev, pm_transition, false);
}
error = 0;
continue;
}
- printk(KERN_INFO "PM: Device %s not prepared "
- "for power transition: code %d\n",
+ pr_info("Device %s not prepared for power transition: code %d\n",
dev_name(dev), error);
put_device(dev);
break;
void __suspend_report_result(const char *function, void *fn, int ret)
{
if (ret)
- pr_err("%s(): %pF returns %d\n", function, fn, ret);
- printk(KERN_ERR "%s(): %pS returns %d\n", function, fn, ret);
++ pr_err("%s(): %pS returns %d\n", function, fn, ret);
}
EXPORT_SYMBOL_GPL(__suspend_report_result);
/**
* device_pm_wait_for_dev - Wait for suspend/resume of a device to complete.
- * @dev: Device to wait for.
* @subordinate: Device that needs to wait for @dev.
+ * @dev: Device to wait for.
*/
int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
{
rcu_read_unlock();
desc->tfm = connection->cram_hmac_tfm;
- desc->flags = 0;
rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
if (rv) {
err = cmd->fn(connection, &pi);
if (err) {
- drbd_err(connection, "%pf failed\n", cmd->fn);
+ drbd_err(connection, "%ps failed\n", cmd->fn);
goto reconnect;
}
static LIST_HEAD(cpufreq_policy_list);
-static inline bool policy_is_inactive(struct cpufreq_policy *policy)
-{
- return cpumask_empty(policy->cpus);
-}
-
/* Macros to iterate over CPU policies */
#define for_each_suitable_policy(__policy, __active) \
list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \
EXPORT_SYMBOL_GPL(cpufreq_generic_get);
/**
- * cpufreq_cpu_get: returns policy for a cpu and marks it busy.
- *
- * @cpu: cpu to find policy for.
+ * cpufreq_cpu_get - Return policy for a CPU and mark it as busy.
+ * @cpu: CPU to find the policy for.
*
- * This returns policy for 'cpu', returns NULL if it doesn't exist.
- * It also increments the kobject reference count to mark it busy and so would
- * require a corresponding call to cpufreq_cpu_put() to decrement it back.
- * If corresponding call cpufreq_cpu_put() isn't made, the policy wouldn't be
- * freed as that depends on the kobj count.
+ * Call cpufreq_cpu_get_raw() to obtain a cpufreq policy for @cpu and increment
+ * the kobject reference counter of that policy. Return a valid policy on
+ * success or NULL on failure.
*
- * Return: A valid policy on success, otherwise NULL on failure.
+ * The policy returned by this function has to be released with the help of
+ * cpufreq_cpu_put() to balance its kobject reference counter properly.
*/
struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
{
EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
/**
- * cpufreq_cpu_put: Decrements the usage count of a policy
- *
- * @policy: policy earlier returned by cpufreq_cpu_get().
- *
- * This decrements the kobject reference count incremented earlier by calling
- * cpufreq_cpu_get().
+ * cpufreq_cpu_put - Decrement kobject usage counter for cpufreq policy.
+ * @policy: cpufreq policy returned by cpufreq_cpu_get().
*/
void cpufreq_cpu_put(struct cpufreq_policy *policy)
{
}
EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
+/**
+ * cpufreq_cpu_release - Unlock a policy and decrement its usage counter.
+ * @policy: cpufreq policy returned by cpufreq_cpu_acquire().
+ */
+void cpufreq_cpu_release(struct cpufreq_policy *policy)
+{
+ if (WARN_ON(!policy))
+ return;
+
+ lockdep_assert_held(&policy->rwsem);
+
+ up_write(&policy->rwsem);
+
+ cpufreq_cpu_put(policy);
+}
+
+/**
+ * cpufreq_cpu_acquire - Find policy for a CPU, mark it as busy and lock it.
+ * @cpu: CPU to find the policy for.
+ *
+ * Call cpufreq_cpu_get() to get a reference on the cpufreq policy for @cpu and
+ * if the policy returned by it is not NULL, acquire its rwsem for writing.
+ * Return the policy if it is active or release it and return NULL otherwise.
+ *
+ * The policy returned by this function has to be released with the help of
+ * cpufreq_cpu_release() in order to release its rwsem and balance its usage
+ * counter properly.
+ */
+struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+ if (!policy)
+ return NULL;
+
+ down_write(&policy->rwsem);
+
+ if (policy_is_inactive(policy)) {
+ cpufreq_cpu_release(policy);
+ return NULL;
+ }
+
+ return policy;
+}
+
/*********************************************************************
* EXTERNALLY AFFECTING FREQUENCY CHANGES *
*********************************************************************/
mutex_lock(&cpufreq_transition_notifier_list.mutex);
for (nb = cpufreq_transition_notifier_list.head; nb; nb = nb->next)
- pr_info("%pF\n", nb->notifier_call);
+ pr_info("%pS\n", nb->notifier_call);
mutex_unlock(&cpufreq_transition_notifier_list.mutex);
}
return ret;
}
-static int cpufreq_set_policy(struct cpufreq_policy *policy,
- struct cpufreq_policy *new_policy);
-
/**
* cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
*/
{
unsigned int limit;
int ret;
- if (cpufreq_driver->bios_limit) {
- ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
- if (!ret)
- return sprintf(buf, "%u\n", limit);
- }
+ ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
+ if (!ret)
+ return sprintf(buf, "%u\n", limit);
return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
}
cpufreq_global_kobject, "policy%u", cpu);
if (ret) {
pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
+ kobject_put(&policy->kobj);
goto err_free_real_cpus;
}
{
unsigned int ret_freq = 0;
- if (unlikely(policy_is_inactive(policy)) || !cpufreq_driver->get)
+ if (unlikely(policy_is_inactive(policy)))
return ret_freq;
ret_freq = cpufreq_driver->get(policy->cpu);
if (policy) {
down_read(&policy->rwsem);
- ret_freq = __cpufreq_get(policy);
+ if (cpufreq_driver->get)
+ ret_freq = __cpufreq_get(policy);
up_read(&policy->rwsem);
cpufreq_cpu_put(policy);
*
* The cpuinfo part of @policy is not updated by this function.
*/
-static int cpufreq_set_policy(struct cpufreq_policy *policy,
- struct cpufreq_policy *new_policy)
+int cpufreq_set_policy(struct cpufreq_policy *policy,
+ struct cpufreq_policy *new_policy)
{
struct cpufreq_governor *old_gov;
int ret;
*/
void cpufreq_update_policy(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
struct cpufreq_policy new_policy;
if (!policy)
return;
- down_write(&policy->rwsem);
-
- if (policy_is_inactive(policy))
- goto unlock;
-
/*
* BIOS might change freq behind our back
* -> ask driver for current freq and notify governors about a change
cpufreq_set_policy(policy, &new_policy);
unlock:
- up_write(&policy->rwsem);
-
- cpufreq_cpu_put(policy);
+ cpufreq_cpu_release(policy);
}
EXPORT_SYMBOL(cpufreq_update_policy);
+/**
+ * cpufreq_update_limits - Update policy limits for a given CPU.
+ * @cpu: CPU to update the policy limits for.
+ *
+ * Invoke the driver's ->update_limits callback if present or call
+ * cpufreq_update_policy() for @cpu.
+ */
+void cpufreq_update_limits(unsigned int cpu)
+{
+ if (cpufreq_driver->update_limits)
+ cpufreq_driver->update_limits(cpu);
+ else
+ cpufreq_update_policy(cpu);
+}
+EXPORT_SYMBOL_GPL(cpufreq_update_limits);
+
/*********************************************************************
* BOOST *
*********************************************************************/
static bool cpufreq_boost_supported(void)
{
- return likely(cpufreq_driver) && cpufreq_driver->set_boost;
+ return cpufreq_driver->set_boost;
}
static int create_boost_sysfs_file(void)
* General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
#include <linux/device.h>
#include <linux/ndctl.h>
static DEFINE_IDA(dimm_ida);
+static bool noblk;
+module_param(noblk, bool, 0444);
+MODULE_PARM_DESC(noblk, "force disable BLK / local alias support");
+
/*
* Retrieve bus and dimm handle and return if this bus supports
* get_config_data commands
rc = nvdimm_check_config_data(ndd->dev);
if (rc)
- dev_dbg(ndd->dev, "%pf: %s error: %d\n",
+ dev_dbg(ndd->dev, "%ps: %s error: %d\n",
__builtin_return_address(0), __func__, rc);
return rc;
}
nvdimm->dimm_id = dimm_id;
nvdimm->provider_data = provider_data;
+ if (noblk)
+ flags |= 1 << NDD_NOBLK;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
nvdimm->num_flush = num_flush;
{
struct pci_driver *pdrv = to_pci_driver(driver);
const struct pci_device_id *ids = pdrv->id_table;
- __u32 vendor, device, subvendor = PCI_ANY_ID,
+ u32 vendor, device, subvendor = PCI_ANY_ID,
subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
unsigned long driver_data = 0;
int fields = 0;
{
struct pci_dynid *dynid, *n;
struct pci_driver *pdrv = to_pci_driver(driver);
- __u32 vendor, device, subvendor = PCI_ANY_ID,
+ u32 vendor, device, subvendor = PCI_ANY_ID,
subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
int fields = 0;
size_t retval = -ENODEV;
if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
&& pci_dev->current_state != PCI_UNKNOWN) {
WARN_ONCE(pci_dev->current_state != prev,
- "PCI PM: Device state not saved by %pF\n",
+ "PCI PM: Device state not saved by %pS\n",
drv->suspend);
}
}
if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
&& pci_dev->current_state != PCI_UNKNOWN) {
WARN_ONCE(pci_dev->current_state != prev,
- "PCI PM: Device state not saved by %pF\n",
+ "PCI PM: Device state not saved by %pS\n",
drv->suspend_late);
goto Fixup;
}
if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
&& pci_dev->current_state != PCI_UNKNOWN) {
WARN_ONCE(pci_dev->current_state != prev,
- "PCI PM: State of device not saved by %pF\n",
+ "PCI PM: State of device not saved by %pS\n",
pm->suspend);
}
}
if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
&& pci_dev->current_state != PCI_UNKNOWN) {
WARN_ONCE(pci_dev->current_state != prev,
- "PCI PM: State of device not saved by %pF\n",
+ "PCI PM: State of device not saved by %pS\n",
pm->suspend_noirq);
goto Fixup;
}
* log level.
*/
if (error == -EBUSY || error == -EAGAIN) {
- dev_dbg(dev, "can't suspend now (%pf returned %d)\n",
+ dev_dbg(dev, "can't suspend now (%ps returned %d)\n",
pm->runtime_suspend, error);
return error;
} else if (error) {
- dev_err(dev, "can't suspend (%pf returned %d)\n",
+ dev_err(dev, "can't suspend (%ps returned %d)\n",
pm->runtime_suspend, error);
return error;
}
&& !pci_dev->state_saved && pci_dev->current_state != PCI_D0
&& pci_dev->current_state != PCI_UNKNOWN) {
WARN_ONCE(pci_dev->current_state != prev,
- "PCI PM: State of device not saved by %pF\n",
+ "PCI PM: State of device not saved by %pS\n",
pm->runtime_suspend);
return 0;
}
void (*fn)(struct pci_dev *dev))
{
if (initcall_debug)
- pci_info(dev, "calling %pF @ %i\n", fn, task_pid_nr(current));
+ pci_info(dev, "calling %pS @ %i\n", fn, task_pid_nr(current));
return ktime_get();
}
delta = ktime_sub(rettime, calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
if (initcall_debug || duration > 10000)
- pci_info(dev, "%pF took %lld usecs\n", fn, duration);
+ pci_info(dev, "%pS took %lld usecs\n", fn, duration);
}
static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
dev->subsystem_device == 0x0299)
return;
- /* else: fall through */
+ /* else, fall through */
case PCI_DEVICE_ID_NETMOS_9735:
case PCI_DEVICE_ID_NETMOS_9745:
case PCI_DEVICE_ID_NETMOS_9845:
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130,
quirk_dma_func1_alias);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170,
+ quirk_dma_func1_alias);
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172,
quirk_dma_func1_alias);
/* QCOM QDF2xxx root ports */
{ PCI_VENDOR_ID_QCOM, 0x0400, pci_quirk_qcom_rp_acs },
{ PCI_VENDOR_ID_QCOM, 0x0401, pci_quirk_qcom_rp_acs },
+ /* HXT SD4800 root ports. The ACS design is same as QCOM QDF2xxx */
+ { PCI_VENDOR_ID_HXT, 0x0401, pci_quirk_qcom_rp_acs },
/* Intel PCH root ports */
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_pch_acs },
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_quirk_intel_spt_pch_acs },
#include <linux/quotaops.h>
#include <crypto/hash.h>
-#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION)
#include <linux/fscrypt.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
+#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
+#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
struct cp_control {
int reason;
/* max discard pend list number */
#define MAX_PLIST_NUM 512
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
- (MAX_PLIST_NUM - 1) : (blk_num - 1))
+ (MAX_PLIST_NUM - 1) : ((blk_num) - 1))
enum {
D_PREP, /* initial */
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
+ int timeout; /* discard timeout for put_super */
};
struct discard_cmd_control {
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
-#define DEF_MIN_INLINE_SIZE 1
static inline int get_extra_isize(struct inode *inode);
static inline int get_inline_xattr_addrs(struct inode *inode);
#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
SBI_IS_SHUTDOWN, /* shutdown by ioctl */
SBI_IS_RECOVERED, /* recovered orphan/data */
SBI_CP_DISABLED, /* CP was disabled last mount */
+ SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
DISCARD_TIME,
GC_TIME,
DISABLE_TIME,
+ UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};
FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */
};
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
#define DUMMY_ENCRYPTION_ENABLED(sbi) \
(unlikely(F2FS_OPTION(sbi).test_dummy_encryption))
#else
unsigned int nquota_files; /* # of quota sysfile */
- u32 s_next_generation; /* for NFS support */
-
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
/* # of allocated blocks */
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
- printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \
+ printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \
KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx));
desc.shash.tfm = sbi->s_chksum_driver;
- desc.shash.flags = 0;
*(u32 *)desc.ctx = crc;
err = crypto_shash_update(&desc.shash, address, length);
{
atomic_inc(&sbi->nr_pages[count_type]);
- if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
- count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA ||
- count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE ||
- count_type == F2FS_RD_META)
- return;
-
- set_sbi_flag(sbi, SBI_IS_DIRTY);
+ if (count_type == F2FS_DIRTY_DENTS ||
+ count_type == F2FS_DIRTY_NODES ||
+ count_type == F2FS_DIRTY_META ||
+ count_type == F2FS_DIRTY_QDATA ||
+ count_type == F2FS_DIRTY_IMETA)
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
}
static inline void inode_inc_dirty_pages(struct inode *inode)
get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
get_pages(sbi, F2FS_WB_CP_DATA) ||
get_pages(sbi, F2FS_DIO_READ) ||
- get_pages(sbi, F2FS_DIO_WRITE) ||
- atomic_read(&SM_I(sbi)->dcc_info->queued_discard) ||
- atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+ get_pages(sbi, F2FS_DIO_WRITE))
+ return false;
+
+ if (SM_I(sbi) && SM_I(sbi)->dcc_info &&
+ atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
+ return false;
+
+ if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
+ atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
return false;
+
return f2fs_time_over(sbi, type);
}
#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
+#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */
#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define F2FS_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */
#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
#define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr))
#define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \
- ((offsetof(typeof(*f2fs_inode), field) + \
+ ((offsetof(typeof(*(f2fs_inode)), field) + \
sizeof((f2fs_inode)->field)) \
- <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \
+ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
-#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
- (!is_read_io(fio->op) || fio->is_meta))
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \
+ (!is_read_io((fio)->op) || (fio)->is_meta))
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
return true;
}
+static inline void f2fs_set_page_private(struct page *page,
+ unsigned long data)
+{
+ if (PagePrivate(page))
+ return;
+
+ get_page(page);
+ SetPagePrivate(page);
+ set_page_private(page, data);
+}
+
+static inline void f2fs_clear_page_private(struct page *page)
+{
+ if (!PagePrivate(page))
+ return;
+
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ f2fs_put_page(page, 0);
+}
+
/*
* file.c
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
- bool buf_write);
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
int f2fs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
/*
* crypto support
*/
-static inline bool f2fs_encrypted_inode(struct inode *inode)
-{
- return file_is_encrypt(inode);
-}
-
static inline bool f2fs_encrypted_file(struct inode *inode)
{
- return f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode);
+ return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode);
}
static inline void f2fs_set_encrypted_inode(struct inode *inode)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
file_set_encrypt(inode);
f2fs_set_inode_flags(inode);
#endif
static inline bool f2fs_may_encrypt(struct inode *inode)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
umode_t mode = inode->i_mode;
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
#endif
-#endif
-
static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_QUOTA
#endif
return false;
}
+
+#endif
)
);
+TRACE_DEFINE_ENUM(RPC_TASK_ASYNC);
+TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
+TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
+TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
+TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
+TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
+TRACE_DEFINE_ENUM(RPC_TASK_SENT);
+TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT);
+TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT);
+TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
+
+#define rpc_show_task_flags(flags) \
+ __print_flags(flags, "|", \
+ { RPC_TASK_ASYNC, "ASYNC" }, \
+ { RPC_TASK_SWAPPER, "SWAPPER" }, \
+ { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \
+ { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \
+ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \
+ { RPC_TASK_KILLED, "KILLED" }, \
+ { RPC_TASK_SOFT, "SOFT" }, \
+ { RPC_TASK_SOFTCONN, "SOFTCONN" }, \
+ { RPC_TASK_SENT, "SENT" }, \
+ { RPC_TASK_TIMEOUT, "TIMEOUT" }, \
+ { RPC_TASK_NOCONNECT, "NOCONNECT" }, \
+ { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" })
+
+TRACE_DEFINE_ENUM(RPC_TASK_RUNNING);
+TRACE_DEFINE_ENUM(RPC_TASK_QUEUED);
+TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
+TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
+
+#define rpc_show_runstate(flags) \
+ __print_flags(flags, "|", \
+ { (1UL << RPC_TASK_RUNNING), "RUNNING" }, \
+ { (1UL << RPC_TASK_QUEUED), "QUEUED" }, \
+ { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \
+ { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \
+ { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \
+ { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+
DECLARE_EVENT_CLASS(rpc_task_running,
TP_PROTO(const struct rpc_task *task, const void *action),
__entry->flags = task->tk_flags;
),
- TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%pf",
- TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%ps",
++ TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%ps",
__entry->task_id, __entry->client_id,
- __entry->flags,
- __entry->runstate,
+ rpc_show_task_flags(__entry->flags),
+ rpc_show_runstate(__entry->runstate),
__entry->status,
__entry->action
)
__assign_str(q_name, rpc_qname(q));
),
- TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
+ TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s",
__entry->task_id, __entry->client_id,
- __entry->flags,
- __entry->runstate,
+ rpc_show_task_flags(__entry->flags),
+ rpc_show_runstate(__entry->runstate),
__entry->status,
__entry->timeout,
__get_str(q_name)
DEFINE_RPC_QUEUED_EVENT(sleep);
DEFINE_RPC_QUEUED_EVENT(wakeup);
+DECLARE_EVENT_CLASS(rpc_failure,
+
+ TP_PROTO(const struct rpc_task *task),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ ),
+
+ TP_printk("task:%u@%u",
+ __entry->task_id, __entry->client_id)
+);
+
+#define DEFINE_RPC_FAILURE(name) \
+ DEFINE_EVENT(rpc_failure, rpc_bad_##name, \
+ TP_PROTO( \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(task))
+
+DEFINE_RPC_FAILURE(callhdr);
+DEFINE_RPC_FAILURE(verifier);
+
+DECLARE_EVENT_CLASS(rpc_reply_event,
+
+ TP_PROTO(
+ const struct rpc_task *task
+ ),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __string(progname, task->tk_client->cl_program->name)
+ __field(u32, version)
+ __string(procname, rpc_proc_name(task))
+ __string(servername, task->tk_xprt->servername)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+ __assign_str(progname, task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procname, rpc_proc_name(task))
+ __assign_str(servername, task->tk_xprt->servername)
+ ),
+
+ TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s",
+ __entry->task_id, __entry->client_id, __get_str(servername),
+ __entry->xid, __get_str(progname), __entry->version,
+ __get_str(procname))
+)
+
+#define DEFINE_RPC_REPLY_EVENT(name) \
+ DEFINE_EVENT(rpc_reply_event, rpc__##name, \
+ TP_PROTO( \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(task))
+
+DEFINE_RPC_REPLY_EVENT(prog_unavail);
+DEFINE_RPC_REPLY_EVENT(prog_mismatch);
+DEFINE_RPC_REPLY_EVENT(proc_unavail);
+DEFINE_RPC_REPLY_EVENT(garbage_args);
+DEFINE_RPC_REPLY_EVENT(unparsable);
+DEFINE_RPC_REPLY_EVENT(mismatch);
+DEFINE_RPC_REPLY_EVENT(stale_creds);
+DEFINE_RPC_REPLY_EVENT(bad_creds);
+DEFINE_RPC_REPLY_EVENT(auth_tooweak);
+
TRACE_EVENT(rpc_stats_latency,
TP_PROTO(
__entry->backlog, __entry->rtt, __entry->execute)
);
+TRACE_EVENT(rpc_xdr_overflow,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+ size_t requested
+ ),
+
+ TP_ARGS(xdr, requested),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(int, version)
+ __field(size_t, requested)
+ __field(const void *, end)
+ __field(const void *, p)
+ __field(const void *, head_base)
+ __field(size_t, head_len)
+ __field(const void *, tail_base)
+ __field(size_t, tail_len)
+ __field(unsigned int, page_len)
+ __field(unsigned int, len)
+ __string(progname,
+ xdr->rqst->rq_task->tk_client->cl_program->name)
+ __string(procedure,
+ xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_fast_assign(
+ if (xdr->rqst) {
+ const struct rpc_task *task = xdr->rqst->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __assign_str(progname,
+ task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+ } else {
+ __entry->task_id = 0;
+ __entry->client_id = 0;
+ __assign_str(progname, "unknown")
+ __entry->version = 0;
+ __assign_str(procedure, "unknown")
+ }
+ __entry->requested = requested;
+ __entry->end = xdr->end;
+ __entry->p = xdr->p;
+ __entry->head_base = xdr->buf->head[0].iov_base,
+ __entry->head_len = xdr->buf->head[0].iov_len,
+ __entry->page_len = xdr->buf->page_len,
+ __entry->tail_base = xdr->buf->tail[0].iov_base,
+ __entry->tail_len = xdr->buf->tail[0].iov_len,
+ __entry->len = xdr->buf->len;
+ ),
+
+ TP_printk(
+ "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ __entry->task_id, __entry->client_id,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->requested, __entry->p, __entry->end,
+ __entry->head_base, __entry->head_len,
+ __entry->page_len,
+ __entry->tail_base, __entry->tail_len,
+ __entry->len
+ )
+);
+
+TRACE_EVENT(rpc_xdr_alignment,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+ size_t offset,
+ unsigned int copied
+ ),
+
+ TP_ARGS(xdr, offset, copied),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(int, version)
+ __field(size_t, offset)
+ __field(unsigned int, copied)
+ __field(const void *, head_base)
+ __field(size_t, head_len)
+ __field(const void *, tail_base)
+ __field(size_t, tail_len)
+ __field(unsigned int, page_len)
+ __field(unsigned int, len)
+ __string(progname,
+ xdr->rqst->rq_task->tk_client->cl_program->name)
+ __string(procedure,
+ xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_fast_assign(
+ const struct rpc_task *task = xdr->rqst->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __assign_str(progname,
+ task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+
+ __entry->offset = offset;
+ __entry->copied = copied;
+ __entry->head_base = xdr->buf->head[0].iov_base,
+ __entry->head_len = xdr->buf->head[0].iov_len,
+ __entry->page_len = xdr->buf->page_len,
+ __entry->tail_base = xdr->buf->tail[0].iov_base,
+ __entry->tail_len = xdr->buf->tail[0].iov_len,
+ __entry->len = xdr->buf->len;
+ ),
+
+ TP_printk(
+ "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ __entry->task_id, __entry->client_id,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->offset, __entry->copied,
+ __entry->head_base, __entry->head_len,
+ __entry->page_len,
+ __entry->tail_base, __entry->tail_len,
+ __entry->len
+ )
+);
+
+TRACE_EVENT(rpc_reply_pages,
+ TP_PROTO(
+ const struct rpc_rqst *req
+ ),
+
+ TP_ARGS(req),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, head_base)
+ __field(size_t, head_len)
+ __field(const void *, tail_base)
+ __field(size_t, tail_len)
+ __field(unsigned int, page_len)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = req->rq_task->tk_pid;
+ __entry->client_id = req->rq_task->tk_client->cl_clid;
+
+ __entry->head_base = req->rq_rcv_buf.head[0].iov_base;
+ __entry->head_len = req->rq_rcv_buf.head[0].iov_len;
+ __entry->page_len = req->rq_rcv_buf.page_len;
+ __entry->tail_base = req->rq_rcv_buf.tail[0].iov_base;
+ __entry->tail_len = req->rq_rcv_buf.tail[0].iov_len;
+ ),
+
+ TP_printk(
+ "task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n",
+ __entry->task_id, __entry->client_id,
+ __entry->head_base, __entry->head_len,
+ __entry->page_len,
+ __entry->tail_base, __entry->tail_len
+ )
+);
+
/*
* First define the enums in the below macros to be exported to userspace
* via TRACE_DEFINE_ENUM().
DEFINE_RPC_XPRT_EVENT(timer);
DEFINE_RPC_XPRT_EVENT(lookup_rqst);
-DEFINE_RPC_XPRT_EVENT(transmit);
DEFINE_RPC_XPRT_EVENT(complete_rqst);
+TRACE_EVENT(xprt_transmit,
+ TP_PROTO(
+ const struct rpc_rqst *rqst,
+ int status
+ ),
+
+ TP_ARGS(rqst, status),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, seqno)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client ?
+ rqst->rq_task->tk_client->cl_clid : -1;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->seqno = rqst->rq_seqno;
+ __entry->status = status;
+ ),
+
+ TP_printk(
+ "task:%u@%u xid=0x%08x seqno=%u status=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->seqno, __entry->status)
+);
+
+TRACE_EVENT(xprt_enq_xmit,
+ TP_PROTO(
+ const struct rpc_task *task,
+ int stage
+ ),
+
+ TP_ARGS(task, stage),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, seqno)
+ __field(int, stage)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+ __entry->seqno = task->tk_rqstp->rq_seqno;
+ __entry->stage = stage;
+ ),
+
+ TP_printk(
+ "task:%u@%u xid=0x%08x seqno=%u stage=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->seqno, __entry->stage)
+);
+
TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status),
*/
static void __init setup_command_line(char *command_line)
{
- saved_command_line =
- memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
- initcall_command_line =
- memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
- static_command_line = memblock_alloc(strlen(command_line) + 1,
- SMP_CACHE_BYTES);
+ size_t len = strlen(boot_command_line) + 1;
+
+ saved_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+ if (!saved_command_line)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+ initcall_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+ if (!initcall_command_line)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
+ static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
+ if (!static_command_line)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, len);
+
strcpy(saved_command_line, boot_command_line);
strcpy(static_command_line, command_line);
}
void __init __weak mem_encrypt_init(void) { }
+void __init __weak poking_init(void) { }
+
+void __init __weak pgd_cache_init(void) { }
+
bool initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
pti_init();
+ pgd_cache_init();
}
void __init __weak arch_call_rest_init(void)
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
+ /* parameters may set static keys */
+ jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
- jump_label_init();
-
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
taskstats_init_early();
delayacct_init();
+ poking_init();
check_bugs();
acpi_subsystem_init();
pr_debug("blacklisting initcall %s\n", str_entry);
entry = memblock_alloc(sizeof(*entry),
SMP_CACHE_BYTES);
+ if (!entry)
+ panic("%s: Failed to allocate %zu bytes\n",
+ __func__, sizeof(*entry));
entry->buf = memblock_alloc(strlen(str_entry) + 1,
SMP_CACHE_BYTES);
+ if (!entry->buf)
+ panic("%s: Failed to allocate %zu bytes\n",
+ __func__, strlen(str_entry) + 1);
strcpy(entry->buf, str_entry);
list_add(&entry->next, &blacklisted_initcalls);
}
{
ktime_t *calltime = (ktime_t *)data;
- printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
+ printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current));
*calltime = ktime_get();
}
rettime = ktime_get();
delta = ktime_sub(rettime, *calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
+ printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
fn, ret, duration);
}
strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
local_irq_enable();
}
- WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
+ WARN(msgbuf[0], "initcall %pS returned with %s\n", fn, msgbuf);
add_latent_entropy();
return ret;
struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
- atomic_t ref;
+ refcount_t ref;
struct rw_semaphore register_rwsem;
struct rw_semaphore consumer_rwsem;
struct list_head pending_list;
static struct uprobe *get_uprobe(struct uprobe *uprobe)
{
- atomic_inc(&uprobe->ref);
+ refcount_inc(&uprobe->ref);
return uprobe;
}
static void put_uprobe(struct uprobe *uprobe)
{
- if (atomic_dec_and_test(&uprobe->ref)) {
+ if (refcount_dec_and_test(&uprobe->ref)) {
/*
* If application munmap(exec_vma) before uprobe_unregister()
* gets called, we don't get a chance to remove uprobe from
rb_link_node(&uprobe->rb_node, parent, p);
rb_insert_color(&uprobe->rb_node, &uprobes_tree);
/* get access + creation ref */
- atomic_set(&uprobe->ref, 2);
+ refcount_set(&uprobe->ref, 2);
return u;
}
if (uc->handler) {
rc = uc->handler(uc, regs);
WARN(rc & ~UPROBE_HANDLER_MASK,
- "bad rc=0x%x from %pf()\n", rc, uc->handler);
+ "bad rc=0x%x from %ps()\n", rc, uc->handler);
}
if (uc->ret_handler)
.priority = INT_MAX-1, /* notified after kprobes, kgdb */
};
-static int __init init_uprobes(void)
+void __init uprobes_init(void)
{
int i;
for (i = 0; i < UPROBES_HASH_SZ; i++)
mutex_init(&uprobes_mmap_mutex[i]);
- if (percpu_init_rwsem(&dup_mmap_sem))
- return -ENOMEM;
+ BUG_ON(percpu_init_rwsem(&dup_mmap_sem));
- return register_die_notifier(&uprobe_exception_nb);
+ BUG_ON(register_die_notifier(&uprobe_exception_nb));
}
-__initcall(init_uprobes);
case IRQ_SET_MASK_OK:
case IRQ_SET_MASK_OK_DONE:
cpumask_copy(desc->irq_common_data.affinity, mask);
+ /* fall through */
case IRQ_SET_MASK_OK_NOCOPY:
irq_validate_effective_affinity(data);
irq_set_thread_affinity(desc);
desc->affinity_notify = notify;
raw_spin_unlock_irqrestore(&desc->lock, flags);
- if (old_notify)
+ if (old_notify) {
+ cancel_work_sync(&old_notify->work);
kref_put(&old_notify->kref, old_notify->release);
+ }
return 0;
}
ret = 0;
break;
default:
- pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
+ pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n",
flags, irq_desc_get_irq(desc), chip->irq_set_type);
}
if (unmask)
/* Number of rcu_nodes at specified level. */
int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
-/* panic() on RCU Stall sysctl. */
-int sysctl_panic_on_rcu_stall __read_mostly;
-/* Commandeer a sysrq key to dump RCU's tree. */
-static bool sysrq_rcu;
-module_param(sysrq_rcu, bool, 0444);
/*
* The rcu_scheduler_active variable is initialized to the value
/* rcuc/rcub kthread realtime priority */
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
-module_param(kthread_prio, int, 0644);
+module_param(kthread_prio, int, 0444);
/* Delay in jiffies for grace-period initialization delays, debug only. */
*/
static ulong jiffies_till_sched_qs = ULONG_MAX;
module_param(jiffies_till_sched_qs, ulong, 0444);
-static ulong jiffies_to_sched_qs; /* Adjusted version of above if not default */
+static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
/*
WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
return;
}
+ /* Otherwise, set to third fqs scan, but bound below on large system. */
j = READ_ONCE(jiffies_till_first_fqs) +
2 * READ_ONCE(jiffies_till_next_fqs);
if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
return gp_state_names[gs];
}
-/*
- * Show the state of the grace-period kthreads.
- */
-void show_rcu_gp_kthreads(void)
-{
- int cpu;
- unsigned long j;
- unsigned long ja;
- unsigned long jr;
- unsigned long jw;
- struct rcu_data *rdp;
- struct rcu_node *rnp;
-
- j = jiffies;
- ja = j - READ_ONCE(rcu_state.gp_activity);
- jr = j - READ_ONCE(rcu_state.gp_req_activity);
- jw = j - READ_ONCE(rcu_state.gp_wake_time);
- pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
- rcu_state.name, gp_state_getname(rcu_state.gp_state),
- rcu_state.gp_state,
- rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
- ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
- (long)READ_ONCE(rcu_state.gp_seq),
- (long)READ_ONCE(rcu_get_root()->gp_seq_needed),
- READ_ONCE(rcu_state.gp_flags));
- rcu_for_each_node_breadth_first(rnp) {
- if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
- continue;
- pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
- rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
- (long)rnp->gp_seq_needed);
- if (!rcu_is_leaf_node(rnp))
- continue;
- for_each_leaf_node_possible_cpu(rnp, cpu) {
- rdp = per_cpu_ptr(&rcu_data, cpu);
- if (rdp->gpwrap ||
- ULONG_CMP_GE(rcu_state.gp_seq,
- rdp->gp_seq_needed))
- continue;
- pr_info("\tcpu %d ->gp_seq_needed %ld\n",
- cpu, (long)rdp->gp_seq_needed);
- }
- }
- /* sched_show_task(rcu_state.gp_kthread); */
-}
-EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
-
-/* Dump grace-period-request information due to commandeered sysrq. */
-static void sysrq_show_rcu(int key)
-{
- show_rcu_gp_kthreads();
-}
-
-static struct sysrq_key_op sysrq_rcudump_op = {
- .handler = sysrq_show_rcu,
- .help_msg = "show-rcu(y)",
- .action_msg = "Show RCU tree",
- .enable_mask = SYSRQ_ENABLE_DUMP,
-};
-
-static int __init rcu_sysrq_init(void)
-{
- if (sysrq_rcu)
- return register_sysrq_key('y', &sysrq_rcudump_op);
- return 0;
-}
-early_initcall(rcu_sysrq_init);
-
/*
* Send along grace-period-related data for rcutorture diagnostics.
*/
return 0;
}
-/*
- * Handler for the irq_work request posted when a grace period has
- * gone on for too long, but not yet long enough for an RCU CPU
- * stall warning. Set state appropriately, but just complain if
- * there is unexpected state on entry.
- */
-static void rcu_iw_handler(struct irq_work *iwp)
-{
- struct rcu_data *rdp;
- struct rcu_node *rnp;
-
- rdp = container_of(iwp, struct rcu_data, rcu_iw);
- rnp = rdp->mynode;
- raw_spin_lock_rcu_node(rnp);
- if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
- rdp->rcu_iw_gp_seq = rnp->gp_seq;
- rdp->rcu_iw_pending = false;
- }
- raw_spin_unlock_rcu_node(rnp);
-}
-
/*
* Return true if the specified CPU has passed through a quiescent
* state by virtue of being in or having passed through an dynticks
return 0;
}
-static void record_gp_stall_check_time(void)
-{
- unsigned long j = jiffies;
- unsigned long j1;
-
- rcu_state.gp_start = j;
- j1 = rcu_jiffies_till_stall_check();
- /* Record ->gp_start before ->jiffies_stall. */
- smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
- rcu_state.jiffies_resched = j + j1 / 2;
- rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
-}
-
-/*
- * Complain about starvation of grace-period kthread.
- */
-static void rcu_check_gp_kthread_starvation(void)
-{
- struct task_struct *gpk = rcu_state.gp_kthread;
- unsigned long j;
-
- j = jiffies - READ_ONCE(rcu_state.gp_activity);
- if (j > 2 * HZ) {
- pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
- rcu_state.name, j,
- (long)rcu_seq_current(&rcu_state.gp_seq),
- READ_ONCE(rcu_state.gp_flags),
- gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
- gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
- if (gpk) {
- pr_err("RCU grace-period kthread stack dump:\n");
- sched_show_task(gpk);
- wake_up_process(gpk);
- }
- }
-}
-
-/*
- * Dump stacks of all tasks running on stalled CPUs. First try using
- * NMIs, but fall back to manual remote stack tracing on architectures
- * that don't support NMI-based stack dumps. The NMI-triggered stack
- * traces are more accurate because they are printed by the target CPU.
- */
-static void rcu_dump_cpu_stacks(void)
-{
- int cpu;
- unsigned long flags;
- struct rcu_node *rnp;
-
- rcu_for_each_leaf_node(rnp) {
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- for_each_leaf_node_possible_cpu(rnp, cpu)
- if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
- if (!trigger_single_cpu_backtrace(cpu))
- dump_cpu_task(cpu);
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- }
-}
-
-/*
- * If too much time has passed in the current grace period, and if
- * so configured, go kick the relevant kthreads.
- */
-static void rcu_stall_kick_kthreads(void)
-{
- unsigned long j;
-
- if (!rcu_kick_kthreads)
- return;
- j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
- if (time_after(jiffies, j) && rcu_state.gp_kthread &&
- (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
- WARN_ONCE(1, "Kicking %s grace-period kthread\n",
- rcu_state.name);
- rcu_ftrace_dump(DUMP_ALL);
- wake_up_process(rcu_state.gp_kthread);
- WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
- }
-}
-
-static void panic_on_rcu_stall(void)
-{
- if (sysctl_panic_on_rcu_stall)
- panic("RCU Stall\n");
-}
-
-static void print_other_cpu_stall(unsigned long gp_seq)
-{
- int cpu;
- unsigned long flags;
- unsigned long gpa;
- unsigned long j;
- int ndetected = 0;
- struct rcu_node *rnp = rcu_get_root();
- long totqlen = 0;
-
- /* Kick and suppress, if so configured. */
- rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
- return;
-
- /*
- * OK, time to rat on our buddy...
- * See Documentation/RCU/stallwarn.txt for info on how to debug
- * RCU CPU stall warnings.
- */
- pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name);
- print_cpu_stall_info_begin();
- rcu_for_each_leaf_node(rnp) {
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- ndetected += rcu_print_task_stall(rnp);
- if (rnp->qsmask != 0) {
- for_each_leaf_node_possible_cpu(rnp, cpu)
- if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
- print_cpu_stall_info(cpu);
- ndetected++;
- }
- }
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- }
-
- print_cpu_stall_info_end();
- for_each_possible_cpu(cpu)
- totqlen += rcu_get_n_cbs_cpu(cpu);
- pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
- smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
- (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
- if (ndetected) {
- rcu_dump_cpu_stacks();
-
- /* Complain about tasks blocking the grace period. */
- rcu_print_detail_task_stall();
- } else {
- if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
- pr_err("INFO: Stall ended before state dump start\n");
- } else {
- j = jiffies;
- gpa = READ_ONCE(rcu_state.gp_activity);
- pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
- rcu_state.name, j - gpa, j, gpa,
- READ_ONCE(jiffies_till_next_fqs),
- rcu_get_root()->qsmask);
- /* In this case, the current CPU might be at fault. */
- sched_show_task(current);
- }
- }
- /* Rewrite if needed in case of slow consoles. */
- if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
- WRITE_ONCE(rcu_state.jiffies_stall,
- jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-
- rcu_check_gp_kthread_starvation();
-
- panic_on_rcu_stall();
-
- rcu_force_quiescent_state(); /* Kick them all. */
-}
-
-static void print_cpu_stall(void)
-{
- int cpu;
- unsigned long flags;
- struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
- struct rcu_node *rnp = rcu_get_root();
- long totqlen = 0;
-
- /* Kick and suppress, if so configured. */
- rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
- return;
-
- /*
- * OK, time to rat on ourselves...
- * See Documentation/RCU/stallwarn.txt for info on how to debug
- * RCU CPU stall warnings.
- */
- pr_err("INFO: %s self-detected stall on CPU", rcu_state.name);
- print_cpu_stall_info_begin();
- raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
- print_cpu_stall_info(smp_processor_id());
- raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
- print_cpu_stall_info_end();
- for_each_possible_cpu(cpu)
- totqlen += rcu_get_n_cbs_cpu(cpu);
- pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",
- jiffies - rcu_state.gp_start,
- (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
-
- rcu_check_gp_kthread_starvation();
-
- rcu_dump_cpu_stacks();
-
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- /* Rewrite if needed in case of slow consoles. */
- if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
- WRITE_ONCE(rcu_state.jiffies_stall,
- jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-
- panic_on_rcu_stall();
-
- /*
- * Attempt to revive the RCU machinery by forcing a context switch.
- *
- * A context switch would normally allow the RCU state machine to make
- * progress and it could be we're stuck in kernel space without context
- * switches for an entirely unreasonable amount of time.
- */
- set_tsk_need_resched(current);
- set_preempt_need_resched();
-}
-
-static void check_cpu_stall(struct rcu_data *rdp)
-{
- unsigned long gs1;
- unsigned long gs2;
- unsigned long gps;
- unsigned long j;
- unsigned long jn;
- unsigned long js;
- struct rcu_node *rnp;
-
- if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
- !rcu_gp_in_progress())
- return;
- rcu_stall_kick_kthreads();
- j = jiffies;
-
- /*
- * Lots of memory barriers to reject false positives.
- *
- * The idea is to pick up rcu_state.gp_seq, then
- * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
- * another copy of rcu_state.gp_seq. These values are updated in
- * the opposite order with memory barriers (or equivalent) during
- * grace-period initialization and cleanup. Now, a false positive
- * can occur if we get an new value of rcu_state.gp_start and a old
- * value of rcu_state.jiffies_stall. But given the memory barriers,
- * the only way that this can happen is if one grace period ends
- * and another starts between these two fetches. This is detected
- * by comparing the second fetch of rcu_state.gp_seq with the
- * previous fetch from rcu_state.gp_seq.
- *
- * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
- * and rcu_state.gp_start suffice to forestall false positives.
- */
- gs1 = READ_ONCE(rcu_state.gp_seq);
- smp_rmb(); /* Pick up ->gp_seq first... */
- js = READ_ONCE(rcu_state.jiffies_stall);
- smp_rmb(); /* ...then ->jiffies_stall before the rest... */
- gps = READ_ONCE(rcu_state.gp_start);
- smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
- gs2 = READ_ONCE(rcu_state.gp_seq);
- if (gs1 != gs2 ||
- ULONG_CMP_LT(j, js) ||
- ULONG_CMP_GE(gps, js))
- return; /* No stall or GP completed since entering function. */
- rnp = rdp->mynode;
- jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
- if (rcu_gp_in_progress() &&
- (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
- cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
-
- /* We haven't checked in, so go dump stack. */
- print_cpu_stall();
-
- } else if (rcu_gp_in_progress() &&
- ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
- cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
-
- /* They had a few time units to dump stack, so complain. */
- print_other_cpu_stall(gs2);
- }
-}
-
-/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
- *
- * The caller must disable hard irqs.
- */
-void rcu_cpu_stall_reset(void)
-{
- WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
-}
-
/* Trace-event wrapper function for trace_rcu_future_grace_period. */
static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
unsigned long gp_seq_req, const char *s)
static void rcu_gp_kthread_wake(void)
{
if ((current == rcu_state.gp_kthread &&
- !in_interrupt() && !in_serving_softirq()) ||
+ !in_irq() && !in_serving_softirq()) ||
!READ_ONCE(rcu_state.gp_flags) ||
!rcu_state.gp_kthread)
return;
return;
}
mask = rdp->grpmask;
+ rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
- rdp->core_needs_qs = false;
-
/*
* This GP can't end until cpu checks in, so all of our
* callbacks can be processed during the next GP.
}
/*
- * Scan the leaf rcu_node structures, processing dyntick state for any that
- * have not yet encountered a quiescent state, using the function specified.
- * Also initiate boosting for any threads blocked on the root rcu_node.
- *
- * The caller must have suppressed start of new grace periods.
+ * Scan the leaf rcu_node structures. For each structure on which all
+ * CPUs have reported a quiescent state and on which there are tasks
+ * blocking the current grace period, initiate RCU priority boosting.
+ * Otherwise, invoke the specified function to check dyntick state for
+ * each CPU that has not yet reported a quiescent state.
*/
static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
{
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-/*
- * This function checks for grace-period requests that fail to motivate
- * RCU to come out of its idle mode.
- */
-void
-rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
- const unsigned long gpssdelay)
-{
- unsigned long flags;
- unsigned long j;
- struct rcu_node *rnp_root = rcu_get_root();
- static atomic_t warned = ATOMIC_INIT(0);
-
- if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
- return;
- j = jiffies; /* Expensive access, and in common case don't get here. */
- if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
- time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
- atomic_read(&warned))
- return;
-
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- j = jiffies;
- if (rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
- time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
- time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
- atomic_read(&warned)) {
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- return;
- }
- /* Hold onto the leaf lock to make others see warned==1. */
-
- if (rnp_root != rnp)
- raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
- j = jiffies;
- if (rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
- time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
- time_before(j, rcu_state.gp_activity + gpssdelay) ||
- atomic_xchg(&warned, 1)) {
- raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- return;
- }
- WARN_ON(1);
- if (rnp_root != rnp)
- raw_spin_unlock_rcu_node(rnp_root);
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- show_rcu_gp_kthreads();
-}
-
-/*
- * Do a forward-progress check for rcutorture. This is normally invoked
- * due to an OOM event. The argument "j" gives the time period during
- * which rcutorture would like progress to have been made.
- */
-void rcu_fwd_progress_check(unsigned long j)
-{
- unsigned long cbs;
- int cpu;
- unsigned long max_cbs = 0;
- int max_cpu = -1;
- struct rcu_data *rdp;
-
- if (rcu_gp_in_progress()) {
- pr_info("%s: GP age %lu jiffies\n",
- __func__, jiffies - rcu_state.gp_start);
- show_rcu_gp_kthreads();
- } else {
- pr_info("%s: Last GP end %lu jiffies ago\n",
- __func__, jiffies - rcu_state.gp_end);
- preempt_disable();
- rdp = this_cpu_ptr(&rcu_data);
- rcu_check_gp_start_stall(rdp->mynode, rdp, j);
- preempt_enable();
- }
- for_each_possible_cpu(cpu) {
- cbs = rcu_get_n_cbs_cpu(cpu);
- if (!cbs)
- continue;
- if (max_cpu < 0)
- pr_info("%s: callbacks", __func__);
- pr_cont(" %d: %lu", cpu, cbs);
- if (cbs <= max_cbs)
- continue;
- max_cbs = cbs;
- max_cpu = cpu;
- }
- if (max_cpu >= 0)
- pr_cont("\n");
-}
-EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
-
/* Perform RCU core processing work for the current CPU. */
static __latent_entropy void rcu_core(struct softirq_action *unused)
{
* Use rcu:rcu_callback trace event to find the previous
* time callback was passed to __call_rcu().
*/
- WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n",
+ WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pS()!!!\n",
head, head->func);
WRITE_ONCE(head->func, rcu_leak_callback);
return;
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
- if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
- rcu_expedite_gp();
+ rcu_expedite_gp();
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
- if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
- rcu_unexpedite_gp();
+ rcu_unexpedite_gp();
break;
default:
break;
jiffies_till_first_fqs = d;
if (jiffies_till_next_fqs == ULONG_MAX)
jiffies_till_next_fqs = d;
- if (jiffies_till_sched_qs == ULONG_MAX)
- adjust_jiffies_till_sched_qs();
+ adjust_jiffies_till_sched_qs();
/* If the compile-time values are accurate, just leave. */
if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
srcu_init();
}
+#include "tree_stall.h"
#include "tree_exp.h"
#include "tree_plugin.h"
unsigned long long notrace sched_clock(void)
{
u64 cyc, res;
- unsigned long seq;
+ unsigned int seq;
struct clock_read_data *rd;
do {
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
enable_sched_clock_irqtime();
- pr_debug("Registered %pF as sched_clock source\n", read);
+ pr_debug("Registered %pS as sched_clock source\n", read);
}
void __init generic_sched_clock_init(void)
*/
static u64 notrace suspended_sched_clock_read(void)
{
- unsigned long seq = raw_read_seqcount(&cd.seq);
+ unsigned int seq = raw_read_seqcount(&cd.seq);
return cd.read_data[seq & 1].epoch_cyc;
}
-static int sched_clock_suspend(void)
+int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
return 0;
}
-static void sched_clock_resume(void)
+void sched_clock_resume(void)
{
struct clock_read_data *rd = &cd.read_data[0];
hlist_add_head(&timer->entry, base->vectors + idx);
__set_bit(idx, base->pending_map);
timer_set_idx(timer, idx);
+
+ trace_timer_start(timer, timer->expires, timer->flags);
}
static void
trace_timer_init(timer);
}
-static inline void
-debug_activate(struct timer_list *timer, unsigned long expires)
-{
- debug_timer_activate(timer);
- trace_timer_start(timer, expires, timer->flags);
-}
-
static inline void debug_deactivate(struct timer_list *timer)
{
debug_timer_deactivate(timer);
}
}
- debug_activate(timer, expires);
+ debug_timer_activate(timer);
timer->expires = expires;
/*
}
forward_timer_base(base);
- debug_activate(timer, timer->expires);
+ debug_timer_activate(timer);
internal_add_timer(base, timer);
raw_spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL(del_timer_sync);
#endif
-static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
+static void call_timer_fn(struct timer_list *timer,
+ void (*fn)(struct timer_list *),
+ unsigned long baseclk)
{
int count = preempt_count();
*/
lock_map_acquire(&lockdep_map);
- trace_timer_expire_entry(timer);
+ trace_timer_expire_entry(timer, baseclk);
fn(timer);
trace_timer_expire_exit(timer);
lock_map_release(&lockdep_map);
if (count != preempt_count()) {
- WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+ WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
fn, count, preempt_count());
/*
* Restore the preempt count. That gives us a decent
static void expire_timers(struct timer_base *base, struct hlist_head *head)
{
+ /*
+ * This value is required only for tracing. base->clk was
+ * incremented directly before expire_timers was called. But expiry
+ * is related to the old base->clk value.
+ */
+ unsigned long baseclk = base->clk - 1;
+
while (!hlist_empty(head)) {
struct timer_list *timer;
void (*fn)(struct timer_list *);
if (timer->flags & TIMER_IRQSAFE) {
raw_spin_unlock(&base->lock);
- call_timer_fn(timer, fn);
+ call_timer_fn(timer, fn, baseclk);
raw_spin_lock(&base->lock);
} else {
raw_spin_unlock_irq(&base->lock);
- call_timer_fn(timer, fn);
+ call_timer_fn(timer, fn, baseclk);
raw_spin_lock_irq(&base->lock);
}
}
}
/**
- * wq_worker_waking_up - a worker is waking up
+ * wq_worker_running - a worker is running again
* @task: task waking up
- * @cpu: CPU @task is waking up to
*
- * This function is called during try_to_wake_up() when a worker is
- * being awoken.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
+ * This function is called when a worker returns from schedule()
*/
-void wq_worker_waking_up(struct task_struct *task, int cpu)
+void wq_worker_running(struct task_struct *task)
{
struct worker *worker = kthread_data(task);
- if (!(worker->flags & WORKER_NOT_RUNNING)) {
- WARN_ON_ONCE(worker->pool->cpu != cpu);
+ if (!worker->sleeping)
+ return;
+ if (!(worker->flags & WORKER_NOT_RUNNING))
atomic_inc(&worker->pool->nr_running);
- }
+ worker->sleeping = 0;
}
/**
* wq_worker_sleeping - a worker is going to sleep
* @task: task going to sleep
*
- * This function is called during schedule() when a busy worker is
- * going to sleep. Worker on the same cpu can be woken up by
- * returning pointer to its task.
- *
- * CONTEXT:
- * spin_lock_irq(rq->lock)
- *
- * Return:
- * Worker task on @cpu to wake up, %NULL if none.
+ * This function is called from schedule() when a busy worker is
+ * going to sleep.
*/
-struct task_struct *wq_worker_sleeping(struct task_struct *task)
+void wq_worker_sleeping(struct task_struct *task)
{
- struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+ struct worker *next, *worker = kthread_data(task);
struct worker_pool *pool;
/*
* checking NOT_RUNNING.
*/
if (worker->flags & WORKER_NOT_RUNNING)
- return NULL;
+ return;
pool = worker->pool;
- /* this can only happen on the local cpu */
- if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
- return NULL;
+ if (WARN_ON_ONCE(worker->sleeping))
+ return;
+
+ worker->sleeping = 1;
+ spin_lock_irq(&pool->lock);
/*
* The counterpart of the following dec_and_test, implied mb,
* lock is safe.
*/
if (atomic_dec_and_test(&pool->nr_running) &&
- !list_empty(&pool->worklist))
- to_wakeup = first_idle_worker(pool);
- return to_wakeup ? to_wakeup->task : NULL;
+ !list_empty(&pool->worklist)) {
+ next = first_idle_worker(pool);
+ if (next)
+ wake_up_process(next->task);
+ }
+ spin_unlock_irq(&pool->lock);
}
/**
if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
- " last function: %pf\n",
+ " last function: %ps\n",
current->comm, preempt_count(), task_pid_nr(current),
worker->current_func);
debug_show_held_locks(current);
worker = current_wq_worker();
WARN_ONCE(current->flags & PF_MEMALLOC,
- "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
current->pid, current->comm, target_wq->name, target_func);
WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
(WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
- "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
+ "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
worker->current_pwq->wq->name, worker->current_func,
target_wq->name, target_func);
}
lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
if (!lock_name)
lock_name = wq->name;
+
+ wq->lock_name = lock_name;
lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
}
INIT_LIST_HEAD(&wq->list);
if (alloc_and_link_pwqs(wq) < 0)
- goto err_free_wq;
+ goto err_unreg_lockdep;
if (wq_online && init_rescuer(wq) < 0)
goto err_destroy;
return wq;
+err_unreg_lockdep:
+ wq_unregister_lockdep(wq);
+ wq_free_lockdep(wq);
err_free_wq:
free_workqueue_attrs(wq->unbound_attrs);
kfree(wq);
probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
if (fn || name[0] || desc[0]) {
- printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
+ printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
if (strcmp(name, desc))
pr_cont(" (%s)", desc);
pr_cont("\n");
pr_cont("%s BAR(%d)", comma ? "," : "",
task_pid_nr(barr->task));
} else {
- pr_cont("%s %pf", comma ? "," : "", work->func);
+ pr_cont("%s %ps", comma ? "," : "", work->func);
}
}
if (worker->current_pwq != pwq)
continue;
- pr_cont("%s %d%s:%pf", comma ? "," : "",
+ pr_cont("%s %d%s:%ps", comma ? "," : "",
task_pid_nr(worker->task),
worker == pwq->wq->rescuer ? "(RESCUER)" : "",
worker->current_func);
*
* WRITE_ONCE() is necessary because @worker->flags may be
* tested without holding any lock in
- * wq_worker_waking_up(). Without it, NOT_RUNNING test may
+ * wq_worker_running(). Without it, NOT_RUNNING test may
* fail incorrectly leading to premature concurrency
* management operations.
*/
#include <linux/gfp.h>
#include <linux/mm.h>
+#include "../tools/testing/selftests/kselftest_module.h"
+
#define BUF_SIZE 256
#define PAD_SIZE 16
#define FILL_CHAR '$'
#define PTR ((void *)0x456789ab)
#define PTR_STR "456789ab"
#define PTR_VAL_NO_CRNG "(ptrval)"
+ #define ZEROS ""
static int __init
plain_format(void)
return 0;
}
-
static int __init
plain_hash(void)
{
test(buf, fmt, p);
}
+ static void __init
+ null_pointer(void)
+ {
+ test_hashed("%p", NULL);
+ test(ZEROS "00000000", "%px", NULL);
+ test("(null)", "%pE", NULL);
+ }
+
+ #define PTR_INVALID ((void *)0x000000ab)
+
+ static void __init
+ invalid_pointer(void)
+ {
+ test_hashed("%p", PTR_INVALID);
+ test(ZEROS "000000ab", "%px", PTR_INVALID);
+ test("(efault)", "%pE", PTR_INVALID);
+ }
+
static void __init
symbol_ptr(void)
{
.tm_year = 118,
};
- test_hashed("%pt", &tm);
-
+ test("(%ptR?)", "%pt", &tm);
test("2018-11-26T05:35:43", "%ptR", &tm);
test("0118-10-26T05:35:43", "%ptRr", &tm);
test("05:35:43|2018-11-26", "%ptRt|%ptRd", &tm, &tm);
large_bitmap(void)
{
const int nbits = 1 << 16;
- unsigned long *bits = kcalloc(BITS_TO_LONGS(nbits), sizeof(long), GFP_KERNEL);
+ unsigned long *bits = bitmap_zalloc(nbits, GFP_KERNEL);
if (!bits)
return;
bitmap_set(bits, 1, 20);
bitmap_set(bits, 60000, 15);
test("1-20,60000-60014", "%*pbl", nbits, bits);
- kfree(bits);
+ bitmap_free(bits);
}
static void __init
test_pointer(void)
{
plain();
+ null_pointer();
+ invalid_pointer();
symbol_ptr();
kernel_ptr();
struct_resource();
flags();
}
-static int __init
-test_printf_init(void)
+static void __init selftest(void)
{
alloced_buffer = kmalloc(BUF_SIZE + 2*PAD_SIZE, GFP_KERNEL);
if (!alloced_buffer)
- return -ENOMEM;
+ return;
test_buffer = alloced_buffer + PAD_SIZE;
test_basic();
test_pointer();
kfree(alloced_buffer);
-
- if (failed_tests == 0)
- pr_info("all %u tests passed\n", total_tests);
- else
- pr_warn("failed %u out of %u tests\n", failed_tests, total_tests);
-
- return failed_tests ? -EINVAL : 0;
}
-module_init(test_printf_init);
-
+KSTM_MODULE_LOADERS(test_printf);
MODULE_LICENSE("GPL");
* :c:func:`memblock_set_node`. The :c:func:`memblock_add_node`
* performs such an assignment directly.
*
- * Once memblock is setup the memory can be allocated using either
- * memblock or bootmem APIs.
+ * Once memblock is setup the memory can be allocated using one of the
+ * API variants:
+ *
+ * * :c:func:`memblock_phys_alloc*` - these functions return the
+ * **physical** address of the allocated memory
+ * * :c:func:`memblock_alloc*` - these functions return the **virtual**
+ * address of the allocated memory.
+ *
+ * Note, that both API variants use implict assumptions about allowed
+ * memory ranges and the fallback methods. Consult the documentation
+ * of :c:func:`memblock_alloc_internal` and
+ * :c:func:`memblock_alloc_range_nid` functions for more elaboarte
+ * description.
*
* As the system boot progresses, the architecture specific
* :c:func:`mem_init` function frees all the memory to the buddy page
static int memblock_memory_in_slab __initdata_memblock = 0;
static int memblock_reserved_in_slab __initdata_memblock = 0;
-enum memblock_flags __init_memblock choose_memblock_flags(void)
+static enum memblock_flags __init_memblock choose_memblock_flags(void)
{
return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
}
* Return:
* Found address on success, 0 on failure.
*/
-phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
+static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
phys_addr_t end, int nid,
enum memblock_flags flags)
else
in_slab = &memblock_reserved_in_slab;
- /* Try to find some space for it.
- *
- * WARNING: We assume that either slab_is_available() and we use it or
- * we use MEMBLOCK for allocations. That means that this is unsafe to
- * use when bootmem is currently active (unless bootmem itself is
- * implemented on top of MEMBLOCK which isn't the case yet)
- *
- * This should however not be an issue for now, as we currently only
- * call into MEMBLOCK while it's still active, or much later when slab
- * is active for memory hotplug operations
- */
+ /* Try to find some space for it */
if (use_slab) {
new_array = kmalloc(new_size, GFP_KERNEL);
addr = new_array ? __pa(new_array) : 0;
{
phys_addr_t end = base + size - 1;
- memblock_dbg("memblock_add: [%pa-%pa] %pF\n",
+ memblock_dbg("memblock_add: [%pa-%pa] %pS\n",
&base, &end, (void *)_RET_IP_);
return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0);
{
phys_addr_t end = base + size - 1;
- memblock_dbg(" memblock_free: [%pa-%pa] %pF\n",
+ memblock_dbg(" memblock_free: [%pa-%pa] %pS\n",
&base, &end, (void *)_RET_IP_);
kmemleak_free_part_phys(base, size);
{
phys_addr_t end = base + size - 1;
- memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n",
+ memblock_dbg("memblock_reserve: [%pa-%pa] %pS\n",
&base, &end, (void *)_RET_IP_);
return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0);
if (ret)
return ret;
- for (i = start_rgn; i < end_rgn; i++)
+ for (i = start_rgn; i < end_rgn; i++) {
+ struct memblock_region *r = &type->regions[i];
+
if (set)
- memblock_set_region_flags(&type->regions[i], flag);
+ r->flags |= flag;
else
- memblock_clear_region_flags(&type->regions[i], flag);
+ r->flags &= ~flag;
+ }
memblock_merge_regions(type);
return 0;
*idx = ULLONG_MAX;
}
+static bool should_skip_region(struct memblock_region *m, int nid, int flags)
+{
+ int m_nid = memblock_get_region_node(m);
+
+ /* only memory regions are associated with nodes, check it */
+ if (nid != NUMA_NO_NODE && nid != m_nid)
+ return true;
+
+ /* skip hotpluggable memory regions if needed */
+ if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
+ return true;
+
+ /* if we want mirror memory skip non-mirror memory regions */
+ if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
+ return true;
+
+ /* skip nomap memory unless we were asked for it explicitly */
+ if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
+ return true;
+
+ return false;
+}
+
/**
- * __next__mem_range - next function for for_each_free_mem_range() etc.
+ * __next_mem_range - next function for for_each_free_mem_range() etc.
* @idx: pointer to u64 loop variable
* @nid: node selector, %NUMA_NO_NODE for all nodes
* @flags: pick from blocks based on memory attributes
phys_addr_t m_end = m->base + m->size;
int m_nid = memblock_get_region_node(m);
- /* only memory regions are associated with nodes, check it */
- if (nid != NUMA_NO_NODE && nid != m_nid)
- continue;
-
- /* skip hotpluggable memory regions if needed */
- if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
- continue;
-
- /* if we want mirror memory skip non-mirror memory regions */
- if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
- continue;
-
- /* skip nomap memory unless we were asked for it explicitly */
- if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
+ if (should_skip_region(m, nid, flags))
continue;
if (!type_b) {
phys_addr_t m_end = m->base + m->size;
int m_nid = memblock_get_region_node(m);
- /* only memory regions are associated with nodes, check it */
- if (nid != NUMA_NO_NODE && nid != m_nid)
- continue;
-
- /* skip hotpluggable memory regions if needed */
- if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
- continue;
-
- /* if we want mirror memory skip non-mirror memory regions */
- if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
- continue;
-
- /* skip nomap memory unless we were asked for it explicitly */
- if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
+ if (should_skip_region(m, nid, flags))
continue;
if (!type_b) {
}
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+/**
+ * memblock_alloc_range_nid - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @start: the lower bound of the memory region to allocate (phys address)
+ * @end: the upper bound of the memory region to allocate (phys address)
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * The allocation is performed from memory region limited by
+ * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
+ *
+ * If the specified node can not hold the requested memory the
+ * allocation falls back to any node in the system
+ *
+ * For systems with memory mirroring, the allocation is attempted first
+ * from the regions with mirroring enabled and then retried from any
+ * memory region.
+ *
+ * In addition, function sets the min_count to 0 using kmemleak_alloc_phys for
+ * allocated boot memory block, so that it is never reported as leaks.
+ *
+ * Return:
+ * Physical address of allocated memory block on success, %0 on failure.
+ */
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
- phys_addr_t end, int nid,
- enum memblock_flags flags)
+ phys_addr_t end, int nid)
{
+ enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
+ if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+ nid = NUMA_NO_NODE;
+
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
align = SMP_CACHE_BYTES;
}
+ if (end > memblock.current_limit)
+ end = memblock.current_limit;
+
+again:
found = memblock_find_in_range_node(size, align, start, end, nid,
flags);
- if (found && !memblock_reserve(found, size)) {
- /*
- * The min_count is set to 0 so that memblock allocations are
- * never reported as leaks.
- */
- kmemleak_alloc_phys(found, size, 0, 0);
- return found;
- }
- return 0;
-}
-
-phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
- phys_addr_t start, phys_addr_t end,
- enum memblock_flags flags)
-{
- return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
- flags);
-}
-
-phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
- phys_addr_t align, phys_addr_t max_addr,
- int nid, enum memblock_flags flags)
-{
- return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
-}
-
-phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
-{
- enum memblock_flags flags = choose_memblock_flags();
- phys_addr_t ret;
+ if (found && !memblock_reserve(found, size))
+ goto done;
-again:
- ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE,
- nid, flags);
+ if (nid != NUMA_NO_NODE) {
+ found = memblock_find_in_range_node(size, align, start,
+ end, NUMA_NO_NODE,
+ flags);
+ if (found && !memblock_reserve(found, size))
+ goto done;
+ }
- if (!ret && (flags & MEMBLOCK_MIRROR)) {
+ if (flags & MEMBLOCK_MIRROR) {
flags &= ~MEMBLOCK_MIRROR;
+ pr_warn("Could not allocate %pap bytes of mirrored memory\n",
+ &size);
goto again;
}
- return ret;
-}
-
-phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
-{
- return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE,
- MEMBLOCK_NONE);
-}
-phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
-{
- phys_addr_t alloc;
-
- alloc = __memblock_alloc_base(size, align, max_addr);
+ return 0;
- if (alloc == 0)
- panic("ERROR: Failed to allocate %pa bytes below %pa.\n",
- &size, &max_addr);
+done:
+ /* Skip kmemleak for kasan_init() due to high volume. */
+ if (end != MEMBLOCK_ALLOC_KASAN)
+ /*
+ * The min_count is set to 0 so that memblock allocated
+ * blocks are never reported as leaks. This is because many
+ * of these blocks are only referred via the physical
+ * address which is not looked up by kmemleak.
+ */
+ kmemleak_alloc_phys(found, size, 0, 0);
- return alloc;
+ return found;
}
-phys_addr_t __init memblock_phys_alloc(phys_addr_t size, phys_addr_t align)
+/**
+ * memblock_phys_alloc_range - allocate a memory block inside specified range
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @start: the lower bound of the memory region to allocate (physical address)
+ * @end: the upper bound of the memory region to allocate (physical address)
+ *
+ * Allocate @size bytes in the between @start and @end.
+ *
+ * Return: physical address of the allocated memory block on success,
+ * %0 on failure.
+ */
+phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
+ phys_addr_t align,
+ phys_addr_t start,
+ phys_addr_t end)
{
- return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+ return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
}
+/**
+ * memblock_phys_alloc_try_nid - allocate a memory block from specified MUMA node
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Allocates memory block from the specified NUMA node. If the node
+ * has no available memory, attempts to allocated from any node in the
+ * system.
+ *
+ * Return: physical address of the allocated memory block on success,
+ * %0 on failure.
+ */
phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
{
- phys_addr_t res = memblock_phys_alloc_nid(size, align, nid);
-
- if (res)
- return res;
- return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+ return memblock_alloc_range_nid(size, align, 0,
+ MEMBLOCK_ALLOC_ACCESSIBLE, nid);
}
/**
* @max_addr: the upper bound of the memory region to allocate (phys address)
* @nid: nid of the free area to find, %NUMA_NO_NODE for any node
*
- * The @min_addr limit is dropped if it can not be satisfied and the allocation
- * will fall back to memory below @min_addr. Also, allocation may fall back
- * to any node in the system if the specified node can not
- * hold the requested memory.
+ * Allocates memory block using memblock_alloc_range_nid() and
+ * converts the returned physical address to virtual.
*
- * The allocation is performed from memory region limited by
- * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
- *
- * The phys address of allocated boot memory block is converted to virtual and
- * allocated memory is reset to 0.
- *
- * In addition, function sets the min_count to 0 using kmemleak_alloc for
- * allocated boot memory block, so that it is never reported as leaks.
+ * The @min_addr limit is dropped if it can not be satisfied and the allocation
+ * will fall back to memory below @min_addr. Other constraints, such
+ * as node and mirrored memory will be handled again in
+ * memblock_alloc_range_nid().
*
* Return:
* Virtual address of allocated memory block on success, NULL on failure.
int nid)
{
phys_addr_t alloc;
- void *ptr;
- enum memblock_flags flags = choose_memblock_flags();
-
- if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
/*
* Detect any accidental use of these APIs after slab is ready, as at
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, nid);
- if (!align) {
- dump_stack();
- align = SMP_CACHE_BYTES;
- }
+ alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
- if (max_addr > memblock.current_limit)
- max_addr = memblock.current_limit;
-again:
- alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,
- nid, flags);
- if (alloc && !memblock_reserve(alloc, size))
- goto done;
+ /* retry allocation without lower limit */
+ if (!alloc && min_addr)
+ alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
- if (nid != NUMA_NO_NODE) {
- alloc = memblock_find_in_range_node(size, align, min_addr,
- max_addr, NUMA_NO_NODE,
- flags);
- if (alloc && !memblock_reserve(alloc, size))
- goto done;
- }
-
- if (min_addr) {
- min_addr = 0;
- goto again;
- }
+ if (!alloc)
+ return NULL;
- if (flags & MEMBLOCK_MIRROR) {
- flags &= ~MEMBLOCK_MIRROR;
- pr_warn("Could not allocate %pap bytes of mirrored memory\n",
- &size);
- goto again;
- }
-
- return NULL;
-done:
- ptr = phys_to_virt(alloc);
-
- /* Skip kmemleak for kasan_init() due to high volume. */
- if (max_addr != MEMBLOCK_ALLOC_KASAN)
- /*
- * The min_count is set to 0 so that bootmem allocated
- * blocks are never reported as leaks. This is because many
- * of these blocks are only referred via the physical
- * address which is not looked up by kmemleak.
- */
- kmemleak_alloc(ptr, size, 0, 0);
-
- return ptr;
+ return phys_to_virt(alloc);
}
/**
{
void *ptr;
- memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
+ memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
__func__, (u64)size, (u64)align, nid, &min_addr,
&max_addr, (void *)_RET_IP_);
}
/**
- * memblock_alloc_try_nid_nopanic - allocate boot memory block
+ * memblock_alloc_try_nid - allocate boot memory block
* @size: size of memory block to be allocated in bytes
* @align: alignment of the region and block's size
* @min_addr: the lower bound of the memory region from where the allocation
* Return:
* Virtual address of allocated memory block on success, NULL on failure.
*/
-void * __init memblock_alloc_try_nid_nopanic(
- phys_addr_t size, phys_addr_t align,
- phys_addr_t min_addr, phys_addr_t max_addr,
- int nid)
-{
- void *ptr;
-
- memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
- __func__, (u64)size, (u64)align, nid, &min_addr,
- &max_addr, (void *)_RET_IP_);
-
- ptr = memblock_alloc_internal(size, align,
- min_addr, max_addr, nid);
- if (ptr)
- memset(ptr, 0, size);
- return ptr;
-}
-
-/**
- * memblock_alloc_try_nid - allocate boot memory block with panicking
- * @size: size of memory block to be allocated in bytes
- * @align: alignment of the region and block's size
- * @min_addr: the lower bound of the memory region from where the allocation
- * is preferred (phys address)
- * @max_addr: the upper bound of the memory region from where the allocation
- * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
- * allocate only from memory limited by memblock.current_limit value
- * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
- *
- * Public panicking version of memblock_alloc_try_nid_nopanic()
- * which provides debug information (including caller info), if enabled,
- * and panics if the request can not be satisfied.
- *
- * Return:
- * Virtual address of allocated memory block on success, NULL on failure.
- */
void * __init memblock_alloc_try_nid(
phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr, phys_addr_t max_addr,
{
void *ptr;
- memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
+ memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
__func__, (u64)size, (u64)align, nid, &min_addr,
&max_addr, (void *)_RET_IP_);
ptr = memblock_alloc_internal(size, align,
min_addr, max_addr, nid);
- if (ptr) {
+ if (ptr)
memset(ptr, 0, size);
- return ptr;
- }
- panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa\n",
- __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr);
- return NULL;
+ return ptr;
}
/**
- * __memblock_free_late - free bootmem block pages directly to buddy allocator
+ * __memblock_free_late - free pages directly to buddy allocator
* @base: phys starting address of the boot memory block
* @size: size of the boot memory block in bytes
*
- * This is only useful when the bootmem allocator has already been torn
+ * This is only useful when the memblock allocator has already been torn
* down, but we are still initializing the system. Pages are released directly
- * to the buddy allocator, no bootmem metadata is updated because it is gone.
+ * to the buddy allocator.
*/
void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
{
phys_addr_t cursor, end;
end = base + size - 1;
- memblock_dbg("%s: [%pa-%pa] %pF\n",
+ memblock_dbg("%s: [%pa-%pa] %pS\n",
__func__, &base, &end, (void *)_RET_IP_);
kmemleak_free_part_phys(base, size);
cursor = PFN_UP(base);
* We add page table cache pages with PAGE_SIZE,
* (see pte_free_tlb()), flush the tlb if we need
*/
- tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+ tlb_change_page_size(tlb, PAGE_SIZE);
pgd = pgd_offset(tlb->mm, addr);
do {
next = pgd_addr_end(addr, end);
dump_page(page, "bad pte");
pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
(void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
- pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n",
+ pr_alert("file:%pD fault:%ps mmap:%ps readpage:%ps\n",
vma->vm_file,
vma->vm_ops ? vma->vm_ops->fault : NULL,
vma->vm_file ? vma->vm_file->f_op->mmap : NULL,
pte_t *pte;
swp_entry_t entry;
- tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+ tlb_change_page_size(tlb, PAGE_SIZE);
again:
init_rss_vec(rss);
start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
*/
if (force_flush) {
force_flush = 0;
- tlb_flush_mmu_free(tlb);
+ tlb_flush_mmu(tlb);
if (addr != end)
goto again;
}
WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
goto out_unlock;
}
- entry = *pte;
- goto out_mkwrite;
- } else
- goto out_unlock;
+ entry = pte_mkyoung(*pte);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ if (ptep_set_access_flags(vma, addr, pte, entry, 1))
+ update_mmu_cache(vma, addr, pte);
+ }
+ goto out_unlock;
}
/* Ok, finally just insert the thing.. */
else
entry = pte_mkspecial(pfn_t_pte(pfn, prot));
-out_mkwrite:
if (mkwrite) {
entry = pte_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
total_scan += delta;
if (total_scan < 0) {
- pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
+ pr_err("shrink_slab: %pS negative objects to delete nr=%ld\n",
shrinker->scan_objects, total_scan);
total_scan = freeable;
next_deferred = nr;
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct mem_cgroup *memcg,
struct scan_control *sc, bool actual_reclaim)
{
enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
/*
* When refaults are being observed, it means a new workingset
* is being established. Disable active list protection to get
* rid of the stale workingset quickly.
*/
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
if (file && actual_reclaim && lruvec->refaults != refaults) {
inactive_ratio = 0;
} else {
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct lruvec *lruvec, struct mem_cgroup *memcg,
- struct scan_control *sc)
+ struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru),
- memcg, sc, true))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
* anonymous pages on the LRU in eligible zones.
* Otherwise, the small LRU gets thrashed.
*/
- if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, false, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
>> sc->priority) {
scan_balance = SCAN_ANON;
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, true, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
- lruvec, memcg, sc);
+ lruvec, sc);
}
}
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
unsigned long refaults;
struct lruvec *lruvec;
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
lruvec->refaults = refaults;
} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
}
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);