1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
4 #include <linux/init.h>
5 #include <linux/kernel.h>
6 #include <linux/module.h>
8 #include <linux/device.h>
9 #include <linux/iommu.h>
10 #include <uapi/linux/idxd.h>
11 #include <linux/highmem.h>
12 #include <linux/sched/smt.h>
13 #include <crypto/internal/acompress.h>
16 #include "iaa_crypto.h"
17 #include "iaa_crypto_stats.h"
23 #define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt
25 #define IAA_ALG_PRIORITY 300
27 /* number of iaa instances probed */
28 static unsigned int nr_iaa;
29 static unsigned int nr_cpus;
30 static unsigned int nr_nodes;
31 static unsigned int nr_cpus_per_node;
33 /* Number of physical cpus sharing each iaa instance */
34 static unsigned int cpus_per_iaa;
36 static struct crypto_comp *deflate_generic_tfm;
38 /* Per-cpu lookup table for balanced wqs */
39 static struct wq_table_entry __percpu *wq_table;
41 static struct idxd_wq *wq_table_next_wq(int cpu)
43 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
45 if (++entry->cur_wq >= entry->n_wqs)
48 if (!entry->wqs[entry->cur_wq])
51 pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
52 entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
53 entry->wqs[entry->cur_wq]->id, cpu);
55 return entry->wqs[entry->cur_wq];
58 static void wq_table_add(int cpu, struct idxd_wq *wq)
60 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
62 if (WARN_ON(entry->n_wqs == entry->max_wqs))
65 entry->wqs[entry->n_wqs++] = wq;
67 pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
68 entry->wqs[entry->n_wqs - 1]->idxd->id,
69 entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
72 static void wq_table_free_entry(int cpu)
74 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
77 memset(entry, 0, sizeof(*entry));
80 static void wq_table_clear_entry(int cpu)
82 struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
86 memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
89 LIST_HEAD(iaa_devices);
90 DEFINE_MUTEX(iaa_devices_lock);
92 /* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
93 static bool iaa_crypto_enabled;
94 static bool iaa_crypto_registered;
96 /* Verify results of IAA compress or not */
97 static bool iaa_verify_compress = true;
99 static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
101 return sprintf(buf, "%d\n", iaa_verify_compress);
104 static ssize_t verify_compress_store(struct device_driver *driver,
105 const char *buf, size_t count)
109 mutex_lock(&iaa_devices_lock);
111 if (iaa_crypto_enabled)
114 ret = kstrtobool(buf, &iaa_verify_compress);
120 mutex_unlock(&iaa_devices_lock);
124 static DRIVER_ATTR_RW(verify_compress);
127 * The iaa crypto driver supports three 'sync' methods determining how
128 * compressions and decompressions are performed:
130 * - sync: the compression or decompression completes before
131 * returning. This is the mode used by the async crypto
132 * interface when the sync mode is set to 'sync' and by
133 * the sync crypto interface regardless of setting.
135 * - async: the compression or decompression is submitted and returns
136 * immediately. Completion interrupts are not used so
137 * the caller is responsible for polling the descriptor
138 * for completion. This mode is applicable to only the
139 * async crypto interface and is ignored for anything
142 * - async_irq: the compression or decompression is submitted and
143 * returns immediately. Completion interrupts are
144 * enabled so the caller can wait for the completion and
145 * yield to other threads. When the compression or
146 * decompression completes, the completion is signaled
147 * and the caller awakened. This mode is applicable to
148 * only the async crypto interface and is ignored for
151 * These modes can be set using the iaa_crypto sync_mode driver
156 static bool async_mode;
161 * set_iaa_sync_mode - Set IAA sync mode
162 * @name: The name of the sync mode
164 * Make the IAA sync mode named @name the current sync mode used by
165 * compression/decompression.
168 static int set_iaa_sync_mode(const char *name)
172 if (sysfs_streq(name, "sync")) {
175 } else if (sysfs_streq(name, "async")) {
178 } else if (sysfs_streq(name, "async_irq")) {
188 static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
192 if (!async_mode && !use_irq)
193 ret = sprintf(buf, "%s\n", "sync");
194 else if (async_mode && !use_irq)
195 ret = sprintf(buf, "%s\n", "async");
196 else if (async_mode && use_irq)
197 ret = sprintf(buf, "%s\n", "async_irq");
202 static ssize_t sync_mode_store(struct device_driver *driver,
203 const char *buf, size_t count)
207 mutex_lock(&iaa_devices_lock);
209 if (iaa_crypto_enabled)
212 ret = set_iaa_sync_mode(buf);
216 mutex_unlock(&iaa_devices_lock);
220 static DRIVER_ATTR_RW(sync_mode);
222 static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
224 static int find_empty_iaa_compression_mode(void)
228 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
229 if (iaa_compression_modes[i])
237 static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
239 struct iaa_compression_mode *mode;
242 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
243 mode = iaa_compression_modes[i];
247 if (!strcmp(mode->name, name)) {
249 return iaa_compression_modes[i];
256 static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
259 kfree(mode->ll_table);
260 kfree(mode->d_table);
266 * IAA Compression modes are defined by an ll_table and a d_table.
267 * These tables are typically generated and captured using statistics
268 * collected from running actual compress/decompress workloads.
270 * A module or other kernel code can add and remove compression modes
271 * with a given name using the exported @add_iaa_compression_mode()
272 * and @remove_iaa_compression_mode functions.
274 * When a new compression mode is added, the tables are saved in a
275 * global compression mode list. When IAA devices are added, a
276 * per-IAA device dma mapping is created for each IAA device, for each
277 * compression mode. These are the tables used to do the actual
278 * compression/deccompression and are unmapped if/when the devices are
279 * removed. Currently, compression modes must be added before any
280 * device is added, and removed after all devices have been removed.
284 * remove_iaa_compression_mode - Remove an IAA compression mode
285 * @name: The name the compression mode will be known as
287 * Remove the IAA compression mode named @name.
289 void remove_iaa_compression_mode(const char *name)
291 struct iaa_compression_mode *mode;
294 mutex_lock(&iaa_devices_lock);
296 if (!list_empty(&iaa_devices))
299 mode = find_iaa_compression_mode(name, &idx);
301 free_iaa_compression_mode(mode);
302 iaa_compression_modes[idx] = NULL;
305 mutex_unlock(&iaa_devices_lock);
307 EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
310 * add_iaa_compression_mode - Add an IAA compression mode
311 * @name: The name the compression mode will be known as
312 * @ll_table: The ll table
313 * @ll_table_size: The ll table size in bytes
314 * @d_table: The d table
315 * @d_table_size: The d table size in bytes
316 * @init: Optional callback function to init the compression mode data
317 * @free: Optional callback function to free the compression mode data
319 * Add a new IAA compression mode named @name.
321 * Returns 0 if successful, errcode otherwise.
323 int add_iaa_compression_mode(const char *name,
328 iaa_dev_comp_init_fn_t init,
329 iaa_dev_comp_free_fn_t free)
331 struct iaa_compression_mode *mode;
332 int idx, ret = -ENOMEM;
334 mutex_lock(&iaa_devices_lock);
336 if (!list_empty(&iaa_devices)) {
341 mode = kzalloc(sizeof(*mode), GFP_KERNEL);
345 mode->name = kstrdup(name, GFP_KERNEL);
350 mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL);
353 mode->ll_table_size = ll_table_size;
357 mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL);
360 mode->d_table_size = d_table_size;
366 idx = find_empty_iaa_compression_mode();
370 pr_debug("IAA compression mode %s added at idx %d\n",
373 iaa_compression_modes[idx] = mode;
377 mutex_unlock(&iaa_devices_lock);
381 free_iaa_compression_mode(mode);
384 EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
386 static struct iaa_device_compression_mode *
387 get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
389 return iaa_device->compression_modes[idx];
392 static void free_device_compression_mode(struct iaa_device *iaa_device,
393 struct iaa_device_compression_mode *device_mode)
395 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
396 struct device *dev = &iaa_device->idxd->pdev->dev;
398 kfree(device_mode->name);
400 if (device_mode->aecs_comp_table)
401 dma_free_coherent(dev, size, device_mode->aecs_comp_table,
402 device_mode->aecs_comp_table_dma_addr);
406 #define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
407 #define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
408 #define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
409 #define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
410 #define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
411 IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
412 IDXD_OP_FLAG_AECS_RW_TGLS)
414 static int check_completion(struct device *dev,
415 struct iax_completion_record *comp,
419 static int init_device_compression_mode(struct iaa_device *iaa_device,
420 struct iaa_compression_mode *mode,
421 int idx, struct idxd_wq *wq)
423 size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
424 struct device *dev = &iaa_device->idxd->pdev->dev;
425 struct iaa_device_compression_mode *device_mode;
428 device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
432 device_mode->name = kstrdup(mode->name, GFP_KERNEL);
433 if (!device_mode->name)
436 device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
437 &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
438 if (!device_mode->aecs_comp_table)
441 /* Add Huffman table to aecs */
442 memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
443 memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
444 memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
447 ret = mode->init(device_mode);
452 /* mode index should match iaa_compression_modes idx */
453 iaa_device->compression_modes[idx] = device_mode;
455 pr_debug("IAA %s compression mode initialized for iaa device %d\n",
456 mode->name, iaa_device->idxd->id);
462 pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
463 mode->name, iaa_device->idxd->id);
465 free_device_compression_mode(iaa_device, device_mode);
469 static int init_device_compression_modes(struct iaa_device *iaa_device,
472 struct iaa_compression_mode *mode;
475 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
476 mode = iaa_compression_modes[i];
480 ret = init_device_compression_mode(iaa_device, mode, i, wq);
488 static void remove_device_compression_modes(struct iaa_device *iaa_device)
490 struct iaa_device_compression_mode *device_mode;
493 for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
494 device_mode = iaa_device->compression_modes[i];
498 if (iaa_compression_modes[i]->free)
499 iaa_compression_modes[i]->free(device_mode);
500 free_device_compression_mode(iaa_device, device_mode);
501 iaa_device->compression_modes[i] = NULL;
505 static struct iaa_device *iaa_device_alloc(void)
507 struct iaa_device *iaa_device;
509 iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
513 INIT_LIST_HEAD(&iaa_device->wqs);
518 static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
520 struct iaa_wq *iaa_wq;
522 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
523 if (iaa_wq->wq == wq)
530 static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
532 struct iaa_device *iaa_device;
534 iaa_device = iaa_device_alloc();
538 iaa_device->idxd = idxd;
540 list_add_tail(&iaa_device->list, &iaa_devices);
547 static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
551 ret = init_device_compression_modes(iaa_device, iaa_wq->wq);
558 static void del_iaa_device(struct iaa_device *iaa_device)
560 list_del(&iaa_device->list);
565 static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
566 struct iaa_wq **new_wq)
568 struct idxd_device *idxd = iaa_device->idxd;
569 struct pci_dev *pdev = idxd->pdev;
570 struct device *dev = &pdev->dev;
571 struct iaa_wq *iaa_wq;
573 iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
578 iaa_wq->iaa_device = iaa_device;
579 idxd_wq_set_private(wq, iaa_wq);
581 list_add_tail(&iaa_wq->list, &iaa_device->wqs);
588 dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
589 wq->id, iaa_device->idxd->id, iaa_device->n_wq);
594 static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
596 struct idxd_device *idxd = iaa_device->idxd;
597 struct pci_dev *pdev = idxd->pdev;
598 struct device *dev = &pdev->dev;
599 struct iaa_wq *iaa_wq;
601 list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
602 if (iaa_wq->wq == wq) {
603 list_del(&iaa_wq->list);
606 dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
607 wq->id, iaa_device->idxd->id,
608 iaa_device->n_wq, nr_iaa);
610 if (iaa_device->n_wq == 0)
611 del_iaa_device(iaa_device);
617 static void clear_wq_table(void)
621 for (cpu = 0; cpu < nr_cpus; cpu++)
622 wq_table_clear_entry(cpu);
624 pr_debug("cleared wq table\n");
627 static void free_iaa_device(struct iaa_device *iaa_device)
632 remove_device_compression_modes(iaa_device);
636 static void __free_iaa_wq(struct iaa_wq *iaa_wq)
638 struct iaa_device *iaa_device;
643 iaa_device = iaa_wq->iaa_device;
644 if (iaa_device->n_wq == 0)
645 free_iaa_device(iaa_wq->iaa_device);
648 static void free_iaa_wq(struct iaa_wq *iaa_wq)
652 __free_iaa_wq(iaa_wq);
657 idxd_wq_set_private(wq, NULL);
660 static int iaa_wq_get(struct idxd_wq *wq)
662 struct idxd_device *idxd = wq->idxd;
663 struct iaa_wq *iaa_wq;
666 spin_lock(&idxd->dev_lock);
667 iaa_wq = idxd_wq_get_private(wq);
668 if (iaa_wq && !iaa_wq->remove) {
674 spin_unlock(&idxd->dev_lock);
679 static int iaa_wq_put(struct idxd_wq *wq)
681 struct idxd_device *idxd = wq->idxd;
682 struct iaa_wq *iaa_wq;
686 spin_lock(&idxd->dev_lock);
687 iaa_wq = idxd_wq_get_private(wq);
690 if (iaa_wq->ref == 0 && iaa_wq->remove) {
691 idxd_wq_set_private(wq, NULL);
698 spin_unlock(&idxd->dev_lock);
700 __free_iaa_wq(iaa_wq);
707 static void free_wq_table(void)
711 for (cpu = 0; cpu < nr_cpus; cpu++)
712 wq_table_free_entry(cpu);
714 free_percpu(wq_table);
716 pr_debug("freed wq table\n");
719 static int alloc_wq_table(int max_wqs)
721 struct wq_table_entry *entry;
724 wq_table = alloc_percpu(struct wq_table_entry);
728 for (cpu = 0; cpu < nr_cpus; cpu++) {
729 entry = per_cpu_ptr(wq_table, cpu);
730 entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL);
736 entry->max_wqs = max_wqs;
739 pr_debug("initialized wq table\n");
744 static int save_iaa_wq(struct idxd_wq *wq)
746 struct iaa_device *iaa_device, *found = NULL;
747 struct idxd_device *idxd;
748 struct pci_dev *pdev;
752 list_for_each_entry(iaa_device, &iaa_devices, list) {
753 if (iaa_device->idxd == wq->idxd) {
754 idxd = iaa_device->idxd;
758 * Check to see that we don't already have this wq.
759 * Shouldn't happen but we don't control probing.
761 if (iaa_has_wq(iaa_device, wq)) {
762 dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
769 ret = add_iaa_wq(iaa_device, wq, NULL);
778 struct iaa_device *new_device;
779 struct iaa_wq *new_wq;
781 new_device = add_iaa_device(wq->idxd);
787 ret = add_iaa_wq(new_device, wq, &new_wq);
789 del_iaa_device(new_device);
790 free_iaa_device(new_device);
794 ret = init_iaa_device(new_device, new_wq);
796 del_iaa_wq(new_device, new_wq->wq);
797 del_iaa_device(new_device);
803 if (WARN_ON(nr_iaa == 0))
806 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
813 static void remove_iaa_wq(struct idxd_wq *wq)
815 struct iaa_device *iaa_device;
817 list_for_each_entry(iaa_device, &iaa_devices, list) {
818 if (iaa_has_wq(iaa_device, wq)) {
819 del_iaa_wq(iaa_device, wq);
825 cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
832 static int wq_table_add_wqs(int iaa, int cpu)
834 struct iaa_device *iaa_device, *found_device = NULL;
835 int ret = 0, cur_iaa = 0, n_wqs_added = 0;
836 struct idxd_device *idxd;
837 struct iaa_wq *iaa_wq;
838 struct pci_dev *pdev;
841 list_for_each_entry(iaa_device, &iaa_devices, list) {
842 idxd = iaa_device->idxd;
846 if (cur_iaa != iaa) {
851 found_device = iaa_device;
852 dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
853 found_device->idxd->id, cur_iaa);
858 found_device = list_first_entry_or_null(&iaa_devices,
859 struct iaa_device, list);
861 pr_debug("couldn't find any iaa devices with wqs!\n");
867 idxd = found_device->idxd;
870 dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
871 found_device->idxd->id, cur_iaa);
874 list_for_each_entry(iaa_wq, &found_device->wqs, list) {
875 wq_table_add(cpu, iaa_wq->wq);
876 pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
877 cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
882 pr_debug("couldn't find any iaa wqs!\n");
891 * Rebalance the wq table so that given a cpu, it's easy to find the
892 * closest IAA instance. The idea is to try to choose the most
893 * appropriate IAA instance for a caller and spread available
894 * workqueues around to clients.
896 static void rebalance_wq_table(void)
898 const struct cpumask *node_cpus;
899 int node, cpu, iaa = -1;
904 pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
905 nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
910 for (cpu = 0; cpu < nr_cpus; cpu++) {
911 if (WARN_ON(wq_table_add_wqs(0, cpu))) {
912 pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu);
920 for_each_node_with_cpus(node) {
921 node_cpus = cpumask_of_node(node);
923 for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) {
924 int node_cpu = cpumask_nth(cpu, node_cpus);
926 if (WARN_ON(node_cpu >= nr_cpu_ids)) {
927 pr_debug("node_cpu %d doesn't exist!\n", node_cpu);
931 if ((cpu % cpus_per_iaa) == 0)
934 if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) {
935 pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
942 static inline int check_completion(struct device *dev,
943 struct iax_completion_record *comp,
947 char *op_str = compress ? "compress" : "decompress";
950 while (!comp->status) {
956 if (comp->status != IAX_COMP_SUCCESS) {
957 if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
959 dev_dbg(dev, "%s timed out, size=0x%x\n",
960 op_str, comp->output_size);
961 update_completion_timeout_errs();
965 if (comp->status == IAA_ANALYTICS_ERROR &&
966 comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
968 dev_dbg(dev, "compressed > uncompressed size,"
969 " not compressing, size=0x%x\n",
971 update_completion_comp_buf_overflow_errs();
975 if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
981 dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
982 op_str, comp->status, comp->error_code, comp->output_size);
983 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
984 update_completion_einval_errs();
992 static int deflate_generic_decompress(struct acomp_req *req)
997 src = kmap_local_page(sg_page(req->src)) + req->src->offset;
998 dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
1000 ret = crypto_comp_decompress(deflate_generic_tfm,
1001 src, req->slen, dst, &req->dlen);
1006 update_total_sw_decomp_calls();
1011 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1012 struct acomp_req *req,
1013 dma_addr_t *src_addr, dma_addr_t *dst_addr);
1015 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1017 dma_addr_t src_addr, unsigned int slen,
1018 dma_addr_t dst_addr, unsigned int *dlen,
1019 u32 compression_crc);
1021 static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1022 enum idxd_complete_type comp_type,
1023 bool free_desc, void *__ctx,
1026 struct iaa_device_compression_mode *active_compression_mode;
1027 struct iaa_compression_ctx *compression_ctx;
1028 struct crypto_ctx *ctx = __ctx;
1029 struct iaa_device *iaa_device;
1030 struct idxd_device *idxd;
1031 struct iaa_wq *iaa_wq;
1032 struct pci_dev *pdev;
1036 compression_ctx = crypto_tfm_ctx(ctx->tfm);
1038 iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1039 iaa_device = iaa_wq->iaa_device;
1040 idxd = iaa_device->idxd;
1044 active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1045 compression_ctx->mode);
1046 dev_dbg(dev, "%s: compression mode %s,"
1047 " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1048 active_compression_mode->name,
1049 ctx->src_addr, ctx->dst_addr);
1051 ret = check_completion(dev, idxd_desc->iax_completion,
1052 ctx->compress, false);
1054 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1055 if (!ctx->compress &&
1056 idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1057 pr_warn("%s: falling back to deflate-generic decompress, "
1058 "analytics error code %x\n", __func__,
1059 idxd_desc->iax_completion->error_code);
1060 ret = deflate_generic_decompress(ctx->req);
1062 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1072 ctx->req->dlen = idxd_desc->iax_completion->output_size;
1076 if (ctx->compress) {
1077 update_total_comp_bytes_out(ctx->req->dlen);
1078 update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1080 update_total_decomp_bytes_in(ctx->req->slen);
1081 update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen);
1084 if (ctx->compress && compression_ctx->verify_compress) {
1085 dma_addr_t src_addr, dst_addr;
1086 u32 compression_crc;
1088 compression_crc = idxd_desc->iax_completion->crc;
1090 ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1092 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1097 ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1098 ctx->req->slen, dst_addr, &ctx->req->dlen,
1101 dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1105 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1106 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1111 dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1112 dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1115 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1117 if (ctx->req->base.complete)
1118 acomp_request_complete(ctx->req, err);
1121 idxd_free_desc(idxd_desc->wq, idxd_desc);
1122 iaa_wq_put(idxd_desc->wq);
1125 static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
1127 dma_addr_t src_addr, unsigned int slen,
1128 dma_addr_t dst_addr, unsigned int *dlen,
1129 u32 *compression_crc,
1132 struct iaa_device_compression_mode *active_compression_mode;
1133 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1134 struct iaa_device *iaa_device;
1135 struct idxd_desc *idxd_desc;
1136 struct iax_hw_desc *desc;
1137 struct idxd_device *idxd;
1138 struct iaa_wq *iaa_wq;
1139 struct pci_dev *pdev;
1143 iaa_wq = idxd_wq_get_private(wq);
1144 iaa_device = iaa_wq->iaa_device;
1145 idxd = iaa_device->idxd;
1149 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1151 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1152 if (IS_ERR(idxd_desc)) {
1153 dev_dbg(dev, "idxd descriptor allocation failed\n");
1154 dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1155 return PTR_ERR(idxd_desc);
1157 desc = idxd_desc->iax_hw;
1159 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1160 IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1161 desc->opcode = IAX_OPCODE_COMPRESS;
1162 desc->compr_flags = IAA_COMP_FLAGS;
1165 desc->src1_addr = (u64)src_addr;
1166 desc->src1_size = slen;
1167 desc->dst_addr = (u64)dst_addr;
1168 desc->max_dst_size = *dlen;
1169 desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1170 desc->src2_size = sizeof(struct aecs_comp_table_record);
1171 desc->completion_addr = idxd_desc->compl_dma;
1173 if (ctx->use_irq && !disable_async) {
1174 desc->flags |= IDXD_OP_FLAG_RCI;
1176 idxd_desc->crypto.req = req;
1177 idxd_desc->crypto.tfm = tfm;
1178 idxd_desc->crypto.src_addr = src_addr;
1179 idxd_desc->crypto.dst_addr = dst_addr;
1180 idxd_desc->crypto.compress = true;
1182 dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1183 " src_addr %llx, dst_addr %llx\n", __func__,
1184 active_compression_mode->name,
1185 src_addr, dst_addr);
1186 } else if (ctx->async_mode && !disable_async)
1187 req->base.data = idxd_desc;
1189 dev_dbg(dev, "%s: compression mode %s,"
1190 " desc->src1_addr %llx, desc->src1_size %d,"
1191 " desc->dst_addr %llx, desc->max_dst_size %d,"
1192 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1193 active_compression_mode->name,
1194 desc->src1_addr, desc->src1_size, desc->dst_addr,
1195 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1197 ret = idxd_submit_desc(wq, idxd_desc);
1199 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1204 update_total_comp_calls();
1205 update_wq_comp_calls(wq);
1207 if (ctx->async_mode && !disable_async) {
1209 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1213 ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1215 dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1219 *dlen = idxd_desc->iax_completion->output_size;
1222 update_total_comp_bytes_out(*dlen);
1223 update_wq_comp_bytes(wq, *dlen);
1225 *compression_crc = idxd_desc->iax_completion->crc;
1227 if (!ctx->async_mode || disable_async)
1228 idxd_free_desc(wq, idxd_desc);
1232 idxd_free_desc(wq, idxd_desc);
1233 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1238 static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1239 struct acomp_req *req,
1240 dma_addr_t *src_addr, dma_addr_t *dst_addr)
1245 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1246 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1248 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1249 if (nr_sgs <= 0 || nr_sgs > 1) {
1250 dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1251 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1252 iaa_wq->wq->id, ret);
1256 *src_addr = sg_dma_address(req->src);
1257 dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1258 " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1259 req->src, req->slen, sg_dma_len(req->src));
1261 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1262 if (nr_sgs <= 0 || nr_sgs > 1) {
1263 dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1264 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1265 iaa_wq->wq->id, ret);
1267 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1270 *dst_addr = sg_dma_address(req->dst);
1271 dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1272 " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1273 req->dst, req->dlen, sg_dma_len(req->dst));
1278 static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1280 dma_addr_t src_addr, unsigned int slen,
1281 dma_addr_t dst_addr, unsigned int *dlen,
1282 u32 compression_crc)
1284 struct iaa_device_compression_mode *active_compression_mode;
1285 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1286 struct iaa_device *iaa_device;
1287 struct idxd_desc *idxd_desc;
1288 struct iax_hw_desc *desc;
1289 struct idxd_device *idxd;
1290 struct iaa_wq *iaa_wq;
1291 struct pci_dev *pdev;
1295 iaa_wq = idxd_wq_get_private(wq);
1296 iaa_device = iaa_wq->iaa_device;
1297 idxd = iaa_device->idxd;
1301 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1303 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1304 if (IS_ERR(idxd_desc)) {
1305 dev_dbg(dev, "idxd descriptor allocation failed\n");
1306 dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1307 PTR_ERR(idxd_desc));
1308 return PTR_ERR(idxd_desc);
1310 desc = idxd_desc->iax_hw;
1312 /* Verify (optional) - decompress and check crc, suppress dest write */
1314 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1315 desc->opcode = IAX_OPCODE_DECOMPRESS;
1316 desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1319 desc->src1_addr = (u64)dst_addr;
1320 desc->src1_size = *dlen;
1321 desc->dst_addr = (u64)src_addr;
1322 desc->max_dst_size = slen;
1323 desc->completion_addr = idxd_desc->compl_dma;
1325 dev_dbg(dev, "(verify) compression mode %s,"
1326 " desc->src1_addr %llx, desc->src1_size %d,"
1327 " desc->dst_addr %llx, desc->max_dst_size %d,"
1328 " desc->src2_addr %llx, desc->src2_size %d\n",
1329 active_compression_mode->name,
1330 desc->src1_addr, desc->src1_size, desc->dst_addr,
1331 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1333 ret = idxd_submit_desc(wq, idxd_desc);
1335 dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1339 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1341 dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1345 if (compression_crc != idxd_desc->iax_completion->crc) {
1347 dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1348 " comp=0x%x, decomp=0x%x\n", compression_crc,
1349 idxd_desc->iax_completion->crc);
1350 print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1351 8, 1, idxd_desc->iax_completion, 64, 0);
1355 idxd_free_desc(wq, idxd_desc);
1359 idxd_free_desc(wq, idxd_desc);
1360 dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1365 static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1367 dma_addr_t src_addr, unsigned int slen,
1368 dma_addr_t dst_addr, unsigned int *dlen,
1371 struct iaa_device_compression_mode *active_compression_mode;
1372 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1373 struct iaa_device *iaa_device;
1374 struct idxd_desc *idxd_desc;
1375 struct iax_hw_desc *desc;
1376 struct idxd_device *idxd;
1377 struct iaa_wq *iaa_wq;
1378 struct pci_dev *pdev;
1382 iaa_wq = idxd_wq_get_private(wq);
1383 iaa_device = iaa_wq->iaa_device;
1384 idxd = iaa_device->idxd;
1388 active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1390 idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1391 if (IS_ERR(idxd_desc)) {
1392 dev_dbg(dev, "idxd descriptor allocation failed\n");
1393 dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1394 PTR_ERR(idxd_desc));
1395 return PTR_ERR(idxd_desc);
1397 desc = idxd_desc->iax_hw;
1399 desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1400 desc->opcode = IAX_OPCODE_DECOMPRESS;
1401 desc->max_dst_size = PAGE_SIZE;
1402 desc->decompr_flags = IAA_DECOMP_FLAGS;
1405 desc->src1_addr = (u64)src_addr;
1406 desc->dst_addr = (u64)dst_addr;
1407 desc->max_dst_size = *dlen;
1408 desc->src1_size = slen;
1409 desc->completion_addr = idxd_desc->compl_dma;
1411 if (ctx->use_irq && !disable_async) {
1412 desc->flags |= IDXD_OP_FLAG_RCI;
1414 idxd_desc->crypto.req = req;
1415 idxd_desc->crypto.tfm = tfm;
1416 idxd_desc->crypto.src_addr = src_addr;
1417 idxd_desc->crypto.dst_addr = dst_addr;
1418 idxd_desc->crypto.compress = false;
1420 dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1421 " src_addr %llx, dst_addr %llx\n", __func__,
1422 active_compression_mode->name,
1423 src_addr, dst_addr);
1424 } else if (ctx->async_mode && !disable_async)
1425 req->base.data = idxd_desc;
1427 dev_dbg(dev, "%s: decompression mode %s,"
1428 " desc->src1_addr %llx, desc->src1_size %d,"
1429 " desc->dst_addr %llx, desc->max_dst_size %d,"
1430 " desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1431 active_compression_mode->name,
1432 desc->src1_addr, desc->src1_size, desc->dst_addr,
1433 desc->max_dst_size, desc->src2_addr, desc->src2_size);
1435 ret = idxd_submit_desc(wq, idxd_desc);
1437 dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1442 update_total_decomp_calls();
1443 update_wq_decomp_calls(wq);
1445 if (ctx->async_mode && !disable_async) {
1447 dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1451 ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1453 dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1454 if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1455 pr_warn("%s: falling back to deflate-generic decompress, "
1456 "analytics error code %x\n", __func__,
1457 idxd_desc->iax_completion->error_code);
1458 ret = deflate_generic_decompress(req);
1460 dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1468 req->dlen = idxd_desc->iax_completion->output_size;
1473 if (!ctx->async_mode || disable_async)
1474 idxd_free_desc(wq, idxd_desc);
1477 update_total_decomp_bytes_in(slen);
1478 update_wq_decomp_bytes(wq, slen);
1482 idxd_free_desc(wq, idxd_desc);
1483 dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1488 static int iaa_comp_acompress(struct acomp_req *req)
1490 struct iaa_compression_ctx *compression_ctx;
1491 struct crypto_tfm *tfm = req->base.tfm;
1492 dma_addr_t src_addr, dst_addr;
1493 bool disable_async = false;
1494 int nr_sgs, cpu, ret = 0;
1495 struct iaa_wq *iaa_wq;
1496 u32 compression_crc;
1501 compression_ctx = crypto_tfm_ctx(tfm);
1503 if (!iaa_crypto_enabled) {
1504 pr_debug("iaa_crypto disabled, not compressing\n");
1508 if (!req->src || !req->slen) {
1509 pr_debug("invalid src, not compressing\n");
1514 wq = wq_table_next_wq(cpu);
1517 pr_debug("no wq configured for cpu=%d\n", cpu);
1521 ret = iaa_wq_get(wq);
1523 pr_debug("no wq available for cpu=%d\n", cpu);
1527 iaa_wq = idxd_wq_get_private(wq);
1530 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
1532 /* incompressible data will always be < 2 * slen */
1533 req->dlen = 2 * req->slen;
1534 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1535 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1541 disable_async = true;
1544 dev = &wq->idxd->pdev->dev;
1546 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1547 if (nr_sgs <= 0 || nr_sgs > 1) {
1548 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1549 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1550 iaa_wq->wq->id, ret);
1554 src_addr = sg_dma_address(req->src);
1555 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1556 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1557 req->src, req->slen, sg_dma_len(req->src));
1559 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1560 if (nr_sgs <= 0 || nr_sgs > 1) {
1561 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1562 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1563 iaa_wq->wq->id, ret);
1567 dst_addr = sg_dma_address(req->dst);
1568 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1569 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1570 req->dst, req->dlen, sg_dma_len(req->dst));
1572 ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1573 &req->dlen, &compression_crc, disable_async);
1574 if (ret == -EINPROGRESS)
1577 if (!ret && compression_ctx->verify_compress) {
1578 ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1580 dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1584 ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1585 dst_addr, &req->dlen, compression_crc);
1587 dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1589 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1590 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1596 dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1598 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1600 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1605 sgl_free_order(req->dst, order);
1610 static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
1612 gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
1613 GFP_KERNEL : GFP_ATOMIC;
1614 struct crypto_tfm *tfm = req->base.tfm;
1615 dma_addr_t src_addr, dst_addr;
1616 int nr_sgs, cpu, ret = 0;
1617 struct iaa_wq *iaa_wq;
1623 wq = wq_table_next_wq(cpu);
1626 pr_debug("no wq configured for cpu=%d\n", cpu);
1630 ret = iaa_wq_get(wq);
1632 pr_debug("no wq available for cpu=%d\n", cpu);
1636 iaa_wq = idxd_wq_get_private(wq);
1638 dev = &wq->idxd->pdev->dev;
1640 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1641 if (nr_sgs <= 0 || nr_sgs > 1) {
1642 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1643 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1644 iaa_wq->wq->id, ret);
1648 src_addr = sg_dma_address(req->src);
1649 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1650 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1651 req->src, req->slen, sg_dma_len(req->src));
1653 req->dlen = 4 * req->slen; /* start with ~avg comp rato */
1655 order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE);
1656 req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL);
1663 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1664 if (nr_sgs <= 0 || nr_sgs > 1) {
1665 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1666 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1667 iaa_wq->wq->id, ret);
1672 dst_addr = sg_dma_address(req->dst);
1673 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1674 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1675 req->dst, req->dlen, sg_dma_len(req->dst));
1676 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1677 dst_addr, &req->dlen, true);
1678 if (ret == -EOVERFLOW) {
1679 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1681 if (req->dlen > CRYPTO_ACOMP_DST_MAX)
1687 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1689 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1691 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1696 sgl_free_order(req->dst, order);
1701 static int iaa_comp_adecompress(struct acomp_req *req)
1703 struct crypto_tfm *tfm = req->base.tfm;
1704 dma_addr_t src_addr, dst_addr;
1705 int nr_sgs, cpu, ret = 0;
1706 struct iaa_wq *iaa_wq;
1710 if (!iaa_crypto_enabled) {
1711 pr_debug("iaa_crypto disabled, not decompressing\n");
1715 if (!req->src || !req->slen) {
1716 pr_debug("invalid src, not decompressing\n");
1721 return iaa_comp_adecompress_alloc_dest(req);
1724 wq = wq_table_next_wq(cpu);
1727 pr_debug("no wq configured for cpu=%d\n", cpu);
1731 ret = iaa_wq_get(wq);
1733 pr_debug("no wq available for cpu=%d\n", cpu);
1737 iaa_wq = idxd_wq_get_private(wq);
1739 dev = &wq->idxd->pdev->dev;
1741 nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1742 if (nr_sgs <= 0 || nr_sgs > 1) {
1743 dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1744 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1745 iaa_wq->wq->id, ret);
1749 src_addr = sg_dma_address(req->src);
1750 dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1751 " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1752 req->src, req->slen, sg_dma_len(req->src));
1754 nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1755 if (nr_sgs <= 0 || nr_sgs > 1) {
1756 dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1757 " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1758 iaa_wq->wq->id, ret);
1762 dst_addr = sg_dma_address(req->dst);
1763 dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1764 " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1765 req->dst, req->dlen, sg_dma_len(req->dst));
1767 ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1768 dst_addr, &req->dlen, false);
1769 if (ret == -EINPROGRESS)
1773 dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1775 dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1777 dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1784 static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1786 ctx->verify_compress = iaa_verify_compress;
1787 ctx->async_mode = async_mode;
1788 ctx->use_irq = use_irq;
1791 static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1793 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1794 struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1796 compression_ctx_init(ctx);
1798 ctx->mode = IAA_MODE_FIXED;
1803 static void dst_free(struct scatterlist *sgl)
1806 * Called for req->dst = NULL cases but we free elsewhere
1807 * using sgl_free_order().
1811 static struct acomp_alg iaa_acomp_fixed_deflate = {
1812 .init = iaa_comp_init_fixed,
1813 .compress = iaa_comp_acompress,
1814 .decompress = iaa_comp_adecompress,
1815 .dst_free = dst_free,
1817 .cra_name = "deflate",
1818 .cra_driver_name = "deflate-iaa",
1819 .cra_flags = CRYPTO_ALG_ASYNC,
1820 .cra_ctxsize = sizeof(struct iaa_compression_ctx),
1821 .cra_module = THIS_MODULE,
1822 .cra_priority = IAA_ALG_PRIORITY,
1826 static int iaa_register_compression_device(void)
1830 ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1832 pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1836 iaa_crypto_registered = true;
1841 static int iaa_unregister_compression_device(void)
1843 if (iaa_crypto_registered)
1844 crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1849 static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1851 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1852 struct idxd_device *idxd = wq->idxd;
1853 struct idxd_driver_data *data = idxd->data;
1854 struct device *dev = &idxd_dev->conf_dev;
1855 bool first_wq = false;
1858 if (idxd->state != IDXD_DEV_ENABLED)
1861 if (data->type != IDXD_TYPE_IAX)
1864 mutex_lock(&wq->wq_lock);
1866 if (idxd_wq_get_private(wq)) {
1867 mutex_unlock(&wq->wq_lock);
1871 if (!idxd_wq_driver_name_match(wq, dev)) {
1872 dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1873 idxd->id, wq->id, wq->driver_name, dev->driver->name);
1874 idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1879 wq->type = IDXD_WQT_KERNEL;
1881 ret = idxd_drv_enable_wq(wq);
1883 dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1884 idxd->id, wq->id, ret);
1889 mutex_lock(&iaa_devices_lock);
1891 if (list_empty(&iaa_devices)) {
1892 ret = alloc_wq_table(wq->idxd->max_wqs);
1898 ret = save_iaa_wq(wq);
1902 rebalance_wq_table();
1905 iaa_crypto_enabled = true;
1906 ret = iaa_register_compression_device();
1908 iaa_crypto_enabled = false;
1909 dev_dbg(dev, "IAA compression device registration failed\n");
1912 try_module_get(THIS_MODULE);
1914 pr_info("iaa_crypto now ENABLED\n");
1917 mutex_unlock(&iaa_devices_lock);
1919 mutex_unlock(&wq->wq_lock);
1925 free_iaa_wq(idxd_wq_get_private(wq));
1930 mutex_unlock(&iaa_devices_lock);
1931 idxd_drv_disable_wq(wq);
1933 wq->type = IDXD_WQT_NONE;
1938 static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
1940 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1941 struct idxd_device *idxd = wq->idxd;
1942 struct iaa_wq *iaa_wq;
1945 idxd_wq_quiesce(wq);
1947 mutex_lock(&wq->wq_lock);
1948 mutex_lock(&iaa_devices_lock);
1952 spin_lock(&idxd->dev_lock);
1953 iaa_wq = idxd_wq_get_private(wq);
1955 spin_unlock(&idxd->dev_lock);
1956 pr_err("%s: no iaa_wq available to remove\n", __func__);
1961 iaa_wq->remove = true;
1964 idxd_wq_set_private(wq, NULL);
1967 spin_unlock(&idxd->dev_lock);
1969 __free_iaa_wq(iaa_wq);
1973 idxd_drv_disable_wq(wq);
1974 rebalance_wq_table();
1977 iaa_crypto_enabled = false;
1979 module_put(THIS_MODULE);
1981 pr_info("iaa_crypto now DISABLED\n");
1984 mutex_unlock(&iaa_devices_lock);
1985 mutex_unlock(&wq->wq_lock);
1988 static enum idxd_dev_type dev_types[] = {
1993 static struct idxd_device_driver iaa_crypto_driver = {
1994 .probe = iaa_crypto_probe,
1995 .remove = iaa_crypto_remove,
1996 .name = IDXD_SUBDRIVER_NAME,
1998 .desc_complete = iaa_desc_complete,
2001 static int __init iaa_crypto_init_module(void)
2006 nr_cpus = num_possible_cpus();
2007 for_each_node_with_cpus(node)
2010 pr_err("IAA couldn't find any nodes with cpus\n");
2013 nr_cpus_per_node = nr_cpus / nr_nodes;
2015 if (crypto_has_comp("deflate-generic", 0, 0))
2016 deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0);
2018 if (IS_ERR_OR_NULL(deflate_generic_tfm)) {
2019 pr_err("IAA could not alloc %s tfm: errcode = %ld\n",
2020 "deflate-generic", PTR_ERR(deflate_generic_tfm));
2024 ret = iaa_aecs_init_fixed();
2026 pr_debug("IAA fixed compression mode init failed\n");
2030 ret = idxd_driver_register(&iaa_crypto_driver);
2032 pr_debug("IAA wq sub-driver registration failed\n");
2033 goto err_driver_reg;
2036 ret = driver_create_file(&iaa_crypto_driver.drv,
2037 &driver_attr_verify_compress);
2039 pr_debug("IAA verify_compress attr creation failed\n");
2040 goto err_verify_attr_create;
2043 ret = driver_create_file(&iaa_crypto_driver.drv,
2044 &driver_attr_sync_mode);
2046 pr_debug("IAA sync mode attr creation failed\n");
2047 goto err_sync_attr_create;
2050 if (iaa_crypto_debugfs_init())
2051 pr_warn("debugfs init failed, stats not available\n");
2053 pr_debug("initialized\n");
2057 err_sync_attr_create:
2058 driver_remove_file(&iaa_crypto_driver.drv,
2059 &driver_attr_verify_compress);
2060 err_verify_attr_create:
2061 idxd_driver_unregister(&iaa_crypto_driver);
2063 iaa_aecs_cleanup_fixed();
2065 crypto_free_comp(deflate_generic_tfm);
2070 static void __exit iaa_crypto_cleanup_module(void)
2072 if (iaa_unregister_compression_device())
2073 pr_debug("IAA compression device unregister failed\n");
2075 iaa_crypto_debugfs_cleanup();
2076 driver_remove_file(&iaa_crypto_driver.drv,
2077 &driver_attr_sync_mode);
2078 driver_remove_file(&iaa_crypto_driver.drv,
2079 &driver_attr_verify_compress);
2080 idxd_driver_unregister(&iaa_crypto_driver);
2081 iaa_aecs_cleanup_fixed();
2082 crypto_free_comp(deflate_generic_tfm);
2084 pr_debug("cleaned up\n");
2087 MODULE_IMPORT_NS(IDXD);
2088 MODULE_LICENSE("GPL");
2089 MODULE_ALIAS_IDXD_DEVICE(0);
2090 MODULE_AUTHOR("Intel Corporation");
2091 MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
2093 module_init(iaa_crypto_init_module);
2094 module_exit(iaa_crypto_cleanup_module);