]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
Merge tag 'regmap-v4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie...
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_cs.c
index 1f040d85ac47fe336f609a0cf9d39ac59b94f665..3b355aeb62fd353320fd47260bb05263e0e998ab 100644 (file)
@@ -126,6 +126,30 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
        return 0;
 }
 
+struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
+                                               struct drm_file *filp,
+                                               struct amdgpu_ctx *ctx,
+                                               struct amdgpu_ib *ibs,
+                                               uint32_t num_ibs)
+{
+       struct amdgpu_cs_parser *parser;
+       int i;
+
+       parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
+       if (!parser)
+               return NULL;
+
+       parser->adev = adev;
+       parser->filp = filp;
+       parser->ctx = ctx;
+       parser->ibs = ibs;
+       parser->num_ibs = num_ibs;
+       for (i = 0; i < num_ibs; i++)
+               ibs[i].ctx = ctx;
+
+       return parser;
+}
+
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 {
        union drm_amdgpu_cs *cs = data;
@@ -147,13 +171,13 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 
        /* get chunks */
        INIT_LIST_HEAD(&p->validated);
-       chunk_array = kcalloc(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
+       chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
        if (chunk_array == NULL) {
                r = -ENOMEM;
                goto out;
        }
 
-       chunk_array_user = (uint64_t *)(unsigned long)(cs->in.chunks);
+       chunk_array_user = (uint64_t __user *)(cs->in.chunks);
        if (copy_from_user(chunk_array, chunk_array_user,
                           sizeof(uint64_t)*cs->in.num_chunks)) {
                r = -EFAULT;
@@ -161,7 +185,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
        }
 
        p->nchunks = cs->in.num_chunks;
-       p->chunks = kcalloc(p->nchunks, sizeof(struct amdgpu_cs_chunk),
+       p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
                            GFP_KERNEL);
        if (p->chunks == NULL) {
                r = -ENOMEM;
@@ -173,7 +197,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                struct drm_amdgpu_cs_chunk user_chunk;
                uint32_t __user *cdata;
 
-               chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
+               chunk_ptr = (void __user *)chunk_array[i];
                if (copy_from_user(&user_chunk, chunk_ptr,
                                       sizeof(struct drm_amdgpu_cs_chunk))) {
                        r = -EFAULT;
@@ -183,7 +207,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                p->chunks[i].length_dw = user_chunk.length_dw;
 
                size = p->chunks[i].length_dw;
-               cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
+               cdata = (void __user *)user_chunk.chunk_data;
                p->chunks[i].user_ptr = cdata;
 
                p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
@@ -235,11 +259,10 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                }
        }
 
+
        p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
-       if (!p->ibs) {
+       if (!p->ibs)
                r = -ENOMEM;
-               goto out;
-       }
 
 out:
        kfree(chunk_array);
@@ -331,7 +354,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)
                         * into account. We don't want to disallow buffer moves
                         * completely.
                         */
-                       if (current_domain != AMDGPU_GEM_DOMAIN_CPU &&
+                       if ((lobj->allowed_domains & current_domain) != 0 &&
                            (domain & current_domain) == 0 && /* will be moved */
                            bytes_moved > bytes_moved_threshold) {
                                /* don't move it */
@@ -415,18 +438,8 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
        return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
 }
 
-/**
- * cs_parser_fini() - clean parser states
- * @parser:    parser structure holding parsing context.
- * @error:     error number
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
+static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)
 {
-       unsigned i;
-
        if (!error) {
                /* Sort the buffer list from the smallest to largest buffer,
                 * which affects the order of buffers in the LRU list.
@@ -447,21 +460,45 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
                ttm_eu_backoff_reservation(&parser->ticket,
                                           &parser->validated);
        }
+}
 
+static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
+{
+       unsigned i;
        if (parser->ctx)
                amdgpu_ctx_put(parser->ctx);
        if (parser->bo_list)
                amdgpu_bo_list_put(parser->bo_list);
+
        drm_free_large(parser->vm_bos);
        for (i = 0; i < parser->nchunks; i++)
                drm_free_large(parser->chunks[i].kdata);
        kfree(parser->chunks);
-       if (parser->ibs)
-               for (i = 0; i < parser->num_ibs; i++)
-                       amdgpu_ib_free(parser->adev, &parser->ibs[i]);
-       kfree(parser->ibs);
-       if (parser->uf.bo)
-               drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
+       if (!amdgpu_enable_scheduler)
+       {
+               if (parser->ibs)
+                       for (i = 0; i < parser->num_ibs; i++)
+                               amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+               kfree(parser->ibs);
+               if (parser->uf.bo)
+                       drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
+       }
+
+       kfree(parser);
+}
+
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser:    parser structure holding parsing context.
+ * @error:     error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
+{
+       amdgpu_cs_parser_fini_early(parser, error, backoff);
+       amdgpu_cs_parser_fini_late(parser);
 }
 
 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -476,12 +513,18 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
        if (r)
                return r;
 
+       r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence);
+       if (r)
+               return r;
+
        r = amdgpu_vm_clear_freed(adev, vm);
        if (r)
                return r;
 
        if (p->bo_list) {
                for (i = 0; i < p->bo_list->num_entries; i++) {
+                       struct fence *f;
+
                        /* ignore duplicates */
                        bo = p->bo_list->array[i].robj;
                        if (!bo)
@@ -495,7 +538,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
                        if (r)
                                return r;
 
-                       amdgpu_sync_fence(&p->ibs[0].sync, bo_va->last_pt_update);
+                       f = bo_va->last_pt_update;
+                       r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f);
+                       if (r)
+                               return r;
                }
        }
 
@@ -529,9 +575,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
                goto out;
        }
        amdgpu_cs_sync_rings(parser);
-
-       r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
-                              parser->filp);
+       if (!amdgpu_enable_scheduler)
+               r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
+                                      parser->filp);
 
 out:
        mutex_unlock(&vm->mutex);
@@ -650,7 +696,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
                        ib->oa_size = amdgpu_bo_size(oa);
                }
        }
-
        /* wrap the last IB with user fence */
        if (parser->uf.bo) {
                struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1];
@@ -693,9 +738,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
                        sizeof(struct drm_amdgpu_cs_chunk_dep);
 
                for (j = 0; j < num_deps; ++j) {
-                       struct amdgpu_fence *fence;
                        struct amdgpu_ring *ring;
                        struct amdgpu_ctx *ctx;
+                       struct fence *fence;
 
                        r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
                                               deps[j].ip_instance,
@@ -707,85 +752,137 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
                        if (ctx == NULL)
                                return -EINVAL;
 
-                       r = amdgpu_fence_recreate(ring, p->filp,
-                                                 deps[j].handle,
-                                                 &fence);
-                       if (r) {
+                       fence = amdgpu_ctx_get_fence(ctx, ring,
+                                                    deps[j].handle);
+                       if (IS_ERR(fence)) {
+                               r = PTR_ERR(fence);
                                amdgpu_ctx_put(ctx);
                                return r;
-                       }
 
-                       amdgpu_sync_fence(&ib->sync, fence);
-                       amdgpu_fence_unref(&fence);
-                       amdgpu_ctx_put(ctx);
+                       } else if (fence) {
+                               r = amdgpu_sync_fence(adev, &ib->sync, fence);
+                               fence_put(fence);
+                               amdgpu_ctx_put(ctx);
+                               if (r)
+                                       return r;
+                       }
                }
        }
 
        return 0;
 }
 
+static int amdgpu_cs_free_job(struct amdgpu_job *sched_job)
+{
+       int i;
+       if (sched_job->ibs)
+               for (i = 0; i < sched_job->num_ibs; i++)
+                       amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
+       kfree(sched_job->ibs);
+       if (sched_job->uf.bo)
+               drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base);
+       return 0;
+}
+
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
        struct amdgpu_device *adev = dev->dev_private;
        union drm_amdgpu_cs *cs = data;
-       struct amdgpu_cs_parser parser;
-       int r, i;
+       struct amdgpu_cs_parser *parser;
        bool reserved_buffers = false;
+       int i, r;
 
        down_read(&adev->exclusive_lock);
        if (!adev->accel_working) {
                up_read(&adev->exclusive_lock);
                return -EBUSY;
        }
-       /* initialize parser */
-       memset(&parser, 0, sizeof(struct amdgpu_cs_parser));
-       parser.filp = filp;
-       parser.adev = adev;
-       r = amdgpu_cs_parser_init(&parser, data);
+
+       parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0);
+       if (!parser)
+               return -ENOMEM;
+       r = amdgpu_cs_parser_init(parser, data);
        if (r) {
                DRM_ERROR("Failed to initialize parser !\n");
-               amdgpu_cs_parser_fini(&parser, r, false);
+               amdgpu_cs_parser_fini(parser, r, false);
                up_read(&adev->exclusive_lock);
                r = amdgpu_cs_handle_lockup(adev, r);
                return r;
        }
 
-       r = amdgpu_cs_parser_relocs(&parser);
-       if (r) {
-               if (r != -ERESTARTSYS) {
-                       if (r == -ENOMEM)
-                               DRM_ERROR("Not enough memory for command submission!\n");
-                       else
-                               DRM_ERROR("Failed to process the buffer list %d!\n", r);
-               }
+       r = amdgpu_cs_parser_relocs(parser);
+       if (r == -ENOMEM)
+               DRM_ERROR("Not enough memory for command submission!\n");
+       else if (r && r != -ERESTARTSYS)
+               DRM_ERROR("Failed to process the buffer list %d!\n", r);
+       else if (!r) {
+               reserved_buffers = true;
+               r = amdgpu_cs_ib_fill(adev, parser);
        }
 
        if (!r) {
-               reserved_buffers = true;
-               r = amdgpu_cs_ib_fill(adev, &parser);
+               r = amdgpu_cs_dependencies(adev, parser);
+               if (r)
+                       DRM_ERROR("Failed in the dependencies handling %d!\n", r);
        }
 
-       if (!r)
-               r = amdgpu_cs_dependencies(adev, &parser);
-
-       if (r) {
-               amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
-               up_read(&adev->exclusive_lock);
-               r = amdgpu_cs_handle_lockup(adev, r);
-               return r;
-       }
+       if (r)
+               goto out;
 
-       for (i = 0; i < parser.num_ibs; i++)
-               trace_amdgpu_cs(&parser, i);
+       for (i = 0; i < parser->num_ibs; i++)
+               trace_amdgpu_cs(parser, i);
 
-       r = amdgpu_cs_ib_vm_chunk(adev, &parser);
-       if (r) {
+       r = amdgpu_cs_ib_vm_chunk(adev, parser);
+       if (r)
                goto out;
+
+       if (amdgpu_enable_scheduler && parser->num_ibs) {
+               struct amdgpu_job *job;
+               struct amdgpu_ring * ring =  parser->ibs->ring;
+               job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
+               if (!job)
+                       return -ENOMEM;
+               job->base.sched = ring->scheduler;
+               job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
+               job->adev = parser->adev;
+               job->ibs = parser->ibs;
+               job->num_ibs = parser->num_ibs;
+               job->base.owner = parser->filp;
+               mutex_init(&job->job_lock);
+               if (job->ibs[job->num_ibs - 1].user) {
+                       memcpy(&job->uf,  &parser->uf,
+                              sizeof(struct amdgpu_user_fence));
+                       job->ibs[job->num_ibs - 1].user = &job->uf;
+               }
+
+               job->free_job = amdgpu_cs_free_job;
+               mutex_lock(&job->job_lock);
+               r = amd_sched_entity_push_job((struct amd_sched_job *)job);
+               if (r) {
+                       mutex_unlock(&job->job_lock);
+                       amdgpu_cs_free_job(job);
+                       kfree(job);
+                       goto out;
+               }
+               cs->out.handle =
+                       amdgpu_ctx_add_fence(parser->ctx, ring,
+                                            &job->base.s_fence->base);
+               parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
+
+               list_sort(NULL, &parser->validated, cmp_size_smaller_first);
+               ttm_eu_fence_buffer_objects(&parser->ticket,
+                               &parser->validated,
+                               &job->base.s_fence->base);
+
+               mutex_unlock(&job->job_lock);
+               amdgpu_cs_parser_fini_late(parser);
+               up_read(&adev->exclusive_lock);
+               return 0;
        }
 
-       cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq;
+       cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
 out:
-       amdgpu_cs_parser_fini(&parser, r, true);
+       amdgpu_cs_parser_fini(parser, r, reserved_buffers);
        up_read(&adev->exclusive_lock);
        r = amdgpu_cs_handle_lockup(adev, r);
        return r;
@@ -806,30 +903,29 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
        union drm_amdgpu_wait_cs *wait = data;
        struct amdgpu_device *adev = dev->dev_private;
        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
-       struct amdgpu_fence *fence = NULL;
        struct amdgpu_ring *ring = NULL;
        struct amdgpu_ctx *ctx;
+       struct fence *fence;
        long r;
 
-       ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
-       if (ctx == NULL)
-               return -EINVAL;
-
        r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
                               wait->in.ring, &ring);
-       if (r) {
-               amdgpu_ctx_put(ctx);
+       if (r)
                return r;
-       }
 
-       r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
-       if (r) {
-               amdgpu_ctx_put(ctx);
-               return r;
-       }
+       ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+       if (ctx == NULL)
+               return -EINVAL;
+
+       fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
+       if (IS_ERR(fence))
+               r = PTR_ERR(fence);
+       else if (fence) {
+               r = fence_wait_timeout(fence, true, timeout);
+               fence_put(fence);
+       } else
+               r = 1;
 
-       r = fence_wait_timeout(&fence->base, true, timeout);
-       amdgpu_fence_unref(&fence);
        amdgpu_ctx_put(ctx);
        if (r < 0)
                return r;
@@ -864,7 +960,16 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                if (!reloc->bo_va)
                        continue;
 
-               list_for_each_entry(mapping, &reloc->bo_va->mappings, list) {
+               list_for_each_entry(mapping, &reloc->bo_va->valids, list) {
+                       if (mapping->it.start > addr ||
+                           addr > mapping->it.last)
+                               continue;
+
+                       *bo = reloc->bo_va->bo;
+                       return mapping;
+               }
+
+               list_for_each_entry(mapping, &reloc->bo_va->invalids, list) {
                        if (mapping->it.start > addr ||
                            addr > mapping->it.last)
                                continue;
This page took 0.050395 seconds and 4 git commands to generate.