drivers/gpu/drm/msm/msm_gpu.c

   1 /*
   2  * Copyright (C) 2013 Red Hat
   3  * Author: Rob Clark <[email protected]>
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 as published by
   7  * the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12  * more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along with
  15  * this program.  If not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 #include "msm_gpu.h"
  19 #include "msm_gem.h"
  20 #include "msm_mmu.h"
  21 #include "msm_fence.h"
  22
  23 #include <linux/string_helpers.h>
  24 #include <linux/pm_opp.h>
  25 #include <linux/devfreq.h>
  26
  27
  28 /*
  29  * Power Management:
  30  */
  31
  32 static int msm_devfreq_target(struct device *dev, unsigned long *freq,
  33                 u32 flags)
  34 {
  35         struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
  36         struct dev_pm_opp *opp;
  37
  38         opp = devfreq_recommended_opp(dev, freq, flags);
  39
  40         if (IS_ERR(opp))
  41                 return PTR_ERR(opp);
  42
  43         clk_set_rate(gpu->core_clk, *freq);
  44         dev_pm_opp_put(opp);
  45
  46         return 0;
  47 }
  48
  49 static int msm_devfreq_get_dev_status(struct device *dev,
  50                 struct devfreq_dev_status *status)
  51 {
  52         struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
  53         u64 cycles;
  54         u32 freq = ((u32) status->current_frequency) / 1000000;
  55         ktime_t time;
  56
  57         status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk);
  58         gpu->funcs->gpu_busy(gpu, &cycles);
  59
  60         status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq;
  61
  62         gpu->devfreq.busy_cycles = cycles;
  63
  64         time = ktime_get();
  65         status->total_time = ktime_us_delta(time, gpu->devfreq.time);
  66         gpu->devfreq.time = time;
  67
  68         return 0;
  69 }
  70
  71 static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
  72 {
  73         struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
  74
  75         *freq = (unsigned long) clk_get_rate(gpu->core_clk);
  76
  77         return 0;
  78 }
  79
  80 static struct devfreq_dev_profile msm_devfreq_profile = {
  81         .polling_ms = 10,
  82         .target = msm_devfreq_target,
  83         .get_dev_status = msm_devfreq_get_dev_status,
  84         .get_cur_freq = msm_devfreq_get_cur_freq,
  85 };
  86
  87 static void msm_devfreq_init(struct msm_gpu *gpu)
  88 {
  89         /* We need target support to do devfreq */
  90         if (!gpu->funcs->gpu_busy)
  91                 return;
  92
  93         msm_devfreq_profile.initial_freq = gpu->fast_rate;
  94
  95         /*
  96          * Don't set the freq_table or max_state and let devfreq build the table
  97          * from OPP
  98          */
  99
 100         gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
 101                         &msm_devfreq_profile, "simple_ondemand", NULL);
 102
 103         if (IS_ERR(gpu->devfreq.devfreq)) {
 104                 dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
 105                 gpu->devfreq.devfreq = NULL;
 106         }
 107 }
 108
 109 static int enable_pwrrail(struct msm_gpu *gpu)
 110 {
 111         struct drm_device *dev = gpu->dev;
 112         int ret = 0;
 113
 114         if (gpu->gpu_reg) {
 115                 ret = regulator_enable(gpu->gpu_reg);
 116                 if (ret) {
 117                         dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
 118                         return ret;
 119                 }
 120         }
 121
 122         if (gpu->gpu_cx) {
 123                 ret = regulator_enable(gpu->gpu_cx);
 124                 if (ret) {
 125                         dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
 126                         return ret;
 127                 }
 128         }
 129
 130         return 0;
 131 }
 132
 133 static int disable_pwrrail(struct msm_gpu *gpu)
 134 {
 135         if (gpu->gpu_cx)
 136                 regulator_disable(gpu->gpu_cx);
 137         if (gpu->gpu_reg)
 138                 regulator_disable(gpu->gpu_reg);
 139         return 0;
 140 }
 141
 142 static int enable_clk(struct msm_gpu *gpu)
 143 {
 144         int i;
 145
 146         if (gpu->core_clk && gpu->fast_rate)
 147                 clk_set_rate(gpu->core_clk, gpu->fast_rate);
 148
 149         /* Set the RBBM timer rate to 19.2Mhz */
 150         if (gpu->rbbmtimer_clk)
 151                 clk_set_rate(gpu->rbbmtimer_clk, 19200000);
 152
 153         for (i = gpu->nr_clocks - 1; i >= 0; i--)
 154                 if (gpu->grp_clks[i])
 155                         clk_prepare(gpu->grp_clks[i]);
 156
 157         for (i = gpu->nr_clocks - 1; i >= 0; i--)
 158                 if (gpu->grp_clks[i])
 159                         clk_enable(gpu->grp_clks[i]);
 160
 161         return 0;
 162 }
 163
 164 static int disable_clk(struct msm_gpu *gpu)
 165 {
 166         int i;
 167
 168         for (i = gpu->nr_clocks - 1; i >= 0; i--)
 169                 if (gpu->grp_clks[i])
 170                         clk_disable(gpu->grp_clks[i]);
 171
 172         for (i = gpu->nr_clocks - 1; i >= 0; i--)
 173                 if (gpu->grp_clks[i])
 174                         clk_unprepare(gpu->grp_clks[i]);
 175
 176         /*
 177          * Set the clock to a deliberately low rate. On older targets the clock
 178          * speed had to be non zero to avoid problems. On newer targets this
 179          * will be rounded down to zero anyway so it all works out.
 180          */
 181         if (gpu->core_clk)
 182                 clk_set_rate(gpu->core_clk, 27000000);
 183
 184         if (gpu->rbbmtimer_clk)
 185                 clk_set_rate(gpu->rbbmtimer_clk, 0);
 186
 187         return 0;
 188 }
 189
 190 static int enable_axi(struct msm_gpu *gpu)
 191 {
 192         if (gpu->ebi1_clk)
 193                 clk_prepare_enable(gpu->ebi1_clk);
 194         return 0;
 195 }
 196
 197 static int disable_axi(struct msm_gpu *gpu)
 198 {
 199         if (gpu->ebi1_clk)
 200                 clk_disable_unprepare(gpu->ebi1_clk);
 201         return 0;
 202 }
 203
 204 int msm_gpu_pm_resume(struct msm_gpu *gpu)
 205 {
 206         int ret;
 207
 208         DBG("%s", gpu->name);
 209
 210         ret = enable_pwrrail(gpu);
 211         if (ret)
 212                 return ret;
 213
 214         ret = enable_clk(gpu);
 215         if (ret)
 216                 return ret;
 217
 218         ret = enable_axi(gpu);
 219         if (ret)
 220                 return ret;
 221
 222         if (gpu->devfreq.devfreq) {
 223                 gpu->devfreq.busy_cycles = 0;
 224                 gpu->devfreq.time = ktime_get();
 225
 226                 devfreq_resume_device(gpu->devfreq.devfreq);
 227         }
 228
 229         gpu->needs_hw_init = true;
 230
 231         return 0;
 232 }
 233
 234 int msm_gpu_pm_suspend(struct msm_gpu *gpu)
 235 {
 236         int ret;
 237
 238         DBG("%s", gpu->name);
 239
 240         if (gpu->devfreq.devfreq)
 241                 devfreq_suspend_device(gpu->devfreq.devfreq);
 242
 243         ret = disable_axi(gpu);
 244         if (ret)
 245                 return ret;
 246
 247         ret = disable_clk(gpu);
 248         if (ret)
 249                 return ret;
 250
 251         ret = disable_pwrrail(gpu);
 252         if (ret)
 253                 return ret;
 254
 255         return 0;
 256 }
 257
 258 int msm_gpu_hw_init(struct msm_gpu *gpu)
 259 {
 260         int ret;
 261
 262         WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex));
 263
 264         if (!gpu->needs_hw_init)
 265                 return 0;
 266
 267         disable_irq(gpu->irq);
 268         ret = gpu->funcs->hw_init(gpu);
 269         if (!ret)
 270                 gpu->needs_hw_init = false;
 271         enable_irq(gpu->irq);
 272
 273         return ret;
 274 }
 275
 276 /*
 277  * Hangcheck detection for locked gpu:
 278  */
 279
 280 static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
 281                 uint32_t fence)
 282 {
 283         struct msm_gem_submit *submit;
 284
 285         list_for_each_entry(submit, &ring->submits, node) {
 286                 if (submit->seqno > fence)
 287                         break;
 288
 289                 msm_update_fence(submit->ring->fctx,
 290                         submit->fence->seqno);
 291         }
 292 }
 293
 294 static struct msm_gem_submit *
 295 find_submit(struct msm_ringbuffer *ring, uint32_t fence)
 296 {
 297         struct msm_gem_submit *submit;
 298
 299         WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));
 300
 301         list_for_each_entry(submit, &ring->submits, node)
 302                 if (submit->seqno == fence)
 303                         return submit;
 304
 305         return NULL;
 306 }
 307
 308 static void retire_submits(struct msm_gpu *gpu);
 309
 310 static void recover_worker(struct work_struct *work)
 311 {
 312         struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
 313         struct drm_device *dev = gpu->dev;
 314         struct msm_drm_private *priv = dev->dev_private;
 315         struct msm_gem_submit *submit;
 316         struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
 317         int i;
 318
 319         mutex_lock(&dev->struct_mutex);
 320
 321         dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
 322
 323         submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
 324         if (submit) {
 325                 struct task_struct *task;
 326
 327                 rcu_read_lock();
 328                 task = pid_task(submit->pid, PIDTYPE_PID);
 329                 if (task) {
 330                         char *cmd;
 331
 332                         /*
 333                          * So slightly annoying, in other paths like
 334                          * mmap'ing gem buffers, mmap_sem is acquired
 335                          * before struct_mutex, which means we can't
 336                          * hold struct_mutex across the call to
 337                          * get_cmdline().  But submits are retired
 338                          * from the same in-order workqueue, so we can
 339                          * safely drop the lock here without worrying
 340                          * about the submit going away.
 341                          */
 342                         mutex_unlock(&dev->struct_mutex);
 343                         cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
 344                         mutex_lock(&dev->struct_mutex);
 345
 346                         dev_err(dev->dev, "%s: offending task: %s (%s)\n",
 347                                 gpu->name, task->comm, cmd);
 348
 349                         msm_rd_dump_submit(priv->hangrd, submit,
 350                                 "offending task: %s (%s)", task->comm, cmd);
 351
 352                         kfree(cmd);
 353                 } else {
 354                         msm_rd_dump_submit(priv->hangrd, submit, NULL);
 355                 }
 356                 rcu_read_unlock();
 357         }
 358
 359
 360         /*
 361          * Update all the rings with the latest and greatest fence.. this
 362          * needs to happen after msm_rd_dump_submit() to ensure that the
 363          * bo's referenced by the offending submit are still around.
 364          */
 365         for (i = 0; i < gpu->nr_rings; i++) {
 366                 struct msm_ringbuffer *ring = gpu->rb[i];
 367
 368                 uint32_t fence = ring->memptrs->fence;
 369
 370                 /*
 371                  * For the current (faulting?) ring/submit advance the fence by
 372                  * one more to clear the faulting submit
 373                  */
 374                 if (ring == cur_ring)
 375                         fence++;
 376
 377                 update_fences(gpu, ring, fence);
 378         }
 379
 380         if (msm_gpu_active(gpu)) {
 381                 /* retire completed submits, plus the one that hung: */
 382                 retire_submits(gpu);
 383
 384                 pm_runtime_get_sync(&gpu->pdev->dev);
 385                 gpu->funcs->recover(gpu);
 386                 pm_runtime_put_sync(&gpu->pdev->dev);
 387
 388                 /*
 389                  * Replay all remaining submits starting with highest priority
 390                  * ring
 391                  */
 392                 for (i = 0; i < gpu->nr_rings; i++) {
 393                         struct msm_ringbuffer *ring = gpu->rb[i];
 394
 395                         list_for_each_entry(submit, &ring->submits, node)
 396                                 gpu->funcs->submit(gpu, submit, NULL);
 397                 }
 398         }
 399
 400         mutex_unlock(&dev->struct_mutex);
 401
 402         msm_gpu_retire(gpu);
 403 }
 404
 405 static void hangcheck_timer_reset(struct msm_gpu *gpu)
 406 {
 407         DBG("%s", gpu->name);
 408         mod_timer(&gpu->hangcheck_timer,
 409                         round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
 410 }
 411
 412 static void hangcheck_handler(struct timer_list *t)
 413 {
 414         struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
 415         struct drm_device *dev = gpu->dev;
 416         struct msm_drm_private *priv = dev->dev_private;
 417         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 418         uint32_t fence = ring->memptrs->fence;
 419
 420         if (fence != ring->hangcheck_fence) {
 421                 /* some progress has been made.. ya! */
 422                 ring->hangcheck_fence = fence;
 423         } else if (fence < ring->seqno) {
 424                 /* no progress and not done.. hung! */
 425                 ring->hangcheck_fence = fence;
 426                 dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
 427                                 gpu->name, ring->id);
 428                 dev_err(dev->dev, "%s:     completed fence: %u\n",
 429                                 gpu->name, fence);
 430                 dev_err(dev->dev, "%s:     submitted fence: %u\n",
 431                                 gpu->name, ring->seqno);
 432
 433                 queue_work(priv->wq, &gpu->recover_work);
 434         }
 435
 436         /* if still more pending work, reset the hangcheck timer: */
 437         if (ring->seqno > ring->hangcheck_fence)
 438                 hangcheck_timer_reset(gpu);
 439
 440         /* workaround for missing irq: */
 441         queue_work(priv->wq, &gpu->retire_work);
 442 }
 443
 444 /*
 445  * Performance Counters:
 446  */
 447
 448 /* called under perf_lock */
 449 static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs)
 450 {
 451         uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
 452         int i, n = min(ncntrs, gpu->num_perfcntrs);
 453
 454         /* read current values: */
 455         for (i = 0; i < gpu->num_perfcntrs; i++)
 456                 current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);
 457
 458         /* update cntrs: */
 459         for (i = 0; i < n; i++)
 460                 cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];
 461
 462         /* save current values: */
 463         for (i = 0; i < gpu->num_perfcntrs; i++)
 464                 gpu->last_cntrs[i] = current_cntrs[i];
 465
 466         return n;
 467 }
 468
 469 static void update_sw_cntrs(struct msm_gpu *gpu)
 470 {
 471         ktime_t time;
 472         uint32_t elapsed;
 473         unsigned long flags;
 474
 475         spin_lock_irqsave(&gpu->perf_lock, flags);
 476         if (!gpu->perfcntr_active)
 477                 goto out;
 478
 479         time = ktime_get();
 480         elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));
 481
 482         gpu->totaltime += elapsed;
 483         if (gpu->last_sample.active)
 484                 gpu->activetime += elapsed;
 485
 486         gpu->last_sample.active = msm_gpu_active(gpu);
 487         gpu->last_sample.time = time;
 488
 489 out:
 490         spin_unlock_irqrestore(&gpu->perf_lock, flags);
 491 }
 492
 493 void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
 494 {
 495         unsigned long flags;
 496
 497         pm_runtime_get_sync(&gpu->pdev->dev);
 498
 499         spin_lock_irqsave(&gpu->perf_lock, flags);
 500         /* we could dynamically enable/disable perfcntr registers too.. */
 501         gpu->last_sample.active = msm_gpu_active(gpu);
 502         gpu->last_sample.time = ktime_get();
 503         gpu->activetime = gpu->totaltime = 0;
 504         gpu->perfcntr_active = true;
 505         update_hw_cntrs(gpu, 0, NULL);
 506         spin_unlock_irqrestore(&gpu->perf_lock, flags);
 507 }
 508
 509 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
 510 {
 511         gpu->perfcntr_active = false;
 512         pm_runtime_put_sync(&gpu->pdev->dev);
 513 }
 514
 515 /* returns -errno or # of cntrs sampled */
 516 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
 517                 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
 518 {
 519         unsigned long flags;
 520         int ret;
 521
 522         spin_lock_irqsave(&gpu->perf_lock, flags);
 523
 524         if (!gpu->perfcntr_active) {
 525                 ret = -EINVAL;
 526                 goto out;
 527         }
 528
 529         *activetime = gpu->activetime;
 530         *totaltime = gpu->totaltime;
 531
 532         gpu->activetime = gpu->totaltime = 0;
 533
 534         ret = update_hw_cntrs(gpu, ncntrs, cntrs);
 535
 536 out:
 537         spin_unlock_irqrestore(&gpu->perf_lock, flags);
 538
 539         return ret;
 540 }
 541
 542 /*
 543  * Cmdstream submission/retirement:
 544  */
 545
 546 static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 547 {
 548         int i;
 549
 550         for (i = 0; i < submit->nr_bos; i++) {
 551                 struct msm_gem_object *msm_obj = submit->bos[i].obj;
 552                 /* move to inactive: */
 553                 msm_gem_move_to_inactive(&msm_obj->base);
 554                 msm_gem_put_iova(&msm_obj->base, gpu->aspace);
 555                 drm_gem_object_put(&msm_obj->base);
 556         }
 557
 558         pm_runtime_mark_last_busy(&gpu->pdev->dev);
 559         pm_runtime_put_autosuspend(&gpu->pdev->dev);
 560         msm_gem_submit_free(submit);
 561 }
 562
 563 static void retire_submits(struct msm_gpu *gpu)
 564 {
 565         struct drm_device *dev = gpu->dev;
 566         struct msm_gem_submit *submit, *tmp;
 567         int i;
 568
 569         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 570
 571         /* Retire the commits starting with highest priority */
 572         for (i = 0; i < gpu->nr_rings; i++) {
 573                 struct msm_ringbuffer *ring = gpu->rb[i];
 574
 575                 list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
 576                         if (dma_fence_is_signaled(submit->fence))
 577                                 retire_submit(gpu, submit);
 578                 }
 579         }
 580 }
 581
 582 static void retire_worker(struct work_struct *work)
 583 {
 584         struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
 585         struct drm_device *dev = gpu->dev;
 586         int i;
 587
 588         for (i = 0; i < gpu->nr_rings; i++)
 589                 update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
 590
 591         mutex_lock(&dev->struct_mutex);
 592         retire_submits(gpu);
 593         mutex_unlock(&dev->struct_mutex);
 594 }
 595
 596 /* call from irq handler to schedule work to retire bo's */
 597 void msm_gpu_retire(struct msm_gpu *gpu)
 598 {
 599         struct msm_drm_private *priv = gpu->dev->dev_private;
 600         queue_work(priv->wq, &gpu->retire_work);
 601         update_sw_cntrs(gpu);
 602 }
 603
 604 /* add bo's to gpu's ring, and kick gpu: */
 605 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 606                 struct msm_file_private *ctx)
 607 {
 608         struct drm_device *dev = gpu->dev;
 609         struct msm_drm_private *priv = dev->dev_private;
 610         struct msm_ringbuffer *ring = submit->ring;
 611         int i;
 612
 613         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 614
 615         pm_runtime_get_sync(&gpu->pdev->dev);
 616
 617         msm_gpu_hw_init(gpu);
 618
 619         submit->seqno = ++ring->seqno;
 620
 621         list_add_tail(&submit->node, &ring->submits);
 622
 623         msm_rd_dump_submit(priv->rd, submit, NULL);
 624
 625         update_sw_cntrs(gpu);
 626
 627         for (i = 0; i < submit->nr_bos; i++) {
 628                 struct msm_gem_object *msm_obj = submit->bos[i].obj;
 629                 uint64_t iova;
 630
 631                 /* can't happen yet.. but when we add 2d support we'll have
 632                  * to deal w/ cross-ring synchronization:
 633                  */
 634                 WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));
 635
 636                 /* submit takes a reference to the bo and iova until retired: */
 637                 drm_gem_object_get(&msm_obj->base);
 638                 msm_gem_get_iova(&msm_obj->base,
 639                                 submit->gpu->aspace, &iova);
 640
 641                 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
 642                         msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
 643                 else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
 644                         msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence);
 645         }
 646
 647         gpu->funcs->submit(gpu, submit, ctx);
 648         priv->lastctx = ctx;
 649
 650         hangcheck_timer_reset(gpu);
 651 }
 652
 653 /*
 654  * Init/Cleanup:
 655  */
 656
 657 static irqreturn_t irq_handler(int irq, void *data)
 658 {
 659         struct msm_gpu *gpu = data;
 660         return gpu->funcs->irq(gpu);
 661 }
 662
 663 static struct clk *get_clock(struct device *dev, const char *name)
 664 {
 665         struct clk *clk = devm_clk_get(dev, name);
 666
 667         return IS_ERR(clk) ? NULL : clk;
 668 }
 669
 670 static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
 671 {
 672         struct device *dev = &pdev->dev;
 673         struct property *prop;
 674         const char *name;
 675         int i = 0;
 676
 677         gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names");
 678         if (gpu->nr_clocks < 1) {
 679                 gpu->nr_clocks = 0;
 680                 return 0;
 681         }
 682
 683         gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks,
 684                 GFP_KERNEL);
 685         if (!gpu->grp_clks) {
 686                 gpu->nr_clocks = 0;
 687                 return -ENOMEM;
 688         }
 689
 690         of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
 691                 gpu->grp_clks[i] = get_clock(dev, name);
 692
 693                 /* Remember the key clocks that we need to control later */
 694                 if (!strcmp(name, "core") || !strcmp(name, "core_clk"))
 695                         gpu->core_clk = gpu->grp_clks[i];
 696                 else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk"))
 697                         gpu->rbbmtimer_clk = gpu->grp_clks[i];
 698
 699                 ++i;
 700         }
 701
 702         return 0;
 703 }
 704
 705 static struct msm_gem_address_space *
 706 msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
 707                 uint64_t va_start, uint64_t va_end)
 708 {
 709         struct iommu_domain *iommu;
 710         struct msm_gem_address_space *aspace;
 711         int ret;
 712
 713         /*
 714          * Setup IOMMU.. eventually we will (I think) do this once per context
 715          * and have separate page tables per context.  For now, to keep things
 716          * simple and to get something working, just use a single address space:
 717          */
 718         iommu = iommu_domain_alloc(&platform_bus_type);
 719         if (!iommu)
 720                 return NULL;
 721
 722         iommu->geometry.aperture_start = va_start;
 723         iommu->geometry.aperture_end = va_end;
 724
 725         dev_info(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
 726
 727         aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
 728         if (IS_ERR(aspace)) {
 729                 dev_err(gpu->dev->dev, "failed to init iommu: %ld\n",
 730                         PTR_ERR(aspace));
 731                 iommu_domain_free(iommu);
 732                 return ERR_CAST(aspace);
 733         }
 734
 735         ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
 736         if (ret) {
 737                 msm_gem_address_space_put(aspace);
 738                 return ERR_PTR(ret);
 739         }
 740
 741         return aspace;
 742 }
 743
 744 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 745                 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
 746                 const char *name, struct msm_gpu_config *config)
 747 {
 748         int i, ret, nr_rings = config->nr_rings;
 749         void *memptrs;
 750         uint64_t memptrs_iova;
 751
 752         if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
 753                 gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
 754
 755         gpu->dev = drm;
 756         gpu->funcs = funcs;
 757         gpu->name = name;
 758
 759         INIT_LIST_HEAD(&gpu->active_list);
 760         INIT_WORK(&gpu->retire_work, retire_worker);
 761         INIT_WORK(&gpu->recover_work, recover_worker);
 762
 763
 764         timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
 765
 766         spin_lock_init(&gpu->perf_lock);
 767
 768
 769         /* Map registers: */
 770         gpu->mmio = msm_ioremap(pdev, config->ioname, name);
 771         if (IS_ERR(gpu->mmio)) {
 772                 ret = PTR_ERR(gpu->mmio);
 773                 goto fail;
 774         }
 775
 776         /* Get Interrupt: */
 777         gpu->irq = platform_get_irq_byname(pdev, config->irqname);
 778         if (gpu->irq < 0) {
 779                 ret = gpu->irq;
 780                 dev_err(drm->dev, "failed to get irq: %d\n", ret);
 781                 goto fail;
 782         }
 783
 784         ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
 785                         IRQF_TRIGGER_HIGH, gpu->name, gpu);
 786         if (ret) {
 787                 dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
 788                 goto fail;
 789         }
 790
 791         ret = get_clocks(pdev, gpu);
 792         if (ret)
 793                 goto fail;
 794
 795         gpu->ebi1_clk = msm_clk_get(pdev, "bus");
 796         DBG("ebi1_clk: %p", gpu->ebi1_clk);
 797         if (IS_ERR(gpu->ebi1_clk))
 798                 gpu->ebi1_clk = NULL;
 799
 800         /* Acquire regulators: */
 801         gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
 802         DBG("gpu_reg: %p", gpu->gpu_reg);
 803         if (IS_ERR(gpu->gpu_reg))
 804                 gpu->gpu_reg = NULL;
 805
 806         gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
 807         DBG("gpu_cx: %p", gpu->gpu_cx);
 808         if (IS_ERR(gpu->gpu_cx))
 809                 gpu->gpu_cx = NULL;
 810
 811         gpu->pdev = pdev;
 812         platform_set_drvdata(pdev, gpu);
 813
 814         msm_devfreq_init(gpu);
 815
 816         gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
 817                 config->va_start, config->va_end);
 818
 819         if (gpu->aspace == NULL)
 820                 dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
 821         else if (IS_ERR(gpu->aspace)) {
 822                 ret = PTR_ERR(gpu->aspace);
 823                 goto fail;
 824         }
 825
 826         memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo),
 827                 MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
 828                 &memptrs_iova);
 829
 830         if (IS_ERR(memptrs)) {
 831                 ret = PTR_ERR(memptrs);
 832                 dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
 833                 goto fail;
 834         }
 835
 836         if (nr_rings > ARRAY_SIZE(gpu->rb)) {
 837                 DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n",
 838                         ARRAY_SIZE(gpu->rb));
 839                 nr_rings = ARRAY_SIZE(gpu->rb);
 840         }
 841
 842         /* Create ringbuffer(s): */
 843         for (i = 0; i < nr_rings; i++) {
 844                 gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);
 845
 846                 if (IS_ERR(gpu->rb[i])) {
 847                         ret = PTR_ERR(gpu->rb[i]);
 848                         dev_err(drm->dev,
 849                                 "could not create ringbuffer %d: %d\n", i, ret);
 850                         goto fail;
 851                 }
 852
 853                 memptrs += sizeof(struct msm_rbmemptrs);
 854                 memptrs_iova += sizeof(struct msm_rbmemptrs);
 855         }
 856
 857         gpu->nr_rings = nr_rings;
 858
 859         return 0;
 860
 861 fail:
 862         for (i = 0; i < ARRAY_SIZE(gpu->rb); i++)  {
 863                 msm_ringbuffer_destroy(gpu->rb[i]);
 864                 gpu->rb[i] = NULL;
 865         }
 866
 867         if (gpu->memptrs_bo) {
 868                 msm_gem_put_vaddr(gpu->memptrs_bo);
 869                 msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
 870                 drm_gem_object_put_unlocked(gpu->memptrs_bo);
 871         }
 872
 873         platform_set_drvdata(pdev, NULL);
 874         return ret;
 875 }
 876
 877 void msm_gpu_cleanup(struct msm_gpu *gpu)
 878 {
 879         int i;
 880
 881         DBG("%s", gpu->name);
 882
 883         WARN_ON(!list_empty(&gpu->active_list));
 884
 885         for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
 886                 msm_ringbuffer_destroy(gpu->rb[i]);
 887                 gpu->rb[i] = NULL;
 888         }
 889
 890         if (gpu->memptrs_bo) {
 891                 msm_gem_put_vaddr(gpu->memptrs_bo);
 892                 msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
 893                 drm_gem_object_put_unlocked(gpu->memptrs_bo);
 894         }
 895
 896         if (!IS_ERR_OR_NULL(gpu->aspace)) {
 897                 gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
 898                         NULL, 0);
 899                 msm_gem_address_space_put(gpu->aspace);
 900         }
 901 }