drivers/gpu/drm/msm/adreno/a5xx_gpu.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   3  */
   4
   5 #include <linux/kernel.h>
   6 #include <linux/types.h>
   7 #include <linux/cpumask.h>
   8 #include <linux/qcom_scm.h>
   9 #include <linux/pm_opp.h>
  10 #include <linux/nvmem-consumer.h>
  11 #include <linux/slab.h>
  12 #include "msm_gem.h"
  13 #include "msm_mmu.h"
  14 #include "a5xx_gpu.h"
  15
  16 extern bool hang_debug;
  17 static void a5xx_dump(struct msm_gpu *gpu);
  18
  19 #define GPU_PAS_ID 13
  20
  21 void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
  22                 bool sync)
  23 {
  24         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  25         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  26         uint32_t wptr;
  27         unsigned long flags;
  28
  29         /*
  30          * Most flush operations need to issue a WHERE_AM_I opcode to sync up
  31          * the rptr shadow
  32          */
  33         if (a5xx_gpu->has_whereami && sync) {
  34                 OUT_PKT7(ring, CP_WHERE_AM_I, 2);
  35                 OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring)));
  36                 OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
  37         }
  38
  39         spin_lock_irqsave(&ring->lock, flags);
  40
  41         /* Copy the shadow to the actual register */
  42         ring->cur = ring->next;
  43
  44         /* Make sure to wrap wptr if we need to */
  45         wptr = get_wptr(ring);
  46
  47         spin_unlock_irqrestore(&ring->lock, flags);
  48
  49         /* Make sure everything is posted before making a decision */
  50         mb();
  51
  52         /* Update HW if this is the current ring and we are not in preempt */
  53         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
  54                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
  55 }
  56
  57 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit)
  58 {
  59         struct msm_drm_private *priv = gpu->dev->dev_private;
  60         struct msm_ringbuffer *ring = submit->ring;
  61         struct msm_gem_object *obj;
  62         uint32_t *ptr, dwords;
  63         unsigned int i;
  64
  65         for (i = 0; i < submit->nr_cmds; i++) {
  66                 switch (submit->cmd[i].type) {
  67                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  68                         break;
  69                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  70                         if (priv->lastctx == submit->queue->ctx)
  71                                 break;
  72                         fallthrough;
  73                 case MSM_SUBMIT_CMD_BUF:
  74                         /* copy commands into RB: */
  75                         obj = submit->bos[submit->cmd[i].idx].obj;
  76                         dwords = submit->cmd[i].size;
  77
  78                         ptr = msm_gem_get_vaddr(&obj->base);
  79
  80                         /* _get_vaddr() shouldn't fail at this point,
  81                          * since we've already mapped it once in
  82                          * submit_reloc()
  83                          */
  84                         if (WARN_ON(!ptr))
  85                                 return;
  86
  87                         for (i = 0; i < dwords; i++) {
  88                                 /* normally the OUT_PKTn() would wait
  89                                  * for space for the packet.  But since
  90                                  * we just OUT_RING() the whole thing,
  91                                  * need to call adreno_wait_ring()
  92                                  * ourself:
  93                                  */
  94                                 adreno_wait_ring(ring, 1);
  95                                 OUT_RING(ring, ptr[i]);
  96                         }
  97
  98                         msm_gem_put_vaddr(&obj->base);
  99
 100                         break;
 101                 }
 102         }
 103
 104         a5xx_flush(gpu, ring, true);
 105         a5xx_preempt_trigger(gpu);
 106
 107         /* we might not necessarily have a cmd from userspace to
 108          * trigger an event to know that submit has completed, so
 109          * do this manually:
 110          */
 111         a5xx_idle(gpu, ring);
 112         ring->memptrs->fence = submit->seqno;
 113         msm_gpu_retire(gpu);
 114 }
 115
 116 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 117 {
 118         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 119         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 120         struct msm_drm_private *priv = gpu->dev->dev_private;
 121         struct msm_ringbuffer *ring = submit->ring;
 122         unsigned int i, ibs = 0;
 123
 124         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 125                 priv->lastctx = NULL;
 126                 a5xx_submit_in_rb(gpu, submit);
 127                 return;
 128         }
 129
 130         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 131         OUT_RING(ring, 0x02);
 132
 133         /* Turn off protected mode to write to special registers */
 134         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 135         OUT_RING(ring, 0);
 136
 137         /* Set the save preemption record for the ring/command */
 138         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 139         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 140         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 141
 142         /* Turn back on protected mode */
 143         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 144         OUT_RING(ring, 1);
 145
 146         /* Enable local preemption for finegrain preemption */
 147         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 148         OUT_RING(ring, 0x02);
 149
 150         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 151         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 152         OUT_RING(ring, 0x02);
 153
 154         /* Submit the commands */
 155         for (i = 0; i < submit->nr_cmds; i++) {
 156                 switch (submit->cmd[i].type) {
 157                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 158                         break;
 159                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 160                         if (priv->lastctx == submit->queue->ctx)
 161                                 break;
 162                         fallthrough;
 163                 case MSM_SUBMIT_CMD_BUF:
 164                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 165                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 166                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 167                         OUT_RING(ring, submit->cmd[i].size);
 168                         ibs++;
 169                         break;
 170                 }
 171         }
 172
 173         /*
 174          * Write the render mode to NULL (0) to indicate to the CP that the IBs
 175          * are done rendering - otherwise a lucky preemption would start
 176          * replaying from the last checkpoint
 177          */
 178         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 179         OUT_RING(ring, 0);
 180         OUT_RING(ring, 0);
 181         OUT_RING(ring, 0);
 182         OUT_RING(ring, 0);
 183         OUT_RING(ring, 0);
 184
 185         /* Turn off IB level preemptions */
 186         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 187         OUT_RING(ring, 0x01);
 188
 189         /* Write the fence to the scratch register */
 190         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 191         OUT_RING(ring, submit->seqno);
 192
 193         /*
 194          * Execute a CACHE_FLUSH_TS event. This will ensure that the
 195          * timestamp is written to the memory and then triggers the interrupt
 196          */
 197         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 198         OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
 199                 CP_EVENT_WRITE_0_IRQ);
 200         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 201         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 202         OUT_RING(ring, submit->seqno);
 203
 204         /* Yield the floor on command completion */
 205         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 206         /*
 207          * If dword[2:1] are non zero, they specify an address for the CP to
 208          * write the value of dword[3] to on preemption complete. Write 0 to
 209          * skip the write
 210          */
 211         OUT_RING(ring, 0x00);
 212         OUT_RING(ring, 0x00);
 213         /* Data value - not used if the address above is 0 */
 214         OUT_RING(ring, 0x01);
 215         /* Set bit 0 to trigger an interrupt on preempt complete */
 216         OUT_RING(ring, 0x01);
 217
 218         /* A WHERE_AM_I packet is not needed after a YIELD */
 219         a5xx_flush(gpu, ring, false);
 220
 221         /* Check to see if we need to start preemption */
 222         a5xx_preempt_trigger(gpu);
 223 }
 224
 225 static const struct {
 226         u32 offset;
 227         u32 value;
 228 } a5xx_hwcg[] = {
 229         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 230         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 231         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 232         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 233         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 234         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 235         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 236         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 237         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 238         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 239         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 240         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 241         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 242         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 243         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 244         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 245         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 246         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 247         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 248         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 249         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 250         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 251         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 252         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 253         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 254         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 255         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 256         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 257         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 258         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 259         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 260         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 261         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 262         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 263         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 264         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 265         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 266         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 267         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 268         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 269         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 270         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 271         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 272         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 273         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 274         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 275         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 276         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 277         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 278         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 279         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 280         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 281         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 282         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 283         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 284         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 285         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 286         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 287         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 288         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 289         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 290         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 291         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 292         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 293         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 294         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 295         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 296         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 297         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 298         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 299         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 300         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 301         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 302         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 303         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 304         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 305         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 306         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 307         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 308         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 309         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 310         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 311         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 312         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 313         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 314         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 315         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 316         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 317         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 318         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 319         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 320         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 321 };
 322
 323 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 324 {
 325         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 326         unsigned int i;
 327
 328         for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 329                 gpu_write(gpu, a5xx_hwcg[i].offset,
 330                         state ? a5xx_hwcg[i].value : 0);
 331
 332         if (adreno_is_a540(adreno_gpu)) {
 333                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
 334                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
 335         }
 336
 337         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 338         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 339 }
 340
 341 static int a5xx_me_init(struct msm_gpu *gpu)
 342 {
 343         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 344         struct msm_ringbuffer *ring = gpu->rb[0];
 345
 346         OUT_PKT7(ring, CP_ME_INIT, 8);
 347
 348         OUT_RING(ring, 0x0000002F);
 349
 350         /* Enable multiple hardware contexts */
 351         OUT_RING(ring, 0x00000003);
 352
 353         /* Enable error detection */
 354         OUT_RING(ring, 0x20000000);
 355
 356         /* Don't enable header dump */
 357         OUT_RING(ring, 0x00000000);
 358         OUT_RING(ring, 0x00000000);
 359
 360         /* Specify workarounds for various microcode issues */
 361         if (adreno_is_a530(adreno_gpu)) {
 362                 /* Workaround for token end syncs
 363                  * Force a WFI after every direct-render 3D mode draw and every
 364                  * 2D mode 3 draw
 365                  */
 366                 OUT_RING(ring, 0x0000000B);
 367         } else if (adreno_is_a510(adreno_gpu)) {
 368                 /* Workaround for token and syncs */
 369                 OUT_RING(ring, 0x00000001);
 370         } else {
 371                 /* No workarounds enabled */
 372                 OUT_RING(ring, 0x00000000);
 373         }
 374
 375         OUT_RING(ring, 0x00000000);
 376         OUT_RING(ring, 0x00000000);
 377
 378         a5xx_flush(gpu, ring, true);
 379         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 380 }
 381
 382 static int a5xx_preempt_start(struct msm_gpu *gpu)
 383 {
 384         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 385         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 386         struct msm_ringbuffer *ring = gpu->rb[0];
 387
 388         if (gpu->nr_rings == 1)
 389                 return 0;
 390
 391         /* Turn off protected mode to write to special registers */
 392         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 393         OUT_RING(ring, 0);
 394
 395         /* Set the save preemption record for the ring/command */
 396         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 397         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 398         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 399
 400         /* Turn back on protected mode */
 401         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 402         OUT_RING(ring, 1);
 403
 404         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 405         OUT_RING(ring, 0x00);
 406
 407         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 408         OUT_RING(ring, 0x01);
 409
 410         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 411         OUT_RING(ring, 0x01);
 412
 413         /* Yield the floor on command completion */
 414         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 415         OUT_RING(ring, 0x00);
 416         OUT_RING(ring, 0x00);
 417         OUT_RING(ring, 0x01);
 418         OUT_RING(ring, 0x01);
 419
 420         /* The WHERE_AMI_I packet is not needed after a YIELD is issued */
 421         a5xx_flush(gpu, ring, false);
 422
 423         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 424 }
 425
 426 static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu,
 427                 struct drm_gem_object *obj)
 428 {
 429         u32 *buf = msm_gem_get_vaddr_active(obj);
 430
 431         if (IS_ERR(buf))
 432                 return;
 433
 434         /*
 435          * If the lowest nibble is 0xa that is an indication that this microcode
 436          * has been patched. The actual version is in dword [3] but we only care
 437          * about the patchlevel which is the lowest nibble of dword [3]
 438          */
 439         if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
 440                 a5xx_gpu->has_whereami = true;
 441
 442         msm_gem_put_vaddr(obj);
 443 }
 444
 445 static int a5xx_ucode_init(struct msm_gpu *gpu)
 446 {
 447         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 448         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 449         int ret;
 450
 451         if (!a5xx_gpu->pm4_bo) {
 452                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 453                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 454
 455
 456                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
 457                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
 458                         a5xx_gpu->pm4_bo = NULL;
 459                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
 460                                 ret);
 461                         return ret;
 462                 }
 463
 464                 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
 465         }
 466
 467         if (!a5xx_gpu->pfp_bo) {
 468                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 469                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 470
 471                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
 472                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
 473                         a5xx_gpu->pfp_bo = NULL;
 474                         DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
 475                                 ret);
 476                         return ret;
 477                 }
 478
 479                 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
 480                 a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
 481         }
 482
 483         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 484                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 485
 486         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 487                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 488
 489         return 0;
 490 }
 491
 492 #define SCM_GPU_ZAP_SHADER_RESUME 0
 493
 494 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 495 {
 496         int ret;
 497
 498         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 499         if (ret)
 500                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
 501                         gpu->name, ret);
 502
 503         return ret;
 504 }
 505
 506 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 507 {
 508         static bool loaded;
 509         int ret;
 510
 511         /*
 512          * If the zap shader is already loaded into memory we just need to kick
 513          * the remote processor to reinitialize it
 514          */
 515         if (loaded)
 516                 return a5xx_zap_shader_resume(gpu);
 517
 518         ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 519
 520         loaded = !ret;
 521         return ret;
 522 }
 523
 524 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 525           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 526           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 527           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 528           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 529           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 530           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 531           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 532           A5XX_RBBM_INT_0_MASK_CP_SW | \
 533           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 534           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 535           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 536
 537 static int a5xx_hw_init(struct msm_gpu *gpu)
 538 {
 539         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 540         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 541         int ret;
 542
 543         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 544
 545         if (adreno_is_a540(adreno_gpu))
 546                 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
 547
 548         /* Make all blocks contribute to the GPU BUSY perf counter */
 549         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 550
 551         /* Enable RBBM error reporting bits */
 552         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 553
 554         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 555                 /*
 556                  * Mask out the activity signals from RB1-3 to avoid false
 557                  * positives
 558                  */
 559
 560                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 561                         0xF0000000);
 562                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 563                         0xFFFFFFFF);
 564                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 565                         0xFFFFFFFF);
 566                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 567                         0xFFFFFFFF);
 568                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 569                         0xFFFFFFFF);
 570                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 571                         0xFFFFFFFF);
 572                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 573                         0xFFFFFFFF);
 574                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 575                         0xFFFFFFFF);
 576         }
 577
 578         /* Enable fault detection */
 579         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 580                 (1 << 30) | 0xFFFF);
 581
 582         /* Turn on performance counters */
 583         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 584
 585         /* Select CP0 to always count cycles */
 586         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 587
 588         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 589         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 590
 591         /* Increase VFD cache access so LRZ and other data gets evicted less */
 592         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 593
 594         /* Disable L2 bypass in the UCHE */
 595         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 596         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 597         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 598         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 599
 600         /* Set the GMEM VA range (0 to gpu->gmem) */
 601         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 602         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 603         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 604                 0x00100000 + adreno_gpu->gmem - 1);
 605         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 606
 607         if (adreno_is_a510(adreno_gpu)) {
 608                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
 609                 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
 610                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
 611                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
 612                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
 613                           (0x200 << 11 | 0x200 << 22));
 614         } else {
 615                 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 616                 if (adreno_is_a530(adreno_gpu))
 617                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 618                 if (adreno_is_a540(adreno_gpu))
 619                         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
 620                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 621                 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 622                 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
 623                           (0x400 << 11 | 0x300 << 22));
 624         }
 625
 626         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 627                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 628
 629         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 630
 631         /* Enable USE_RETENTION_FLOPS */
 632         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 633
 634         /* Enable ME/PFP split notification */
 635         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 636
 637         /*
 638          *  In A5x, CCU can send context_done event of a particular context to
 639          *  UCHE which ultimately reaches CP even when there is valid
 640          *  transaction of that context inside CCU. This can let CP to program
 641          *  config registers, which will make the "valid transaction" inside
 642          *  CCU to be interpreted differently. This can cause gpu fault. This
 643          *  bug is fixed in latest A510 revision. To enable this bug fix -
 644          *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
 645          *  (disable). For older A510 version this bit is unused.
 646          */
 647         if (adreno_is_a510(adreno_gpu))
 648                 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
 649
 650         /* Enable HWCG */
 651         a5xx_set_hwcg(gpu, true);
 652
 653         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 654
 655         /* Set the highest bank bit */
 656         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 657         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 658         if (adreno_is_a540(adreno_gpu))
 659                 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
 660
 661         /* Protect registers from the CP */
 662         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 663
 664         /* RBBM */
 665         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 666         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 667         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 668         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 669         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 670         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 671
 672         /* Content protect */
 673         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 674                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 675                         16));
 676         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 677                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 678
 679         /* CP */
 680         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 681         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 682         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 683         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 684
 685         /* RB */
 686         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 687         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 688
 689         /* VPC */
 690         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 691         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 692
 693         /* UCHE */
 694         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 695
 696         if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
 697                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 698                         ADRENO_PROTECT_RW(0x10000, 0x8000));
 699
 700         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 701         /*
 702          * Disable the trusted memory range - we don't actually supported secure
 703          * memory rendering at this point in time and we don't want to block off
 704          * part of the virtual memory space.
 705          */
 706         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 707                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 708         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 709
 710         /* Put the GPU into 64 bit by default */
 711         gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
 712         gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
 713         gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
 714         gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
 715         gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
 716         gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
 717         gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
 718         gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
 719         gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
 720         gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
 721         gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
 722         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
 723
 724         /*
 725          * VPC corner case with local memory load kill leads to corrupt
 726          * internal state. Normal Disable does not work for all a5x chips.
 727          * So do the following setting to disable it.
 728          */
 729         if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
 730                 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
 731                 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
 732         }
 733
 734         ret = adreno_hw_init(gpu);
 735         if (ret)
 736                 return ret;
 737
 738         if (!adreno_is_a510(adreno_gpu))
 739                 a5xx_gpmu_ucode_init(gpu);
 740
 741         ret = a5xx_ucode_init(gpu);
 742         if (ret)
 743                 return ret;
 744
 745         /* Set the ringbuffer address */
 746         gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
 747                 gpu->rb[0]->iova);
 748
 749         /*
 750          * If the microcode supports the WHERE_AM_I opcode then we can use that
 751          * in lieu of the RPTR shadow and enable preemption. Otherwise, we
 752          * can't safely use the RPTR shadow or preemption. In either case, the
 753          * RPTR shadow should be disabled in hardware.
 754          */
 755         gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
 756                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
 757
 758         /* Disable preemption if WHERE_AM_I isn't available */
 759         if (!a5xx_gpu->has_whereami && gpu->nr_rings > 1) {
 760                 a5xx_preempt_fini(gpu);
 761                 gpu->nr_rings = 1;
 762         } else {
 763                 /* Create a privileged buffer for the RPTR shadow */
 764                 if (!a5xx_gpu->shadow_bo) {
 765                         a5xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
 766                                 sizeof(u32) * gpu->nr_rings,
 767                                 MSM_BO_UNCACHED | MSM_BO_MAP_PRIV,
 768                                 gpu->aspace, &a5xx_gpu->shadow_bo,
 769                                 &a5xx_gpu->shadow_iova);
 770
 771                         if (IS_ERR(a5xx_gpu->shadow))
 772                                 return PTR_ERR(a5xx_gpu->shadow);
 773                 }
 774
 775                 gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
 776                         REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
 777         }
 778
 779         a5xx_preempt_hw_init(gpu);
 780
 781         /* Disable the interrupts through the initial bringup stage */
 782         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 783
 784         /* Clear ME_HALT to start the micro engine */
 785         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 786         ret = a5xx_me_init(gpu);
 787         if (ret)
 788                 return ret;
 789
 790         ret = a5xx_power_init(gpu);
 791         if (ret)
 792                 return ret;
 793
 794         /*
 795          * Send a pipeline event stat to get misbehaving counters to start
 796          * ticking correctly
 797          */
 798         if (adreno_is_a530(adreno_gpu)) {
 799                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 800                 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
 801
 802                 a5xx_flush(gpu, gpu->rb[0], true);
 803                 if (!a5xx_idle(gpu, gpu->rb[0]))
 804                         return -EINVAL;
 805         }
 806
 807         /*
 808          * If the chip that we are using does support loading one, then
 809          * try to load a zap shader into the secure world. If successful
 810          * we can use the CP to switch out of secure mode. If not then we
 811          * have no resource but to try to switch ourselves out manually. If we
 812          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 813          * be blocked and a permissions violation will soon follow.
 814          */
 815         ret = a5xx_zap_shader_init(gpu);
 816         if (!ret) {
 817                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 818                 OUT_RING(gpu->rb[0], 0x00000000);
 819
 820                 a5xx_flush(gpu, gpu->rb[0], true);
 821                 if (!a5xx_idle(gpu, gpu->rb[0]))
 822                         return -EINVAL;
 823         } else if (ret == -ENODEV) {
 824                 /*
 825                  * This device does not use zap shader (but print a warning
 826                  * just in case someone got their dt wrong.. hopefully they
 827                  * have a debug UART to realize the error of their ways...
 828                  * if you mess this up you are about to crash horribly)
 829                  */
 830                 dev_warn_once(gpu->dev->dev,
 831                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 832                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 833         } else {
 834                 return ret;
 835         }
 836
 837         /* Last step - yield the ringbuffer */
 838         a5xx_preempt_start(gpu);
 839
 840         return 0;
 841 }
 842
 843 static void a5xx_recover(struct msm_gpu *gpu)
 844 {
 845         int i;
 846
 847         adreno_dump_info(gpu);
 848
 849         for (i = 0; i < 8; i++) {
 850                 printk("CP_SCRATCH_REG%d: %u\n", i,
 851                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 852         }
 853
 854         if (hang_debug)
 855                 a5xx_dump(gpu);
 856
 857         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 858         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 859         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 860         adreno_recover(gpu);
 861 }
 862
 863 static void a5xx_destroy(struct msm_gpu *gpu)
 864 {
 865         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 866         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 867
 868         DBG("%s", gpu->name);
 869
 870         a5xx_preempt_fini(gpu);
 871
 872         if (a5xx_gpu->pm4_bo) {
 873                 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 874                 drm_gem_object_put(a5xx_gpu->pm4_bo);
 875         }
 876
 877         if (a5xx_gpu->pfp_bo) {
 878                 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 879                 drm_gem_object_put(a5xx_gpu->pfp_bo);
 880         }
 881
 882         if (a5xx_gpu->gpmu_bo) {
 883                 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 884                 drm_gem_object_put(a5xx_gpu->gpmu_bo);
 885         }
 886
 887         if (a5xx_gpu->shadow_bo) {
 888                 msm_gem_unpin_iova(a5xx_gpu->shadow_bo, gpu->aspace);
 889                 drm_gem_object_put(a5xx_gpu->shadow_bo);
 890         }
 891
 892         adreno_gpu_cleanup(adreno_gpu);
 893         kfree(a5xx_gpu);
 894 }
 895
 896 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 897 {
 898         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 899                 return false;
 900
 901         /*
 902          * Nearly every abnormality ends up pausing the GPU and triggering a
 903          * fault so we can safely just watch for this one interrupt to fire
 904          */
 905         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 906                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 907 }
 908
 909 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 910 {
 911         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 912         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 913
 914         if (ring != a5xx_gpu->cur_ring) {
 915                 WARN(1, "Tried to idle a non-current ringbuffer\n");
 916                 return false;
 917         }
 918
 919         /* wait for CP to drain ringbuffer: */
 920         if (!adreno_idle(gpu, ring))
 921                 return false;
 922
 923         if (spin_until(_a5xx_check_idle(gpu))) {
 924                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 925                         gpu->name, __builtin_return_address(0),
 926                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 927                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 928                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 929                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 930                 return false;
 931         }
 932
 933         return true;
 934 }
 935
 936 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 937 {
 938         struct msm_gpu *gpu = arg;
 939         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 940                         iova, flags,
 941                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 942                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 943                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 944                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 945
 946         return -EFAULT;
 947 }
 948
 949 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 950 {
 951         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 952
 953         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 954                 u32 val;
 955
 956                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 957
 958                 /*
 959                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 960                  * read it twice
 961                  */
 962
 963                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 964                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 965
 966                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 967                         val);
 968         }
 969
 970         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 971                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 972                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 973
 974         if (status & A5XX_CP_INT_CP_DMA_ERROR)
 975                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 976
 977         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 978                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 979
 980                 dev_err_ratelimited(gpu->dev->dev,
 981                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 982                         val & (1 << 24) ? "WRITE" : "READ",
 983                         (val & 0xFFFFF) >> 2, val);
 984         }
 985
 986         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 987                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 988                 const char *access[16] = { "reserved", "reserved",
 989                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 990                         "", "", "me read", "me write", "", "", "crashdump read",
 991                         "crashdump write" };
 992
 993                 dev_err_ratelimited(gpu->dev->dev,
 994                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 995                         status & 0xFFFFF, access[(status >> 24) & 0xF],
 996                         (status & (1 << 31)), status);
 997         }
 998 }
 999
1000 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
1001 {
1002         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
1003                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
1004
1005                 dev_err_ratelimited(gpu->dev->dev,
1006                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
1007                         val & (1 << 28) ? "WRITE" : "READ",
1008                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
1009                         (val >> 24) & 0xF);
1010
1011                 /* Clear the error */
1012                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
1013
1014                 /* Clear the interrupt */
1015                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1016                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1017         }
1018
1019         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1020                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1021
1022         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1023                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1024                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1025
1026         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1027                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1028                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1029
1030         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1031                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1032                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1033
1034         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1035                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1036
1037         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1038                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1039 }
1040
1041 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1042 {
1043         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1044
1045         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1046
1047         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1048                 addr);
1049 }
1050
1051 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1052 {
1053         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1054 }
1055
1056 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1057 {
1058         struct drm_device *dev = gpu->dev;
1059         struct msm_drm_private *priv = dev->dev_private;
1060         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1061
1062         DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1063                 ring ? ring->id : -1, ring ? ring->seqno : 0,
1064                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1065                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1066                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1067                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1068                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1069                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1070                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1071
1072         /* Turn off the hangcheck timer to keep it from bothering us */
1073         del_timer(&gpu->hangcheck_timer);
1074
1075         queue_work(priv->wq, &gpu->recover_work);
1076 }
1077
1078 #define RBBM_ERROR_MASK \
1079         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1080         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1081         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1082         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1083         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1084         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1085
1086 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1087 {
1088         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1089
1090         /*
1091          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1092          * before the source is cleared the interrupt will storm.
1093          */
1094         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1095                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1096
1097         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1098         if (status & RBBM_ERROR_MASK)
1099                 a5xx_rbbm_err_irq(gpu, status);
1100
1101         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1102                 a5xx_cp_err_irq(gpu);
1103
1104         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1105                 a5xx_fault_detect_irq(gpu);
1106
1107         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1108                 a5xx_uche_err_irq(gpu);
1109
1110         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1111                 a5xx_gpmu_err_irq(gpu);
1112
1113         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1114                 a5xx_preempt_trigger(gpu);
1115                 msm_gpu_retire(gpu);
1116         }
1117
1118         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1119                 a5xx_preempt_irq(gpu);
1120
1121         return IRQ_HANDLED;
1122 }
1123
1124 static const u32 a5xx_registers[] = {
1125         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1126         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1127         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1128         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1129         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1130         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1131         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1132         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1133         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1134         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1135         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1136         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1137         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1138         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1139         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1140         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1141         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1142         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1143         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1144         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1145         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1146         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1147         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1148         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1149         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1150         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1151         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1152         0xAC60, 0xAC60, ~0,
1153 };
1154
1155 static void a5xx_dump(struct msm_gpu *gpu)
1156 {
1157         DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1158                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1159         adreno_dump(gpu);
1160 }
1161
1162 static int a5xx_pm_resume(struct msm_gpu *gpu)
1163 {
1164         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1165         int ret;
1166
1167         /* Turn on the core power */
1168         ret = msm_gpu_pm_resume(gpu);
1169         if (ret)
1170                 return ret;
1171
1172         if (adreno_is_a510(adreno_gpu)) {
1173                 /* Halt the sp_input_clk at HM level */
1174                 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1175                 a5xx_set_hwcg(gpu, true);
1176                 /* Turn on sp_input_clk at HM level */
1177                 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1178                 return 0;
1179         }
1180
1181         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1182         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1183
1184         /* Wait 3 usecs before polling */
1185         udelay(3);
1186
1187         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1188                 (1 << 20), (1 << 20));
1189         if (ret) {
1190                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1191                         gpu->name,
1192                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1193                 return ret;
1194         }
1195
1196         /* Turn on the SP domain */
1197         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1198         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1199                 (1 << 20), (1 << 20));
1200         if (ret)
1201                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1202                         gpu->name);
1203
1204         return ret;
1205 }
1206
1207 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1208 {
1209         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1210         u32 mask = 0xf;
1211
1212         /* A510 has 3 XIN ports in VBIF */
1213         if (adreno_is_a510(adreno_gpu))
1214                 mask = 0x7;
1215
1216         /* Clear the VBIF pipe before shutting down */
1217         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1218         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1219                                 mask) == mask);
1220
1221         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1222
1223         /*
1224          * Reset the VBIF before power collapse to avoid issue with FIFO
1225          * entries
1226          */
1227         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1228         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1229
1230         return msm_gpu_pm_suspend(gpu);
1231 }
1232
1233 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1234 {
1235         *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1236                 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1237
1238         return 0;
1239 }
1240
1241 struct a5xx_crashdumper {
1242         void *ptr;
1243         struct drm_gem_object *bo;
1244         u64 iova;
1245 };
1246
1247 struct a5xx_gpu_state {
1248         struct msm_gpu_state base;
1249         u32 *hlsqregs;
1250 };
1251
1252 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1253                 struct a5xx_crashdumper *dumper)
1254 {
1255         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1256                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1257                 &dumper->bo, &dumper->iova);
1258
1259         if (!IS_ERR(dumper->ptr))
1260                 msm_gem_object_set_name(dumper->bo, "crashdump");
1261
1262         return PTR_ERR_OR_ZERO(dumper->ptr);
1263 }
1264
1265 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1266                 struct a5xx_crashdumper *dumper)
1267 {
1268         u32 val;
1269
1270         if (IS_ERR_OR_NULL(dumper->ptr))
1271                 return -EINVAL;
1272
1273         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1274                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1275
1276         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1277
1278         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1279                 val & 0x04, 100, 10000);
1280 }
1281
1282 /*
1283  * These are a list of the registers that need to be read through the HLSQ
1284  * aperture through the crashdumper.  These are not nominally accessible from
1285  * the CPU on a secure platform.
1286  */
1287 static const struct {
1288         u32 type;
1289         u32 regoffset;
1290         u32 count;
1291 } a5xx_hlsq_aperture_regs[] = {
1292         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1293         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1294         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1295         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1296         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1297         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1298         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1299         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1300         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1301         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1302         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1303         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1304         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1305         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1306         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1307 };
1308
1309 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1310                 struct a5xx_gpu_state *a5xx_state)
1311 {
1312         struct a5xx_crashdumper dumper = { 0 };
1313         u32 offset, count = 0;
1314         u64 *ptr;
1315         int i;
1316
1317         if (a5xx_crashdumper_init(gpu, &dumper))
1318                 return;
1319
1320         /* The script will be written at offset 0 */
1321         ptr = dumper.ptr;
1322
1323         /* Start writing the data at offset 256k */
1324         offset = dumper.iova + (256 * SZ_1K);
1325
1326         /* Count how many additional registers to get from the HLSQ aperture */
1327         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1328                 count += a5xx_hlsq_aperture_regs[i].count;
1329
1330         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1331         if (!a5xx_state->hlsqregs)
1332                 return;
1333
1334         /* Build the crashdump script */
1335         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1336                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1337                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1338
1339                 /* Write the register to select the desired bank */
1340                 *ptr++ = ((u64) type << 8);
1341                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1342                         (1 << 21) | 1;
1343
1344                 *ptr++ = offset;
1345                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1346                         | c;
1347
1348                 offset += c * sizeof(u32);
1349         }
1350
1351         /* Write two zeros to close off the script */
1352         *ptr++ = 0;
1353         *ptr++ = 0;
1354
1355         if (a5xx_crashdumper_run(gpu, &dumper)) {
1356                 kfree(a5xx_state->hlsqregs);
1357                 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1358                 return;
1359         }
1360
1361         /* Copy the data from the crashdumper to the state */
1362         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1363                 count * sizeof(u32));
1364
1365         msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1366 }
1367
1368 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1369 {
1370         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1371                         GFP_KERNEL);
1372
1373         if (!a5xx_state)
1374                 return ERR_PTR(-ENOMEM);
1375
1376         /* Temporarily disable hardware clock gating before reading the hw */
1377         a5xx_set_hwcg(gpu, false);
1378
1379         /* First get the generic state from the adreno core */
1380         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1381
1382         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1383
1384         /* Get the HLSQ regs with the help of the crashdumper */
1385         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1386
1387         a5xx_set_hwcg(gpu, true);
1388
1389         return &a5xx_state->base;
1390 }
1391
1392 static void a5xx_gpu_state_destroy(struct kref *kref)
1393 {
1394         struct msm_gpu_state *state = container_of(kref,
1395                 struct msm_gpu_state, ref);
1396         struct a5xx_gpu_state *a5xx_state = container_of(state,
1397                 struct a5xx_gpu_state, base);
1398
1399         kfree(a5xx_state->hlsqregs);
1400
1401         adreno_gpu_state_destroy(state);
1402         kfree(a5xx_state);
1403 }
1404
1405 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1406 {
1407         if (IS_ERR_OR_NULL(state))
1408                 return 1;
1409
1410         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1411 }
1412
1413
1414 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1415 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1416                       struct drm_printer *p)
1417 {
1418         int i, j;
1419         u32 pos = 0;
1420         struct a5xx_gpu_state *a5xx_state = container_of(state,
1421                 struct a5xx_gpu_state, base);
1422
1423         if (IS_ERR_OR_NULL(state))
1424                 return;
1425
1426         adreno_show(gpu, state, p);
1427
1428         /* Dump the additional a5xx HLSQ registers */
1429         if (!a5xx_state->hlsqregs)
1430                 return;
1431
1432         drm_printf(p, "registers-hlsq:\n");
1433
1434         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1435                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1436                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1437
1438                 for (j = 0; j < c; j++, pos++, o++) {
1439                         /*
1440                          * To keep the crashdump simple we pull the entire range
1441                          * for each register type but not all of the registers
1442                          * in the range are valid. Fortunately invalid registers
1443                          * stick out like a sore thumb with a value of
1444                          * 0xdeadbeef
1445                          */
1446                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1447                                 continue;
1448
1449                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1450                                 o << 2, a5xx_state->hlsqregs[pos]);
1451                 }
1452         }
1453 }
1454 #endif
1455
1456 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1457 {
1458         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1459         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1460
1461         return a5xx_gpu->cur_ring;
1462 }
1463
1464 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1465 {
1466         u64 busy_cycles, busy_time;
1467
1468         /* Only read the gpu busy if the hardware is already active */
1469         if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1470                 return 0;
1471
1472         busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1473                         REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1474
1475         busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1476         do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1477
1478         gpu->devfreq.busy_cycles = busy_cycles;
1479
1480         pm_runtime_put(&gpu->pdev->dev);
1481
1482         if (WARN_ON(busy_time > ~0LU))
1483                 return ~0LU;
1484
1485         return (unsigned long)busy_time;
1486 }
1487
1488 static uint32_t a5xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1489 {
1490         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1491         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1492
1493         if (a5xx_gpu->has_whereami)
1494                 return a5xx_gpu->shadow[ring->id];
1495
1496         return ring->memptrs->rptr = gpu_read(gpu, REG_A5XX_CP_RB_RPTR);
1497 }
1498
1499 static const struct adreno_gpu_funcs funcs = {
1500         .base = {
1501                 .get_param = adreno_get_param,
1502                 .hw_init = a5xx_hw_init,
1503                 .pm_suspend = a5xx_pm_suspend,
1504                 .pm_resume = a5xx_pm_resume,
1505                 .recover = a5xx_recover,
1506                 .submit = a5xx_submit,
1507                 .active_ring = a5xx_active_ring,
1508                 .irq = a5xx_irq,
1509                 .destroy = a5xx_destroy,
1510 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1511                 .show = a5xx_show,
1512 #endif
1513 #if defined(CONFIG_DEBUG_FS)
1514                 .debugfs_init = a5xx_debugfs_init,
1515 #endif
1516                 .gpu_busy = a5xx_gpu_busy,
1517                 .gpu_state_get = a5xx_gpu_state_get,
1518                 .gpu_state_put = a5xx_gpu_state_put,
1519                 .create_address_space = adreno_iommu_create_address_space,
1520                 .get_rptr = a5xx_get_rptr,
1521         },
1522         .get_timestamp = a5xx_get_timestamp,
1523 };
1524
1525 static void check_speed_bin(struct device *dev)
1526 {
1527         struct nvmem_cell *cell;
1528         u32 val;
1529
1530         /*
1531          * If the OPP table specifies a opp-supported-hw property then we have
1532          * to set something with dev_pm_opp_set_supported_hw() or the table
1533          * doesn't get populated so pick an arbitrary value that should
1534          * ensure the default frequencies are selected but not conflict with any
1535          * actual bins
1536          */
1537         val = 0x80;
1538
1539         cell = nvmem_cell_get(dev, "speed_bin");
1540
1541         if (!IS_ERR(cell)) {
1542                 void *buf = nvmem_cell_read(cell, NULL);
1543
1544                 if (!IS_ERR(buf)) {
1545                         u8 bin = *((u8 *) buf);
1546
1547                         val = (1 << bin);
1548                         kfree(buf);
1549                 }
1550
1551                 nvmem_cell_put(cell);
1552         }
1553
1554         dev_pm_opp_set_supported_hw(dev, &val, 1);
1555 }
1556
1557 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1558 {
1559         struct msm_drm_private *priv = dev->dev_private;
1560         struct platform_device *pdev = priv->gpu_pdev;
1561         struct a5xx_gpu *a5xx_gpu = NULL;
1562         struct adreno_gpu *adreno_gpu;
1563         struct msm_gpu *gpu;
1564         int ret;
1565
1566         if (!pdev) {
1567                 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1568                 return ERR_PTR(-ENXIO);
1569         }
1570
1571         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1572         if (!a5xx_gpu)
1573                 return ERR_PTR(-ENOMEM);
1574
1575         adreno_gpu = &a5xx_gpu->base;
1576         gpu = &adreno_gpu->base;
1577
1578         adreno_gpu->registers = a5xx_registers;
1579
1580         a5xx_gpu->lm_leakage = 0x4E001A;
1581
1582         check_speed_bin(&pdev->dev);
1583
1584         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1585         if (ret) {
1586                 a5xx_destroy(&(a5xx_gpu->base.base));
1587                 return ERR_PTR(ret);
1588         }
1589
1590         if (gpu->aspace)
1591                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1592
1593         /* Set up the preemption specific bits and pieces for each ringbuffer */
1594         a5xx_preempt_init(gpu);
1595
1596         return gpu;
1597 }