drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
   3
   4 #include <linux/ascii85.h>
   5 #include "msm_gem.h"
   6 #include "a6xx_gpu.h"
   7 #include "a6xx_gmu.h"
   8 #include "a6xx_gpu_state.h"
   9 #include "a6xx_gmu.xml.h"
  10
  11 struct a6xx_gpu_state_obj {
  12         const void *handle;
  13         u32 *data;
  14 };
  15
  16 struct a6xx_gpu_state {
  17         struct msm_gpu_state base;
  18
  19         struct a6xx_gpu_state_obj *gmu_registers;
  20         int nr_gmu_registers;
  21
  22         struct a6xx_gpu_state_obj *registers;
  23         int nr_registers;
  24
  25         struct a6xx_gpu_state_obj *shaders;
  26         int nr_shaders;
  27
  28         struct a6xx_gpu_state_obj *clusters;
  29         int nr_clusters;
  30
  31         struct a6xx_gpu_state_obj *dbgahb_clusters;
  32         int nr_dbgahb_clusters;
  33
  34         struct a6xx_gpu_state_obj *indexed_regs;
  35         int nr_indexed_regs;
  36
  37         struct a6xx_gpu_state_obj *debugbus;
  38         int nr_debugbus;
  39
  40         struct a6xx_gpu_state_obj *vbif_debugbus;
  41
  42         struct a6xx_gpu_state_obj *cx_debugbus;
  43         int nr_cx_debugbus;
  44
  45         struct msm_gpu_state_bo *gmu_log;
  46         struct msm_gpu_state_bo *gmu_hfi;
  47         struct msm_gpu_state_bo *gmu_debug;
  48
  49         s32 hfi_queue_history[2][HFI_HISTORY_SZ];
  50
  51         struct list_head objs;
  52
  53         bool gpu_initialized;
  54 };
  55
  56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
  57 {
  58         in[0] = val;
  59         in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
  60
  61         return 2;
  62 }
  63
  64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
  65 {
  66         in[0] = target;
  67         in[1] = (((u64) reg) << 44 | dwords);
  68
  69         return 2;
  70 }
  71
  72 static inline int CRASHDUMP_FINI(u64 *in)
  73 {
  74         in[0] = 0;
  75         in[1] = 0;
  76
  77         return 2;
  78 }
  79
  80 struct a6xx_crashdumper {
  81         void *ptr;
  82         struct drm_gem_object *bo;
  83         u64 iova;
  84 };
  85
  86 struct a6xx_state_memobj {
  87         struct list_head node;
  88         unsigned long long data[];
  89 };
  90
  91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
  92 {
  93         struct a6xx_state_memobj *obj =
  94                 kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
  95
  96         if (!obj)
  97                 return NULL;
  98
  99         list_add_tail(&obj->node, &a6xx_state->objs);
 100         return &obj->data;
 101 }
 102
 103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
 104                 size_t size)
 105 {
 106         void *dst = state_kcalloc(a6xx_state, 1, size);
 107
 108         if (dst)
 109                 memcpy(dst, src, size);
 110         return dst;
 111 }
 112
 113 /*
 114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
 115  * the rest for the data
 116  */
 117 #define A6XX_CD_DATA_OFFSET 8192
 118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
 119
 120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
 121                 struct a6xx_crashdumper *dumper)
 122 {
 123         dumper->ptr = msm_gem_kernel_new(gpu->dev,
 124                 SZ_1M, MSM_BO_WC, gpu->aspace,
 125                 &dumper->bo, &dumper->iova);
 126
 127         if (!IS_ERR(dumper->ptr))
 128                 msm_gem_object_set_name(dumper->bo, "crashdump");
 129
 130         return PTR_ERR_OR_ZERO(dumper->ptr);
 131 }
 132
 133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 134                 struct a6xx_crashdumper *dumper)
 135 {
 136         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 137         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 138         u32 val;
 139         int ret;
 140
 141         if (IS_ERR_OR_NULL(dumper->ptr))
 142                 return -EINVAL;
 143
 144         if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
 145                 return -EINVAL;
 146
 147         /* Make sure all pending memory writes are posted */
 148         wmb();
 149
 150         gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
 151
 152         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 153
 154         ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
 155                 val & 0x02, 100, 10000);
 156
 157         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
 158
 159         return ret;
 160 }
 161
 162 /* read a value from the GX debug bus */
 163 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
 164                 u32 *data)
 165 {
 166         u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
 167                 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
 168
 169         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
 170         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
 171         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
 172         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
 173
 174         /* Wait 1 us to make sure the data is flowing */
 175         udelay(1);
 176
 177         data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 178         data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 179
 180         return 2;
 181 }
 182
 183 #define cxdbg_write(ptr, offset, val) \
 184         msm_writel((val), (ptr) + ((offset) << 2))
 185
 186 #define cxdbg_read(ptr, offset) \
 187         msm_readl((ptr) + ((offset) << 2))
 188
 189 /* read a value from the CX debug bus */
 190 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
 191                 u32 *data)
 192 {
 193         u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
 194                 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 195
 196         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 197         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 198         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 199         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 200
 201         /* Wait 1 us to make sure the data is flowing */
 202         udelay(1);
 203
 204         data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 205         data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 206
 207         return 2;
 208 }
 209
 210 /* Read a chunk of data from the VBIF debug bus */
 211 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
 212                 u32 reg, int count, u32 *data)
 213 {
 214         int i;
 215
 216         gpu_write(gpu, ctrl0, reg);
 217
 218         for (i = 0; i < count; i++) {
 219                 gpu_write(gpu, ctrl1, i);
 220                 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
 221         }
 222
 223         return count;
 224 }
 225
 226 #define AXI_ARB_BLOCKS 2
 227 #define XIN_AXI_BLOCKS 5
 228 #define XIN_CORE_BLOCKS 4
 229
 230 #define VBIF_DEBUGBUS_BLOCK_SIZE \
 231         ((16 * AXI_ARB_BLOCKS) + \
 232          (18 * XIN_AXI_BLOCKS) + \
 233          (12 * XIN_CORE_BLOCKS))
 234
 235 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
 236                 struct a6xx_gpu_state *a6xx_state,
 237                 struct a6xx_gpu_state_obj *obj)
 238 {
 239         u32 clk, *ptr;
 240         int i;
 241
 242         obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
 243                 sizeof(u32));
 244         if (!obj->data)
 245                 return;
 246
 247         obj->handle = NULL;
 248
 249         /* Get the current clock setting */
 250         clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
 251
 252         /* Force on the bus so we can read it */
 253         gpu_write(gpu, REG_A6XX_VBIF_CLKON,
 254                 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
 255
 256         /* We will read from BUS2 first, so disable BUS1 */
 257         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
 258
 259         /* Enable the VBIF bus for reading */
 260         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
 261
 262         ptr = obj->data;
 263
 264         for (i = 0; i < AXI_ARB_BLOCKS; i++)
 265                 ptr += vbif_debugbus_read(gpu,
 266                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 267                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 268                         1 << (i + 16), 16, ptr);
 269
 270         for (i = 0; i < XIN_AXI_BLOCKS; i++)
 271                 ptr += vbif_debugbus_read(gpu,
 272                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 273                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 274                         1 << i, 18, ptr);
 275
 276         /* Stop BUS2 so we can turn on BUS1 */
 277         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
 278
 279         for (i = 0; i < XIN_CORE_BLOCKS; i++)
 280                 ptr += vbif_debugbus_read(gpu,
 281                         REG_A6XX_VBIF_TEST_BUS1_CTRL0,
 282                         REG_A6XX_VBIF_TEST_BUS1_CTRL1,
 283                         1 << i, 12, ptr);
 284
 285         /* Restore the VBIF clock setting */
 286         gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
 287 }
 288
 289 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
 290                 struct a6xx_gpu_state *a6xx_state,
 291                 const struct a6xx_debugbus_block *block,
 292                 struct a6xx_gpu_state_obj *obj)
 293 {
 294         int i;
 295         u32 *ptr;
 296
 297         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 298         if (!obj->data)
 299                 return;
 300
 301         obj->handle = block;
 302
 303         for (ptr = obj->data, i = 0; i < block->count; i++)
 304                 ptr += debugbus_read(gpu, block->id, i, ptr);
 305 }
 306
 307 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
 308                 struct a6xx_gpu_state *a6xx_state,
 309                 const struct a6xx_debugbus_block *block,
 310                 struct a6xx_gpu_state_obj *obj)
 311 {
 312         int i;
 313         u32 *ptr;
 314
 315         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 316         if (!obj->data)
 317                 return;
 318
 319         obj->handle = block;
 320
 321         for (ptr = obj->data, i = 0; i < block->count; i++)
 322                 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
 323 }
 324
 325 static void a6xx_get_debugbus(struct msm_gpu *gpu,
 326                 struct a6xx_gpu_state *a6xx_state)
 327 {
 328         struct resource *res;
 329         void __iomem *cxdbg = NULL;
 330         int nr_debugbus_blocks;
 331
 332         /* Set up the GX debug bus */
 333
 334         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 335                 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 336
 337         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 338                 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 339
 340         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 341         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 342         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 343         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 344
 345         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
 346         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
 347
 348         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 349         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 350         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 351         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 352
 353         /* Set up the CX debug bus - it lives elsewhere in the system so do a
 354          * temporary ioremap for the registers
 355          */
 356         res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
 357                         "cx_dbgc");
 358
 359         if (res)
 360                 cxdbg = ioremap(res->start, resource_size(res));
 361
 362         if (cxdbg) {
 363                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
 364                         A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 365
 366                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
 367                         A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 368
 369                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 370                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 371                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 372                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 373
 374                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
 375                         0x76543210);
 376                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
 377                         0xFEDCBA98);
 378
 379                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 380                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 381                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 382                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 383         }
 384
 385         nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
 386                 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
 387
 388         if (adreno_is_a650_family(to_adreno_gpu(gpu)))
 389                 nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
 390
 391         a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
 392                         sizeof(*a6xx_state->debugbus));
 393
 394         if (a6xx_state->debugbus) {
 395                 int i;
 396
 397                 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
 398                         a6xx_get_debugbus_block(gpu,
 399                                 a6xx_state,
 400                                 &a6xx_debugbus_blocks[i],
 401                                 &a6xx_state->debugbus[i]);
 402
 403                 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
 404
 405                 /*
 406                  * GBIF has same debugbus as of other GPU blocks, fall back to
 407                  * default path if GPU uses GBIF, also GBIF uses exactly same
 408                  * ID as of VBIF.
 409                  */
 410                 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
 411                         a6xx_get_debugbus_block(gpu, a6xx_state,
 412                                 &a6xx_gbif_debugbus_block,
 413                                 &a6xx_state->debugbus[i]);
 414
 415                         a6xx_state->nr_debugbus += 1;
 416                 }
 417
 418
 419                 if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
 420                         for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
 421                                 a6xx_get_debugbus_block(gpu,
 422                                         a6xx_state,
 423                                         &a650_debugbus_blocks[i],
 424                                         &a6xx_state->debugbus[i]);
 425                 }
 426         }
 427
 428         /*  Dump the VBIF debugbus on applicable targets */
 429         if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
 430                 a6xx_state->vbif_debugbus =
 431                         state_kcalloc(a6xx_state, 1,
 432                                         sizeof(*a6xx_state->vbif_debugbus));
 433
 434                 if (a6xx_state->vbif_debugbus)
 435                         a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
 436                                         a6xx_state->vbif_debugbus);
 437         }
 438
 439         if (cxdbg) {
 440                 a6xx_state->cx_debugbus =
 441                         state_kcalloc(a6xx_state,
 442                         ARRAY_SIZE(a6xx_cx_debugbus_blocks),
 443                         sizeof(*a6xx_state->cx_debugbus));
 444
 445                 if (a6xx_state->cx_debugbus) {
 446                         int i;
 447
 448                         for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
 449                                 a6xx_get_cx_debugbus_block(cxdbg,
 450                                         a6xx_state,
 451                                         &a6xx_cx_debugbus_blocks[i],
 452                                         &a6xx_state->cx_debugbus[i]);
 453
 454                         a6xx_state->nr_cx_debugbus =
 455                                 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
 456                 }
 457
 458                 iounmap(cxdbg);
 459         }
 460 }
 461
 462 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
 463
 464 /* Read a data cluster from behind the AHB aperture */
 465 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 466                 struct a6xx_gpu_state *a6xx_state,
 467                 const struct a6xx_dbgahb_cluster *dbgahb,
 468                 struct a6xx_gpu_state_obj *obj,
 469                 struct a6xx_crashdumper *dumper)
 470 {
 471         u64 *in = dumper->ptr;
 472         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 473         size_t datasize;
 474         int i, regcount = 0;
 475
 476         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 477                 int j;
 478
 479                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 480                         (dbgahb->statetype + i * 2) << 8);
 481
 482                 for (j = 0; j < dbgahb->count; j += 2) {
 483                         int count = RANGE(dbgahb->registers, j);
 484                         u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 485                                 dbgahb->registers[j] - (dbgahb->base >> 2);
 486
 487                         in += CRASHDUMP_READ(in, offset, count, out);
 488
 489                         out += count * sizeof(u32);
 490
 491                         if (i == 0)
 492                                 regcount += count;
 493                 }
 494         }
 495
 496         CRASHDUMP_FINI(in);
 497
 498         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 499
 500         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 501                 return;
 502
 503         if (a6xx_crashdumper_run(gpu, dumper))
 504                 return;
 505
 506         obj->handle = dbgahb;
 507         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 508                 datasize);
 509 }
 510
 511 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 512                 struct a6xx_gpu_state *a6xx_state,
 513                 struct a6xx_crashdumper *dumper)
 514 {
 515         int i;
 516
 517         a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 518                 ARRAY_SIZE(a6xx_dbgahb_clusters),
 519                 sizeof(*a6xx_state->dbgahb_clusters));
 520
 521         if (!a6xx_state->dbgahb_clusters)
 522                 return;
 523
 524         a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
 525
 526         for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
 527                 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
 528                         &a6xx_dbgahb_clusters[i],
 529                         &a6xx_state->dbgahb_clusters[i], dumper);
 530 }
 531
 532 /* Read a data cluster from the CP aperture with the crashdumper */
 533 static void a6xx_get_cluster(struct msm_gpu *gpu,
 534                 struct a6xx_gpu_state *a6xx_state,
 535                 const struct a6xx_cluster *cluster,
 536                 struct a6xx_gpu_state_obj *obj,
 537                 struct a6xx_crashdumper *dumper)
 538 {
 539         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 540         u64 *in = dumper->ptr;
 541         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 542         size_t datasize;
 543         int i, regcount = 0;
 544         u32 id = cluster->id;
 545
 546         /* Skip registers that are not present on older generation */
 547         if (!adreno_is_a660_family(adreno_gpu) &&
 548                         cluster->registers == a660_fe_cluster)
 549                 return;
 550
 551         if (adreno_is_a650_family(adreno_gpu) &&
 552                         cluster->registers == a6xx_ps_cluster)
 553                 id = CLUSTER_VPC_PS;
 554
 555         /* Some clusters need a selector register to be programmed too */
 556         if (cluster->sel_reg)
 557                 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
 558
 559         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 560                 int j;
 561
 562                 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
 563                         (id << 8) | (i << 4) | i);
 564
 565                 for (j = 0; j < cluster->count; j += 2) {
 566                         int count = RANGE(cluster->registers, j);
 567
 568                         in += CRASHDUMP_READ(in, cluster->registers[j],
 569                                 count, out);
 570
 571                         out += count * sizeof(u32);
 572
 573                         if (i == 0)
 574                                 regcount += count;
 575                 }
 576         }
 577
 578         CRASHDUMP_FINI(in);
 579
 580         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 581
 582         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 583                 return;
 584
 585         if (a6xx_crashdumper_run(gpu, dumper))
 586                 return;
 587
 588         obj->handle = cluster;
 589         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 590                 datasize);
 591 }
 592
 593 static void a6xx_get_clusters(struct msm_gpu *gpu,
 594                 struct a6xx_gpu_state *a6xx_state,
 595                 struct a6xx_crashdumper *dumper)
 596 {
 597         int i;
 598
 599         a6xx_state->clusters = state_kcalloc(a6xx_state,
 600                 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
 601
 602         if (!a6xx_state->clusters)
 603                 return;
 604
 605         a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
 606
 607         for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
 608                 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
 609                         &a6xx_state->clusters[i], dumper);
 610 }
 611
 612 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
 613 static void a6xx_get_shader_block(struct msm_gpu *gpu,
 614                 struct a6xx_gpu_state *a6xx_state,
 615                 const struct a6xx_shader_block *block,
 616                 struct a6xx_gpu_state_obj *obj,
 617                 struct a6xx_crashdumper *dumper)
 618 {
 619         u64 *in = dumper->ptr;
 620         size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
 621         int i;
 622
 623         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 624                 return;
 625
 626         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 627                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 628                         (block->type << 8) | i);
 629
 630                 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
 631                         block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
 632         }
 633
 634         CRASHDUMP_FINI(in);
 635
 636         if (a6xx_crashdumper_run(gpu, dumper))
 637                 return;
 638
 639         obj->handle = block;
 640         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 641                 datasize);
 642 }
 643
 644 static void a6xx_get_shaders(struct msm_gpu *gpu,
 645                 struct a6xx_gpu_state *a6xx_state,
 646                 struct a6xx_crashdumper *dumper)
 647 {
 648         int i;
 649
 650         a6xx_state->shaders = state_kcalloc(a6xx_state,
 651                 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
 652
 653         if (!a6xx_state->shaders)
 654                 return;
 655
 656         a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
 657
 658         for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
 659                 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
 660                         &a6xx_state->shaders[i], dumper);
 661 }
 662
 663 /* Read registers from behind the HLSQ aperture with the crashdumper */
 664 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
 665                 struct a6xx_gpu_state *a6xx_state,
 666                 const struct a6xx_registers *regs,
 667                 struct a6xx_gpu_state_obj *obj,
 668                 struct a6xx_crashdumper *dumper)
 669
 670 {
 671         u64 *in = dumper->ptr;
 672         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 673         int i, regcount = 0;
 674
 675         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
 676
 677         for (i = 0; i < regs->count; i += 2) {
 678                 u32 count = RANGE(regs->registers, i);
 679                 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 680                         regs->registers[i] - (regs->val0 >> 2);
 681
 682                 in += CRASHDUMP_READ(in, offset, count, out);
 683
 684                 out += count * sizeof(u32);
 685                 regcount += count;
 686         }
 687
 688         CRASHDUMP_FINI(in);
 689
 690         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 691                 return;
 692
 693         if (a6xx_crashdumper_run(gpu, dumper))
 694                 return;
 695
 696         obj->handle = regs;
 697         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 698                 regcount * sizeof(u32));
 699 }
 700
 701 /* Read a block of registers using the crashdumper */
 702 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
 703                 struct a6xx_gpu_state *a6xx_state,
 704                 const struct a6xx_registers *regs,
 705                 struct a6xx_gpu_state_obj *obj,
 706                 struct a6xx_crashdumper *dumper)
 707
 708 {
 709         u64 *in = dumper->ptr;
 710         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 711         int i, regcount = 0;
 712
 713         /* Skip unsupported registers on older generations */
 714         if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
 715                         (regs->registers == a660_registers))
 716                 return;
 717
 718         /* Some blocks might need to program a selector register first */
 719         if (regs->val0)
 720                 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
 721
 722         for (i = 0; i < regs->count; i += 2) {
 723                 u32 count = RANGE(regs->registers, i);
 724
 725                 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
 726
 727                 out += count * sizeof(u32);
 728                 regcount += count;
 729         }
 730
 731         CRASHDUMP_FINI(in);
 732
 733         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 734                 return;
 735
 736         if (a6xx_crashdumper_run(gpu, dumper))
 737                 return;
 738
 739         obj->handle = regs;
 740         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 741                 regcount * sizeof(u32));
 742 }
 743
 744 /* Read a block of registers via AHB */
 745 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
 746                 struct a6xx_gpu_state *a6xx_state,
 747                 const struct a6xx_registers *regs,
 748                 struct a6xx_gpu_state_obj *obj)
 749 {
 750         int i, regcount = 0, index = 0;
 751
 752         /* Skip unsupported registers on older generations */
 753         if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
 754                         (regs->registers == a660_registers))
 755                 return;
 756
 757         for (i = 0; i < regs->count; i += 2)
 758                 regcount += RANGE(regs->registers, i);
 759
 760         obj->handle = (const void *) regs;
 761         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 762         if (!obj->data)
 763                 return;
 764
 765         for (i = 0; i < regs->count; i += 2) {
 766                 u32 count = RANGE(regs->registers, i);
 767                 int j;
 768
 769                 for (j = 0; j < count; j++)
 770                         obj->data[index++] = gpu_read(gpu,
 771                                 regs->registers[i] + j);
 772         }
 773 }
 774
 775 /* Read a block of GMU registers */
 776 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
 777                 struct a6xx_gpu_state *a6xx_state,
 778                 const struct a6xx_registers *regs,
 779                 struct a6xx_gpu_state_obj *obj,
 780                 bool rscc)
 781 {
 782         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 783         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 784         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 785         int i, regcount = 0, index = 0;
 786
 787         for (i = 0; i < regs->count; i += 2)
 788                 regcount += RANGE(regs->registers, i);
 789
 790         obj->handle = (const void *) regs;
 791         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 792         if (!obj->data)
 793                 return;
 794
 795         for (i = 0; i < regs->count; i += 2) {
 796                 u32 count = RANGE(regs->registers, i);
 797                 int j;
 798
 799                 for (j = 0; j < count; j++) {
 800                         u32 offset = regs->registers[i] + j;
 801                         u32 val;
 802
 803                         if (rscc)
 804                                 val = gmu_read_rscc(gmu, offset);
 805                         else
 806                                 val = gmu_read(gmu, offset);
 807
 808                         obj->data[index++] = val;
 809                 }
 810         }
 811 }
 812
 813 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
 814                 struct a6xx_gpu_state *a6xx_state)
 815 {
 816         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 817         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 818
 819         a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
 820                 3, sizeof(*a6xx_state->gmu_registers));
 821
 822         if (!a6xx_state->gmu_registers)
 823                 return;
 824
 825         a6xx_state->nr_gmu_registers = 3;
 826
 827         /* Get the CX GMU registers from AHB */
 828         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
 829                 &a6xx_state->gmu_registers[0], false);
 830         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
 831                 &a6xx_state->gmu_registers[1], true);
 832
 833         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 834                 return;
 835
 836         /* Set the fence to ALLOW mode so we can access the registers */
 837         gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
 838
 839         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
 840                 &a6xx_state->gmu_registers[2], false);
 841 }
 842
 843 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
 844                 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
 845 {
 846         struct msm_gpu_state_bo *snapshot;
 847
 848         if (!bo->size)
 849                 return NULL;
 850
 851         snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
 852         if (!snapshot)
 853                 return NULL;
 854
 855         snapshot->iova = bo->iova;
 856         snapshot->size = bo->size;
 857         snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
 858         if (!snapshot->data)
 859                 return NULL;
 860
 861         memcpy(snapshot->data, bo->virt, bo->size);
 862
 863         return snapshot;
 864 }
 865
 866 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
 867                                           struct a6xx_gpu_state *a6xx_state)
 868 {
 869         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 870         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 871         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 872         unsigned i, j;
 873
 874         BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
 875
 876         for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
 877                 struct a6xx_hfi_queue *queue = &gmu->queues[i];
 878                 for (j = 0; j < HFI_HISTORY_SZ; j++) {
 879                         unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
 880                         a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
 881                 }
 882         }
 883 }
 884
 885 #define A6XX_GBIF_REGLIST_SIZE   1
 886 static void a6xx_get_registers(struct msm_gpu *gpu,
 887                 struct a6xx_gpu_state *a6xx_state,
 888                 struct a6xx_crashdumper *dumper)
 889 {
 890         int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
 891                 ARRAY_SIZE(a6xx_reglist) +
 892                 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
 893         int index = 0;
 894         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 895
 896         a6xx_state->registers = state_kcalloc(a6xx_state,
 897                 count, sizeof(*a6xx_state->registers));
 898
 899         if (!a6xx_state->registers)
 900                 return;
 901
 902         a6xx_state->nr_registers = count;
 903
 904         for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
 905                 a6xx_get_ahb_gpu_registers(gpu,
 906                         a6xx_state, &a6xx_ahb_reglist[i],
 907                         &a6xx_state->registers[index++]);
 908
 909         if (a6xx_has_gbif(adreno_gpu))
 910                 a6xx_get_ahb_gpu_registers(gpu,
 911                                 a6xx_state, &a6xx_gbif_reglist,
 912                                 &a6xx_state->registers[index++]);
 913         else
 914                 a6xx_get_ahb_gpu_registers(gpu,
 915                                 a6xx_state, &a6xx_vbif_reglist,
 916                                 &a6xx_state->registers[index++]);
 917         if (!dumper) {
 918                 /*
 919                  * We can't use the crashdumper when the SMMU is stalled,
 920                  * because the GPU has no memory access until we resume
 921                  * translation (but we don't want to do that until after
 922                  * we have captured as much useful GPU state as possible).
 923                  * So instead collect registers via the CPU:
 924                  */
 925                 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 926                         a6xx_get_ahb_gpu_registers(gpu,
 927                                 a6xx_state, &a6xx_reglist[i],
 928                                 &a6xx_state->registers[index++]);
 929                 return;
 930         }
 931
 932         for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 933                 a6xx_get_crashdumper_registers(gpu,
 934                         a6xx_state, &a6xx_reglist[i],
 935                         &a6xx_state->registers[index++],
 936                         dumper);
 937
 938         for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
 939                 a6xx_get_crashdumper_hlsq_registers(gpu,
 940                         a6xx_state, &a6xx_hlsq_reglist[i],
 941                         &a6xx_state->registers[index++],
 942                         dumper);
 943 }
 944
 945 static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
 946 {
 947         /* The value at [16:31] is in 4dword units. Convert it to dwords */
 948         return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
 949 }
 950
 951 /* Read a block of data from an indexed register pair */
 952 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
 953                 struct a6xx_gpu_state *a6xx_state,
 954                 struct a6xx_indexed_registers *indexed,
 955                 struct a6xx_gpu_state_obj *obj)
 956 {
 957         int i;
 958
 959         obj->handle = (const void *) indexed;
 960         if (indexed->count_fn)
 961                 indexed->count = indexed->count_fn(gpu);
 962
 963         obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
 964         if (!obj->data)
 965                 return;
 966
 967         /* All the indexed banks start at address 0 */
 968         gpu_write(gpu, indexed->addr, 0);
 969
 970         /* Read the data - each read increments the internal address by 1 */
 971         for (i = 0; i < indexed->count; i++)
 972                 obj->data[i] = gpu_read(gpu, indexed->data);
 973 }
 974
 975 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
 976                 struct a6xx_gpu_state *a6xx_state)
 977 {
 978         u32 mempool_size;
 979         int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
 980         int i;
 981
 982         a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
 983                 sizeof(*a6xx_state->indexed_regs));
 984         if (!a6xx_state->indexed_regs)
 985                 return;
 986
 987         for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
 988                 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
 989                         &a6xx_state->indexed_regs[i]);
 990
 991         if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
 992                 u32 val;
 993
 994                 val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
 995                 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
 996
 997                 /* Get the contents of the CP mempool */
 998                 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
 999                         &a6xx_state->indexed_regs[i]);
1000
1001                 gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1002                 a6xx_state->nr_indexed_regs = count;
1003                 return;
1004         }
1005
1006         /* Set the CP mempool size to 0 to stabilize it while dumping */
1007         mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1008         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1009
1010         /* Get the contents of the CP mempool */
1011         a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1012                 &a6xx_state->indexed_regs[i]);
1013
1014         /*
1015          * Offset 0x2000 in the mempool is the size - copy the saved size over
1016          * so the data is consistent
1017          */
1018         a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1019
1020         /* Restore the size in the hardware */
1021         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1022
1023         a6xx_state->nr_indexed_regs = count;
1024 }
1025
1026 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1027 {
1028         struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1029         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1030         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1031         struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1032                 GFP_KERNEL);
1033         bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1034                         A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1035
1036         if (!a6xx_state)
1037                 return ERR_PTR(-ENOMEM);
1038
1039         INIT_LIST_HEAD(&a6xx_state->objs);
1040
1041         /* Get the generic state from the adreno core */
1042         adreno_gpu_state_get(gpu, &a6xx_state->base);
1043
1044         a6xx_get_gmu_registers(gpu, a6xx_state);
1045
1046         a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1047         a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1048         a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1049
1050         a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1051
1052         /* If GX isn't on the rest of the data isn't going to be accessible */
1053         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1054                 return &a6xx_state->base;
1055
1056         /* Get the banks of indexed registers */
1057         a6xx_get_indexed_registers(gpu, a6xx_state);
1058
1059         /*
1060          * Try to initialize the crashdumper, if we are not dumping state
1061          * with the SMMU stalled.  The crashdumper needs memory access to
1062          * write out GPU state, so we need to skip this when the SMMU is
1063          * stalled in response to an iova fault
1064          */
1065         if (!stalled && !gpu->needs_hw_init &&
1066             !a6xx_crashdumper_init(gpu, &_dumper)) {
1067                 dumper = &_dumper;
1068         }
1069
1070         a6xx_get_registers(gpu, a6xx_state, dumper);
1071
1072         if (dumper) {
1073                 a6xx_get_shaders(gpu, a6xx_state, dumper);
1074                 a6xx_get_clusters(gpu, a6xx_state, dumper);
1075                 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1076
1077                 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1078         }
1079
1080         if (snapshot_debugbus)
1081                 a6xx_get_debugbus(gpu, a6xx_state);
1082
1083         a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1084
1085         return  &a6xx_state->base;
1086 }
1087
1088 static void a6xx_gpu_state_destroy(struct kref *kref)
1089 {
1090         struct a6xx_state_memobj *obj, *tmp;
1091         struct msm_gpu_state *state = container_of(kref,
1092                         struct msm_gpu_state, ref);
1093         struct a6xx_gpu_state *a6xx_state = container_of(state,
1094                         struct a6xx_gpu_state, base);
1095
1096         if (a6xx_state->gmu_log)
1097                 kvfree(a6xx_state->gmu_log->data);
1098
1099         if (a6xx_state->gmu_hfi)
1100                 kvfree(a6xx_state->gmu_hfi->data);
1101
1102         if (a6xx_state->gmu_debug)
1103                 kvfree(a6xx_state->gmu_debug->data);
1104
1105         list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1106                 list_del(&obj->node);
1107                 kvfree(obj);
1108         }
1109
1110         adreno_gpu_state_destroy(state);
1111         kfree(a6xx_state);
1112 }
1113
1114 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1115 {
1116         if (IS_ERR_OR_NULL(state))
1117                 return 1;
1118
1119         return kref_put(&state->ref, a6xx_gpu_state_destroy);
1120 }
1121
1122 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1123                 struct drm_printer *p)
1124 {
1125         int i, index = 0;
1126
1127         if (!data)
1128                 return;
1129
1130         for (i = 0; i < count; i += 2) {
1131                 u32 count = RANGE(registers, i);
1132                 u32 offset = registers[i];
1133                 int j;
1134
1135                 for (j = 0; j < count; index++, offset++, j++) {
1136                         if (data[index] == 0xdeafbead)
1137                                 continue;
1138
1139                         drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1140                                 offset << 2, data[index]);
1141                 }
1142         }
1143 }
1144
1145 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1146 {
1147         char out[ASCII85_BUFSZ];
1148         long i, l, datalen = 0;
1149
1150         for (i = 0; i < len >> 2; i++) {
1151                 if (data[i])
1152                         datalen = (i + 1) << 2;
1153         }
1154
1155         if (datalen == 0)
1156                 return;
1157
1158         drm_puts(p, "    data: !!ascii85 |\n");
1159         drm_puts(p, "      ");
1160
1161
1162         l = ascii85_encode_len(datalen);
1163
1164         for (i = 0; i < l; i++)
1165                 drm_puts(p, ascii85_encode(data[i], out));
1166
1167         drm_puts(p, "\n");
1168 }
1169
1170 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1171 {
1172         drm_puts(p, fmt);
1173         drm_puts(p, name);
1174         drm_puts(p, "\n");
1175 }
1176
1177 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1178                 struct drm_printer *p)
1179 {
1180         const struct a6xx_shader_block *block = obj->handle;
1181         int i;
1182
1183         if (!obj->handle)
1184                 return;
1185
1186         print_name(p, "  - type: ", block->name);
1187
1188         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1189                 drm_printf(p, "    - bank: %d\n", i);
1190                 drm_printf(p, "      size: %d\n", block->size);
1191
1192                 if (!obj->data)
1193                         continue;
1194
1195                 print_ascii85(p, block->size << 2,
1196                         obj->data + (block->size * i));
1197         }
1198 }
1199
1200 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1201                 struct drm_printer *p)
1202 {
1203         int ctx, index = 0;
1204
1205         for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1206                 int j;
1207
1208                 drm_printf(p, "    - context: %d\n", ctx);
1209
1210                 for (j = 0; j < size; j += 2) {
1211                         u32 count = RANGE(registers, j);
1212                         u32 offset = registers[j];
1213                         int k;
1214
1215                         for (k = 0; k < count; index++, offset++, k++) {
1216                                 if (data[index] == 0xdeafbead)
1217                                         continue;
1218
1219                                 drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1220                                         offset << 2, data[index]);
1221                         }
1222                 }
1223         }
1224 }
1225
1226 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1227                 struct drm_printer *p)
1228 {
1229         const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1230
1231         if (dbgahb) {
1232                 print_name(p, "  - cluster-name: ", dbgahb->name);
1233                 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1234                         obj->data, p);
1235         }
1236 }
1237
1238 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1239                 struct drm_printer *p)
1240 {
1241         const struct a6xx_cluster *cluster = obj->handle;
1242
1243         if (cluster) {
1244                 print_name(p, "  - cluster-name: ", cluster->name);
1245                 a6xx_show_cluster_data(cluster->registers, cluster->count,
1246                         obj->data, p);
1247         }
1248 }
1249
1250 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1251                 struct drm_printer *p)
1252 {
1253         const struct a6xx_indexed_registers *indexed = obj->handle;
1254
1255         if (!indexed)
1256                 return;
1257
1258         print_name(p, "  - regs-name: ", indexed->name);
1259         drm_printf(p, "    dwords: %d\n", indexed->count);
1260
1261         print_ascii85(p, indexed->count << 2, obj->data);
1262 }
1263
1264 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1265                 u32 *data, struct drm_printer *p)
1266 {
1267         if (block) {
1268                 print_name(p, "  - debugbus-block: ", block->name);
1269
1270                 /*
1271                  * count for regular debugbus data is in quadwords,
1272                  * but print the size in dwords for consistency
1273                  */
1274                 drm_printf(p, "    count: %d\n", block->count << 1);
1275
1276                 print_ascii85(p, block->count << 3, data);
1277         }
1278 }
1279
1280 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1281                 struct drm_printer *p)
1282 {
1283         int i;
1284
1285         for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1286                 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1287
1288                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1289         }
1290
1291         if (a6xx_state->vbif_debugbus) {
1292                 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1293
1294                 drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1295                 drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1296
1297                 /* vbif debugbus data is in dwords.  Confusing, huh? */
1298                 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1299         }
1300
1301         for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1302                 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1303
1304                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1305         }
1306 }
1307
1308 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1309                 struct drm_printer *p)
1310 {
1311         struct a6xx_gpu_state *a6xx_state = container_of(state,
1312                         struct a6xx_gpu_state, base);
1313         int i;
1314
1315         if (IS_ERR_OR_NULL(state))
1316                 return;
1317
1318         drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1319
1320         adreno_show(gpu, state, p);
1321
1322         drm_puts(p, "gmu-log:\n");
1323         if (a6xx_state->gmu_log) {
1324                 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1325
1326                 drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1327                 drm_printf(p, "    size: %zu\n", gmu_log->size);
1328                 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1329                                 &gmu_log->encoded);
1330         }
1331
1332         drm_puts(p, "gmu-hfi:\n");
1333         if (a6xx_state->gmu_hfi) {
1334                 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1335                 unsigned i, j;
1336
1337                 drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1338                 drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1339                 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1340                         drm_printf(p, "    queue-history[%u]:", i);
1341                         for (j = 0; j < HFI_HISTORY_SZ; j++) {
1342                                 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1343                         }
1344                         drm_printf(p, "\n");
1345                 }
1346                 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1347                                 &gmu_hfi->encoded);
1348         }
1349
1350         drm_puts(p, "gmu-debug:\n");
1351         if (a6xx_state->gmu_debug) {
1352                 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1353
1354                 drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1355                 drm_printf(p, "    size: %zu\n", gmu_debug->size);
1356                 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1357                                 &gmu_debug->encoded);
1358         }
1359
1360         drm_puts(p, "registers:\n");
1361         for (i = 0; i < a6xx_state->nr_registers; i++) {
1362                 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1363                 const struct a6xx_registers *regs = obj->handle;
1364
1365                 if (!obj->handle)
1366                         continue;
1367
1368                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1369         }
1370
1371         drm_puts(p, "registers-gmu:\n");
1372         for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1373                 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1374                 const struct a6xx_registers *regs = obj->handle;
1375
1376                 if (!obj->handle)
1377                         continue;
1378
1379                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1380         }
1381
1382         drm_puts(p, "indexed-registers:\n");
1383         for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1384                 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1385
1386         drm_puts(p, "shader-blocks:\n");
1387         for (i = 0; i < a6xx_state->nr_shaders; i++)
1388                 a6xx_show_shader(&a6xx_state->shaders[i], p);
1389
1390         drm_puts(p, "clusters:\n");
1391         for (i = 0; i < a6xx_state->nr_clusters; i++)
1392                 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1393
1394         for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1395                 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1396
1397         drm_puts(p, "debugbus:\n");
1398         a6xx_show_debugbus(a6xx_state, p);
1399 }