]> Git Repo - linux.git/blob - drivers/gpu/drm/radeon/evergreen_cs.c
Linux 6.14-rc3
[linux.git] / drivers / gpu / drm / radeon / evergreen_cs.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "r600.h"
32 #include "evergreend.h"
33 #include "evergreen_reg_safe.h"
34 #include "cayman_reg_safe.h"
35
36 #ifndef MIN
37 #define MAX(a, b)                   (((a) > (b)) ? (a) : (b))
38 #define MIN(a, b)                   (((a) < (b)) ? (a) : (b))
39 #endif
40
41 #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
42
43 struct evergreen_cs_track {
44         u32                     group_size;
45         u32                     nbanks;
46         u32                     npipes;
47         u32                     row_size;
48         /* value we track */
49         u32                     nsamples;               /* unused */
50         struct radeon_bo        *cb_color_bo[12];
51         u32                     cb_color_bo_offset[12];
52         struct radeon_bo        *cb_color_fmask_bo[8];  /* unused */
53         struct radeon_bo        *cb_color_cmask_bo[8];  /* unused */
54         u32                     cb_color_info[12];
55         u32                     cb_color_view[12];
56         u32                     cb_color_pitch[12];
57         u32                     cb_color_slice[12];
58         u32                     cb_color_slice_idx[12];
59         u32                     cb_color_attrib[12];
60         u32                     cb_color_cmask_slice[8];/* unused */
61         u32                     cb_color_fmask_slice[8];/* unused */
62         u32                     cb_target_mask;
63         u32                     cb_shader_mask; /* unused */
64         u32                     vgt_strmout_config;
65         u32                     vgt_strmout_buffer_config;
66         struct radeon_bo        *vgt_strmout_bo[4];
67         u32                     vgt_strmout_bo_offset[4];
68         u32                     vgt_strmout_size[4];
69         u32                     db_depth_control;
70         u32                     db_depth_view;
71         u32                     db_depth_slice;
72         u32                     db_depth_size;
73         u32                     db_z_info;
74         u32                     db_z_read_offset;
75         u32                     db_z_write_offset;
76         struct radeon_bo        *db_z_read_bo;
77         struct radeon_bo        *db_z_write_bo;
78         u32                     db_s_info;
79         u32                     db_s_read_offset;
80         u32                     db_s_write_offset;
81         struct radeon_bo        *db_s_read_bo;
82         struct radeon_bo        *db_s_write_bo;
83         bool                    sx_misc_kill_all_prims;
84         bool                    cb_dirty;
85         bool                    db_dirty;
86         bool                    streamout_dirty;
87         u32                     htile_offset;
88         u32                     htile_surface;
89         struct radeon_bo        *htile_bo;
90         unsigned long           indirect_draw_buffer_size;
91         const unsigned          *reg_safe_bm;
92 };
93
94 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
95 {
96         if (tiling_flags & RADEON_TILING_MACRO)
97                 return ARRAY_2D_TILED_THIN1;
98         else if (tiling_flags & RADEON_TILING_MICRO)
99                 return ARRAY_1D_TILED_THIN1;
100         else
101                 return ARRAY_LINEAR_GENERAL;
102 }
103
104 static u32 evergreen_cs_get_num_banks(u32 nbanks)
105 {
106         switch (nbanks) {
107         case 2:
108                 return ADDR_SURF_2_BANK;
109         case 4:
110                 return ADDR_SURF_4_BANK;
111         case 8:
112         default:
113                 return ADDR_SURF_8_BANK;
114         case 16:
115                 return ADDR_SURF_16_BANK;
116         }
117 }
118
119 static void evergreen_cs_track_init(struct evergreen_cs_track *track)
120 {
121         int i;
122
123         for (i = 0; i < 8; i++) {
124                 track->cb_color_fmask_bo[i] = NULL;
125                 track->cb_color_cmask_bo[i] = NULL;
126                 track->cb_color_cmask_slice[i] = 0;
127                 track->cb_color_fmask_slice[i] = 0;
128         }
129
130         for (i = 0; i < 12; i++) {
131                 track->cb_color_bo[i] = NULL;
132                 track->cb_color_bo_offset[i] = 0xFFFFFFFF;
133                 track->cb_color_info[i] = 0;
134                 track->cb_color_view[i] = 0xFFFFFFFF;
135                 track->cb_color_pitch[i] = 0;
136                 track->cb_color_slice[i] = 0xfffffff;
137                 track->cb_color_slice_idx[i] = 0;
138         }
139         track->cb_target_mask = 0xFFFFFFFF;
140         track->cb_shader_mask = 0xFFFFFFFF;
141         track->cb_dirty = true;
142
143         track->db_depth_slice = 0xffffffff;
144         track->db_depth_view = 0xFFFFC000;
145         track->db_depth_size = 0xFFFFFFFF;
146         track->db_depth_control = 0xFFFFFFFF;
147         track->db_z_info = 0xFFFFFFFF;
148         track->db_z_read_offset = 0xFFFFFFFF;
149         track->db_z_write_offset = 0xFFFFFFFF;
150         track->db_z_read_bo = NULL;
151         track->db_z_write_bo = NULL;
152         track->db_s_info = 0xFFFFFFFF;
153         track->db_s_read_offset = 0xFFFFFFFF;
154         track->db_s_write_offset = 0xFFFFFFFF;
155         track->db_s_read_bo = NULL;
156         track->db_s_write_bo = NULL;
157         track->db_dirty = true;
158         track->htile_bo = NULL;
159         track->htile_offset = 0xFFFFFFFF;
160         track->htile_surface = 0;
161
162         for (i = 0; i < 4; i++) {
163                 track->vgt_strmout_size[i] = 0;
164                 track->vgt_strmout_bo[i] = NULL;
165                 track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
166         }
167         track->streamout_dirty = true;
168         track->sx_misc_kill_all_prims = false;
169 }
170
171 struct eg_surface {
172         /* value gathered from cs */
173         unsigned        nbx;
174         unsigned        nby;
175         unsigned        format;
176         unsigned        mode;
177         unsigned        nbanks;
178         unsigned        bankw;
179         unsigned        bankh;
180         unsigned        tsplit;
181         unsigned        mtilea;
182         unsigned        nsamples;
183         /* output value */
184         unsigned        bpe;
185         unsigned        layer_size;
186         unsigned        palign;
187         unsigned        halign;
188         unsigned long   base_align;
189 };
190
191 static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
192                                           struct eg_surface *surf,
193                                           const char *prefix)
194 {
195         surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
196         surf->base_align = surf->bpe;
197         surf->palign = 1;
198         surf->halign = 1;
199         return 0;
200 }
201
202 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
203                                                   struct eg_surface *surf,
204                                                   const char *prefix)
205 {
206         struct evergreen_cs_track *track = p->track;
207         unsigned palign;
208
209         palign = MAX(64, track->group_size / surf->bpe);
210         surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
211         surf->base_align = track->group_size;
212         surf->palign = palign;
213         surf->halign = 1;
214         if (surf->nbx & (palign - 1)) {
215                 if (prefix) {
216                         dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
217                                  __func__, __LINE__, prefix, surf->nbx, palign);
218                 }
219                 return -EINVAL;
220         }
221         return 0;
222 }
223
224 static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
225                                       struct eg_surface *surf,
226                                       const char *prefix)
227 {
228         struct evergreen_cs_track *track = p->track;
229         unsigned palign;
230
231         palign = track->group_size / (8 * surf->bpe * surf->nsamples);
232         palign = MAX(8, palign);
233         surf->layer_size = surf->nbx * surf->nby * surf->bpe;
234         surf->base_align = track->group_size;
235         surf->palign = palign;
236         surf->halign = 8;
237         if ((surf->nbx & (palign - 1))) {
238                 if (prefix) {
239                         dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
240                                  __func__, __LINE__, prefix, surf->nbx, palign,
241                                  track->group_size, surf->bpe, surf->nsamples);
242                 }
243                 return -EINVAL;
244         }
245         if ((surf->nby & (8 - 1))) {
246                 if (prefix) {
247                         dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
248                                  __func__, __LINE__, prefix, surf->nby);
249                 }
250                 return -EINVAL;
251         }
252         return 0;
253 }
254
255 static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
256                                       struct eg_surface *surf,
257                                       const char *prefix)
258 {
259         struct evergreen_cs_track *track = p->track;
260         unsigned palign, halign, tileb, slice_pt;
261         unsigned mtile_pr, mtile_ps, mtileb;
262
263         tileb = 64 * surf->bpe * surf->nsamples;
264         slice_pt = 1;
265         if (tileb > surf->tsplit) {
266                 slice_pt = tileb / surf->tsplit;
267         }
268         tileb = tileb / slice_pt;
269         /* macro tile width & height */
270         palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
271         halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
272         mtileb = (palign / 8) * (halign / 8) * tileb;
273         mtile_pr = surf->nbx / palign;
274         mtile_ps = (mtile_pr * surf->nby) / halign;
275         surf->layer_size = mtile_ps * mtileb * slice_pt;
276         surf->base_align = (palign / 8) * (halign / 8) * tileb;
277         surf->palign = palign;
278         surf->halign = halign;
279
280         if ((surf->nbx & (palign - 1))) {
281                 if (prefix) {
282                         dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
283                                  __func__, __LINE__, prefix, surf->nbx, palign);
284                 }
285                 return -EINVAL;
286         }
287         if ((surf->nby & (halign - 1))) {
288                 if (prefix) {
289                         dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
290                                  __func__, __LINE__, prefix, surf->nby, halign);
291                 }
292                 return -EINVAL;
293         }
294
295         return 0;
296 }
297
298 static int evergreen_surface_check(struct radeon_cs_parser *p,
299                                    struct eg_surface *surf,
300                                    const char *prefix)
301 {
302         /* some common value computed here */
303         surf->bpe = r600_fmt_get_blocksize(surf->format);
304
305         switch (surf->mode) {
306         case ARRAY_LINEAR_GENERAL:
307                 return evergreen_surface_check_linear(p, surf, prefix);
308         case ARRAY_LINEAR_ALIGNED:
309                 return evergreen_surface_check_linear_aligned(p, surf, prefix);
310         case ARRAY_1D_TILED_THIN1:
311                 return evergreen_surface_check_1d(p, surf, prefix);
312         case ARRAY_2D_TILED_THIN1:
313                 return evergreen_surface_check_2d(p, surf, prefix);
314         default:
315                 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
316                                 __func__, __LINE__, prefix, surf->mode);
317                 return -EINVAL;
318         }
319         return -EINVAL;
320 }
321
322 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
323                                               struct eg_surface *surf,
324                                               const char *prefix)
325 {
326         switch (surf->mode) {
327         case ARRAY_2D_TILED_THIN1:
328                 break;
329         case ARRAY_LINEAR_GENERAL:
330         case ARRAY_LINEAR_ALIGNED:
331         case ARRAY_1D_TILED_THIN1:
332                 return 0;
333         default:
334                 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
335                                 __func__, __LINE__, prefix, surf->mode);
336                 return -EINVAL;
337         }
338
339         switch (surf->nbanks) {
340         case 0: surf->nbanks = 2; break;
341         case 1: surf->nbanks = 4; break;
342         case 2: surf->nbanks = 8; break;
343         case 3: surf->nbanks = 16; break;
344         default:
345                 dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
346                          __func__, __LINE__, prefix, surf->nbanks);
347                 return -EINVAL;
348         }
349         switch (surf->bankw) {
350         case 0: surf->bankw = 1; break;
351         case 1: surf->bankw = 2; break;
352         case 2: surf->bankw = 4; break;
353         case 3: surf->bankw = 8; break;
354         default:
355                 dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
356                          __func__, __LINE__, prefix, surf->bankw);
357                 return -EINVAL;
358         }
359         switch (surf->bankh) {
360         case 0: surf->bankh = 1; break;
361         case 1: surf->bankh = 2; break;
362         case 2: surf->bankh = 4; break;
363         case 3: surf->bankh = 8; break;
364         default:
365                 dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
366                          __func__, __LINE__, prefix, surf->bankh);
367                 return -EINVAL;
368         }
369         switch (surf->mtilea) {
370         case 0: surf->mtilea = 1; break;
371         case 1: surf->mtilea = 2; break;
372         case 2: surf->mtilea = 4; break;
373         case 3: surf->mtilea = 8; break;
374         default:
375                 dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
376                          __func__, __LINE__, prefix, surf->mtilea);
377                 return -EINVAL;
378         }
379         switch (surf->tsplit) {
380         case 0: surf->tsplit = 64; break;
381         case 1: surf->tsplit = 128; break;
382         case 2: surf->tsplit = 256; break;
383         case 3: surf->tsplit = 512; break;
384         case 4: surf->tsplit = 1024; break;
385         case 5: surf->tsplit = 2048; break;
386         case 6: surf->tsplit = 4096; break;
387         default:
388                 dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
389                          __func__, __LINE__, prefix, surf->tsplit);
390                 return -EINVAL;
391         }
392         return 0;
393 }
394
395 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
396 {
397         struct evergreen_cs_track *track = p->track;
398         struct eg_surface surf;
399         unsigned pitch, slice, mslice;
400         u64 offset;
401         int r;
402
403         mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
404         pitch = track->cb_color_pitch[id];
405         slice = track->cb_color_slice[id];
406         surf.nbx = (pitch + 1) * 8;
407         surf.nby = ((slice + 1) * 64) / surf.nbx;
408         surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
409         surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
410         surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
411         surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
412         surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
413         surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
414         surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
415         surf.nsamples = 1;
416
417         if (!r600_fmt_is_valid_color(surf.format)) {
418                 dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
419                          __func__, __LINE__, surf.format,
420                         id, track->cb_color_info[id]);
421                 return -EINVAL;
422         }
423
424         r = evergreen_surface_value_conv_check(p, &surf, "cb");
425         if (r) {
426                 return r;
427         }
428
429         r = evergreen_surface_check(p, &surf, "cb");
430         if (r) {
431                 dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
432                          __func__, __LINE__, id, track->cb_color_pitch[id],
433                          track->cb_color_slice[id], track->cb_color_attrib[id],
434                          track->cb_color_info[id]);
435                 return r;
436         }
437
438         offset = (u64)track->cb_color_bo_offset[id] << 8;
439         if (offset & (surf.base_align - 1)) {
440                 dev_warn(p->dev, "%s:%d cb[%d] bo base %llu not aligned with %ld\n",
441                          __func__, __LINE__, id, offset, surf.base_align);
442                 return -EINVAL;
443         }
444
445         offset += (u64)surf.layer_size * mslice;
446         if (offset > radeon_bo_size(track->cb_color_bo[id])) {
447                 /* old ddx are broken they allocate bo with w*h*bpp but
448                  * program slice with ALIGN(h, 8), catch this and patch
449                  * command stream.
450                  */
451                 if (!surf.mode) {
452                         uint32_t *ib = p->ib.ptr;
453                         u64 tmp, nby, bsize, size, min = 0;
454
455                         /* find the height the ddx wants */
456                         if (surf.nby > 8) {
457                                 min = surf.nby - 8;
458                         }
459                         bsize = radeon_bo_size(track->cb_color_bo[id]);
460                         tmp = (u64)track->cb_color_bo_offset[id] << 8;
461                         for (nby = surf.nby; nby > min; nby--) {
462                                 size = nby * surf.nbx * surf.bpe * surf.nsamples;
463                                 if ((tmp + size * mslice) <= bsize) {
464                                         break;
465                                 }
466                         }
467                         if (nby > min) {
468                                 surf.nby = nby;
469                                 slice = ((nby * surf.nbx) / 64) - 1;
470                                 if (!evergreen_surface_check(p, &surf, "cb")) {
471                                         /* check if this one works */
472                                         tmp += (u64)surf.layer_size * mslice;
473                                         if (tmp <= bsize) {
474                                                 ib[track->cb_color_slice_idx[id]] = slice;
475                                                 goto old_ddx_ok;
476                                         }
477                                 }
478                         }
479                 }
480                 dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
481                          "offset %llu, max layer %d, bo size %ld, slice %d)\n",
482                          __func__, __LINE__, id, surf.layer_size,
483                         (u64)track->cb_color_bo_offset[id] << 8, mslice,
484                         radeon_bo_size(track->cb_color_bo[id]), slice);
485                 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
486                          __func__, __LINE__, surf.nbx, surf.nby,
487                         surf.mode, surf.bpe, surf.nsamples,
488                         surf.bankw, surf.bankh,
489                         surf.tsplit, surf.mtilea);
490                 return -EINVAL;
491         }
492 old_ddx_ok:
493
494         return 0;
495 }
496
497 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
498                                                 unsigned nbx, unsigned nby)
499 {
500         struct evergreen_cs_track *track = p->track;
501         unsigned long size;
502
503         if (track->htile_bo == NULL) {
504                 dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
505                                 __func__, __LINE__, track->db_z_info);
506                 return -EINVAL;
507         }
508
509         if (G_028ABC_LINEAR(track->htile_surface)) {
510                 /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
511                 nbx = round_up(nbx, 16 * 8);
512                 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
513                 nby = round_up(nby, track->npipes * 8);
514         } else {
515                 /* always assume 8x8 htile */
516                 /* align is htile align * 8, htile align vary according to
517                  * number of pipe and tile width and nby
518                  */
519                 switch (track->npipes) {
520                 case 8:
521                         /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
522                         nbx = round_up(nbx, 64 * 8);
523                         nby = round_up(nby, 64 * 8);
524                         break;
525                 case 4:
526                         /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
527                         nbx = round_up(nbx, 64 * 8);
528                         nby = round_up(nby, 32 * 8);
529                         break;
530                 case 2:
531                         /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
532                         nbx = round_up(nbx, 32 * 8);
533                         nby = round_up(nby, 32 * 8);
534                         break;
535                 case 1:
536                         /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
537                         nbx = round_up(nbx, 32 * 8);
538                         nby = round_up(nby, 16 * 8);
539                         break;
540                 default:
541                         dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
542                                         __func__, __LINE__, track->npipes);
543                         return -EINVAL;
544                 }
545         }
546         /* compute number of htile */
547         nbx = nbx >> 3;
548         nby = nby >> 3;
549         /* size must be aligned on npipes * 2K boundary */
550         size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
551         size += track->htile_offset;
552
553         if (size > radeon_bo_size(track->htile_bo)) {
554                 dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
555                                 __func__, __LINE__, radeon_bo_size(track->htile_bo),
556                                 size, nbx, nby);
557                 return -EINVAL;
558         }
559         return 0;
560 }
561
562 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
563 {
564         struct evergreen_cs_track *track = p->track;
565         struct eg_surface surf;
566         unsigned pitch, slice, mslice;
567         u64 offset;
568         int r;
569
570         mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
571         pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
572         slice = track->db_depth_slice;
573         surf.nbx = (pitch + 1) * 8;
574         surf.nby = ((slice + 1) * 64) / surf.nbx;
575         surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
576         surf.format = G_028044_FORMAT(track->db_s_info);
577         surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
578         surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
579         surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
580         surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
581         surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
582         surf.nsamples = 1;
583
584         if (surf.format != 1) {
585                 dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
586                          __func__, __LINE__, surf.format);
587                 return -EINVAL;
588         }
589         /* replace by color format so we can use same code */
590         surf.format = V_028C70_COLOR_8;
591
592         r = evergreen_surface_value_conv_check(p, &surf, "stencil");
593         if (r) {
594                 return r;
595         }
596
597         r = evergreen_surface_check(p, &surf, NULL);
598         if (r) {
599                 /* old userspace doesn't compute proper depth/stencil alignment
600                  * check that alignment against a bigger byte per elements and
601                  * only report if that alignment is wrong too.
602                  */
603                 surf.format = V_028C70_COLOR_8_8_8_8;
604                 r = evergreen_surface_check(p, &surf, "stencil");
605                 if (r) {
606                         dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
607                                  __func__, __LINE__, track->db_depth_size,
608                                  track->db_depth_slice, track->db_s_info, track->db_z_info);
609                 }
610                 return r;
611         }
612
613         offset = (u64)track->db_s_read_offset << 8;
614         if (offset & (surf.base_align - 1)) {
615                 dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n",
616                          __func__, __LINE__, offset, surf.base_align);
617                 return -EINVAL;
618         }
619         offset += (u64)surf.layer_size * mslice;
620         if (offset > radeon_bo_size(track->db_s_read_bo)) {
621                 dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
622                          "offset %llu, max layer %d, bo size %ld)\n",
623                          __func__, __LINE__, surf.layer_size,
624                         (u64)track->db_s_read_offset << 8, mslice,
625                         radeon_bo_size(track->db_s_read_bo));
626                 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
627                          __func__, __LINE__, track->db_depth_size,
628                          track->db_depth_slice, track->db_s_info, track->db_z_info);
629                 return -EINVAL;
630         }
631
632         offset = (u64)track->db_s_write_offset << 8;
633         if (offset & (surf.base_align - 1)) {
634                 dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n",
635                          __func__, __LINE__, offset, surf.base_align);
636                 return -EINVAL;
637         }
638         offset += (u64)surf.layer_size * mslice;
639         if (offset > radeon_bo_size(track->db_s_write_bo)) {
640                 dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
641                          "offset %llu, max layer %d, bo size %ld)\n",
642                          __func__, __LINE__, surf.layer_size,
643                         (u64)track->db_s_write_offset << 8, mslice,
644                         radeon_bo_size(track->db_s_write_bo));
645                 return -EINVAL;
646         }
647
648         /* hyperz */
649         if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
650                 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
651                 if (r) {
652                         return r;
653                 }
654         }
655
656         return 0;
657 }
658
659 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
660 {
661         struct evergreen_cs_track *track = p->track;
662         struct eg_surface surf;
663         unsigned pitch, slice, mslice;
664         u64 offset;
665         int r;
666
667         mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
668         pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
669         slice = track->db_depth_slice;
670         surf.nbx = (pitch + 1) * 8;
671         surf.nby = ((slice + 1) * 64) / surf.nbx;
672         surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
673         surf.format = G_028040_FORMAT(track->db_z_info);
674         surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
675         surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
676         surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
677         surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
678         surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
679         surf.nsamples = 1;
680
681         switch (surf.format) {
682         case V_028040_Z_16:
683                 surf.format = V_028C70_COLOR_16;
684                 break;
685         case V_028040_Z_24:
686         case V_028040_Z_32_FLOAT:
687                 surf.format = V_028C70_COLOR_8_8_8_8;
688                 break;
689         default:
690                 dev_warn(p->dev, "%s:%d depth invalid format %d\n",
691                          __func__, __LINE__, surf.format);
692                 return -EINVAL;
693         }
694
695         r = evergreen_surface_value_conv_check(p, &surf, "depth");
696         if (r) {
697                 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
698                          __func__, __LINE__, track->db_depth_size,
699                          track->db_depth_slice, track->db_z_info);
700                 return r;
701         }
702
703         r = evergreen_surface_check(p, &surf, "depth");
704         if (r) {
705                 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
706                          __func__, __LINE__, track->db_depth_size,
707                          track->db_depth_slice, track->db_z_info);
708                 return r;
709         }
710
711         offset = (u64)track->db_z_read_offset << 8;
712         if (offset & (surf.base_align - 1)) {
713                 dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n",
714                          __func__, __LINE__, offset, surf.base_align);
715                 return -EINVAL;
716         }
717         offset += (u64)surf.layer_size * mslice;
718         if (offset > radeon_bo_size(track->db_z_read_bo)) {
719                 dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
720                          "offset %llu, max layer %d, bo size %ld)\n",
721                          __func__, __LINE__, surf.layer_size,
722                         (u64)track->db_z_read_offset << 8, mslice,
723                         radeon_bo_size(track->db_z_read_bo));
724                 return -EINVAL;
725         }
726
727         offset = (u64)track->db_z_write_offset << 8;
728         if (offset & (surf.base_align - 1)) {
729                 dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n",
730                          __func__, __LINE__, offset, surf.base_align);
731                 return -EINVAL;
732         }
733         offset += (u64)surf.layer_size * mslice;
734         if (offset > radeon_bo_size(track->db_z_write_bo)) {
735                 dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
736                          "offset %llu, max layer %d, bo size %ld)\n",
737                          __func__, __LINE__, surf.layer_size,
738                         (u64)track->db_z_write_offset << 8, mslice,
739                         radeon_bo_size(track->db_z_write_bo));
740                 return -EINVAL;
741         }
742
743         /* hyperz */
744         if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
745                 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
746                 if (r) {
747                         return r;
748                 }
749         }
750
751         return 0;
752 }
753
754 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
755                                                struct radeon_bo *texture,
756                                                struct radeon_bo *mipmap,
757                                                unsigned idx)
758 {
759         struct eg_surface surf;
760         unsigned long toffset, moffset;
761         unsigned dim, llevel, mslice, width, height, depth, i;
762         u32 texdw[8];
763         int r;
764
765         texdw[0] = radeon_get_ib_value(p, idx + 0);
766         texdw[1] = radeon_get_ib_value(p, idx + 1);
767         texdw[2] = radeon_get_ib_value(p, idx + 2);
768         texdw[3] = radeon_get_ib_value(p, idx + 3);
769         texdw[4] = radeon_get_ib_value(p, idx + 4);
770         texdw[5] = radeon_get_ib_value(p, idx + 5);
771         texdw[6] = radeon_get_ib_value(p, idx + 6);
772         texdw[7] = radeon_get_ib_value(p, idx + 7);
773         dim = G_030000_DIM(texdw[0]);
774         llevel = G_030014_LAST_LEVEL(texdw[5]);
775         mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
776         width = G_030000_TEX_WIDTH(texdw[0]) + 1;
777         height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
778         depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
779         surf.format = G_03001C_DATA_FORMAT(texdw[7]);
780         surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
781         surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
782         surf.nby = r600_fmt_get_nblocksy(surf.format, height);
783         surf.mode = G_030004_ARRAY_MODE(texdw[1]);
784         surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
785         surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
786         surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
787         surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
788         surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
789         surf.nsamples = 1;
790         toffset = texdw[2] << 8;
791         moffset = texdw[3] << 8;
792
793         if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
794                 dev_warn(p->dev, "%s:%d texture invalid format %d\n",
795                          __func__, __LINE__, surf.format);
796                 return -EINVAL;
797         }
798         switch (dim) {
799         case V_030000_SQ_TEX_DIM_1D:
800         case V_030000_SQ_TEX_DIM_2D:
801         case V_030000_SQ_TEX_DIM_CUBEMAP:
802         case V_030000_SQ_TEX_DIM_1D_ARRAY:
803         case V_030000_SQ_TEX_DIM_2D_ARRAY:
804                 depth = 1;
805                 break;
806         case V_030000_SQ_TEX_DIM_2D_MSAA:
807         case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
808                 surf.nsamples = 1 << llevel;
809                 llevel = 0;
810                 depth = 1;
811                 break;
812         case V_030000_SQ_TEX_DIM_3D:
813                 break;
814         default:
815                 dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
816                          __func__, __LINE__, dim);
817                 return -EINVAL;
818         }
819
820         r = evergreen_surface_value_conv_check(p, &surf, "texture");
821         if (r) {
822                 return r;
823         }
824
825         /* align height */
826         evergreen_surface_check(p, &surf, NULL);
827         surf.nby = ALIGN(surf.nby, surf.halign);
828
829         r = evergreen_surface_check(p, &surf, "texture");
830         if (r) {
831                 dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
832                          __func__, __LINE__, texdw[0], texdw[1], texdw[4],
833                          texdw[5], texdw[6], texdw[7]);
834                 return r;
835         }
836
837         /* check texture size */
838         if (toffset & (surf.base_align - 1)) {
839                 dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
840                          __func__, __LINE__, toffset, surf.base_align);
841                 return -EINVAL;
842         }
843         if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
844                 dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
845                          __func__, __LINE__, moffset, surf.base_align);
846                 return -EINVAL;
847         }
848         if (dim == SQ_TEX_DIM_3D) {
849                 toffset += surf.layer_size * depth;
850         } else {
851                 toffset += surf.layer_size * mslice;
852         }
853         if (toffset > radeon_bo_size(texture)) {
854                 dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
855                          "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
856                          __func__, __LINE__, surf.layer_size,
857                         (unsigned long)texdw[2] << 8, mslice,
858                         depth, radeon_bo_size(texture),
859                         surf.nbx, surf.nby);
860                 return -EINVAL;
861         }
862
863         if (!mipmap) {
864                 if (llevel) {
865                         dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
866                                  __func__, __LINE__);
867                         return -EINVAL;
868                 } else {
869                         return 0; /* everything's ok */
870                 }
871         }
872
873         /* check mipmap size */
874         for (i = 1; i <= llevel; i++) {
875                 unsigned w, h, d;
876
877                 w = r600_mip_minify(width, i);
878                 h = r600_mip_minify(height, i);
879                 d = r600_mip_minify(depth, i);
880                 surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
881                 surf.nby = r600_fmt_get_nblocksy(surf.format, h);
882
883                 switch (surf.mode) {
884                 case ARRAY_2D_TILED_THIN1:
885                         if (surf.nbx < surf.palign || surf.nby < surf.halign) {
886                                 surf.mode = ARRAY_1D_TILED_THIN1;
887                         }
888                         /* recompute alignment */
889                         evergreen_surface_check(p, &surf, NULL);
890                         break;
891                 case ARRAY_LINEAR_GENERAL:
892                 case ARRAY_LINEAR_ALIGNED:
893                 case ARRAY_1D_TILED_THIN1:
894                         break;
895                 default:
896                         dev_warn(p->dev, "%s:%d invalid array mode %d\n",
897                                  __func__, __LINE__, surf.mode);
898                         return -EINVAL;
899                 }
900                 surf.nbx = ALIGN(surf.nbx, surf.palign);
901                 surf.nby = ALIGN(surf.nby, surf.halign);
902
903                 r = evergreen_surface_check(p, &surf, "mipmap");
904                 if (r) {
905                         return r;
906                 }
907
908                 if (dim == SQ_TEX_DIM_3D) {
909                         moffset += surf.layer_size * d;
910                 } else {
911                         moffset += surf.layer_size * mslice;
912                 }
913                 if (moffset > radeon_bo_size(mipmap)) {
914                         dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
915                                         "offset %ld, coffset %ld, max layer %d, depth %d, "
916                                         "bo size %ld) level0 (%d %d %d)\n",
917                                         __func__, __LINE__, i, surf.layer_size,
918                                         (unsigned long)texdw[3] << 8, moffset, mslice,
919                                         d, radeon_bo_size(mipmap),
920                                         width, height, depth);
921                         dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
922                                  __func__, __LINE__, surf.nbx, surf.nby,
923                                 surf.mode, surf.bpe, surf.nsamples,
924                                 surf.bankw, surf.bankh,
925                                 surf.tsplit, surf.mtilea);
926                         return -EINVAL;
927                 }
928         }
929
930         return 0;
931 }
932
933 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
934 {
935         struct evergreen_cs_track *track = p->track;
936         unsigned tmp, i;
937         int r;
938         unsigned buffer_mask = 0;
939
940         /* check streamout */
941         if (track->streamout_dirty && track->vgt_strmout_config) {
942                 for (i = 0; i < 4; i++) {
943                         if (track->vgt_strmout_config & (1 << i)) {
944                                 buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
945                         }
946                 }
947
948                 for (i = 0; i < 4; i++) {
949                         if (buffer_mask & (1 << i)) {
950                                 if (track->vgt_strmout_bo[i]) {
951                                         u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
952                                                         (u64)track->vgt_strmout_size[i];
953                                         if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
954                                                 DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
955                                                           i, offset,
956                                                           radeon_bo_size(track->vgt_strmout_bo[i]));
957                                                 return -EINVAL;
958                                         }
959                                 } else {
960                                         dev_warn(p->dev, "No buffer for streamout %d\n", i);
961                                         return -EINVAL;
962                                 }
963                         }
964                 }
965                 track->streamout_dirty = false;
966         }
967
968         if (track->sx_misc_kill_all_prims)
969                 return 0;
970
971         /* check that we have a cb for each enabled target
972          */
973         if (track->cb_dirty) {
974                 tmp = track->cb_target_mask;
975                 for (i = 0; i < 8; i++) {
976                         u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
977
978                         if (format != V_028C70_COLOR_INVALID &&
979                             (tmp >> (i * 4)) & 0xF) {
980                                 /* at least one component is enabled */
981                                 if (track->cb_color_bo[i] == NULL) {
982                                         dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
983                                                 __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
984                                         return -EINVAL;
985                                 }
986                                 /* check cb */
987                                 r = evergreen_cs_track_validate_cb(p, i);
988                                 if (r) {
989                                         return r;
990                                 }
991                         }
992                 }
993                 track->cb_dirty = false;
994         }
995
996         if (track->db_dirty) {
997                 /* Check stencil buffer */
998                 if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
999                     G_028800_STENCIL_ENABLE(track->db_depth_control)) {
1000                         r = evergreen_cs_track_validate_stencil(p);
1001                         if (r)
1002                                 return r;
1003                 }
1004                 /* Check depth buffer */
1005                 if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1006                     G_028800_Z_ENABLE(track->db_depth_control)) {
1007                         r = evergreen_cs_track_validate_depth(p);
1008                         if (r)
1009                                 return r;
1010                 }
1011                 track->db_dirty = false;
1012         }
1013
1014         return 0;
1015 }
1016
1017 /**
1018  * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1019  * @p:          parser structure holding parsing context.
1020  *
1021  * This is an Evergreen(+)-specific function for parsing VLINE packets.
1022  * Real work is done by r600_cs_common_vline_parse function.
1023  * Here we just set up ASIC-specific register table and call
1024  * the common implementation function.
1025  */
1026 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1027 {
1028
1029         static uint32_t vline_start_end[6] = {
1030                 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1031                 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1032                 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1033                 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1034                 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1035                 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1036         };
1037         static uint32_t vline_status[6] = {
1038                 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1039                 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1040                 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1041                 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1042                 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1043                 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1044         };
1045
1046         return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1047 }
1048
1049 static int evergreen_packet0_check(struct radeon_cs_parser *p,
1050                                    struct radeon_cs_packet *pkt,
1051                                    unsigned idx, unsigned reg)
1052 {
1053         int r;
1054
1055         switch (reg) {
1056         case EVERGREEN_VLINE_START_END:
1057                 r = evergreen_cs_packet_parse_vline(p);
1058                 if (r) {
1059                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1060                                         idx, reg);
1061                         return r;
1062                 }
1063                 break;
1064         default:
1065                 pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1066                 return -EINVAL;
1067         }
1068         return 0;
1069 }
1070
1071 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1072                                       struct radeon_cs_packet *pkt)
1073 {
1074         unsigned reg, i;
1075         unsigned idx;
1076         int r;
1077
1078         idx = pkt->idx + 1;
1079         reg = pkt->reg;
1080         for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1081                 r = evergreen_packet0_check(p, pkt, idx, reg);
1082                 if (r) {
1083                         return r;
1084                 }
1085         }
1086         return 0;
1087 }
1088
1089 /**
1090  * evergreen_cs_handle_reg() - process registers that need special handling.
1091  * @p: parser structure holding parsing context
1092  * @reg: register we are testing
1093  * @idx: index into the cs buffer
1094  */
1095 static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1096 {
1097         struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1098         struct radeon_bo_list *reloc;
1099         u32 tmp, *ib;
1100         int r;
1101
1102         ib = p->ib.ptr;
1103         switch (reg) {
1104         /* force following reg to 0 in an attempt to disable out buffer
1105          * which will need us to better understand how it works to perform
1106          * security check on it (Jerome)
1107          */
1108         case SQ_ESGS_RING_SIZE:
1109         case SQ_GSVS_RING_SIZE:
1110         case SQ_ESTMP_RING_SIZE:
1111         case SQ_GSTMP_RING_SIZE:
1112         case SQ_HSTMP_RING_SIZE:
1113         case SQ_LSTMP_RING_SIZE:
1114         case SQ_PSTMP_RING_SIZE:
1115         case SQ_VSTMP_RING_SIZE:
1116         case SQ_ESGS_RING_ITEMSIZE:
1117         case SQ_ESTMP_RING_ITEMSIZE:
1118         case SQ_GSTMP_RING_ITEMSIZE:
1119         case SQ_GSVS_RING_ITEMSIZE:
1120         case SQ_GS_VERT_ITEMSIZE:
1121         case SQ_GS_VERT_ITEMSIZE_1:
1122         case SQ_GS_VERT_ITEMSIZE_2:
1123         case SQ_GS_VERT_ITEMSIZE_3:
1124         case SQ_GSVS_RING_OFFSET_1:
1125         case SQ_GSVS_RING_OFFSET_2:
1126         case SQ_GSVS_RING_OFFSET_3:
1127         case SQ_HSTMP_RING_ITEMSIZE:
1128         case SQ_LSTMP_RING_ITEMSIZE:
1129         case SQ_PSTMP_RING_ITEMSIZE:
1130         case SQ_VSTMP_RING_ITEMSIZE:
1131         case VGT_TF_RING_SIZE:
1132                 /* get value to populate the IB don't remove */
1133                 /*tmp =radeon_get_ib_value(p, idx);
1134                   ib[idx] = 0;*/
1135                 break;
1136         case SQ_ESGS_RING_BASE:
1137         case SQ_GSVS_RING_BASE:
1138         case SQ_ESTMP_RING_BASE:
1139         case SQ_GSTMP_RING_BASE:
1140         case SQ_HSTMP_RING_BASE:
1141         case SQ_LSTMP_RING_BASE:
1142         case SQ_PSTMP_RING_BASE:
1143         case SQ_VSTMP_RING_BASE:
1144                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1145                 if (r) {
1146                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1147                                         "0x%04X\n", reg);
1148                         return -EINVAL;
1149                 }
1150                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1151                 break;
1152         case DB_DEPTH_CONTROL:
1153                 track->db_depth_control = radeon_get_ib_value(p, idx);
1154                 track->db_dirty = true;
1155                 break;
1156         case CAYMAN_DB_EQAA:
1157                 if (p->rdev->family < CHIP_CAYMAN) {
1158                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1159                                  "0x%04X\n", reg);
1160                         return -EINVAL;
1161                 }
1162                 break;
1163         case CAYMAN_DB_DEPTH_INFO:
1164                 if (p->rdev->family < CHIP_CAYMAN) {
1165                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1166                                  "0x%04X\n", reg);
1167                         return -EINVAL;
1168                 }
1169                 break;
1170         case DB_Z_INFO:
1171                 track->db_z_info = radeon_get_ib_value(p, idx);
1172                 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1173                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1174                         if (r) {
1175                                 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1176                                                 "0x%04X\n", reg);
1177                                 return -EINVAL;
1178                         }
1179                         ib[idx] &= ~Z_ARRAY_MODE(0xf);
1180                         track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1181                         ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1182                         track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1183                         if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1184                                 unsigned bankw, bankh, mtaspect, tile_split;
1185
1186                                 evergreen_tiling_fields(reloc->tiling_flags,
1187                                                         &bankw, &bankh, &mtaspect,
1188                                                         &tile_split);
1189                                 ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1190                                 ib[idx] |= DB_TILE_SPLIT(tile_split) |
1191                                                 DB_BANK_WIDTH(bankw) |
1192                                                 DB_BANK_HEIGHT(bankh) |
1193                                                 DB_MACRO_TILE_ASPECT(mtaspect);
1194                         }
1195                 }
1196                 track->db_dirty = true;
1197                 break;
1198         case DB_STENCIL_INFO:
1199                 track->db_s_info = radeon_get_ib_value(p, idx);
1200                 track->db_dirty = true;
1201                 break;
1202         case DB_DEPTH_VIEW:
1203                 track->db_depth_view = radeon_get_ib_value(p, idx);
1204                 track->db_dirty = true;
1205                 break;
1206         case DB_DEPTH_SIZE:
1207                 track->db_depth_size = radeon_get_ib_value(p, idx);
1208                 track->db_dirty = true;
1209                 break;
1210         case R_02805C_DB_DEPTH_SLICE:
1211                 track->db_depth_slice = radeon_get_ib_value(p, idx);
1212                 track->db_dirty = true;
1213                 break;
1214         case DB_Z_READ_BASE:
1215                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1216                 if (r) {
1217                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1218                                         "0x%04X\n", reg);
1219                         return -EINVAL;
1220                 }
1221                 track->db_z_read_offset = radeon_get_ib_value(p, idx);
1222                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1223                 track->db_z_read_bo = reloc->robj;
1224                 track->db_dirty = true;
1225                 break;
1226         case DB_Z_WRITE_BASE:
1227                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1228                 if (r) {
1229                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1230                                         "0x%04X\n", reg);
1231                         return -EINVAL;
1232                 }
1233                 track->db_z_write_offset = radeon_get_ib_value(p, idx);
1234                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1235                 track->db_z_write_bo = reloc->robj;
1236                 track->db_dirty = true;
1237                 break;
1238         case DB_STENCIL_READ_BASE:
1239                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1240                 if (r) {
1241                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1242                                         "0x%04X\n", reg);
1243                         return -EINVAL;
1244                 }
1245                 track->db_s_read_offset = radeon_get_ib_value(p, idx);
1246                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1247                 track->db_s_read_bo = reloc->robj;
1248                 track->db_dirty = true;
1249                 break;
1250         case DB_STENCIL_WRITE_BASE:
1251                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1252                 if (r) {
1253                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1254                                         "0x%04X\n", reg);
1255                         return -EINVAL;
1256                 }
1257                 track->db_s_write_offset = radeon_get_ib_value(p, idx);
1258                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1259                 track->db_s_write_bo = reloc->robj;
1260                 track->db_dirty = true;
1261                 break;
1262         case VGT_STRMOUT_CONFIG:
1263                 track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1264                 track->streamout_dirty = true;
1265                 break;
1266         case VGT_STRMOUT_BUFFER_CONFIG:
1267                 track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1268                 track->streamout_dirty = true;
1269                 break;
1270         case VGT_STRMOUT_BUFFER_BASE_0:
1271         case VGT_STRMOUT_BUFFER_BASE_1:
1272         case VGT_STRMOUT_BUFFER_BASE_2:
1273         case VGT_STRMOUT_BUFFER_BASE_3:
1274                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1275                 if (r) {
1276                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1277                                         "0x%04X\n", reg);
1278                         return -EINVAL;
1279                 }
1280                 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1281                 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1282                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1283                 track->vgt_strmout_bo[tmp] = reloc->robj;
1284                 track->streamout_dirty = true;
1285                 break;
1286         case VGT_STRMOUT_BUFFER_SIZE_0:
1287         case VGT_STRMOUT_BUFFER_SIZE_1:
1288         case VGT_STRMOUT_BUFFER_SIZE_2:
1289         case VGT_STRMOUT_BUFFER_SIZE_3:
1290                 tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1291                 /* size in register is DWs, convert to bytes */
1292                 track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1293                 track->streamout_dirty = true;
1294                 break;
1295         case CP_COHER_BASE:
1296                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1297                 if (r) {
1298                         dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1299                                         "0x%04X\n", reg);
1300                         return -EINVAL;
1301                 }
1302                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1303                 break;
1304         case CB_TARGET_MASK:
1305                 track->cb_target_mask = radeon_get_ib_value(p, idx);
1306                 track->cb_dirty = true;
1307                 break;
1308         case CB_SHADER_MASK:
1309                 track->cb_shader_mask = radeon_get_ib_value(p, idx);
1310                 track->cb_dirty = true;
1311                 break;
1312         case PA_SC_AA_CONFIG:
1313                 if (p->rdev->family >= CHIP_CAYMAN) {
1314                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1315                                  "0x%04X\n", reg);
1316                         return -EINVAL;
1317                 }
1318                 tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1319                 track->nsamples = 1 << tmp;
1320                 break;
1321         case CAYMAN_PA_SC_AA_CONFIG:
1322                 if (p->rdev->family < CHIP_CAYMAN) {
1323                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1324                                  "0x%04X\n", reg);
1325                         return -EINVAL;
1326                 }
1327                 tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1328                 track->nsamples = 1 << tmp;
1329                 break;
1330         case CB_COLOR0_VIEW:
1331         case CB_COLOR1_VIEW:
1332         case CB_COLOR2_VIEW:
1333         case CB_COLOR3_VIEW:
1334         case CB_COLOR4_VIEW:
1335         case CB_COLOR5_VIEW:
1336         case CB_COLOR6_VIEW:
1337         case CB_COLOR7_VIEW:
1338                 tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1339                 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1340                 track->cb_dirty = true;
1341                 break;
1342         case CB_COLOR8_VIEW:
1343         case CB_COLOR9_VIEW:
1344         case CB_COLOR10_VIEW:
1345         case CB_COLOR11_VIEW:
1346                 tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1347                 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1348                 track->cb_dirty = true;
1349                 break;
1350         case CB_COLOR0_INFO:
1351         case CB_COLOR1_INFO:
1352         case CB_COLOR2_INFO:
1353         case CB_COLOR3_INFO:
1354         case CB_COLOR4_INFO:
1355         case CB_COLOR5_INFO:
1356         case CB_COLOR6_INFO:
1357         case CB_COLOR7_INFO:
1358                 tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1359                 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1360                 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1361                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1362                         if (r) {
1363                                 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1364                                                 "0x%04X\n", reg);
1365                                 return -EINVAL;
1366                         }
1367                         ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1368                         track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1369                 }
1370                 track->cb_dirty = true;
1371                 break;
1372         case CB_COLOR8_INFO:
1373         case CB_COLOR9_INFO:
1374         case CB_COLOR10_INFO:
1375         case CB_COLOR11_INFO:
1376                 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1377                 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1378                 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1379                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1380                         if (r) {
1381                                 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1382                                                 "0x%04X\n", reg);
1383                                 return -EINVAL;
1384                         }
1385                         ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1386                         track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1387                 }
1388                 track->cb_dirty = true;
1389                 break;
1390         case CB_COLOR0_PITCH:
1391         case CB_COLOR1_PITCH:
1392         case CB_COLOR2_PITCH:
1393         case CB_COLOR3_PITCH:
1394         case CB_COLOR4_PITCH:
1395         case CB_COLOR5_PITCH:
1396         case CB_COLOR6_PITCH:
1397         case CB_COLOR7_PITCH:
1398                 tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1399                 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1400                 track->cb_dirty = true;
1401                 break;
1402         case CB_COLOR8_PITCH:
1403         case CB_COLOR9_PITCH:
1404         case CB_COLOR10_PITCH:
1405         case CB_COLOR11_PITCH:
1406                 tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1407                 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1408                 track->cb_dirty = true;
1409                 break;
1410         case CB_COLOR0_SLICE:
1411         case CB_COLOR1_SLICE:
1412         case CB_COLOR2_SLICE:
1413         case CB_COLOR3_SLICE:
1414         case CB_COLOR4_SLICE:
1415         case CB_COLOR5_SLICE:
1416         case CB_COLOR6_SLICE:
1417         case CB_COLOR7_SLICE:
1418                 tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1419                 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1420                 track->cb_color_slice_idx[tmp] = idx;
1421                 track->cb_dirty = true;
1422                 break;
1423         case CB_COLOR8_SLICE:
1424         case CB_COLOR9_SLICE:
1425         case CB_COLOR10_SLICE:
1426         case CB_COLOR11_SLICE:
1427                 tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1428                 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1429                 track->cb_color_slice_idx[tmp] = idx;
1430                 track->cb_dirty = true;
1431                 break;
1432         case CB_COLOR0_ATTRIB:
1433         case CB_COLOR1_ATTRIB:
1434         case CB_COLOR2_ATTRIB:
1435         case CB_COLOR3_ATTRIB:
1436         case CB_COLOR4_ATTRIB:
1437         case CB_COLOR5_ATTRIB:
1438         case CB_COLOR6_ATTRIB:
1439         case CB_COLOR7_ATTRIB:
1440                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1441                 if (r) {
1442                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1443                                         "0x%04X\n", reg);
1444                         return -EINVAL;
1445                 }
1446                 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1447                         if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1448                                 unsigned bankw, bankh, mtaspect, tile_split;
1449
1450                                 evergreen_tiling_fields(reloc->tiling_flags,
1451                                                         &bankw, &bankh, &mtaspect,
1452                                                         &tile_split);
1453                                 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1454                                 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1455                                            CB_BANK_WIDTH(bankw) |
1456                                            CB_BANK_HEIGHT(bankh) |
1457                                            CB_MACRO_TILE_ASPECT(mtaspect);
1458                         }
1459                 }
1460                 tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1461                 track->cb_color_attrib[tmp] = ib[idx];
1462                 track->cb_dirty = true;
1463                 break;
1464         case CB_COLOR8_ATTRIB:
1465         case CB_COLOR9_ATTRIB:
1466         case CB_COLOR10_ATTRIB:
1467         case CB_COLOR11_ATTRIB:
1468                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1469                 if (r) {
1470                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1471                                         "0x%04X\n", reg);
1472                         return -EINVAL;
1473                 }
1474                 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1475                         if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1476                                 unsigned bankw, bankh, mtaspect, tile_split;
1477
1478                                 evergreen_tiling_fields(reloc->tiling_flags,
1479                                                         &bankw, &bankh, &mtaspect,
1480                                                         &tile_split);
1481                                 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1482                                 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1483                                            CB_BANK_WIDTH(bankw) |
1484                                            CB_BANK_HEIGHT(bankh) |
1485                                            CB_MACRO_TILE_ASPECT(mtaspect);
1486                         }
1487                 }
1488                 tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1489                 track->cb_color_attrib[tmp] = ib[idx];
1490                 track->cb_dirty = true;
1491                 break;
1492         case CB_COLOR0_FMASK:
1493         case CB_COLOR1_FMASK:
1494         case CB_COLOR2_FMASK:
1495         case CB_COLOR3_FMASK:
1496         case CB_COLOR4_FMASK:
1497         case CB_COLOR5_FMASK:
1498         case CB_COLOR6_FMASK:
1499         case CB_COLOR7_FMASK:
1500                 tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1501                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1502                 if (r) {
1503                         dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1504                         return -EINVAL;
1505                 }
1506                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1507                 track->cb_color_fmask_bo[tmp] = reloc->robj;
1508                 break;
1509         case CB_COLOR0_CMASK:
1510         case CB_COLOR1_CMASK:
1511         case CB_COLOR2_CMASK:
1512         case CB_COLOR3_CMASK:
1513         case CB_COLOR4_CMASK:
1514         case CB_COLOR5_CMASK:
1515         case CB_COLOR6_CMASK:
1516         case CB_COLOR7_CMASK:
1517                 tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1518                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1519                 if (r) {
1520                         dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1521                         return -EINVAL;
1522                 }
1523                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1524                 track->cb_color_cmask_bo[tmp] = reloc->robj;
1525                 break;
1526         case CB_COLOR0_FMASK_SLICE:
1527         case CB_COLOR1_FMASK_SLICE:
1528         case CB_COLOR2_FMASK_SLICE:
1529         case CB_COLOR3_FMASK_SLICE:
1530         case CB_COLOR4_FMASK_SLICE:
1531         case CB_COLOR5_FMASK_SLICE:
1532         case CB_COLOR6_FMASK_SLICE:
1533         case CB_COLOR7_FMASK_SLICE:
1534                 tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1535                 track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1536                 break;
1537         case CB_COLOR0_CMASK_SLICE:
1538         case CB_COLOR1_CMASK_SLICE:
1539         case CB_COLOR2_CMASK_SLICE:
1540         case CB_COLOR3_CMASK_SLICE:
1541         case CB_COLOR4_CMASK_SLICE:
1542         case CB_COLOR5_CMASK_SLICE:
1543         case CB_COLOR6_CMASK_SLICE:
1544         case CB_COLOR7_CMASK_SLICE:
1545                 tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1546                 track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1547                 break;
1548         case CB_COLOR0_BASE:
1549         case CB_COLOR1_BASE:
1550         case CB_COLOR2_BASE:
1551         case CB_COLOR3_BASE:
1552         case CB_COLOR4_BASE:
1553         case CB_COLOR5_BASE:
1554         case CB_COLOR6_BASE:
1555         case CB_COLOR7_BASE:
1556                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1557                 if (r) {
1558                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1559                                         "0x%04X\n", reg);
1560                         return -EINVAL;
1561                 }
1562                 tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1563                 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1564                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1565                 track->cb_color_bo[tmp] = reloc->robj;
1566                 track->cb_dirty = true;
1567                 break;
1568         case CB_COLOR8_BASE:
1569         case CB_COLOR9_BASE:
1570         case CB_COLOR10_BASE:
1571         case CB_COLOR11_BASE:
1572                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1573                 if (r) {
1574                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1575                                         "0x%04X\n", reg);
1576                         return -EINVAL;
1577                 }
1578                 tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1579                 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1580                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1581                 track->cb_color_bo[tmp] = reloc->robj;
1582                 track->cb_dirty = true;
1583                 break;
1584         case DB_HTILE_DATA_BASE:
1585                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1586                 if (r) {
1587                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1588                                         "0x%04X\n", reg);
1589                         return -EINVAL;
1590                 }
1591                 track->htile_offset = radeon_get_ib_value(p, idx);
1592                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1593                 track->htile_bo = reloc->robj;
1594                 track->db_dirty = true;
1595                 break;
1596         case DB_HTILE_SURFACE:
1597                 /* 8x8 only */
1598                 track->htile_surface = radeon_get_ib_value(p, idx);
1599                 /* force 8x8 htile width and height */
1600                 ib[idx] |= 3;
1601                 track->db_dirty = true;
1602                 break;
1603         case CB_IMMED0_BASE:
1604         case CB_IMMED1_BASE:
1605         case CB_IMMED2_BASE:
1606         case CB_IMMED3_BASE:
1607         case CB_IMMED4_BASE:
1608         case CB_IMMED5_BASE:
1609         case CB_IMMED6_BASE:
1610         case CB_IMMED7_BASE:
1611         case CB_IMMED8_BASE:
1612         case CB_IMMED9_BASE:
1613         case CB_IMMED10_BASE:
1614         case CB_IMMED11_BASE:
1615         case SQ_PGM_START_FS:
1616         case SQ_PGM_START_ES:
1617         case SQ_PGM_START_VS:
1618         case SQ_PGM_START_GS:
1619         case SQ_PGM_START_PS:
1620         case SQ_PGM_START_HS:
1621         case SQ_PGM_START_LS:
1622         case SQ_CONST_MEM_BASE:
1623         case SQ_ALU_CONST_CACHE_GS_0:
1624         case SQ_ALU_CONST_CACHE_GS_1:
1625         case SQ_ALU_CONST_CACHE_GS_2:
1626         case SQ_ALU_CONST_CACHE_GS_3:
1627         case SQ_ALU_CONST_CACHE_GS_4:
1628         case SQ_ALU_CONST_CACHE_GS_5:
1629         case SQ_ALU_CONST_CACHE_GS_6:
1630         case SQ_ALU_CONST_CACHE_GS_7:
1631         case SQ_ALU_CONST_CACHE_GS_8:
1632         case SQ_ALU_CONST_CACHE_GS_9:
1633         case SQ_ALU_CONST_CACHE_GS_10:
1634         case SQ_ALU_CONST_CACHE_GS_11:
1635         case SQ_ALU_CONST_CACHE_GS_12:
1636         case SQ_ALU_CONST_CACHE_GS_13:
1637         case SQ_ALU_CONST_CACHE_GS_14:
1638         case SQ_ALU_CONST_CACHE_GS_15:
1639         case SQ_ALU_CONST_CACHE_PS_0:
1640         case SQ_ALU_CONST_CACHE_PS_1:
1641         case SQ_ALU_CONST_CACHE_PS_2:
1642         case SQ_ALU_CONST_CACHE_PS_3:
1643         case SQ_ALU_CONST_CACHE_PS_4:
1644         case SQ_ALU_CONST_CACHE_PS_5:
1645         case SQ_ALU_CONST_CACHE_PS_6:
1646         case SQ_ALU_CONST_CACHE_PS_7:
1647         case SQ_ALU_CONST_CACHE_PS_8:
1648         case SQ_ALU_CONST_CACHE_PS_9:
1649         case SQ_ALU_CONST_CACHE_PS_10:
1650         case SQ_ALU_CONST_CACHE_PS_11:
1651         case SQ_ALU_CONST_CACHE_PS_12:
1652         case SQ_ALU_CONST_CACHE_PS_13:
1653         case SQ_ALU_CONST_CACHE_PS_14:
1654         case SQ_ALU_CONST_CACHE_PS_15:
1655         case SQ_ALU_CONST_CACHE_VS_0:
1656         case SQ_ALU_CONST_CACHE_VS_1:
1657         case SQ_ALU_CONST_CACHE_VS_2:
1658         case SQ_ALU_CONST_CACHE_VS_3:
1659         case SQ_ALU_CONST_CACHE_VS_4:
1660         case SQ_ALU_CONST_CACHE_VS_5:
1661         case SQ_ALU_CONST_CACHE_VS_6:
1662         case SQ_ALU_CONST_CACHE_VS_7:
1663         case SQ_ALU_CONST_CACHE_VS_8:
1664         case SQ_ALU_CONST_CACHE_VS_9:
1665         case SQ_ALU_CONST_CACHE_VS_10:
1666         case SQ_ALU_CONST_CACHE_VS_11:
1667         case SQ_ALU_CONST_CACHE_VS_12:
1668         case SQ_ALU_CONST_CACHE_VS_13:
1669         case SQ_ALU_CONST_CACHE_VS_14:
1670         case SQ_ALU_CONST_CACHE_VS_15:
1671         case SQ_ALU_CONST_CACHE_HS_0:
1672         case SQ_ALU_CONST_CACHE_HS_1:
1673         case SQ_ALU_CONST_CACHE_HS_2:
1674         case SQ_ALU_CONST_CACHE_HS_3:
1675         case SQ_ALU_CONST_CACHE_HS_4:
1676         case SQ_ALU_CONST_CACHE_HS_5:
1677         case SQ_ALU_CONST_CACHE_HS_6:
1678         case SQ_ALU_CONST_CACHE_HS_7:
1679         case SQ_ALU_CONST_CACHE_HS_8:
1680         case SQ_ALU_CONST_CACHE_HS_9:
1681         case SQ_ALU_CONST_CACHE_HS_10:
1682         case SQ_ALU_CONST_CACHE_HS_11:
1683         case SQ_ALU_CONST_CACHE_HS_12:
1684         case SQ_ALU_CONST_CACHE_HS_13:
1685         case SQ_ALU_CONST_CACHE_HS_14:
1686         case SQ_ALU_CONST_CACHE_HS_15:
1687         case SQ_ALU_CONST_CACHE_LS_0:
1688         case SQ_ALU_CONST_CACHE_LS_1:
1689         case SQ_ALU_CONST_CACHE_LS_2:
1690         case SQ_ALU_CONST_CACHE_LS_3:
1691         case SQ_ALU_CONST_CACHE_LS_4:
1692         case SQ_ALU_CONST_CACHE_LS_5:
1693         case SQ_ALU_CONST_CACHE_LS_6:
1694         case SQ_ALU_CONST_CACHE_LS_7:
1695         case SQ_ALU_CONST_CACHE_LS_8:
1696         case SQ_ALU_CONST_CACHE_LS_9:
1697         case SQ_ALU_CONST_CACHE_LS_10:
1698         case SQ_ALU_CONST_CACHE_LS_11:
1699         case SQ_ALU_CONST_CACHE_LS_12:
1700         case SQ_ALU_CONST_CACHE_LS_13:
1701         case SQ_ALU_CONST_CACHE_LS_14:
1702         case SQ_ALU_CONST_CACHE_LS_15:
1703                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1704                 if (r) {
1705                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1706                                         "0x%04X\n", reg);
1707                         return -EINVAL;
1708                 }
1709                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1710                 break;
1711         case SX_MEMORY_EXPORT_BASE:
1712                 if (p->rdev->family >= CHIP_CAYMAN) {
1713                         dev_warn(p->dev, "bad SET_CONFIG_REG "
1714                                  "0x%04X\n", reg);
1715                         return -EINVAL;
1716                 }
1717                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1718                 if (r) {
1719                         dev_warn(p->dev, "bad SET_CONFIG_REG "
1720                                         "0x%04X\n", reg);
1721                         return -EINVAL;
1722                 }
1723                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1724                 break;
1725         case CAYMAN_SX_SCATTER_EXPORT_BASE:
1726                 if (p->rdev->family < CHIP_CAYMAN) {
1727                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1728                                  "0x%04X\n", reg);
1729                         return -EINVAL;
1730                 }
1731                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1732                 if (r) {
1733                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1734                                         "0x%04X\n", reg);
1735                         return -EINVAL;
1736                 }
1737                 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1738                 break;
1739         case SX_MISC:
1740                 track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1741                 break;
1742         default:
1743                 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1744                 return -EINVAL;
1745         }
1746         return 0;
1747 }
1748
1749 /**
1750  * evergreen_is_safe_reg() - check if register is authorized or not
1751  * @p: parser structure holding parsing context
1752  * @reg: register we are testing
1753  *
1754  * This function will test against reg_safe_bm and return true
1755  * if register is safe or false otherwise.
1756  */
1757 static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg)
1758 {
1759         struct evergreen_cs_track *track = p->track;
1760         u32 m, i;
1761
1762         i = (reg >> 7);
1763         if (unlikely(i >= REG_SAFE_BM_SIZE)) {
1764                 return false;
1765         }
1766         m = 1 << ((reg >> 2) & 31);
1767         if (!(track->reg_safe_bm[i] & m))
1768                 return true;
1769
1770         return false;
1771 }
1772
1773 static int evergreen_packet3_check(struct radeon_cs_parser *p,
1774                                    struct radeon_cs_packet *pkt)
1775 {
1776         struct radeon_bo_list *reloc;
1777         struct evergreen_cs_track *track;
1778         uint32_t *ib;
1779         unsigned idx;
1780         unsigned i;
1781         unsigned start_reg, end_reg, reg;
1782         int r;
1783         u32 idx_value;
1784
1785         track = (struct evergreen_cs_track *)p->track;
1786         ib = p->ib.ptr;
1787         idx = pkt->idx + 1;
1788         idx_value = radeon_get_ib_value(p, idx);
1789
1790         switch (pkt->opcode) {
1791         case PACKET3_SET_PREDICATION:
1792         {
1793                 int pred_op;
1794                 int tmp;
1795                 uint64_t offset;
1796
1797                 if (pkt->count != 1) {
1798                         DRM_ERROR("bad SET PREDICATION\n");
1799                         return -EINVAL;
1800                 }
1801
1802                 tmp = radeon_get_ib_value(p, idx + 1);
1803                 pred_op = (tmp >> 16) & 0x7;
1804
1805                 /* for the clear predicate operation */
1806                 if (pred_op == 0)
1807                         return 0;
1808
1809                 if (pred_op > 2) {
1810                         DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1811                         return -EINVAL;
1812                 }
1813
1814                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1815                 if (r) {
1816                         DRM_ERROR("bad SET PREDICATION\n");
1817                         return -EINVAL;
1818                 }
1819
1820                 offset = reloc->gpu_offset +
1821                          (idx_value & 0xfffffff0) +
1822                          ((u64)(tmp & 0xff) << 32);
1823
1824                 ib[idx + 0] = offset;
1825                 ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1826         }
1827         break;
1828         case PACKET3_CONTEXT_CONTROL:
1829                 if (pkt->count != 1) {
1830                         DRM_ERROR("bad CONTEXT_CONTROL\n");
1831                         return -EINVAL;
1832                 }
1833                 break;
1834         case PACKET3_INDEX_TYPE:
1835         case PACKET3_NUM_INSTANCES:
1836         case PACKET3_CLEAR_STATE:
1837                 if (pkt->count) {
1838                         DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1839                         return -EINVAL;
1840                 }
1841                 break;
1842         case CAYMAN_PACKET3_DEALLOC_STATE:
1843                 if (p->rdev->family < CHIP_CAYMAN) {
1844                         DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1845                         return -EINVAL;
1846                 }
1847                 if (pkt->count) {
1848                         DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1849                         return -EINVAL;
1850                 }
1851                 break;
1852         case PACKET3_INDEX_BASE:
1853         {
1854                 uint64_t offset;
1855
1856                 if (pkt->count != 1) {
1857                         DRM_ERROR("bad INDEX_BASE\n");
1858                         return -EINVAL;
1859                 }
1860                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1861                 if (r) {
1862                         DRM_ERROR("bad INDEX_BASE\n");
1863                         return -EINVAL;
1864                 }
1865
1866                 offset = reloc->gpu_offset +
1867                          idx_value +
1868                          ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1869
1870                 ib[idx+0] = offset;
1871                 ib[idx+1] = upper_32_bits(offset) & 0xff;
1872
1873                 r = evergreen_cs_track_check(p);
1874                 if (r) {
1875                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1876                         return r;
1877                 }
1878                 break;
1879         }
1880         case PACKET3_INDEX_BUFFER_SIZE:
1881         {
1882                 if (pkt->count != 0) {
1883                         DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1884                         return -EINVAL;
1885                 }
1886                 break;
1887         }
1888         case PACKET3_DRAW_INDEX:
1889         {
1890                 uint64_t offset;
1891                 if (pkt->count != 3) {
1892                         DRM_ERROR("bad DRAW_INDEX\n");
1893                         return -EINVAL;
1894                 }
1895                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1896                 if (r) {
1897                         DRM_ERROR("bad DRAW_INDEX\n");
1898                         return -EINVAL;
1899                 }
1900
1901                 offset = reloc->gpu_offset +
1902                          idx_value +
1903                          ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1904
1905                 ib[idx+0] = offset;
1906                 ib[idx+1] = upper_32_bits(offset) & 0xff;
1907
1908                 r = evergreen_cs_track_check(p);
1909                 if (r) {
1910                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1911                         return r;
1912                 }
1913                 break;
1914         }
1915         case PACKET3_DRAW_INDEX_2:
1916         {
1917                 uint64_t offset;
1918
1919                 if (pkt->count != 4) {
1920                         DRM_ERROR("bad DRAW_INDEX_2\n");
1921                         return -EINVAL;
1922                 }
1923                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1924                 if (r) {
1925                         DRM_ERROR("bad DRAW_INDEX_2\n");
1926                         return -EINVAL;
1927                 }
1928
1929                 offset = reloc->gpu_offset +
1930                          radeon_get_ib_value(p, idx+1) +
1931                          ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1932
1933                 ib[idx+1] = offset;
1934                 ib[idx+2] = upper_32_bits(offset) & 0xff;
1935
1936                 r = evergreen_cs_track_check(p);
1937                 if (r) {
1938                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1939                         return r;
1940                 }
1941                 break;
1942         }
1943         case PACKET3_DRAW_INDEX_AUTO:
1944                 if (pkt->count != 1) {
1945                         DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1946                         return -EINVAL;
1947                 }
1948                 r = evergreen_cs_track_check(p);
1949                 if (r) {
1950                         dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1951                         return r;
1952                 }
1953                 break;
1954         case PACKET3_DRAW_INDEX_MULTI_AUTO:
1955                 if (pkt->count != 2) {
1956                         DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1957                         return -EINVAL;
1958                 }
1959                 r = evergreen_cs_track_check(p);
1960                 if (r) {
1961                         dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1962                         return r;
1963                 }
1964                 break;
1965         case PACKET3_DRAW_INDEX_IMMD:
1966                 if (pkt->count < 2) {
1967                         DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1968                         return -EINVAL;
1969                 }
1970                 r = evergreen_cs_track_check(p);
1971                 if (r) {
1972                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1973                         return r;
1974                 }
1975                 break;
1976         case PACKET3_DRAW_INDEX_OFFSET:
1977                 if (pkt->count != 2) {
1978                         DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1979                         return -EINVAL;
1980                 }
1981                 r = evergreen_cs_track_check(p);
1982                 if (r) {
1983                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1984                         return r;
1985                 }
1986                 break;
1987         case PACKET3_DRAW_INDEX_OFFSET_2:
1988                 if (pkt->count != 3) {
1989                         DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
1990                         return -EINVAL;
1991                 }
1992                 r = evergreen_cs_track_check(p);
1993                 if (r) {
1994                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1995                         return r;
1996                 }
1997                 break;
1998         case PACKET3_SET_BASE:
1999         {
2000                 /*
2001                 DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2002                    2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2003                      0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2004                    3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2005                    4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2006                 */
2007                 if (pkt->count != 2) {
2008                         DRM_ERROR("bad SET_BASE\n");
2009                         return -EINVAL;
2010                 }
2011
2012                 /* currently only supporting setting indirect draw buffer base address */
2013                 if (idx_value != 1) {
2014                         DRM_ERROR("bad SET_BASE\n");
2015                         return -EINVAL;
2016                 }
2017
2018                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2019                 if (r) {
2020                         DRM_ERROR("bad SET_BASE\n");
2021                         return -EINVAL;
2022                 }
2023
2024                 track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2025
2026                 ib[idx+1] = reloc->gpu_offset;
2027                 ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2028
2029                 break;
2030         }
2031         case PACKET3_DRAW_INDIRECT:
2032         case PACKET3_DRAW_INDEX_INDIRECT:
2033         {
2034                 u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2035
2036                 /*
2037                 DW 1 HEADER
2038                    2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2039                    3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2040                 */
2041                 if (pkt->count != 1) {
2042                         DRM_ERROR("bad DRAW_INDIRECT\n");
2043                         return -EINVAL;
2044                 }
2045
2046                 if (idx_value + size > track->indirect_draw_buffer_size) {
2047                         dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
2048                                 idx_value, size, track->indirect_draw_buffer_size);
2049                         return -EINVAL;
2050                 }
2051
2052                 r = evergreen_cs_track_check(p);
2053                 if (r) {
2054                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2055                         return r;
2056                 }
2057                 break;
2058         }
2059         case PACKET3_DISPATCH_DIRECT:
2060                 if (pkt->count != 3) {
2061                         DRM_ERROR("bad DISPATCH_DIRECT\n");
2062                         return -EINVAL;
2063                 }
2064                 r = evergreen_cs_track_check(p);
2065                 if (r) {
2066                         dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2067                         return r;
2068                 }
2069                 break;
2070         case PACKET3_DISPATCH_INDIRECT:
2071                 if (pkt->count != 1) {
2072                         DRM_ERROR("bad DISPATCH_INDIRECT\n");
2073                         return -EINVAL;
2074                 }
2075                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2076                 if (r) {
2077                         DRM_ERROR("bad DISPATCH_INDIRECT\n");
2078                         return -EINVAL;
2079                 }
2080                 ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2081                 r = evergreen_cs_track_check(p);
2082                 if (r) {
2083                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2084                         return r;
2085                 }
2086                 break;
2087         case PACKET3_WAIT_REG_MEM:
2088                 if (pkt->count != 5) {
2089                         DRM_ERROR("bad WAIT_REG_MEM\n");
2090                         return -EINVAL;
2091                 }
2092                 /* bit 4 is reg (0) or mem (1) */
2093                 if (idx_value & 0x10) {
2094                         uint64_t offset;
2095
2096                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2097                         if (r) {
2098                                 DRM_ERROR("bad WAIT_REG_MEM\n");
2099                                 return -EINVAL;
2100                         }
2101
2102                         offset = reloc->gpu_offset +
2103                                  (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2104                                  ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2105
2106                         ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2107                         ib[idx+2] = upper_32_bits(offset) & 0xff;
2108                 } else if (idx_value & 0x100) {
2109                         DRM_ERROR("cannot use PFP on REG wait\n");
2110                         return -EINVAL;
2111                 }
2112                 break;
2113         case PACKET3_CP_DMA:
2114         {
2115                 u32 command, size, info;
2116                 u64 offset, tmp;
2117                 if (pkt->count != 4) {
2118                         DRM_ERROR("bad CP DMA\n");
2119                         return -EINVAL;
2120                 }
2121                 command = radeon_get_ib_value(p, idx+4);
2122                 size = command & 0x1fffff;
2123                 info = radeon_get_ib_value(p, idx+1);
2124                 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2125                     (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2126                     ((((info & 0x00300000) >> 20) == 0) &&
2127                      (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2128                     ((((info & 0x60000000) >> 29) == 0) &&
2129                      (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2130                         /* non mem to mem copies requires dw aligned count */
2131                         if (size % 4) {
2132                                 DRM_ERROR("CP DMA command requires dw count alignment\n");
2133                                 return -EINVAL;
2134                         }
2135                 }
2136                 if (command & PACKET3_CP_DMA_CMD_SAS) {
2137                         /* src address space is register */
2138                         /* GDS is ok */
2139                         if (((info & 0x60000000) >> 29) != 1) {
2140                                 DRM_ERROR("CP DMA SAS not supported\n");
2141                                 return -EINVAL;
2142                         }
2143                 } else {
2144                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
2145                                 DRM_ERROR("CP DMA SAIC only supported for registers\n");
2146                                 return -EINVAL;
2147                         }
2148                         /* src address space is memory */
2149                         if (((info & 0x60000000) >> 29) == 0) {
2150                                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2151                                 if (r) {
2152                                         DRM_ERROR("bad CP DMA SRC\n");
2153                                         return -EINVAL;
2154                                 }
2155
2156                                 tmp = radeon_get_ib_value(p, idx) +
2157                                         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2158
2159                                 offset = reloc->gpu_offset + tmp;
2160
2161                                 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2162                                         dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2163                                                  tmp + size, radeon_bo_size(reloc->robj));
2164                                         return -EINVAL;
2165                                 }
2166
2167                                 ib[idx] = offset;
2168                                 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2169                         } else if (((info & 0x60000000) >> 29) != 2) {
2170                                 DRM_ERROR("bad CP DMA SRC_SEL\n");
2171                                 return -EINVAL;
2172                         }
2173                 }
2174                 if (command & PACKET3_CP_DMA_CMD_DAS) {
2175                         /* dst address space is register */
2176                         /* GDS is ok */
2177                         if (((info & 0x00300000) >> 20) != 1) {
2178                                 DRM_ERROR("CP DMA DAS not supported\n");
2179                                 return -EINVAL;
2180                         }
2181                 } else {
2182                         /* dst address space is memory */
2183                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
2184                                 DRM_ERROR("CP DMA DAIC only supported for registers\n");
2185                                 return -EINVAL;
2186                         }
2187                         if (((info & 0x00300000) >> 20) == 0) {
2188                                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2189                                 if (r) {
2190                                         DRM_ERROR("bad CP DMA DST\n");
2191                                         return -EINVAL;
2192                                 }
2193
2194                                 tmp = radeon_get_ib_value(p, idx+2) +
2195                                         ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2196
2197                                 offset = reloc->gpu_offset + tmp;
2198
2199                                 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2200                                         dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2201                                                  tmp + size, radeon_bo_size(reloc->robj));
2202                                         return -EINVAL;
2203                                 }
2204
2205                                 ib[idx+2] = offset;
2206                                 ib[idx+3] = upper_32_bits(offset) & 0xff;
2207                         } else {
2208                                 DRM_ERROR("bad CP DMA DST_SEL\n");
2209                                 return -EINVAL;
2210                         }
2211                 }
2212                 break;
2213         }
2214         case PACKET3_PFP_SYNC_ME:
2215                 if (pkt->count) {
2216                         DRM_ERROR("bad PFP_SYNC_ME\n");
2217                         return -EINVAL;
2218                 }
2219                 break;
2220         case PACKET3_SURFACE_SYNC:
2221                 if (pkt->count != 3) {
2222                         DRM_ERROR("bad SURFACE_SYNC\n");
2223                         return -EINVAL;
2224                 }
2225                 /* 0xffffffff/0x0 is flush all cache flag */
2226                 if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2227                     radeon_get_ib_value(p, idx + 2) != 0) {
2228                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2229                         if (r) {
2230                                 DRM_ERROR("bad SURFACE_SYNC\n");
2231                                 return -EINVAL;
2232                         }
2233                         ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2234                 }
2235                 break;
2236         case PACKET3_EVENT_WRITE:
2237                 if (pkt->count != 2 && pkt->count != 0) {
2238                         DRM_ERROR("bad EVENT_WRITE\n");
2239                         return -EINVAL;
2240                 }
2241                 if (pkt->count) {
2242                         uint64_t offset;
2243
2244                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2245                         if (r) {
2246                                 DRM_ERROR("bad EVENT_WRITE\n");
2247                                 return -EINVAL;
2248                         }
2249                         offset = reloc->gpu_offset +
2250                                  (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2251                                  ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2252
2253                         ib[idx+1] = offset & 0xfffffff8;
2254                         ib[idx+2] = upper_32_bits(offset) & 0xff;
2255                 }
2256                 break;
2257         case PACKET3_EVENT_WRITE_EOP:
2258         {
2259                 uint64_t offset;
2260
2261                 if (pkt->count != 4) {
2262                         DRM_ERROR("bad EVENT_WRITE_EOP\n");
2263                         return -EINVAL;
2264                 }
2265                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2266                 if (r) {
2267                         DRM_ERROR("bad EVENT_WRITE_EOP\n");
2268                         return -EINVAL;
2269                 }
2270
2271                 offset = reloc->gpu_offset +
2272                          (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2273                          ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2274
2275                 ib[idx+1] = offset & 0xfffffffc;
2276                 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2277                 break;
2278         }
2279         case PACKET3_EVENT_WRITE_EOS:
2280         {
2281                 uint64_t offset;
2282
2283                 if (pkt->count != 3) {
2284                         DRM_ERROR("bad EVENT_WRITE_EOS\n");
2285                         return -EINVAL;
2286                 }
2287                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2288                 if (r) {
2289                         DRM_ERROR("bad EVENT_WRITE_EOS\n");
2290                         return -EINVAL;
2291                 }
2292
2293                 offset = reloc->gpu_offset +
2294                          (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2295                          ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2296
2297                 ib[idx+1] = offset & 0xfffffffc;
2298                 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2299                 break;
2300         }
2301         case PACKET3_SET_CONFIG_REG:
2302                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2303                 end_reg = 4 * pkt->count + start_reg - 4;
2304                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2305                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2306                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2307                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2308                         return -EINVAL;
2309                 }
2310                 for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2311                         if (evergreen_is_safe_reg(p, reg))
2312                                 continue;
2313                         r = evergreen_cs_handle_reg(p, reg, idx);
2314                         if (r)
2315                                 return r;
2316                 }
2317                 break;
2318         case PACKET3_SET_CONTEXT_REG:
2319                 start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2320                 end_reg = 4 * pkt->count + start_reg - 4;
2321                 if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2322                     (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2323                     (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2324                         DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2325                         return -EINVAL;
2326                 }
2327                 for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2328                         if (evergreen_is_safe_reg(p, reg))
2329                                 continue;
2330                         r = evergreen_cs_handle_reg(p, reg, idx);
2331                         if (r)
2332                                 return r;
2333                 }
2334                 break;
2335         case PACKET3_SET_RESOURCE:
2336                 if (pkt->count % 8) {
2337                         DRM_ERROR("bad SET_RESOURCE\n");
2338                         return -EINVAL;
2339                 }
2340                 start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2341                 end_reg = 4 * pkt->count + start_reg - 4;
2342                 if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2343                     (start_reg >= PACKET3_SET_RESOURCE_END) ||
2344                     (end_reg >= PACKET3_SET_RESOURCE_END)) {
2345                         DRM_ERROR("bad SET_RESOURCE\n");
2346                         return -EINVAL;
2347                 }
2348                 for (i = 0; i < (pkt->count / 8); i++) {
2349                         struct radeon_bo *texture, *mipmap;
2350                         u32 toffset, moffset;
2351                         u32 size, offset, mip_address, tex_dim;
2352
2353                         switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2354                         case SQ_TEX_VTX_VALID_TEXTURE:
2355                                 /* tex base */
2356                                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2357                                 if (r) {
2358                                         DRM_ERROR("bad SET_RESOURCE (tex)\n");
2359                                         return -EINVAL;
2360                                 }
2361                                 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2362                                         ib[idx+1+(i*8)+1] |=
2363                                                 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2364                                         if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2365                                                 unsigned bankw, bankh, mtaspect, tile_split;
2366
2367                                                 evergreen_tiling_fields(reloc->tiling_flags,
2368                                                                         &bankw, &bankh, &mtaspect,
2369                                                                         &tile_split);
2370                                                 ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2371                                                 ib[idx+1+(i*8)+7] |=
2372                                                         TEX_BANK_WIDTH(bankw) |
2373                                                         TEX_BANK_HEIGHT(bankh) |
2374                                                         MACRO_TILE_ASPECT(mtaspect) |
2375                                                         TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2376                                         }
2377                                 }
2378                                 texture = reloc->robj;
2379                                 toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2380
2381                                 /* tex mip base */
2382                                 tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2383                                 mip_address = ib[idx+1+(i*8)+3];
2384
2385                                 if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2386                                     !mip_address &&
2387                                     !radeon_cs_packet_next_is_pkt3_nop(p)) {
2388                                         /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2389                                          * It should be 0 if FMASK is disabled. */
2390                                         moffset = 0;
2391                                         mipmap = NULL;
2392                                 } else {
2393                                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2394                                         if (r) {
2395                                                 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2396                                                 return -EINVAL;
2397                                         }
2398                                         moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2399                                         mipmap = reloc->robj;
2400                                 }
2401
2402                                 r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2403                                 if (r)
2404                                         return r;
2405                                 ib[idx+1+(i*8)+2] += toffset;
2406                                 ib[idx+1+(i*8)+3] += moffset;
2407                                 break;
2408                         case SQ_TEX_VTX_VALID_BUFFER:
2409                         {
2410                                 uint64_t offset64;
2411                                 /* vtx base */
2412                                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2413                                 if (r) {
2414                                         DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2415                                         return -EINVAL;
2416                                 }
2417                                 offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2418                                 size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2419                                 if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2420                                         /* force size to size of the buffer */
2421                                         dev_warn_ratelimited(p->dev, "vbo resource seems too big for the bo\n");
2422                                         ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2423                                 }
2424
2425                                 offset64 = reloc->gpu_offset + offset;
2426                                 ib[idx+1+(i*8)+0] = offset64;
2427                                 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2428                                                     (upper_32_bits(offset64) & 0xff);
2429                                 break;
2430                         }
2431                         case SQ_TEX_VTX_INVALID_TEXTURE:
2432                         case SQ_TEX_VTX_INVALID_BUFFER:
2433                         default:
2434                                 DRM_ERROR("bad SET_RESOURCE\n");
2435                                 return -EINVAL;
2436                         }
2437                 }
2438                 break;
2439         case PACKET3_SET_ALU_CONST:
2440                 /* XXX fix me ALU const buffers only */
2441                 break;
2442         case PACKET3_SET_BOOL_CONST:
2443                 start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2444                 end_reg = 4 * pkt->count + start_reg - 4;
2445                 if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2446                     (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2447                     (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2448                         DRM_ERROR("bad SET_BOOL_CONST\n");
2449                         return -EINVAL;
2450                 }
2451                 break;
2452         case PACKET3_SET_LOOP_CONST:
2453                 start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2454                 end_reg = 4 * pkt->count + start_reg - 4;
2455                 if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2456                     (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2457                     (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2458                         DRM_ERROR("bad SET_LOOP_CONST\n");
2459                         return -EINVAL;
2460                 }
2461                 break;
2462         case PACKET3_SET_CTL_CONST:
2463                 start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2464                 end_reg = 4 * pkt->count + start_reg - 4;
2465                 if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2466                     (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2467                     (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2468                         DRM_ERROR("bad SET_CTL_CONST\n");
2469                         return -EINVAL;
2470                 }
2471                 break;
2472         case PACKET3_SET_SAMPLER:
2473                 if (pkt->count % 3) {
2474                         DRM_ERROR("bad SET_SAMPLER\n");
2475                         return -EINVAL;
2476                 }
2477                 start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2478                 end_reg = 4 * pkt->count + start_reg - 4;
2479                 if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2480                     (start_reg >= PACKET3_SET_SAMPLER_END) ||
2481                     (end_reg >= PACKET3_SET_SAMPLER_END)) {
2482                         DRM_ERROR("bad SET_SAMPLER\n");
2483                         return -EINVAL;
2484                 }
2485                 break;
2486         case PACKET3_STRMOUT_BUFFER_UPDATE:
2487                 if (pkt->count != 4) {
2488                         DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2489                         return -EINVAL;
2490                 }
2491                 /* Updating memory at DST_ADDRESS. */
2492                 if (idx_value & 0x1) {
2493                         u64 offset;
2494                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2495                         if (r) {
2496                                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2497                                 return -EINVAL;
2498                         }
2499                         offset = radeon_get_ib_value(p, idx+1);
2500                         offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2501                         if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2502                                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2503                                           offset + 4, radeon_bo_size(reloc->robj));
2504                                 return -EINVAL;
2505                         }
2506                         offset += reloc->gpu_offset;
2507                         ib[idx+1] = offset;
2508                         ib[idx+2] = upper_32_bits(offset) & 0xff;
2509                 }
2510                 /* Reading data from SRC_ADDRESS. */
2511                 if (((idx_value >> 1) & 0x3) == 2) {
2512                         u64 offset;
2513                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2514                         if (r) {
2515                                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2516                                 return -EINVAL;
2517                         }
2518                         offset = radeon_get_ib_value(p, idx+3);
2519                         offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2520                         if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2521                                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2522                                           offset + 4, radeon_bo_size(reloc->robj));
2523                                 return -EINVAL;
2524                         }
2525                         offset += reloc->gpu_offset;
2526                         ib[idx+3] = offset;
2527                         ib[idx+4] = upper_32_bits(offset) & 0xff;
2528                 }
2529                 break;
2530         case PACKET3_MEM_WRITE:
2531         {
2532                 u64 offset;
2533
2534                 if (pkt->count != 3) {
2535                         DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2536                         return -EINVAL;
2537                 }
2538                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2539                 if (r) {
2540                         DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2541                         return -EINVAL;
2542                 }
2543                 offset = radeon_get_ib_value(p, idx+0);
2544                 offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2545                 if (offset & 0x7) {
2546                         DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2547                         return -EINVAL;
2548                 }
2549                 if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2550                         DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2551                                   offset + 8, radeon_bo_size(reloc->robj));
2552                         return -EINVAL;
2553                 }
2554                 offset += reloc->gpu_offset;
2555                 ib[idx+0] = offset;
2556                 ib[idx+1] = upper_32_bits(offset) & 0xff;
2557                 break;
2558         }
2559         case PACKET3_COPY_DW:
2560                 if (pkt->count != 4) {
2561                         DRM_ERROR("bad COPY_DW (invalid count)\n");
2562                         return -EINVAL;
2563                 }
2564                 if (idx_value & 0x1) {
2565                         u64 offset;
2566                         /* SRC is memory. */
2567                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2568                         if (r) {
2569                                 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2570                                 return -EINVAL;
2571                         }
2572                         offset = radeon_get_ib_value(p, idx+1);
2573                         offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2574                         if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2575                                 DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2576                                           offset + 4, radeon_bo_size(reloc->robj));
2577                                 return -EINVAL;
2578                         }
2579                         offset += reloc->gpu_offset;
2580                         ib[idx+1] = offset;
2581                         ib[idx+2] = upper_32_bits(offset) & 0xff;
2582                 } else {
2583                         /* SRC is a reg. */
2584                         reg = radeon_get_ib_value(p, idx+1) << 2;
2585                         if (!evergreen_is_safe_reg(p, reg)) {
2586                                 dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2587                                          reg, idx + 1);
2588                                 return -EINVAL;
2589                         }
2590                 }
2591                 if (idx_value & 0x2) {
2592                         u64 offset;
2593                         /* DST is memory. */
2594                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2595                         if (r) {
2596                                 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2597                                 return -EINVAL;
2598                         }
2599                         offset = radeon_get_ib_value(p, idx+3);
2600                         offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2601                         if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2602                                 DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2603                                           offset + 4, radeon_bo_size(reloc->robj));
2604                                 return -EINVAL;
2605                         }
2606                         offset += reloc->gpu_offset;
2607                         ib[idx+3] = offset;
2608                         ib[idx+4] = upper_32_bits(offset) & 0xff;
2609                 } else {
2610                         /* DST is a reg. */
2611                         reg = radeon_get_ib_value(p, idx+3) << 2;
2612                         if (!evergreen_is_safe_reg(p, reg)) {
2613                                 dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2614                                          reg, idx + 3);
2615                                 return -EINVAL;
2616                         }
2617                 }
2618                 break;
2619         case PACKET3_SET_APPEND_CNT:
2620         {
2621                 uint32_t areg;
2622                 uint32_t allowed_reg_base;
2623                 uint32_t source_sel;
2624                 if (pkt->count != 2) {
2625                         DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
2626                         return -EINVAL;
2627                 }
2628
2629                 allowed_reg_base = GDS_APPEND_COUNT_0;
2630                 allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
2631                 allowed_reg_base >>= 2;
2632
2633                 areg = idx_value >> 16;
2634                 if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
2635                         dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n",
2636                                  areg, idx);
2637                         return -EINVAL;
2638                 }
2639
2640                 source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value);
2641                 if (source_sel == PACKET3_SAC_SRC_SEL_MEM) {
2642                         uint64_t offset;
2643                         uint32_t swap;
2644                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2645                         if (r) {
2646                                 DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n");
2647                                 return -EINVAL;
2648                         }
2649                         offset = radeon_get_ib_value(p, idx + 1);
2650                         swap = offset & 0x3;
2651                         offset &= ~0x3;
2652
2653                         offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32;
2654
2655                         offset += reloc->gpu_offset;
2656                         ib[idx+1] = (offset & 0xfffffffc) | swap;
2657                         ib[idx+2] = upper_32_bits(offset) & 0xff;
2658                 } else {
2659                         DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n");
2660                         return -EINVAL;
2661                 }
2662                 break;
2663         }
2664         case PACKET3_NOP:
2665                 break;
2666         default:
2667                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2668                 return -EINVAL;
2669         }
2670         return 0;
2671 }
2672
2673 int evergreen_cs_parse(struct radeon_cs_parser *p)
2674 {
2675         struct radeon_cs_packet pkt;
2676         struct evergreen_cs_track *track;
2677         u32 tmp;
2678         int r;
2679
2680         if (p->track == NULL) {
2681                 /* initialize tracker, we are in kms */
2682                 track = kzalloc(sizeof(*track), GFP_KERNEL);
2683                 if (track == NULL)
2684                         return -ENOMEM;
2685                 evergreen_cs_track_init(track);
2686                 if (p->rdev->family >= CHIP_CAYMAN) {
2687                         tmp = p->rdev->config.cayman.tile_config;
2688                         track->reg_safe_bm = cayman_reg_safe_bm;
2689                 } else {
2690                         tmp = p->rdev->config.evergreen.tile_config;
2691                         track->reg_safe_bm = evergreen_reg_safe_bm;
2692                 }
2693                 BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE);
2694                 BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE);
2695                 switch (tmp & 0xf) {
2696                 case 0:
2697                         track->npipes = 1;
2698                         break;
2699                 case 1:
2700                 default:
2701                         track->npipes = 2;
2702                         break;
2703                 case 2:
2704                         track->npipes = 4;
2705                         break;
2706                 case 3:
2707                         track->npipes = 8;
2708                         break;
2709                 }
2710
2711                 switch ((tmp & 0xf0) >> 4) {
2712                 case 0:
2713                         track->nbanks = 4;
2714                         break;
2715                 case 1:
2716                 default:
2717                         track->nbanks = 8;
2718                         break;
2719                 case 2:
2720                         track->nbanks = 16;
2721                         break;
2722                 }
2723
2724                 switch ((tmp & 0xf00) >> 8) {
2725                 case 0:
2726                         track->group_size = 256;
2727                         break;
2728                 case 1:
2729                 default:
2730                         track->group_size = 512;
2731                         break;
2732                 }
2733
2734                 switch ((tmp & 0xf000) >> 12) {
2735                 case 0:
2736                         track->row_size = 1;
2737                         break;
2738                 case 1:
2739                 default:
2740                         track->row_size = 2;
2741                         break;
2742                 case 2:
2743                         track->row_size = 4;
2744                         break;
2745                 }
2746
2747                 p->track = track;
2748         }
2749         do {
2750                 r = radeon_cs_packet_parse(p, &pkt, p->idx);
2751                 if (r) {
2752                         kfree(p->track);
2753                         p->track = NULL;
2754                         return r;
2755                 }
2756                 p->idx += pkt.count + 2;
2757                 switch (pkt.type) {
2758                 case RADEON_PACKET_TYPE0:
2759                         r = evergreen_cs_parse_packet0(p, &pkt);
2760                         break;
2761                 case RADEON_PACKET_TYPE2:
2762                         break;
2763                 case RADEON_PACKET_TYPE3:
2764                         r = evergreen_packet3_check(p, &pkt);
2765                         break;
2766                 default:
2767                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2768                         kfree(p->track);
2769                         p->track = NULL;
2770                         return -EINVAL;
2771                 }
2772                 if (r) {
2773                         kfree(p->track);
2774                         p->track = NULL;
2775                         return r;
2776                 }
2777         } while (p->idx < p->chunk_ib->length_dw);
2778 #if 0
2779         for (r = 0; r < p->ib.length_dw; r++) {
2780                 pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
2781                 mdelay(1);
2782         }
2783 #endif
2784         kfree(p->track);
2785         p->track = NULL;
2786         return 0;
2787 }
2788
2789 /**
2790  * evergreen_dma_cs_parse() - parse the DMA IB
2791  * @p:          parser structure holding parsing context.
2792  *
2793  * Parses the DMA IB from the CS ioctl and updates
2794  * the GPU addresses based on the reloc information and
2795  * checks for errors. (Evergreen-Cayman)
2796  * Returns 0 for success and an error on failure.
2797  **/
2798 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2799 {
2800         struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2801         struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2802         u32 header, cmd, count, sub_cmd;
2803         uint32_t *ib = p->ib.ptr;
2804         u32 idx;
2805         u64 src_offset, dst_offset, dst2_offset;
2806         int r;
2807
2808         do {
2809                 if (p->idx >= ib_chunk->length_dw) {
2810                         DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2811                                   p->idx, ib_chunk->length_dw);
2812                         return -EINVAL;
2813                 }
2814                 idx = p->idx;
2815                 header = radeon_get_ib_value(p, idx);
2816                 cmd = GET_DMA_CMD(header);
2817                 count = GET_DMA_COUNT(header);
2818                 sub_cmd = GET_DMA_SUB_CMD(header);
2819
2820                 switch (cmd) {
2821                 case DMA_PACKET_WRITE:
2822                         r = r600_dma_cs_next_reloc(p, &dst_reloc);
2823                         if (r) {
2824                                 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2825                                 return -EINVAL;
2826                         }
2827                         switch (sub_cmd) {
2828                         /* tiled */
2829                         case 8:
2830                                 dst_offset = radeon_get_ib_value(p, idx+1);
2831                                 dst_offset <<= 8;
2832
2833                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2834                                 p->idx += count + 7;
2835                                 break;
2836                         /* linear */
2837                         case 0:
2838                                 dst_offset = radeon_get_ib_value(p, idx+1);
2839                                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2840
2841                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2842                                 ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2843                                 p->idx += count + 3;
2844                                 break;
2845                         default:
2846                                 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2847                                 return -EINVAL;
2848                         }
2849                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2850                                 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2851                                          dst_offset, radeon_bo_size(dst_reloc->robj));
2852                                 return -EINVAL;
2853                         }
2854                         break;
2855                 case DMA_PACKET_COPY:
2856                         r = r600_dma_cs_next_reloc(p, &src_reloc);
2857                         if (r) {
2858                                 DRM_ERROR("bad DMA_PACKET_COPY\n");
2859                                 return -EINVAL;
2860                         }
2861                         r = r600_dma_cs_next_reloc(p, &dst_reloc);
2862                         if (r) {
2863                                 DRM_ERROR("bad DMA_PACKET_COPY\n");
2864                                 return -EINVAL;
2865                         }
2866                         switch (sub_cmd) {
2867                         /* Copy L2L, DW aligned */
2868                         case 0x00:
2869                                 /* L2L, dw */
2870                                 src_offset = radeon_get_ib_value(p, idx+2);
2871                                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2872                                 dst_offset = radeon_get_ib_value(p, idx+1);
2873                                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2874                                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2875                                         dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2876                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2877                                         return -EINVAL;
2878                                 }
2879                                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2880                                         dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2881                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2882                                         return -EINVAL;
2883                                 }
2884                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2885                                 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2886                                 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2887                                 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2888                                 p->idx += 5;
2889                                 break;
2890                         /* Copy L2T/T2L */
2891                         case 0x08:
2892                                 /* detile bit */
2893                                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2894                                         /* tiled src, linear dst */
2895                                         src_offset = radeon_get_ib_value(p, idx+1);
2896                                         src_offset <<= 8;
2897                                         ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2898
2899                                         dst_offset = radeon_get_ib_value(p, idx + 7);
2900                                         dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2901                                         ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2902                                         ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2903                                 } else {
2904                                         /* linear src, tiled dst */
2905                                         src_offset = radeon_get_ib_value(p, idx+7);
2906                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2907                                         ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2908                                         ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2909
2910                                         dst_offset = radeon_get_ib_value(p, idx+1);
2911                                         dst_offset <<= 8;
2912                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2913                                 }
2914                                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2915                                         dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2916                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2917                                         return -EINVAL;
2918                                 }
2919                                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2920                                         dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2921                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2922                                         return -EINVAL;
2923                                 }
2924                                 p->idx += 9;
2925                                 break;
2926                         /* Copy L2L, byte aligned */
2927                         case 0x40:
2928                                 /* L2L, byte */
2929                                 src_offset = radeon_get_ib_value(p, idx+2);
2930                                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2931                                 dst_offset = radeon_get_ib_value(p, idx+1);
2932                                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2933                                 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2934                                         dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2935                                                         src_offset + count, radeon_bo_size(src_reloc->robj));
2936                                         return -EINVAL;
2937                                 }
2938                                 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2939                                         dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2940                                                         dst_offset + count, radeon_bo_size(dst_reloc->robj));
2941                                         return -EINVAL;
2942                                 }
2943                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2944                                 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2945                                 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2946                                 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2947                                 p->idx += 5;
2948                                 break;
2949                         /* Copy L2L, partial */
2950                         case 0x41:
2951                                 /* L2L, partial */
2952                                 if (p->family < CHIP_CAYMAN) {
2953                                         DRM_ERROR("L2L Partial is cayman only !\n");
2954                                         return -EINVAL;
2955                                 }
2956                                 ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2957                                 ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2958                                 ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2959                                 ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2960
2961                                 p->idx += 9;
2962                                 break;
2963                         /* Copy L2L, DW aligned, broadcast */
2964                         case 0x44:
2965                                 /* L2L, dw, broadcast */
2966                                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2967                                 if (r) {
2968                                         DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2969                                         return -EINVAL;
2970                                 }
2971                                 dst_offset = radeon_get_ib_value(p, idx+1);
2972                                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2973                                 dst2_offset = radeon_get_ib_value(p, idx+2);
2974                                 dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2975                                 src_offset = radeon_get_ib_value(p, idx+3);
2976                                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2977                                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2978                                         dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2979                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2980                                         return -EINVAL;
2981                                 }
2982                                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2983                                         dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2984                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2985                                         return -EINVAL;
2986                                 }
2987                                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2988                                         dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2989                                                         dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2990                                         return -EINVAL;
2991                                 }
2992                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2993                                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2994                                 ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2995                                 ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2996                                 ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2997                                 ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2998                                 p->idx += 7;
2999                                 break;
3000                         /* Copy L2T Frame to Field */
3001                         case 0x48:
3002                                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3003                                         DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3004                                         return -EINVAL;
3005                                 }
3006                                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3007                                 if (r) {
3008                                         DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3009                                         return -EINVAL;
3010                                 }
3011                                 dst_offset = radeon_get_ib_value(p, idx+1);
3012                                 dst_offset <<= 8;
3013                                 dst2_offset = radeon_get_ib_value(p, idx+2);
3014                                 dst2_offset <<= 8;
3015                                 src_offset = radeon_get_ib_value(p, idx+8);
3016                                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3017                                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3018                                         dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
3019                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3020                                         return -EINVAL;
3021                                 }
3022                                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3023                                         dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3024                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3025                                         return -EINVAL;
3026                                 }
3027                                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3028                                         dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3029                                                         dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3030                                         return -EINVAL;
3031                                 }
3032                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3033                                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3034                                 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3035                                 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3036                                 p->idx += 10;
3037                                 break;
3038                         /* Copy L2T/T2L, partial */
3039                         case 0x49:
3040                                 /* L2T, T2L partial */
3041                                 if (p->family < CHIP_CAYMAN) {
3042                                         DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3043                                         return -EINVAL;
3044                                 }
3045                                 /* detile bit */
3046                                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3047                                         /* tiled src, linear dst */
3048                                         ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3049
3050                                         ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3051                                         ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3052                                 } else {
3053                                         /* linear src, tiled dst */
3054                                         ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3055                                         ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3056
3057                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3058                                 }
3059                                 p->idx += 12;
3060                                 break;
3061                         /* Copy L2T broadcast */
3062                         case 0x4b:
3063                                 /* L2T, broadcast */
3064                                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3065                                         DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3066                                         return -EINVAL;
3067                                 }
3068                                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3069                                 if (r) {
3070                                         DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3071                                         return -EINVAL;
3072                                 }
3073                                 dst_offset = radeon_get_ib_value(p, idx+1);
3074                                 dst_offset <<= 8;
3075                                 dst2_offset = radeon_get_ib_value(p, idx+2);
3076                                 dst2_offset <<= 8;
3077                                 src_offset = radeon_get_ib_value(p, idx+8);
3078                                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3079                                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3080                                         dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3081                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3082                                         return -EINVAL;
3083                                 }
3084                                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3085                                         dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3086                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3087                                         return -EINVAL;
3088                                 }
3089                                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3090                                         dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3091                                                         dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3092                                         return -EINVAL;
3093                                 }
3094                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3095                                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3096                                 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3097                                 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3098                                 p->idx += 10;
3099                                 break;
3100                         /* Copy L2T/T2L (tile units) */
3101                         case 0x4c:
3102                                 /* L2T, T2L */
3103                                 /* detile bit */
3104                                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3105                                         /* tiled src, linear dst */
3106                                         src_offset = radeon_get_ib_value(p, idx+1);
3107                                         src_offset <<= 8;
3108                                         ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3109
3110                                         dst_offset = radeon_get_ib_value(p, idx+7);
3111                                         dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3112                                         ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3113                                         ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3114                                 } else {
3115                                         /* linear src, tiled dst */
3116                                         src_offset = radeon_get_ib_value(p, idx+7);
3117                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3118                                         ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3119                                         ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3120
3121                                         dst_offset = radeon_get_ib_value(p, idx+1);
3122                                         dst_offset <<= 8;
3123                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3124                                 }
3125                                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3126                                         dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3127                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3128                                         return -EINVAL;
3129                                 }
3130                                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3131                                         dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3132                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3133                                         return -EINVAL;
3134                                 }
3135                                 p->idx += 9;
3136                                 break;
3137                         /* Copy T2T, partial (tile units) */
3138                         case 0x4d:
3139                                 /* T2T partial */
3140                                 if (p->family < CHIP_CAYMAN) {
3141                                         DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3142                                         return -EINVAL;
3143                                 }
3144                                 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3145                                 ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3146                                 p->idx += 13;
3147                                 break;
3148                         /* Copy L2T broadcast (tile units) */
3149                         case 0x4f:
3150                                 /* L2T, broadcast */
3151                                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3152                                         DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3153                                         return -EINVAL;
3154                                 }
3155                                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3156                                 if (r) {
3157                                         DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3158                                         return -EINVAL;
3159                                 }
3160                                 dst_offset = radeon_get_ib_value(p, idx+1);
3161                                 dst_offset <<= 8;
3162                                 dst2_offset = radeon_get_ib_value(p, idx+2);
3163                                 dst2_offset <<= 8;
3164                                 src_offset = radeon_get_ib_value(p, idx+8);
3165                                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3166                                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3167                                         dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3168                                                         src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3169                                         return -EINVAL;
3170                                 }
3171                                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3172                                         dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3173                                                         dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3174                                         return -EINVAL;
3175                                 }
3176                                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3177                                         dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3178                                                         dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3179                                         return -EINVAL;
3180                                 }
3181                                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3182                                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3183                                 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3184                                 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3185                                 p->idx += 10;
3186                                 break;
3187                         default:
3188                                 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3189                                 return -EINVAL;
3190                         }
3191                         break;
3192                 case DMA_PACKET_CONSTANT_FILL:
3193                         r = r600_dma_cs_next_reloc(p, &dst_reloc);
3194                         if (r) {
3195                                 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3196                                 return -EINVAL;
3197                         }
3198                         dst_offset = radeon_get_ib_value(p, idx+1);
3199                         dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3200                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3201                                 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3202                                          dst_offset, radeon_bo_size(dst_reloc->robj));
3203                                 return -EINVAL;
3204                         }
3205                         ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3206                         ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3207                         p->idx += 4;
3208                         break;
3209                 case DMA_PACKET_NOP:
3210                         p->idx += 1;
3211                         break;
3212                 default:
3213                         DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3214                         return -EINVAL;
3215                 }
3216         } while (p->idx < p->chunk_ib->length_dw);
3217 #if 0
3218         for (r = 0; r < p->ib->length_dw; r++) {
3219                 pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
3220                 mdelay(1);
3221         }
3222 #endif
3223         return 0;
3224 }
3225
3226 /* vm parser */
3227 static bool evergreen_vm_reg_valid(u32 reg)
3228 {
3229         /* context regs are fine */
3230         if (reg >= 0x28000)
3231                 return true;
3232
3233         /* check config regs */
3234         switch (reg) {
3235         case WAIT_UNTIL:
3236         case GRBM_GFX_INDEX:
3237         case CP_STRMOUT_CNTL:
3238         case CP_COHER_CNTL:
3239         case CP_COHER_SIZE:
3240         case VGT_VTX_VECT_EJECT_REG:
3241         case VGT_CACHE_INVALIDATION:
3242         case VGT_GS_VERTEX_REUSE:
3243         case VGT_PRIMITIVE_TYPE:
3244         case VGT_INDEX_TYPE:
3245         case VGT_NUM_INDICES:
3246         case VGT_NUM_INSTANCES:
3247         case VGT_COMPUTE_DIM_X:
3248         case VGT_COMPUTE_DIM_Y:
3249         case VGT_COMPUTE_DIM_Z:
3250         case VGT_COMPUTE_START_X:
3251         case VGT_COMPUTE_START_Y:
3252         case VGT_COMPUTE_START_Z:
3253         case VGT_COMPUTE_INDEX:
3254         case VGT_COMPUTE_THREAD_GROUP_SIZE:
3255         case VGT_HS_OFFCHIP_PARAM:
3256         case PA_CL_ENHANCE:
3257         case PA_SU_LINE_STIPPLE_VALUE:
3258         case PA_SC_LINE_STIPPLE_STATE:
3259         case PA_SC_ENHANCE:
3260         case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3261         case SQ_DYN_GPR_SIMD_LOCK_EN:
3262         case SQ_CONFIG:
3263         case SQ_GPR_RESOURCE_MGMT_1:
3264         case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3265         case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3266         case SQ_CONST_MEM_BASE:
3267         case SQ_STATIC_THREAD_MGMT_1:
3268         case SQ_STATIC_THREAD_MGMT_2:
3269         case SQ_STATIC_THREAD_MGMT_3:
3270         case SPI_CONFIG_CNTL:
3271         case SPI_CONFIG_CNTL_1:
3272         case TA_CNTL_AUX:
3273         case DB_DEBUG:
3274         case DB_DEBUG2:
3275         case DB_DEBUG3:
3276         case DB_DEBUG4:
3277         case DB_WATERMARKS:
3278         case TD_PS_BORDER_COLOR_INDEX:
3279         case TD_PS_BORDER_COLOR_RED:
3280         case TD_PS_BORDER_COLOR_GREEN:
3281         case TD_PS_BORDER_COLOR_BLUE:
3282         case TD_PS_BORDER_COLOR_ALPHA:
3283         case TD_VS_BORDER_COLOR_INDEX:
3284         case TD_VS_BORDER_COLOR_RED:
3285         case TD_VS_BORDER_COLOR_GREEN:
3286         case TD_VS_BORDER_COLOR_BLUE:
3287         case TD_VS_BORDER_COLOR_ALPHA:
3288         case TD_GS_BORDER_COLOR_INDEX:
3289         case TD_GS_BORDER_COLOR_RED:
3290         case TD_GS_BORDER_COLOR_GREEN:
3291         case TD_GS_BORDER_COLOR_BLUE:
3292         case TD_GS_BORDER_COLOR_ALPHA:
3293         case TD_HS_BORDER_COLOR_INDEX:
3294         case TD_HS_BORDER_COLOR_RED:
3295         case TD_HS_BORDER_COLOR_GREEN:
3296         case TD_HS_BORDER_COLOR_BLUE:
3297         case TD_HS_BORDER_COLOR_ALPHA:
3298         case TD_LS_BORDER_COLOR_INDEX:
3299         case TD_LS_BORDER_COLOR_RED:
3300         case TD_LS_BORDER_COLOR_GREEN:
3301         case TD_LS_BORDER_COLOR_BLUE:
3302         case TD_LS_BORDER_COLOR_ALPHA:
3303         case TD_CS_BORDER_COLOR_INDEX:
3304         case TD_CS_BORDER_COLOR_RED:
3305         case TD_CS_BORDER_COLOR_GREEN:
3306         case TD_CS_BORDER_COLOR_BLUE:
3307         case TD_CS_BORDER_COLOR_ALPHA:
3308         case SQ_ESGS_RING_SIZE:
3309         case SQ_GSVS_RING_SIZE:
3310         case SQ_ESTMP_RING_SIZE:
3311         case SQ_GSTMP_RING_SIZE:
3312         case SQ_HSTMP_RING_SIZE:
3313         case SQ_LSTMP_RING_SIZE:
3314         case SQ_PSTMP_RING_SIZE:
3315         case SQ_VSTMP_RING_SIZE:
3316         case SQ_ESGS_RING_ITEMSIZE:
3317         case SQ_ESTMP_RING_ITEMSIZE:
3318         case SQ_GSTMP_RING_ITEMSIZE:
3319         case SQ_GSVS_RING_ITEMSIZE:
3320         case SQ_GS_VERT_ITEMSIZE:
3321         case SQ_GS_VERT_ITEMSIZE_1:
3322         case SQ_GS_VERT_ITEMSIZE_2:
3323         case SQ_GS_VERT_ITEMSIZE_3:
3324         case SQ_GSVS_RING_OFFSET_1:
3325         case SQ_GSVS_RING_OFFSET_2:
3326         case SQ_GSVS_RING_OFFSET_3:
3327         case SQ_HSTMP_RING_ITEMSIZE:
3328         case SQ_LSTMP_RING_ITEMSIZE:
3329         case SQ_PSTMP_RING_ITEMSIZE:
3330         case SQ_VSTMP_RING_ITEMSIZE:
3331         case VGT_TF_RING_SIZE:
3332         case SQ_ESGS_RING_BASE:
3333         case SQ_GSVS_RING_BASE:
3334         case SQ_ESTMP_RING_BASE:
3335         case SQ_GSTMP_RING_BASE:
3336         case SQ_HSTMP_RING_BASE:
3337         case SQ_LSTMP_RING_BASE:
3338         case SQ_PSTMP_RING_BASE:
3339         case SQ_VSTMP_RING_BASE:
3340         case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3341         case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3342                 return true;
3343         default:
3344                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3345                 return false;
3346         }
3347 }
3348
3349 static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3350                                       u32 *ib, struct radeon_cs_packet *pkt)
3351 {
3352         u32 idx = pkt->idx + 1;
3353         u32 idx_value = ib[idx];
3354         u32 start_reg, end_reg, reg, i;
3355         u32 command, info;
3356
3357         switch (pkt->opcode) {
3358         case PACKET3_NOP:
3359                 break;
3360         case PACKET3_SET_BASE:
3361                 if (idx_value != 1) {
3362                         DRM_ERROR("bad SET_BASE");
3363                         return -EINVAL;
3364                 }
3365                 break;
3366         case PACKET3_CLEAR_STATE:
3367         case PACKET3_INDEX_BUFFER_SIZE:
3368         case PACKET3_DISPATCH_DIRECT:
3369         case PACKET3_DISPATCH_INDIRECT:
3370         case PACKET3_MODE_CONTROL:
3371         case PACKET3_SET_PREDICATION:
3372         case PACKET3_COND_EXEC:
3373         case PACKET3_PRED_EXEC:
3374         case PACKET3_DRAW_INDIRECT:
3375         case PACKET3_DRAW_INDEX_INDIRECT:
3376         case PACKET3_INDEX_BASE:
3377         case PACKET3_DRAW_INDEX_2:
3378         case PACKET3_CONTEXT_CONTROL:
3379         case PACKET3_DRAW_INDEX_OFFSET:
3380         case PACKET3_INDEX_TYPE:
3381         case PACKET3_DRAW_INDEX:
3382         case PACKET3_DRAW_INDEX_AUTO:
3383         case PACKET3_DRAW_INDEX_IMMD:
3384         case PACKET3_NUM_INSTANCES:
3385         case PACKET3_DRAW_INDEX_MULTI_AUTO:
3386         case PACKET3_STRMOUT_BUFFER_UPDATE:
3387         case PACKET3_DRAW_INDEX_OFFSET_2:
3388         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3389         case PACKET3_MPEG_INDEX:
3390         case PACKET3_WAIT_REG_MEM:
3391         case PACKET3_MEM_WRITE:
3392         case PACKET3_PFP_SYNC_ME:
3393         case PACKET3_SURFACE_SYNC:
3394         case PACKET3_EVENT_WRITE:
3395         case PACKET3_EVENT_WRITE_EOP:
3396         case PACKET3_EVENT_WRITE_EOS:
3397         case PACKET3_SET_CONTEXT_REG:
3398         case PACKET3_SET_BOOL_CONST:
3399         case PACKET3_SET_LOOP_CONST:
3400         case PACKET3_SET_RESOURCE:
3401         case PACKET3_SET_SAMPLER:
3402         case PACKET3_SET_CTL_CONST:
3403         case PACKET3_SET_RESOURCE_OFFSET:
3404         case PACKET3_SET_CONTEXT_REG_INDIRECT:
3405         case PACKET3_SET_RESOURCE_INDIRECT:
3406         case CAYMAN_PACKET3_DEALLOC_STATE:
3407                 break;
3408         case PACKET3_COND_WRITE:
3409                 if (idx_value & 0x100) {
3410                         reg = ib[idx + 5] * 4;
3411                         if (!evergreen_vm_reg_valid(reg))
3412                                 return -EINVAL;
3413                 }
3414                 break;
3415         case PACKET3_COPY_DW:
3416                 if (idx_value & 0x2) {
3417                         reg = ib[idx + 3] * 4;
3418                         if (!evergreen_vm_reg_valid(reg))
3419                                 return -EINVAL;
3420                 }
3421                 break;
3422         case PACKET3_SET_CONFIG_REG:
3423                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3424                 end_reg = 4 * pkt->count + start_reg - 4;
3425                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3426                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3427                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3428                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3429                         return -EINVAL;
3430                 }
3431                 for (i = 0; i < pkt->count; i++) {
3432                         reg = start_reg + (4 * i);
3433                         if (!evergreen_vm_reg_valid(reg))
3434                                 return -EINVAL;
3435                 }
3436                 break;
3437         case PACKET3_CP_DMA:
3438                 command = ib[idx + 4];
3439                 info = ib[idx + 1];
3440                 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3441                     (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3442                     ((((info & 0x00300000) >> 20) == 0) &&
3443                      (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3444                     ((((info & 0x60000000) >> 29) == 0) &&
3445                      (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3446                         /* non mem to mem copies requires dw aligned count */
3447                         if ((command & 0x1fffff) % 4) {
3448                                 DRM_ERROR("CP DMA command requires dw count alignment\n");
3449                                 return -EINVAL;
3450                         }
3451                 }
3452                 if (command & PACKET3_CP_DMA_CMD_SAS) {
3453                         /* src address space is register */
3454                         if (((info & 0x60000000) >> 29) == 0) {
3455                                 start_reg = idx_value << 2;
3456                                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3457                                         reg = start_reg;
3458                                         if (!evergreen_vm_reg_valid(reg)) {
3459                                                 DRM_ERROR("CP DMA Bad SRC register\n");
3460                                                 return -EINVAL;
3461                                         }
3462                                 } else {
3463                                         for (i = 0; i < (command & 0x1fffff); i++) {
3464                                                 reg = start_reg + (4 * i);
3465                                                 if (!evergreen_vm_reg_valid(reg)) {
3466                                                         DRM_ERROR("CP DMA Bad SRC register\n");
3467                                                         return -EINVAL;
3468                                                 }
3469                                         }
3470                                 }
3471                         }
3472                 }
3473                 if (command & PACKET3_CP_DMA_CMD_DAS) {
3474                         /* dst address space is register */
3475                         if (((info & 0x00300000) >> 20) == 0) {
3476                                 start_reg = ib[idx + 2];
3477                                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3478                                         reg = start_reg;
3479                                         if (!evergreen_vm_reg_valid(reg)) {
3480                                                 DRM_ERROR("CP DMA Bad DST register\n");
3481                                                 return -EINVAL;
3482                                         }
3483                                 } else {
3484                                         for (i = 0; i < (command & 0x1fffff); i++) {
3485                                                 reg = start_reg + (4 * i);
3486                                                 if (!evergreen_vm_reg_valid(reg)) {
3487                                                         DRM_ERROR("CP DMA Bad DST register\n");
3488                                                         return -EINVAL;
3489                                                 }
3490                                         }
3491                                 }
3492                         }
3493                 }
3494                 break;
3495         case PACKET3_SET_APPEND_CNT: {
3496                 uint32_t areg;
3497                 uint32_t allowed_reg_base;
3498
3499                 if (pkt->count != 2) {
3500                         DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
3501                         return -EINVAL;
3502                 }
3503
3504                 allowed_reg_base = GDS_APPEND_COUNT_0;
3505                 allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
3506                 allowed_reg_base >>= 2;
3507
3508                 areg = idx_value >> 16;
3509                 if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
3510                         DRM_ERROR("forbidden register for append cnt 0x%08x at %d\n",
3511                                   areg, idx);
3512                         return -EINVAL;
3513                 }
3514                 break;
3515         }
3516         default:
3517                 return -EINVAL;
3518         }
3519         return 0;
3520 }
3521
3522 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3523 {
3524         int ret = 0;
3525         u32 idx = 0;
3526         struct radeon_cs_packet pkt;
3527
3528         do {
3529                 pkt.idx = idx;
3530                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3531                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3532                 pkt.one_reg_wr = 0;
3533                 switch (pkt.type) {
3534                 case RADEON_PACKET_TYPE0:
3535                         dev_err(rdev->dev, "Packet0 not allowed!\n");
3536                         ret = -EINVAL;
3537                         break;
3538                 case RADEON_PACKET_TYPE2:
3539                         idx += 1;
3540                         break;
3541                 case RADEON_PACKET_TYPE3:
3542                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3543                         ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3544                         idx += pkt.count + 2;
3545                         break;
3546                 default:
3547                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3548                         ret = -EINVAL;
3549                         break;
3550                 }
3551                 if (ret)
3552                         break;
3553         } while (idx < ib->length_dw);
3554
3555         return ret;
3556 }
3557
3558 /**
3559  * evergreen_dma_ib_parse() - parse the DMA IB for VM
3560  * @rdev: radeon_device pointer
3561  * @ib: radeon_ib pointer
3562  *
3563  * Parses the DMA IB from the VM CS ioctl
3564  * checks for errors. (Cayman-SI)
3565  * Returns 0 for success and an error on failure.
3566  **/
3567 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3568 {
3569         u32 idx = 0;
3570         u32 header, cmd, count, sub_cmd;
3571
3572         do {
3573                 header = ib->ptr[idx];
3574                 cmd = GET_DMA_CMD(header);
3575                 count = GET_DMA_COUNT(header);
3576                 sub_cmd = GET_DMA_SUB_CMD(header);
3577
3578                 switch (cmd) {
3579                 case DMA_PACKET_WRITE:
3580                         switch (sub_cmd) {
3581                         /* tiled */
3582                         case 8:
3583                                 idx += count + 7;
3584                                 break;
3585                         /* linear */
3586                         case 0:
3587                                 idx += count + 3;
3588                                 break;
3589                         default:
3590                                 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3591                                 return -EINVAL;
3592                         }
3593                         break;
3594                 case DMA_PACKET_COPY:
3595                         switch (sub_cmd) {
3596                         /* Copy L2L, DW aligned */
3597                         case 0x00:
3598                                 idx += 5;
3599                                 break;
3600                         /* Copy L2T/T2L */
3601                         case 0x08:
3602                                 idx += 9;
3603                                 break;
3604                         /* Copy L2L, byte aligned */
3605                         case 0x40:
3606                                 idx += 5;
3607                                 break;
3608                         /* Copy L2L, partial */
3609                         case 0x41:
3610                                 idx += 9;
3611                                 break;
3612                         /* Copy L2L, DW aligned, broadcast */
3613                         case 0x44:
3614                                 idx += 7;
3615                                 break;
3616                         /* Copy L2T Frame to Field */
3617                         case 0x48:
3618                                 idx += 10;
3619                                 break;
3620                         /* Copy L2T/T2L, partial */
3621                         case 0x49:
3622                                 idx += 12;
3623                                 break;
3624                         /* Copy L2T broadcast */
3625                         case 0x4b:
3626                                 idx += 10;
3627                                 break;
3628                         /* Copy L2T/T2L (tile units) */
3629                         case 0x4c:
3630                                 idx += 9;
3631                                 break;
3632                         /* Copy T2T, partial (tile units) */
3633                         case 0x4d:
3634                                 idx += 13;
3635                                 break;
3636                         /* Copy L2T broadcast (tile units) */
3637                         case 0x4f:
3638                                 idx += 10;
3639                                 break;
3640                         default:
3641                                 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3642                                 return -EINVAL;
3643                         }
3644                         break;
3645                 case DMA_PACKET_CONSTANT_FILL:
3646                         idx += 4;
3647                         break;
3648                 case DMA_PACKET_NOP:
3649                         idx += 1;
3650                         break;
3651                 default:
3652                         DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3653                         return -EINVAL;
3654                 }
3655         } while (idx < ib->length_dw);
3656
3657         return 0;
3658 }
This page took 0.261415 seconds and 4 git commands to generate.