/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef AC_PERFCOUNTER_H
#define AC_PERFCOUNTER_H

#include <stdbool.h>

#include "sid.h"

#include "ac_gpu_info.h"

/* Max counters per HW block */
#define AC_QUERY_MAX_COUNTERS 16

#define AC_PC_SHADERS_WINDOWING (1u << 31)

enum ac_pc_block_flags
{
   /* This block is part of the shader engine */
   AC_PC_BLOCK_SE = (1 << 0),

   /* Expose per-instance groups instead of summing all instances (within
    * an SE). */
   AC_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),

   /* Expose per-SE groups instead of summing instances across SEs. */
   AC_PC_BLOCK_SE_GROUPS = (1 << 2),

   /* Shader block */
   AC_PC_BLOCK_SHADER = (1 << 3),

   /* Non-shader block with perfcounters windowed by shaders. */
   AC_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
};

enum ac_pc_gpu_block {
   CPF     = 0x0,
   IA      = 0x1,
   VGT     = 0x2,
   PA_SU   = 0x3,
   PA_SC   = 0x4,
   SPI     = 0x5,
   SQ      = 0x6,
   SX      = 0x7,
   TA      = 0x8,
   TD      = 0x9,
   TCP     = 0xA,
   TCC     = 0xB,
   TCA     = 0xC,
   DB      = 0xD,
   CB      = 0xE,
   GDS     = 0xF,
   SRBM    = 0x10,
   GRBM    = 0x11,
   GRBMSE  = 0x12,
   RLC     = 0x13,
   DMA     = 0x14,
   MC      = 0x15,
   CPG     = 0x16,
   CPC     = 0x17,
   WD      = 0x18,
   TCS     = 0x19,
   ATC     = 0x1A,
   ATCL2   = 0x1B,
   MCVML2  = 0x1C,
   EA      = 0x1D,
   RPB     = 0x1E,
   RMI     = 0x1F,
   UMCCH   = 0x20,
   GE      = 0x21,
   GE1     = GE,
   GL1A    = 0x22,
   GL1C    = 0x23,
   GL1CG   = 0x24,
   GL2A    = 0x25,
   GL2C    = 0x26,
   CHA     = 0x27,
   CHC     = 0x28,
   CHCG    = 0x29,
   GUS     = 0x2A,
   GCR     = 0x2B,
   PA_PH   = 0x2C,
   UTCL1   = 0x2D,
   GEDIST  = 0x2E,
   GESE    = 0x2F,
   DF      = 0x30,
   NUM_GPU_BLOCK,
};

struct ac_pc_block_base {
   enum ac_pc_gpu_block gpu_block;
   const char *name;
   unsigned num_counters;
   unsigned flags;

   unsigned select_or;
   unsigned *select0;
   unsigned counter0_lo;
   unsigned *counters;

   /* SPM */
   unsigned num_spm_counters;
   unsigned num_spm_wires;
   unsigned *select1;
   unsigned spm_block_select;
};

struct ac_pc_block_gfxdescr {
   struct ac_pc_block_base *b;
   unsigned selectors;
   unsigned instances;
};

struct ac_pc_block {
   const struct ac_pc_block_gfxdescr *b;
   unsigned num_instances;

   unsigned num_groups;
   char *group_names;
   unsigned group_name_stride;

   char *selector_names;
   unsigned selector_name_stride;
};

struct ac_perfcounters {
   unsigned num_groups;
   unsigned num_blocks;
   struct ac_pc_block *blocks;

   bool separate_se;
   bool separate_instance;
};

/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
 * performance counter group IDs.
 */
static const char *const ac_pc_shader_type_suffixes[] = {"",    "_ES", "_GS", "_VS",
                                                         "_PS", "_LS", "_HS", "_CS"};

static const unsigned ac_pc_shader_type_bits[] = {
   0x7f,
   S_036780_ES_EN(1),
   S_036780_GS_EN(1),
   S_036780_VS_EN(1),
   S_036780_PS_EN(1),
   S_036780_LS_EN(1),
   S_036780_HS_EN(1),
   S_036780_CS_EN(1),
};

static inline bool
ac_pc_block_has_per_se_groups(const struct ac_perfcounters *pc,
                              const struct ac_pc_block *block)
{
   return block->b->b->flags & AC_PC_BLOCK_SE_GROUPS ||
          (block->b->b->flags & AC_PC_BLOCK_SE && pc->separate_se);
}

static inline bool
ac_pc_block_has_per_instance_groups(const struct ac_perfcounters *pc,
                                    const struct ac_pc_block *block)
{
   return block->b->b->flags & AC_PC_BLOCK_INSTANCE_GROUPS ||
          (block->num_instances > 1 && pc->separate_instance);
}

struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
                                      unsigned index, unsigned *base_gid,
                                      unsigned *sub_index);
struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
                                    unsigned *index);

bool ac_init_block_names(const struct radeon_info *info,
                         const struct ac_perfcounters *pc,
                         struct ac_pc_block *block);

bool ac_init_perfcounters(const struct radeon_info *info,
                          bool separate_se,
                          bool separate_instance,
                          struct ac_perfcounters *pc);
void ac_destroy_perfcounters(struct ac_perfcounters *pc);

#endif
