// Copyright © 2023-2025 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT

#include "../iface.op_attn_bwd.h"
// #include <aotriton/cpp_tune.h>  // TODO: add op_tune
#include <string_view>
#ifndef NDEBUG
#include <iostream>
#endif

#define CURRENT_ENTRY_PUBLIC Optune_op_attn_bwd__A0__F124

#define ARRAY_SIZE(array)  (sizeof(array) / sizeof(array[0]))

namespace { // Anonymous namespace

using namespace std::literals::string_view_literals;

static int8_t lut[1][10][10] =
{
// GPU gfx950_mod0
{{1,1,1,1,1,1,1,1,1,1},
 {1,1,1,1,1,1,1,1,1,1},
 {1,1,1,1,1,1,1,1,1,1},
 {1,1,1,0,1,1,1,1,1,1},
 {0,0,0,0,0,1,1,1,1,1},
 {0,0,0,0,0,1,1,1,1,0},
 {0,0,0,0,0,0,0,0,0,1},
 {0,0,0,0,0,0,0,0,1,1},
 {0,0,0,0,0,0,0,0,1,1},
 {0,0,0,0,0,0,0,0,0,0}}
// End of GPU gfx950_mod0
}
;

}; // End of anonymous namespace

namespace AOTRITON_NS::v3::flash::optune {

void CURRENT_ENTRY_PUBLIC(OpAttnBwdContext& context, int mod_number) {
    auto backend_index = op_attn_bwd__lut_lambda__0(*context.params, mod_number, lut);
    if (backend_index < 0) {
        return ;
    }
    context.backend_index = static_cast<OpAttnBwdContext::BackendEnum>(backend_index);
}

#undef CURRENT_ENTRY_PUBLIC
}

// Human-readable Signature 
// Q = "*fp16:16"
// sm_scale = "fp32"
// DQ_ACC = "*fp32:16"
// L = "*fp32:16"
// stride_bz = 0
// stride_dbz = 0
// num_head_q = "i32"
// cu_seqlens_q = "*i32:16"
// num_seqlens = "i32"
// dropout_p = "fp32"
// philox_seed_ptr = "*u64"
// philox_offset1 = "*u64"
// philox_offset2 = "u64"
// Window_left = "i32"
// BLOCK_DMODEL = 160
// CAUSAL_TYPE = 3
// ENABLE_DROPOUT = True
// PADDED_HEAD = False
// BIAS_TYPE = 0

// vim: set fileencoding=utf-8

