// MIT License
//
// Copyright (c) 2023-2025 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

#pragma once

#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp"

#include <functional>
#include <mutex>
#include <vector>

namespace rocprofiler
{
namespace pc_sampling
{
/**
 * @brief A class that encapsulates the logic for marking the correlation IDs retired
 * by PC sampling service.
 *
 * To reduce the overhead, SDK's PC sampling service tries to avoid flushing the ROCr's buffer
 * explicitly. Instead, it waits for the ROCr to deliver the PC samples once the buffer's watermark
 * is crossed.
 *
 * There are some subtleties we need to consider when implementing the PC sampling service.
 * Currently, the 2nd level trap handler uses the double-buffering scheme, meaning the following
 * scenario can occur. Assume that one of the buffers (referred to as A) is full and is reported to
 * the PC sampling service via `data_ready_callback`. In the meantime, the 2nd level trap handler is
 * filling the buffer B with samples of currently active kernel K that is about to finish. Let's
 * mark the thread executing the `data_ready_callback` as TA. Before TA accesses the information
 * about all completed correlation IDs, it might be intercepted by another thread TB that receives
 * the kernel completion callback for the kernel K. While executing this callback, the thread TB
 * marks the K's correlation ID as completed. After TB finishes executing the callback, the TA
 * continues executing the `data_ready_callback` and observes that the K's CID has been marked as
 * completed. The TA drains the buffer A and decrements ref counts of all completed CIDs including
 * K's CID. If the count reaches zero, then the K's CID might be reported as retired. However, the
 * buffer B might still contain samples generated by the kernel K. To be sure that PC sampling
 * service drains all samples generated by the kernel K, we require one of the following
 * two scenarios to happen:
 *
 * 1. two implicit buffer flushes happened after the kernel of the correlation ID has completed,
 * 2. one explicit buffer flush initiated via `hsa_ven_amd_pcs_flush` happened after the kernel
 * of the correlation ID has completed. The reason why only one explicit flush is enough is because
 * the `hsa_ven_amd_pcs_flush` guarantees that all samples generated prior to (sequenced-before) the
 * call to the  `hsa_ven_amd_pcs_flush` will be delivered.
 *
 * This way, we can guarantee that all samples are
 * drained from both buffers filled by 2nd level trap handler.
 *
 * To know if all samples produced by a kernel are drained from the ROCr's and 2nd level trap
 * handler's buffers and placed in the SDK's buffer, the PC sampling service employs the CID
 * retirement protocol implemented in the PCSCIDManager class. Refer to the comments of the
 * PCSCIDManager's attributes and methods for more details about the CID retirement protocol.
 *
 * PCSCIDManager is a singleton per PCSAgentSession.
 */
class PCSCIDManager
{
    /// A lock that must be hold while updating the state of PCSCIDManager.
    std::mutex m;
    /// Correlation IDs with the following property: no ROCr's buffer flush happened
    /// since a corresponding kernel completed
    std::vector<context::correlation_id*> q1;
    /// Correlation IDs with the following property: exactly one ROCr's buffer flush occured
    /// since a corresponding kernel completed
    std::vector<context::correlation_id*> q2;
    /// A pointer to the PC sampling parser to be notified when the CID is retired.
    PCSamplingParserContext* pcs_parser = nullptr;

    /// Prepare the CIDs of q to be retired. Refer to the implementation for more information.
    void retire_cids_of(std::vector<context::correlation_id*>& q);

public:
    PCSCIDManager(PCSamplingParserContext* parser)
    : pcs_parser(parser)
    {}

    /// Called by the `kernel_completion_callback` to mark the kernel matching @p cid completed.
    void cid_async_activity_completed(context::correlation_id* cid);

    /// a callback function for copying PC samples from ROCr's buffer to the SDK's buffer
    using pc_samples_copy_fn_t = std::function<void(void)>;

    /// Called by the @p data_ready_callback.
    /// Encapsulates the logic for verifying that two implicit ROCr's buffer flushes
    /// happened after a kernel of the CID is marked completed (scenario 1 from above),
    /// before retiring that CID.
    /// @p manage_cids_implicit calls @p pc_samples_copy_fn to copy samples from
    /// ROCr's buffer to the SDK's buffer.
    void manage_cids_implicit(const pc_samples_copy_fn_t& pc_samples_copy_fn);

    /// Called by the PC sampling service prior to initiating an explicit ROCr's buffer flush.
    /// The explicit flush is initiated by the @p pc_samples_explicit_flush_fn` callback.
    /// @p manage_cids_explicit` retires all CIDs whose corresponding kernels completed
    /// (sequenced) before the call to the @p manage_cids_explicit (scenario 2 from above).
    void manage_cids_explicit(const pc_samples_copy_fn_t& pc_samples_explicit_flush_fn);
};

}  // namespace pc_sampling
}  // namespace rocprofiler
