// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
#define ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_

#include <atomic>
#include <cstddef>
#include <deque>

#include "absl/base/config.h"
#include "absl/crc/crc32c.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {

// CrcCordState is a copy-on-write class that holds the chunked CRC32C data
// that allows CrcCord to perform efficient substring operations. CrcCordState
// is used as a member variable in CrcCord. When a CrcCord is converted to a
// Cord, the CrcCordState is shallow-copied into the root node of the Cord. If
// the converted Cord is modified outside of CrcCord, the CrcCordState is
// discarded from the Cord. If the Cord is converted back to a CrcCord, and the
// Cord is still carrying the CrcCordState in its root node, the CrcCord can
// re-use the CrcCordState, making the construction of the CrcCord cheap.
//
// CrcCordState does not try to encapsulate the CRC32C state (CrcCord requires
// knowledge of how CrcCordState represents the CRC32C state). It does
// encapsulate the copy-on-write nature of the state.
class CrcCordState {
 public:
  // Constructors.
  CrcCordState();
  CrcCordState(const CrcCordState&);
  CrcCordState(CrcCordState&&);

  // Destructor. Atomically unreferences the data.
  ~CrcCordState();

  // Copy and move operators.
  CrcCordState& operator=(const CrcCordState&);
  CrcCordState& operator=(CrcCordState&&);

  // A (length, crc) pair.
  struct PrefixCrc {
    PrefixCrc() = default;
    PrefixCrc(size_t length_arg, absl::crc32c_t crc_arg)
        : length(length_arg), crc(crc_arg) {}

    size_t length = 0;

    // TODO(absl-team): Memory stomping often zeros out memory. If this struct
    // gets overwritten, we could end up with {0, 0}, which is the correct CRC
    // for a string of length 0. Consider storing a scrambled value and
    // unscrambling it before verifying it.
    absl::crc32c_t crc = absl::crc32c_t{0};
  };

  // The representation of the chunked CRC32C data.
  struct Rep {
    // `removed_prefix` is the crc and length of any prefix that has been
    // removed from the Cord (for example, by calling
    // `CrcCord::RemovePrefix()`). To get the checksum of any prefix of the
    // cord, this value must be subtracted from `prefix_crc`. See `Checksum()`
    // for an example.
    //
    // CrcCordState is said to be "normalized" if removed_prefix.length == 0.
    PrefixCrc removed_prefix;

    // A deque of (length, crc) pairs, representing length and crc of a prefix
    // of the Cord, before removed_prefix has been subtracted. The lengths of
    // the prefixes are stored in increasing order. If the Cord is not empty,
    // the last value in deque is the contains the CRC32C of the entire Cord
    // when removed_prefix is subtracted from it.
    std::deque<PrefixCrc> prefix_crc;
  };

  // Returns a reference to the representation of the chunked CRC32C data.
  const Rep& rep() const { return refcounted_rep_->rep; }

  // Returns a mutable reference to the representation of the chunked CRC32C
  // data. Calling this function will copy the data if another instance also
  // holds a reference to the data, so it is important to call rep() instead if
  // the data may not be mutated.
  Rep* mutable_rep() {
    if (refcounted_rep_->count.load(std::memory_order_acquire) != 1) {
      RefcountedRep* copy = new RefcountedRep;
      copy->rep = refcounted_rep_->rep;
      Unref(refcounted_rep_);
      refcounted_rep_ = copy;
    }
    return &refcounted_rep_->rep;
  }

  // Returns the CRC32C of the entire Cord.
  absl::crc32c_t Checksum() const;

  // Returns true if the chunked CRC32C cached is normalized.
  bool IsNormalized() const { return rep().removed_prefix.length == 0; }

  // Normalizes the chunked CRC32C checksum cache by subtracting any removed
  // prefix from the chunks.
  void Normalize();

  // Returns the number of cached chunks.
  size_t NumChunks() const { return rep().prefix_crc.size(); }

  // Helper that returns the (length, crc) of the `n`-th cached chunked.
  PrefixCrc NormalizedPrefixCrcAtNthChunk(size_t n) const;

  // Poisons all chunks to so that Checksum() will likely be incorrect with high
  // probability.
  void Poison();

 private:
  struct RefcountedRep {
    std::atomic<int32_t> count{1};
    Rep rep;
  };

  // Adds a reference to the shared global empty `RefcountedRep`, and returns a
  // pointer to the `RefcountedRep`. This is an optimization to avoid unneeded
  // allocations when the allocation is unlikely to ever be used. The returned
  // pointer can be `Unref()`ed when it is no longer needed.  Since the returned
  // instance will always have a reference counter greater than 1, attempts to
  // modify it (by calling `mutable_rep()`) will create a new unshared copy.
  static RefcountedRep* RefSharedEmptyRep();

  static void Ref(RefcountedRep* r) {
    assert(r != nullptr);
    r->count.fetch_add(1, std::memory_order_relaxed);
  }

  static void Unref(RefcountedRep* r) {
    assert(r != nullptr);
    if (r->count.fetch_sub(1, std::memory_order_acq_rel) == 1) {
      delete r;
    }
  }

  RefcountedRep* refcounted_rep_;
};

}  // namespace crc_internal
ABSL_NAMESPACE_END
}  // namespace absl

#endif  // ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_