Verified Commit 3d7c19dd authored by Alberto Miranda's avatar Alberto Miranda ♨️
Browse files

Add constexpr functions for chunk arithmetic

Fixes #137.
parent e0a1a1ed
Loading
Loading
Loading
Loading
+139 −63
Original line number Diff line number Diff line
@@ -14,112 +14,188 @@
#ifndef GEKKOFS_CHNK_CALC_UTIL_HPP
#define GEKKOFS_CHNK_CALC_UTIL_HPP

#include <cstdint>
#include <unistd.h>
#include <cassert>

namespace gkfs::util {

/**
 * Compute the base2 logarithm for 64 bit integers
 * Check whether integer `n` is a power of 2.
 *
 * @param [in] n the number to check.
 * @returns `true` if `n` is a power of 2; `false` otherwise.
 */
inline int
log2(uint64_t n) {
constexpr bool
is_power_of_2(uint64_t n) {
    return n && (!(n & (n - 1u)));
}

    /* see
     * http://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
/**
 * Compute the base2 logarithm for 64 bit integers.
 *
 * @param [in] n the number from which to compute the log2.
 * @returns the base 2 logarithm of `n`.
 */
    static const int table[64] = {
            0,  58, 1,  59, 47, 53, 2,  60, 39, 48, 27, 54, 33, 42, 3,  61,
            51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4,  62,
            57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56,
            45, 25, 31, 35, 16, 9,  12, 44, 24, 15, 8,  23, 7,  6,  5,  63};

    n |= n >> 1;
    n |= n >> 2;
    n |= n >> 4;
    n |= n >> 8;
    n |= n >> 16;
    n |= n >> 32;

    return table[(n * 0x03f6eaf2cd271461) >> 58];
constexpr std::size_t
log2(uint64_t n) {
    return 8u * sizeof(uint64_t) - __builtin_clzll(n) - 1;
}

/**
 * Check whether @n is divisible by @block_size.
 *
 * @note This function assumes that block_size is a power of 2.
 *
 * @param [in] n the number to check.
 * @param [in] block_size
 * @returns true if @n is divisible by @block_size; false otherwise.
 */
constexpr bool
is_divisible(const uint64_t n, const size_t block_size) {
    using gkfs::util::log2;
    assert(is_power_of_2(block_size));
    return !(n & ((1u << log2(block_size)) - 1));
}

/**
 * Align an @offset to the closest left side chunk boundary
 * Given a file @offset and a @block_size, align the @offset to its
 * closest left-side block boundary.
 *
 * @note This function assumes that block_size is a power of 2.
 *
 * @param [in] offset the offset to align.
 * @param [in] block_size the block size used to compute boundaries.
 * @returns an offset aligned to the left-side block boundary.
 */
inline off64_t
chnk_lalign(const off64_t offset, const size_t chnk_size) {
    return offset & ~(chnk_size - 1);
constexpr uint64_t
chnk_lalign(const uint64_t offset, const size_t block_size) {
    // This check is automatically removed in release builds
    assert(is_power_of_2(block_size));
    return static_cast<uint64_t>(offset) & ~(block_size - 1u);
}


/**
 * Align an @offset to the closest right side chunk boundary
 * Given a file @offset and a @block_size, align the @offset to its
 * closest right-side block boundary.
 *
 * @note This function assumes that block_size is a power of 2.
 *
 * @param [in] offset the offset to align.
 * @param [in] block_size the block size used to compute boundaries.
 * @returns an offset aligned to the right-side block boundary.
 */
inline off64_t
chnk_ralign(const off64_t offset, const size_t chnk_size) {
    return chnk_lalign(offset + chnk_size, chnk_size);
constexpr uint64_t
chnk_ralign(const uint64_t offset, const size_t block_size) {
    // This check is automatically removed in release builds
    assert(is_power_of_2(block_size));
    return chnk_lalign(offset, block_size) + block_size;
}


/**
 * Return the padding (bytes) that separates the @offset from the closest
 * left side chunk boundary
 * Return the overrun bytes that separate @offset from the closest left side
 * block boundary.
 *
 * If @offset is a boundary the resulting padding will be 0
 * @note This function assumes that block_size is a power of 2.
 *
 * @param [in] offset the offset for which the overrun distance should be
 * computed.
 * @param [in] block_size the block size used to compute boundaries.
 * @returns the distance in bytes between the left-side boundary of @offset
 */
inline size_t
chnk_lpad(const off64_t offset, const size_t chnk_size) {
    return offset % chnk_size;
constexpr size_t
chnk_lpad(const uint64_t offset, const size_t block_size) {
    // This check is automatically removed in release builds
    assert(is_power_of_2(block_size));
    return static_cast<uint64_t>(offset) & (block_size - 1u);
}


/**
 * Return the padding (bytes) that separates the @offset from the closest
 * right side chunk boundary
 * Return the underrun bytes that separate @offset from the closest right side
 * block boundary.
 *
 * @note This function assumes that block_size is a power of 2.
 *
 * If @offset is a boundary the resulting padding will be 0
 * @param [in] offset the offset for which the overrun distance should be
 * computed.
 * @param [in] block_size the block size used to compute boundaries.
 * @returns the distance in bytes between the right-side boundary of @offset
 */
inline size_t
chnk_rpad(const off64_t offset, const size_t chnk_size) {
    return (-offset) % chnk_size;
constexpr size_t
chnk_rpad(const uint64_t offset, const size_t block_size) {
    // This check is automatically removed in release builds
    assert(is_power_of_2(block_size));
    return chnk_ralign(offset, block_size) - offset;
}


/**
 * Given an @offset calculates the chunk number to which the @offset belongs
 * Given an @offset and a @block_size, compute the block index to which @offset
 * belongs.
 *
 * chunk_id(8,4) = 2;
 * chunk_id(7,4) = 1;
 * chunk_id(2,4) = 0;
 * chunk_id(0,4) = 0;
 */
inline uint64_t
chnk_id_for_offset(const off64_t offset, const size_t chnk_size) {
    /*
     * This does not work for offsets that use the 64th bit, i.e.,
     * 9223372036854775808. 9223372036854775808 - 1 uses 63 bits and still
     * works. `offset / chnk_size` works with the 64th bit. With this number we
     * can address more than 19,300,000 exabytes of data though. Hi future me?
 * @note Block indexes are (conceptually) computed by dividing @offset
 * by @block_size, with index 0 referring to block [0, block_size - 1],
 * index 1 to block [block_size, 2 * block_size - 1], and so on up to
 * a maximum index FILE_LENGTH / block_size.
 *
 * @note This function assumes that @block_size is a power of 2.
 *
 * @param [in] offset the offset for which the block index should be computed.
 * @param [in] block_size the block_size that should be used to compute the
 * index.
 * @returns the index of the block containing @offset.
 */
    return static_cast<uint64_t>(chnk_lalign(offset, chnk_size) >>
                                 log2(chnk_size));
constexpr uint64_t
chnk_id_for_offset(const uint64_t offset, const size_t block_size) {

    using gkfs::util::log2;

    // This check is automatically removed in release builds
    assert(is_power_of_2(block_size));
    return static_cast<uint64_t>(chnk_lalign(offset, block_size) >>
                                 log2(block_size));
}


/**
 * Return the number of chunks involved in an operation that operates
 * from @offset for a certain amount of bytes (@count).
 * Compute the number of blocks involved in an operation affecting the
 * regions from [@offset, to @offset + @count).
 *
 * @note This function assumes that @block_size is a power of 2.
 * @note This function assumes that @offset + @count does not
 * overflow.
 *
 * @param [in] offset the operation's initial offset.
 * @param [in] count the number of bytes affected by the operation.
 * @param [in] chnk_size the block size that should be used to compute the
 * number of blocks.
 * @returns the number of blocks affected by the operation.
 */
inline uint64_t
chnk_count_for_offset(const off64_t offset, const size_t count,
constexpr std::size_t
chnk_count_for_offset(const uint64_t offset, const size_t count,
                      const size_t chnk_size) {

    off64_t chnk_start = chnk_lalign(offset, chnk_size);
    off64_t chnk_end = chnk_lalign(offset + count - 1, chnk_size);
    using gkfs::util::log2;

    // These checks are automatically removed in release builds
    assert(is_power_of_2(chnk_size));

#if defined(__GNUC__) && !defined(__clang__)
    assert(!__builtin_add_overflow_p(offset, count, static_cast<uint64_t>(0)));
#else
    assert(offset + count > offset);
#endif

    const uint64_t chnk_start = chnk_lalign(offset, chnk_size);
    const uint64_t chnk_end = chnk_lalign(offset + count, chnk_size);
    const size_t mask = -!!count; // this is either 0 or ~0

    return static_cast<uint64_t>((chnk_end >> log2(chnk_size)) -
                                 (chnk_start >> log2(chnk_size)) + 1);
    return (((chnk_end >> log2(chnk_size)) - (chnk_start >> log2(chnk_size)) +
             !is_divisible(offset + count, chnk_size))) &
           mask;
}

} // namespace gkfs::util