Verified Commit b2ed5532 authored by Marc Vef's avatar Marc Vef
Browse files

latest fix attempts

parent c09aebf1
Loading
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ DEP_CONFIG=""
VALID_DEP_OPTIONS="mogongpu mogon2 mogon1 ngio direct all ci"

MOGONGPU_DEPS=(
    "zstd" "lz4" "snappy" "capstone" "mercury" "argobots" "json-c" "margo" "rocksdb"
    "zstd" "lz4" "snappy" "capstone" "ofi" "mercury" "argobots" "json-c" "margo" "rocksdb"
    "syscall_intercept" "date" "verbs"
)

+1 −1
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ VERBOSE=false
VALID_DEP_OPTIONS="mogongpu mogon2 mogon1 ngio direct all ci"

MOGONGPU_DEPS=(
    "bzip2" "zstd" "lz4" "snappy" "capstone" "mercury-experimental" "argobots" "json-c" "margo" "rocksdb-experimental"
    "bzip2" "zstd" "lz4" "snappy" "capstone" "ofi-experimental" "mercury-experimental" "argobots" "json-c" "margo" "rocksdb-experimental"
    "syscall_intercept" "date"
)

+48 −36
Original line number Diff line number Diff line
@@ -334,6 +334,11 @@ forward_read(const string& path, void* buf, const off64_t offset,

        auto endp = CTX->hosts().at(target);

        auto retry_max = 5;

        auto retry_cnt = 0;
        do {
            retry_cnt++;
            try {

                LOG(DEBUG, "Sending RPC ...");
@@ -363,15 +368,22 @@ forward_read(const string& path, void* buf, const off64_t offset,

                LOG(DEBUG, "host: {}, path: {}, chunks: {}, size: {}, offset: {}",
                    target, path, in.chunk_n(), total_chunk_size, in.offset());

                break;
            } catch(const std::exception& ex) {
                LOG(ERROR,
                    "Unable to send non-blocking rpc for path \"{}\" "
                "[peer: {}]",
                path, target);
                    "[peer: {}]. Try: '{}' (max '{}' tries",
                    path, target, retry_cnt, retry_max);
                if (retry_cnt == retry_max) {
                    LOG(ERROR,
                        "Exceeded all retries... fail");
                    return make_pair(EBUSY, 0);
                }
            }
        } while(retry_cnt != retry_max);

    }


    // Wait for RPC responses and then get response and add it to out_size
    // which is the read size. All potential outputs are served to free
+4 −0
Original line number Diff line number Diff line
@@ -216,6 +216,10 @@ ChunkStorage::read_chunk(const string& file_path, gkfs::rpc::chnk_id_t chunk_id,

    FileHandle fh(open(chunk_path.c_str(), O_RDONLY), chunk_path);
    if(!fh.valid()) {
        if (errno == ENOENT && chunk_id > 0) {
            //log_->warn("XXX {}(): Weird corner case. Return 0 read no error? path '{}' chunkid '{}' size '{}' offset '{}' ", __func__, file_path, chunk_id, size, offset);
            return 0;
        }
        auto err_str = fmt::format(
                "{}() Failed to open chunk file for read. File: '{}', Error: '{}'",
                __func__, chunk_path, ::strerror(errno));
+4 −3
Original line number Diff line number Diff line
@@ -534,14 +534,15 @@ rpc_srv_read(hg_handle_t handle) {
    bulk_args.chunk_ids = &chnk_ids_host;
    // wait for all tasklets and push read data back to client
    auto read_result = chunk_read_op.wait_for_tasks_and_push_back(bulk_args);
    out.err = read_result.first;
    out.err = 0;
    out.io_size = read_result.second;

    /*
     * 5. Respond and cleanup
     */
    GKFS_DATA->spdlogger()->debug("{}() Sending output response, err: {}",
                                  __func__, out.err);
    if (out.err != read_result.first)
            GKFS_DATA->spdlogger()->info("{}() Sending output response, err: {}. Real err code we ignore: '{}'",
                                        __func__, out.err, read_result.first);
    return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle);
}