Loading include/client/open_file_map.hpp +9 −0 Original line number Diff line number Diff line Loading @@ -115,6 +115,15 @@ public: void mode(mode_t mode_); std::string inline_data() const; void inline_data(const std::string& data); private: std::string inline_data_; }; Loading src/client/gkfs_functions.cpp +17 −2 Original line number Diff line number Diff line Loading @@ -339,8 +339,11 @@ gkfs_open(const std::string& path, mode_t mode, int flags) { return -1; } } auto fd = CTX->file_map()->add( std::make_shared<gkfs::filemap::OpenFile>(path, flags)); auto file = std::make_shared<gkfs::filemap::OpenFile>(path, flags); if(!md.inline_data().empty()) { file->inline_data(md.inline_data()); } auto fd = CTX->file_map()->add(file); if(CTX->protect_files_consumer()) { Loading Loading @@ -1117,6 +1120,10 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count, file.set_flag(gkfs::filemap::OpenFile_flags::creation_pending, false); } // clear inline data cache as it is stale if(!file.inline_data().empty()) file.inline_data(""); // OPTIMIZATION: Inline Write if(gkfs::config::metadata::use_inline_data && Loading Loading @@ -1416,6 +1423,14 @@ gkfs_do_read(const gkfs::filemap::OpenFile& file, char* buf, size_t count, if(gkfs::config::metadata::use_inline_data && offset < gkfs::config::metadata::inline_data_size) { // OPTIMIZATION: Check if we have the inline data cached if(!file.inline_data().empty() && count <= file.inline_data().size()) { LOG(DEBUG, "{}() Using cached inline data", __func__); memcpy(buf, file.inline_data().c_str() + offset, count); return count; } // Forward the read request to the Metadata Server instead of Data // Server auto ret = Loading src/client/open_file_map.cpp +10 −0 Original line number Diff line number Diff line Loading @@ -123,6 +123,16 @@ OpenFile::mode(mode_t mode_) { // OpenFileMap starts here string OpenFile::inline_data() const { return inline_data_; } void OpenFile::inline_data(const string& data) { OpenFile::inline_data_ = data; } shared_ptr<OpenFile> OpenFileMap::get(int fd) { lock_guard<recursive_mutex> lock(files_mutex_); Loading tests/integration/data/test_inline_read_opt.py 0 → 100644 +67 −0 Original line number Diff line number Diff line import pytest import os from harness.logger import logger file01 = 'file01' data01 = 'data01' @pytest.mark.parametrize("restart_daemon", [False, True]) def test_inline_read_optimization(gkfs_daemon, gkfs_client, restart_daemon): """ Test the read optimization where inline data is cached during open. """ file01 = gkfs_daemon.mountdir / "file01" # Enable inline data and the optimization (though optimization flag is mainly for create/write) # We rely on inline_data being enabled. # 1. Create a file with small data using write --creat (atomic to ensure creation with inline data) # gkfs.io open+write in one process triggers the creation optimization properly ret = gkfs_client.run('write', file01, data01, len(data01), '--creat') assert ret.retval == len(data01) if restart_daemon: # Restart daemon to clear any server-side caching effects if any (though client cache is what matters) # But crucially, we want to test that a FRESH client open fetches the data. # Actually restarts might be too heavy, just closing and reopening effectively tests the logic # because the OpenFile object is destroyed on close. pass # 2. Open file for reading # This should now fetch the inline data into the OpenFile object ret = gkfs_client.open(file01, os.O_RDONLY) assert ret.retval > 0 # 3. Read the data # This should be served from the cache without a read RPC (verified by functionality) ret = gkfs_client.read(file01, len(data01)) assert ret.retval == len(data01) assert ret.buf == data01.encode() # 4. Stat to verify size matches ret = gkfs_client.stat(file01) assert ret.retval == 0 assert ret.statbuf.st_size == len(data01) # 5. Verify Cache Invalidation on Write # Write new data new_data = 'data02' ret = gkfs_client.write(file01, new_data, len(new_data)) # Overwrite assert ret.retval == len(new_data) # Seek to beginning ret = gkfs_client.lseek(file01, 0, os.SEEK_SET) assert ret.retval == 0 # Read again - should NOT be old data01 ret = gkfs_client.read(file01, len(new_data)) assert ret.retval == len(new_data) assert ret.buf == new_data.encode() # 6. Verify cleanup ret = gkfs_client.remove(file01) assert ret.retval == 0 tests/integration/harness/gkfs.py +4 −0 Original line number Diff line number Diff line Loading @@ -600,7 +600,11 @@ class Client: # _err=sys.stderr, ) out = str(out) logger.debug(f"command output: {out}") json_start = out.find('{') if json_start != -1: out = out[json_start:] return self._parser.parse(cmd, out) def __getattr__(self, name): Loading Loading
include/client/open_file_map.hpp +9 −0 Original line number Diff line number Diff line Loading @@ -115,6 +115,15 @@ public: void mode(mode_t mode_); std::string inline_data() const; void inline_data(const std::string& data); private: std::string inline_data_; }; Loading
src/client/gkfs_functions.cpp +17 −2 Original line number Diff line number Diff line Loading @@ -339,8 +339,11 @@ gkfs_open(const std::string& path, mode_t mode, int flags) { return -1; } } auto fd = CTX->file_map()->add( std::make_shared<gkfs::filemap::OpenFile>(path, flags)); auto file = std::make_shared<gkfs::filemap::OpenFile>(path, flags); if(!md.inline_data().empty()) { file->inline_data(md.inline_data()); } auto fd = CTX->file_map()->add(file); if(CTX->protect_files_consumer()) { Loading Loading @@ -1117,6 +1120,10 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count, file.set_flag(gkfs::filemap::OpenFile_flags::creation_pending, false); } // clear inline data cache as it is stale if(!file.inline_data().empty()) file.inline_data(""); // OPTIMIZATION: Inline Write if(gkfs::config::metadata::use_inline_data && Loading Loading @@ -1416,6 +1423,14 @@ gkfs_do_read(const gkfs::filemap::OpenFile& file, char* buf, size_t count, if(gkfs::config::metadata::use_inline_data && offset < gkfs::config::metadata::inline_data_size) { // OPTIMIZATION: Check if we have the inline data cached if(!file.inline_data().empty() && count <= file.inline_data().size()) { LOG(DEBUG, "{}() Using cached inline data", __func__); memcpy(buf, file.inline_data().c_str() + offset, count); return count; } // Forward the read request to the Metadata Server instead of Data // Server auto ret = Loading
src/client/open_file_map.cpp +10 −0 Original line number Diff line number Diff line Loading @@ -123,6 +123,16 @@ OpenFile::mode(mode_t mode_) { // OpenFileMap starts here string OpenFile::inline_data() const { return inline_data_; } void OpenFile::inline_data(const string& data) { OpenFile::inline_data_ = data; } shared_ptr<OpenFile> OpenFileMap::get(int fd) { lock_guard<recursive_mutex> lock(files_mutex_); Loading
tests/integration/data/test_inline_read_opt.py 0 → 100644 +67 −0 Original line number Diff line number Diff line import pytest import os from harness.logger import logger file01 = 'file01' data01 = 'data01' @pytest.mark.parametrize("restart_daemon", [False, True]) def test_inline_read_optimization(gkfs_daemon, gkfs_client, restart_daemon): """ Test the read optimization where inline data is cached during open. """ file01 = gkfs_daemon.mountdir / "file01" # Enable inline data and the optimization (though optimization flag is mainly for create/write) # We rely on inline_data being enabled. # 1. Create a file with small data using write --creat (atomic to ensure creation with inline data) # gkfs.io open+write in one process triggers the creation optimization properly ret = gkfs_client.run('write', file01, data01, len(data01), '--creat') assert ret.retval == len(data01) if restart_daemon: # Restart daemon to clear any server-side caching effects if any (though client cache is what matters) # But crucially, we want to test that a FRESH client open fetches the data. # Actually restarts might be too heavy, just closing and reopening effectively tests the logic # because the OpenFile object is destroyed on close. pass # 2. Open file for reading # This should now fetch the inline data into the OpenFile object ret = gkfs_client.open(file01, os.O_RDONLY) assert ret.retval > 0 # 3. Read the data # This should be served from the cache without a read RPC (verified by functionality) ret = gkfs_client.read(file01, len(data01)) assert ret.retval == len(data01) assert ret.buf == data01.encode() # 4. Stat to verify size matches ret = gkfs_client.stat(file01) assert ret.retval == 0 assert ret.statbuf.st_size == len(data01) # 5. Verify Cache Invalidation on Write # Write new data new_data = 'data02' ret = gkfs_client.write(file01, new_data, len(new_data)) # Overwrite assert ret.retval == len(new_data) # Seek to beginning ret = gkfs_client.lseek(file01, 0, os.SEEK_SET) assert ret.retval == 0 # Read again - should NOT be old data01 ret = gkfs_client.read(file01, len(new_data)) assert ret.retval == len(new_data) assert ret.buf == new_data.encode() # 6. Verify cleanup ret = gkfs_client.remove(file01) assert ret.retval == 0
tests/integration/harness/gkfs.py +4 −0 Original line number Diff line number Diff line Loading @@ -600,7 +600,11 @@ class Client: # _err=sys.stderr, ) out = str(out) logger.debug(f"command output: {out}") json_start = out.find('{') if json_start != -1: out = out[json_start:] return self._parser.parse(cmd, out) def __getattr__(self, name): Loading