1 RPC online metadata read (84a689c2) · Commits · hpc / gekkofs

include/client/open_file_map.hpp

+9 −0

Original line number	Diff line number	Diff line
		@@ -115,6 +115,15 @@ public:

		void
		mode(mode_t mode_);

		std::string
		inline_data() const;

		void
		inline_data(const std::string& data);

		private:
		std::string inline_data_;
		};

src/client/gkfs_functions.cpp

+17 −2

Original line number	Diff line number	Diff line
		@@ -339,8 +339,11 @@ gkfs_open(const std::string& path, mode_t mode, int flags) {
		return -1;
		}
		}
		auto fd = CTX->file_map()->add(
		std::make_shared<gkfs::filemap::OpenFile>(path, flags));
		auto file = std::make_shared<gkfs::filemap::OpenFile>(path, flags);
		if(!md.inline_data().empty()) {
		file->inline_data(md.inline_data());
		}
		auto fd = CTX->file_map()->add(file);


		if(CTX->protect_files_consumer()) {
		@@ -1117,6 +1120,10 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count,
		file.set_flag(gkfs::filemap::OpenFile_flags::creation_pending, false);
		}

		// clear inline data cache as it is stale
		if(!file.inline_data().empty())
		file.inline_data("");


		// OPTIMIZATION: Inline Write
		if(gkfs::config::metadata::use_inline_data &&
		@@ -1416,6 +1423,14 @@ gkfs_do_read(const gkfs::filemap::OpenFile& file, char* buf, size_t count,
		if(gkfs::config::metadata::use_inline_data &&
		offset < gkfs::config::metadata::inline_data_size) {

		// OPTIMIZATION: Check if we have the inline data cached
		if(!file.inline_data().empty() &&
		count <= file.inline_data().size()) {
		LOG(DEBUG, "{}() Using cached inline data", __func__);
		memcpy(buf, file.inline_data().c_str() + offset, count);
		return count;
		}

		// Forward the read request to the Metadata Server instead of Data
		// Server
		auto ret =

src/client/open_file_map.cpp

+10 −0

Original line number	Diff line number	Diff line
		@@ -123,6 +123,16 @@ OpenFile::mode(mode_t mode_) {

		// OpenFileMap starts here

		string
		OpenFile::inline_data() const {
		return inline_data_;
		}

		void
		OpenFile::inline_data(const string& data) {
		OpenFile::inline_data_ = data;
		}

		shared_ptr<OpenFile>
		OpenFileMap::get(int fd) {
		lock_guard<recursive_mutex> lock(files_mutex_);

tests/integration/data/test_inline_read_opt.py

0 → 100644

+67 −0

Original line number	Diff line number	Diff line
		import pytest
		import os
		from harness.logger import logger

		file01 = 'file01'
		data01 = 'data01'

		@pytest.mark.parametrize("restart_daemon", [False, True])
		def test_inline_read_optimization(gkfs_daemon, gkfs_client, restart_daemon):
		"""
		Test the read optimization where inline data is cached during open.
		"""
		file01 = gkfs_daemon.mountdir / "file01"

		# Enable inline data and the optimization (though optimization flag is mainly for create/write)
		# We rely on inline_data being enabled.

		# 1. Create a file with small data using write --creat (atomic to ensure creation with inline data)
		# gkfs.io open+write in one process triggers the creation optimization properly
		ret = gkfs_client.run('write', file01, data01, len(data01), '--creat')
		assert ret.retval == len(data01)



		if restart_daemon:
		# Restart daemon to clear any server-side caching effects if any (though client cache is what matters)
		# But crucially, we want to test that a FRESH client open fetches the data.
		# Actually restarts might be too heavy, just closing and reopening effectively tests the logic
		# because the OpenFile object is destroyed on close.
		pass

		# 2. Open file for reading
		# This should now fetch the inline data into the OpenFile object
		ret = gkfs_client.open(file01,
		os.O_RDONLY)
		assert ret.retval > 0


		# 3. Read the data
		# This should be served from the cache without a read RPC (verified by functionality)
		ret = gkfs_client.read(file01, len(data01))
		assert ret.retval == len(data01)
		assert ret.buf == data01.encode()

		# 4. Stat to verify size matches
		ret = gkfs_client.stat(file01)
		assert ret.retval == 0
		assert ret.statbuf.st_size == len(data01)

		# 5. Verify Cache Invalidation on Write
		# Write new data
		new_data = 'data02'
		ret = gkfs_client.write(file01, new_data, len(new_data)) # Overwrite
		assert ret.retval == len(new_data)

		# Seek to beginning
		ret = gkfs_client.lseek(file01, 0, os.SEEK_SET)
		assert ret.retval == 0

		# Read again - should NOT be old data01
		ret = gkfs_client.read(file01, len(new_data))
		assert ret.retval == len(new_data)
		assert ret.buf == new_data.encode()

		# 6. Verify cleanup
		ret = gkfs_client.remove(file01)
		assert ret.retval == 0

tests/integration/harness/gkfs.py

+4 −0

Original line number	Diff line number	Diff line
		@@ -600,7 +600,11 @@ class Client:
		# _err=sys.stderr,
		)

		out = str(out)
		logger.debug(f"command output: {out}")
		json_start = out.find('{')
		if json_start != -1:
		out = out[json_start:]
		return self._parser.parse(cmd, out)

		def __getattr__(self, name):