Commit dae425ec authored by Marc Vef's avatar Marc Vef
Browse files

Merge branch '36-separate-metadata-and-data-directories' into 'master'

Resolve "Separate metadata and data directories"

Closes #36

See merge request zdvresearch_bsc/adafs!15
parents 2df9e3ed aa478412
Loading
Loading
Loading
Loading
+4 −16
Original line number Diff line number Diff line
@@ -26,10 +26,8 @@ private:
    // paths
    std::string rootdir_;
    std::string mountdir_;
    std::string inode_path_;
    std::string dentry_path_;
    std::string metadir_;
    std::string chunk_path_;
    std::string mgmt_path_;

    // hosts_
    std::string hosts_raw_; // raw hosts string, given when daemon is started. Used to give it to fs client
@@ -78,8 +76,6 @@ public:

    bool is_local_op(size_t recipient);

    size_t hash_path(const std::string& path);

    // getter/setter

    const std::unordered_map<std::string, std::string>& hashmap() const;
@@ -108,24 +104,16 @@ public:

    const std::string& mountdir() const;

    void mountdir(const std::string& mountdir);

    const std::string& inode_path() const;

    void inode_path(const std::string& inode_path_);
    void mountdir(const std::string& mountdir_);

    const std::string& dentry_path() const;
    const std::string& metadir() const;

    void dentry_path(const std::string& dentry_path_);
    void metadir(const std::string& metadir_);

    const std::string& chunk_path() const;

    void chunk_path(const std::string& chunk_path_);

    const std::string& mgmt_path() const;

    void mgmt_path(const std::string& mgmt_path_);

    const std::shared_ptr<rocksdb::DB>& rdb() const;

    void rdb(const std::shared_ptr<rocksdb::DB>& rdb);
+26 −16
Original line number Diff line number Diff line
@@ -2,9 +2,8 @@
# -*- coding: utf-8 -*-

import argparse
import time

import os
import time

from util import util

@@ -32,14 +31,17 @@ def check_dependencies():
    exit(1)


def init_system(daemon_path, rootdir, mountdir, nodelist, cleanroot, numactl):
def init_system(daemon_path, rootdir, metadir, mountdir, nodelist, cleanroot, numactl):
    """Initializes ADAFS on specified nodes.

    Args:
        daemon_path (str): Path to daemon executable
        rootdir (str): Path to root directory for fs data
        metadir (str): Path to metadata directory where metadata is stored
        mountdir (str): Path to mount directory where adafs is used in
        nodelist (str): Comma-separated list of nodes where adafs is launched on
        cleanroot (bool): if True, root and metadir is cleaned before daemon init
        numactl (str): numactl arguments for daemon init
    """
    global PSSH_PATH
    global PRETEND
@@ -48,6 +50,11 @@ def init_system(daemon_path, rootdir, mountdir, nodelist, cleanroot, numactl):
    daemon_path = os.path.realpath(os.path.expanduser(daemon_path))
    mountdir = os.path.realpath(os.path.expanduser(mountdir))
    rootdir = os.path.realpath(os.path.expanduser(rootdir))
    # Replace metadir with rootdir if only rootdir is given
    if len(metadir) == 0:
        metadir = rootdir
    else:
        metadir = os.path.realpath(os.path.expanduser(metadir))
    pssh_nodelist = ''
    nodefile = False
    if os.path.exists(nodelist):
@@ -62,9 +69,9 @@ def init_system(daemon_path, rootdir, mountdir, nodelist, cleanroot, numactl):
    else:
        pssh = '%s -O StrictHostKeyChecking=no -i -H "%s"' % (PSSH_PATH, nodelist.replace(',', ' '))

    # clean root dir if needed
    # clean root and metadata dir if needed
    if cleanroot:
        cmd_rm_str = '%s "rm -rf %s/* && truncate -s 0 /tmp/adafs_daemon.log /tmp/adafs_preload.log"' % (pssh, rootdir)
        cmd_rm_str = '%s "rm -rf %s/* %s/* && truncate -s 0 /tmp/adafs_daemon.log /tmp/adafs_preload.log"' % (pssh, rootdir, metadir)
        if PRETEND:
            print 'Pretending: %s' % cmd_rm_str
        else:
@@ -78,7 +85,7 @@ def init_system(daemon_path, rootdir, mountdir, nodelist, cleanroot, numactl):
                          (line[line.find('FAILURE'):].strip().split(' ')[1])
                    print line
            if not err:
                print 'pssh daemon launch successfully executed. Root dir is cleaned.\n'
                print 'pssh daemon launch successfully executed. Root and Metadata dir are cleaned.\n'
            else:
                print '[ERR] with pssh. Aborting!'
                exit(1)
@@ -86,19 +93,19 @@ def init_system(daemon_path, rootdir, mountdir, nodelist, cleanroot, numactl):
    # Start deamons
    if nodefile:
        if len(numactl) == 0:
            cmd_str = '%s "nohup %s -r %s -m %s --hostfile %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, daemon_path, rootdir, mountdir, nodelist)
            cmd_str = '%s "nohup %s -r %s -d %s -m %s --hostfile %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, daemon_path, rootdir, metadir, mountdir, nodelist)
        else:
            cmd_str = '%s "nohup numactl %s %s -r %s -m %s --hostfile %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, numactl, daemon_path, rootdir, mountdir, nodelist)
            cmd_str = '%s "nohup numactl %s %s -r %s -d %s -m %s --hostfile %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, numactl, daemon_path, rootdir, metadir, mountdir, nodelist)

    else:
        if len(numactl) == 0:
            cmd_str = '%s "nohup %s -r %s -m %s --hosts %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, daemon_path, rootdir, mountdir, nodelist)
            cmd_str = '%s "nohup %s -r %s -d %s -m %s --hosts %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, daemon_path, rootdir, metadir, mountdir, nodelist)
        else:
            cmd_str = '%s "nohup numactl %s %s -r %s -m %s --hosts %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, numactl, daemon_path, rootdir, mountdir, nodelist)
            cmd_str = '%s "nohup numactl %s %s -r %s -d %s -m %s --hosts %s > /tmp/adafs_daemon.log 2>&1 &"' \
                      % (pssh, numactl, daemon_path, rootdir, metadir, mountdir, nodelist)

    if PRETEND:
        print 'Pretending: %s' % cmd_str
@@ -171,6 +178,9 @@ if __name__ == "__main__":
or a path to a nodefile (one node per line)''')

    # optional arguments
    parser.add_argument('-i', '--metadir', metavar='<METADIR_PATH>', type=str, default='',
                        help='''Path to separate metadir directory where metadata is stored. 
If not set, rootdir will be used instead.''')
    parser.add_argument('-p', '--pretend', action='store_true',
                        help='Output adafs launch command and do not actually execute it')
    parser.add_argument('-P', '--pssh', metavar='<PSSH_PATH>', type=str, default='',
@@ -178,7 +188,7 @@ or a path to a nodefile (one node per line)''')
    parser.add_argument('-J', '--jobid', metavar='<JOBID>', type=str, default='',
                        help='Jobid for cluster batch system. Used for a unique hostfile used for pssh.')
    parser.add_argument('-c', '--cleanroot', action='store_true',
                        help='Removes contents of root directory before starting ADA-FS Daemon. Be careful!')
                        help='Removes contents of root and metadata directory before starting ADA-FS Daemon. Be careful!')
    parser.add_argument('-n', '--numactl', metavar='<numactl_args>', type=str, default='',
                        help='If adafs daemon should be pinned to certain cores, set numactl arguments here.')
    args = parser.parse_args()
@@ -192,6 +202,6 @@ or a path to a nodefile (one node per line)''')
        PSSH_HOSTFILE_PATH = '/tmp/hostfile_pssh_%s' % args.jobid
    PSSH_PATH = args.pssh
    WAITTIME = 5
    init_system(args.daemonpath, args.rootdir, args.mountdir, args.nodelist, args.cleanroot, args.numactl)
    init_system(args.daemonpath, args.rootdir, args.metadir, args.mountdir, args.nodelist, args.cleanroot, args.numactl)

    print '\nNothing left to do; exiting. :)'
+10 −12
Original line number Diff line number Diff line
@@ -308,8 +308,9 @@ int main(int argc, const char* argv[]) {
    po::options_description desc("Allowed options");
    desc.add_options()
            ("help,h", "Help message")
            ("mountdir,m", po::value<string>()->required(), "User Fuse mountdir.")
            ("mountdir,m", po::value<string>()->required(), "User Fuse mountdir")
            ("rootdir,r", po::value<string>()->required(), "ADA-FS data directory")
            ("metadir,i", po::value<string>(), "ADA-FS metadata directory, if not set rootdir is used for metadata ")
            ("hostfile", po::value<string>(), "Path to the hosts_file for all fs participants")
            ("hosts,h", po::value<string>(), "Comma separated list of hosts_ for all fs participants");
    po::variables_map vm;
@@ -332,6 +333,12 @@ int main(int argc, const char* argv[]) {
    if (vm.count("rootdir")) {
        ADAFS_DATA->rootdir(vm["rootdir"].as<string>());
    }
    if (vm.count("metadir")) {
        ADAFS_DATA->metadir(vm["metadir"].as<string>());
    } else if (vm.count("rootdir")) {
        ADAFS_DATA->metadir(vm["rootdir"].as<string>());
    }

    // parse host parameters
    vector<string> hosts{};
    if (vm.count("hostfile")) {
@@ -396,22 +403,13 @@ int main(int argc, const char* argv[]) {
    ADAFS_DATA->host_size(hostmap.size());
    ADAFS_DATA->rpc_port(fmt::FormatInt(RPC_PORT).str());
    ADAFS_DATA->hosts_raw(hosts_raw);



    //set all paths
    ADAFS_DATA->inode_path(ADAFS_DATA->rootdir() + "/meta/inodes"s); // XXX prob not needed anymore
    ADAFS_DATA->dentry_path(ADAFS_DATA->rootdir() + "/meta/dentries"s); // XXX prob not needed anymore
    ADAFS_DATA->chunk_path(ADAFS_DATA->rootdir() + "/data/chunks"s);
    ADAFS_DATA->mgmt_path(ADAFS_DATA->rootdir() + "/mgmt"s);

    ADAFS_DATA->spdlogger()->info("{}() Initializing environment. Hold on ...", __func__);

    // Make sure directory structure exists
    bfs::create_directories(ADAFS_DATA->dentry_path());
    bfs::create_directories(ADAFS_DATA->inode_path());
    bfs::create_directories(ADAFS_DATA->chunk_path());
    bfs::create_directories(ADAFS_DATA->mgmt_path());
    bfs::create_directories(ADAFS_DATA->metadir());
    // Create mountdir. We use this dir to get some information on the underlying fs with statfs in adafs_statfs
    bfs::create_directories(ADAFS_DATA->mountdir());

+4 −20
Original line number Diff line number Diff line
@@ -62,20 +62,12 @@ void FsData::mountdir(const std::string& mountdir) {
    FsData::mountdir_ = mountdir;
}

const std::string& FsData::inode_path() const {
    return inode_path_;
const std::string& FsData::metadir() const {
    return metadir_;
}

void FsData::inode_path(const std::string& inode_path_) {
    FsData::inode_path_ = inode_path_;
}

const std::string& FsData::dentry_path() const {
    return dentry_path_;
}

void FsData::dentry_path(const std::string& dentry_path_) {
    FsData::dentry_path_ = dentry_path_;
void FsData::metadir(const std::string& metadir) {
    FsData::metadir_ = metadir;
}

const std::string& FsData::chunk_path() const {
@@ -86,14 +78,6 @@ void FsData::chunk_path(const std::string& chunk_path_) {
    FsData::chunk_path_ = chunk_path_;
}

const std::string& FsData::mgmt_path() const {
    return mgmt_path_;
}

void FsData::mgmt_path(const std::string& mgmt_path_) {
    FsData::mgmt_path_ = mgmt_path_;
}

const rocksdb::Options& FsData::rdb_options() const {
    return rdb_options_;
}
+2 −2
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ using namespace std;

bool init_rocksdb() {
    rocksdb::DB* db;
    ADAFS_DATA->rdb_path(ADAFS_DATA->rootdir() + "/meta/rocksdb"s);
    ADAFS_DATA->rdb_path(ADAFS_DATA->metadir() + "/rocksdb"s);
    rocksdb::Options options;
    // Optimize RocksDB. This is the easiest way to get RocksDB to perform well
    options.IncreaseParallelism();