Skip to content
Snippets Groups Projects

Resolve "Segmentation fault in sfind"

Closed Ramon Nou requested to merge 138-segmentation-fault-in-sfind into master
1 unresolved thread
Compare and Show latest version
2 files
+ 37
17
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 290
303
@@ -20,11 +20,9 @@
#include <iostream>
#include <queue>
#include <regex.h>
#include <stdio.h>
#include <cstdio>
#include <string>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <limits>
using namespace std;
@@ -32,238 +30,214 @@ using namespace std;
/* Minimal struct needed for io500 find */
/* We could also do the filtering on the server */
struct dirent_extended {
size_t size;
time_t ctime;
unsigned short d_reclen;
unsigned char d_type;
char d_name[1];
size_t size;
time_t ctime;
unsigned short d_reclen;
unsigned char d_type;
char d_name[1];
};
/* Function exported from GekkoFS LD_PRELOAD, code needs to be compiled with
* -fPIC */
extern "C" int gkfs_getsingleserverdir(const char *path,
struct dirent_extended *dirp,
unsigned int count, int server)
__attribute__((weak));
extern "C" int
gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp,
unsigned int count, int server) __attribute__((weak));
/* PFIND OPTIONS EXTENDED We need to add the GekkoFS mount dir and the number of
* servers */
typedef struct {
std::string workdir{};
int just_count;
int print_by_process;
char *results_dir;
int stonewall_timer;
int print_rates;
char *timestamp_file;
char *name_pattern;
regex_t name_regex;
uint64_t size;
int num_servers;
char *mountdir;
// optimizing parameters NOT USED
int queue_length;
int max_entries_per_iter;
int steal_from_next; // if true, then steal from the next process
int parallel_single_dir_access; // if 1, use hashing to parallelize single
// directory access, if 2 sequential increment
int verbosity;
} pfind_options_t;
class pfind_options_t {
public:
std::string workdir{};
int stonewall_timer;
char* timestamp_file;
char* name_pattern;
regex_t name_regex;
uint64_t size;
int num_servers;
char* mountdir;
// optimizing parameters NOT USED
int verbosity;
pfind_options_t() {
workdir = "";
stonewall_timer = 0;
timestamp_file = nullptr;
name_pattern = nullptr;
size = 0;
num_servers = 0;
mountdir = nullptr;
verbosity = 0;
};
};
typedef struct {
uint64_t ctime_min;
double stonewall_endtime;
FILE *logfile;
int needs_stat;
uint64_t ctime_min;
double stonewall_endtime;
FILE* logfile;
int needs_stat;
} pfind_runtime_options_t;
static pfind_runtime_options_t runtime;
int pfind_rank;
static pfind_options_t *opt;
void pfind_abort(const std::string str) {
printf("%s", str.c_str());
exit(1);
void
pfind_abort(const std::string& str) {
printf("%s", str.c_str());
exit(1);
}
static void pfind_print_help(pfind_options_t *res) {
printf("pfind \nSynopsis:\n"
"pfind <workdir> [-newer <timestamp file>] [-size <size>c] [-name "
"<substr>] [-regex <regex>] [-S <numserver>] [-M <mountdir>]\n"
"\tworkdir = \"%s\"\n"
"\t-newer = \"%s\"\n"
"\t-name|-regex = \"%s\"\n"
"\t-S: num servers = \"%s\"\n"
"\t-M: mountdir = \"%s\"\n"
"Optional flags\n"
"\t-h: prints the help\n"
"\t--help: prints the help without initializing MPI\n",res->workdir,
res->timestamp_file, res->name_pattern, res->num_servers,
res->mountdir );
static void
pfind_print_help(pfind_options_t& res) {
printf("pfind \nSynopsis:\n"
"pfind <workdir> [-newer <timestamp file>] [-size <size>c] [-name "
"<substr>] [-regex <regex>] [-S <numserver>] [-M <mountdir>]\n"
"\tworkdir = \"%s\"\n"
"\t-newer = \"%s\"\n"
"\t-name|-regex = \"%s\"\n"
"\t-S: num servers = \"%d\"\n"
"\t-M: mountdir = \"%s\"\n"
"Optional flags\n"
"\t-h: prints the help\n"
"\t--help: prints the help without initializing MPI\n",
res.workdir.c_str(), res.timestamp_file, res.name_pattern,
res.num_servers, res.mountdir);
}
int pfind_size;
pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
pfind_rank = 0;
pfind_size = 1;
pfind_options_t *res = (pfind_options_t *)malloc(sizeof(pfind_options_t));
memset(res, 0, sizeof(pfind_options_t));
auto print_help = force_print_help;
res->workdir = "./";
res->results_dir = nullptr;
res->verbosity = 0;
res->timestamp_file = nullptr;
res->name_pattern = nullptr;
res->size = std::numeric_limits<uint64_t>::max();
res->queue_length = 100000;
res->max_entries_per_iter = 1000;
char *firstarg = nullptr;
// when we find special args, we process them
// but we need to replace them with 0 so that getopt will ignore them
// and getopt will continue to process beyond them
for (auto i = 1; i < argc - 1; i++) {
if (strcmp(argv[i], "-newer") == 0) {
res->timestamp_file = strdup(argv[i + 1]);
argv[i][0] = 0;
argv[++i][0] = 0;
} else if (strcmp(argv[i], "-size") == 0) {
char *str = argv[i + 1];
char extension = str[strlen(str) - 1];
str[strlen(str) - 1] = 0;
res->size = atoll(str);
switch (extension) {
case 'c':
break;
default:
pfind_abort("Unsupported exension for -size\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if (strcmp(argv[i], "-name") == 0) {
res->name_pattern = (char *)malloc(strlen(argv[i + 1]) * 4 + 100);
// transform a traditional name pattern to a regex:
char *str = argv[i + 1];
char *out = res->name_pattern;
auto pos = 0;
for (auto i = 0; i < strlen(str); i++) {
if (str[i] == '*') {
pos += sprintf(out + pos, ".*");
} else if (str[i] == '.') {
pos += sprintf(out + pos, "[.]");
} else if (str[i] == '"' || str[i] == '\"') {
// erase the "
} else {
out[pos] = str[i];
pos++;
pfind_options_t
pfind_parse_args(int argc, char** argv, int force_print_help) {
pfind_rank = 0;
auto print_help = force_print_help;
pfind_options_t res;
res.workdir = "./";
res.verbosity = 0;
res.timestamp_file = nullptr;
res.name_pattern = nullptr;
res.size = std::numeric_limits<uint64_t>::max();
char* firstarg = nullptr;
// when we find special args, we process them
// but we need to replace them with 0 so that getopt will ignore them
// and getopt will continue to process beyond them
for(auto i = 1; i < argc - 1; i++) {
if(strcmp(argv[i], "-newer") == 0) {
res.timestamp_file = strdup(argv[i + 1]);
argv[i][0] = 0;
argv[++i][0] = 0;
} else if(strcmp(argv[i], "-size") == 0) {
char* str = argv[i + 1];
char extension = str[strlen(str) - 1];
str[strlen(str) - 1] = 0;
res.size = atoll(str);
if(extension != 'c') {
pfind_abort("Unsupported extension for -size\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if(strcmp(argv[i], "-name") == 0) {
res.name_pattern = (char*) malloc(strlen(argv[i + 1]) * 4 + 100);
// transform a traditional name pattern to a regex:
char* str = argv[i + 1];
char* out = res.name_pattern;
auto pos = 0;
for(size_t k = 0; k < strlen(str); k++) {
if(str[k] == '*') {
pos += sprintf(out + pos, ".*");
} else if(str[k] == '.') {
pos += sprintf(out + pos, "[.]");
} else if(str[k] == '"' || str[k] == '\"') {
// erase the "
} else {
out[pos] = str[k];
pos++;
}
}
out[pos] = 0;
int ret = regcomp(&res.name_regex, res.name_pattern, 0);
if(ret) {
pfind_abort("Invalid regex for name given\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if(strcmp(argv[i], "-regex") == 0) {
res.name_pattern = strdup(argv[i + 1]);
int ret = regcomp(&res.name_regex, res.name_pattern, 0);
if(ret) {
pfind_abort("Invalid regex for name given\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if(!firstarg) {
firstarg = strdup(argv[i]);
argv[i][0] = 0;
}
}
out[pos] = 0;
int ret = regcomp(&res->name_regex, res->name_pattern, 0);
if (ret) {
pfind_abort("Invalid regex for name given\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if (strcmp(argv[i], "-regex") == 0) {
res->name_pattern = strdup(argv[i + 1]);
int ret = regcomp(&res->name_regex, res->name_pattern, 0);
if (ret) {
pfind_abort("Invalid regex for name given\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if (!firstarg) {
firstarg = strdup(argv[i]);
argv[i][0] = 0;
}
}
if (argc == 2) {
firstarg = strdup(argv[1]);
}
int c;
while ((c = getopt(argc, argv, "CPs:r:vhD:xq:H:NM:S:")) != -1) {
if (c == -1) {
break;
if(argc == 2) {
firstarg = strdup(argv[1]);
}
switch (c) {
case 'H':
res->parallel_single_dir_access = atoi(optarg);
break;
case 'N':
res->steal_from_next = 1;
break;
case 'x':
/* ignore fake arg that we added when we processed the extra args */
break;
case 'P':
res->print_by_process = 1;
break;
case 'C':
res->just_count = 1;
break;
case 'D':
if (strcmp(optarg, "rates") == 0) {
res->print_rates = 1;
} else {
pfind_abort("Unsupported debug flag\n");
}
break;
case 'h':
print_help = 1;
break;
case 'r':
res->results_dir = strdup(optarg);
break;
case 'q':
res->queue_length = atoi(optarg);
break;
if (res->queue_length < 10) {
pfind_abort("Queue must be at least 10 elements!\n");
}
break;
case 's':
res->stonewall_timer = atol(optarg);
break;
case 'S':
res->num_servers = atoi(optarg);
break;
case 'M':
res->mountdir = strdup(optarg);
break;
case 'v':
res->verbosity++;
break;
case 0:
break;
int c;
while((c = getopt(argc, argv, "CPs:r:vhD:xq:H:NM:S:")) != -1) {
if(c == -1) {
break;
}
switch(c) {
case 'x':
/* ignore fake arg that we added when we processed the extra
* args */
break;
case 'h':
print_help = 1;
break;
case 's':
res.stonewall_timer = atoi(optarg);
break;
case 'S':
res.num_servers = atoi(optarg);
break;
case 'M':
res.mountdir = strdup(optarg);
break;
case 'v':
res.verbosity++;
break;
case 0:
break;
default:
cerr << "Unknown parameter" << endl;
break;
}
}
if(res.verbosity > 2 && pfind_rank == 0) {
printf("Regex: %s\n", res.name_pattern);
}
}
if (res->verbosity > 2 && pfind_rank == 0) {
printf("Regex: %s\n", res->name_pattern);
}
if (print_help) {
if (pfind_rank == 0)
pfind_print_help(res);
exit(0);
}
if (!firstarg) {
pfind_abort("Error: pfind <directory>\n");
}
res->workdir = firstarg;
return res;
if(print_help) {
if(pfind_rank == 0)
pfind_print_help(res);
exit(0);
}
if(!firstarg) {
pfind_abort("Error: pfind <directory>\n");
}
res.workdir = firstarg;
return res;
}
/* Client Processing a path.
@@ -273,109 +247,122 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
* server, which is enough for most cases
*
*/
void dirProcess(const string path, unsigned long long &checked,
unsigned long long &found, queue<string> &dirs,
unsigned int world_rank, unsigned int world_size,
pfind_options_t *opt) {
struct dirent_extended *getdir = (struct dirent_extended *)malloc(
(sizeof(struct dirent_extended) + 255) * 1024 * 100);
memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100);
// cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path <<
// endl;
for (auto server = 0; server < opt->num_servers; server++) {
unsigned long long total_size = 0;
auto n = gkfs_getsingleserverdir(
path.c_str(), getdir,
(sizeof(struct dirent_extended) + 255) * 1024 * 100, server);
struct dirent_extended *temp = getdir;
while (total_size < n) {
if (strlen(temp->d_name) == 0)
break;
total_size += temp->d_reclen;
/* Queue directory to process */
if (temp->d_type == 1) {
string slash;
if (path[path.size() - 1] != '/')
slash = "/";
checked++;
dirs.push(path + slash + temp->d_name);
temp =
reinterpret_cast<dirent_extended *>(reinterpret_cast<char *>(temp) + temp->d_reclen);
continue;
}
/* Find filtering */
auto timeOK = true;
if (opt->timestamp_file) {
if ((uint64_t)temp->ctime < runtime.ctime_min)
timeOK = false;
}
if (timeOK and (temp->size == opt->size or opt->size == std::numeric_limits<uint64_t>::max()))
if (!(opt->name_pattern &&
regexec(&opt->name_regex, temp->d_name, 0, nullptr, 0)))
found++;
checked++;
temp = reinterpret_cast<dirent_extended *>(reinterpret_cast<char *>(temp) + temp->d_reclen);
void
dirProcess(const string& path, unsigned long long& checked,
unsigned long long& found, queue<string>& dirs,
pfind_options_t& opt) {
auto* getdir = (struct dirent_extended*) malloc(
(sizeof(struct dirent_extended) + 255) * 1024 * 100);
memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100);
for(auto server = 0; server < opt.num_servers; server++) {
unsigned long long total_size = 0;
unsigned long long n = gkfs_getsingleserverdir(
path.c_str(), getdir,
(sizeof(struct dirent_extended) + 255) * 1024 * 100, server);
struct dirent_extended* temp = getdir;
if(opt.verbosity)
cerr << "[" << n << "] " << path.c_str() << endl;
while(total_size < n) {
if(strlen(temp->d_name) == 0)
break;
total_size += temp->d_reclen;
/* Queue directory to process */
if(temp->d_type == 1) {
string slash;
if(path[path.size() - 1] != '/')
slash = "/";
checked++;
dirs.push(path + slash + temp->d_name);
temp = reinterpret_cast<dirent_extended*>(
reinterpret_cast<char*>(temp) + temp->d_reclen);
continue;
}
/* Find filtering */
auto timeOK = true;
if(opt.timestamp_file) {
if((uint64_t) temp->ctime < runtime.ctime_min)
timeOK = false;
}
if(timeOK and (temp->size == opt.size or
opt.size == std::numeric_limits<uint64_t>::max()))
if(!(opt.name_pattern &&
regexec(&opt.name_regex, temp->d_name, 0, nullptr, 0)))
found++;
checked++;
if(opt.verbosity)
cerr << temp->d_name << endl;
temp = reinterpret_cast<dirent_extended*>(
reinterpret_cast<char*>(temp) + temp->d_reclen);
}
}
}
}
int process(pfind_options_t *opt) {
// Print off a hello world message
unsigned long long found,checked;
// INIT PFIND
found = 0;
checked = 0;
memset(&runtime, 0, sizeof(pfind_runtime_options_t));
auto ret = 0;
/* Get timestamp file */
if (opt->timestamp_file) {
if (pfind_rank == 0) {
static struct stat timer_file{};
if (lstat(opt->timestamp_file, &timer_file) != 0) {
printf("Could not open: \"%s\", error: %s", opt->timestamp_file,
strerror(errno));
pfind_abort("\n");
}
runtime.ctime_min = timer_file.st_ctime;
int
process(pfind_options_t& opt) {
// Print off a hello world message
unsigned long long found, checked;
// INIT PFIND
found = 0;
checked = 0;
memset(&runtime, 0, sizeof(pfind_runtime_options_t));
/* Get timestamp file */
if(opt.timestamp_file) {
if(pfind_rank == 0) {
static struct stat timer_file {};
if(lstat(opt.timestamp_file, &timer_file) != 0) {
printf("Could not open: \"%s\", error: %s", opt.timestamp_file,
strerror(errno));
pfind_abort("\n");
}
runtime.ctime_min = timer_file.st_ctime;
}
}
}
auto iterations = 0;
queue<string> dirs;
string workdir = opt->workdir;
workdir = workdir.substr(strlen(opt->mountdir), workdir.size());
if (workdir.size() == 0)
workdir = "/";
dirs.push(workdir);
do {
string processpath = dirs.front();
dirs.pop();
dirProcess(processpath, checked, found, dirs, 0, 1, opt);
// cout << "NO more paths " << dirs.size() << endl;
} while (!dirs.empty());
queue<string> dirs;
string workdir = opt.workdir;
workdir = workdir.substr(strlen(opt.mountdir), workdir.size());
if(workdir.empty())
workdir = "/";
if(opt.verbosity)
cerr << "STARTING PATH " << workdir << endl;
dirs.push(workdir);
do {
string processpath = dirs.front();
dirs.pop();
if(opt.verbosity)
cerr << "PROCESSING " << processpath << endl;
dirProcess(processpath, checked, found, dirs, opt);
// cout << "NO more paths " << dirs.size() << endl;
} while(!dirs.empty());
cout << "MATCHED " << found << "/" << checked << endl;
return 0;
return 0;
}
int main(int argc, char **argv) {
for (auto i = 0; i < argc; i++) {
if (strcmp(argv[i], "--help") == 0) {
argv[i][0] = 0;
pfind_rank = 0;
pfind_parse_args(argc, argv, 1);
exit(0);
int
main(int argc, char** argv) {
if(gkfs_getsingleserverdir == nullptr) {
cerr << "LD_PRELOAD not correctly defined or incorrect gekkofs version."
<< endl;
exit(1);
}
}
opt = pfind_parse_args(argc, argv, 0);
process(opt);
for(auto i = 0; i < argc; i++) {
if(strcmp(argv[i], "--help") == 0) {
argv[i][0] = 0;
pfind_rank = 0;
pfind_parse_args(argc, argv, 1);
exit(0);
}
}
pfind_options_t opt;
opt = pfind_parse_args(argc, argv, 0);
process(opt);
}
Loading