Skip to content
Snippets Groups Projects

Resolve "Segmentation fault in sfind"

Closed Ramon Nou requested to merge 138-segmentation-fault-in-sfind into master
1 unresolved thread
Compare and Show latest version
2 files
+ 108
121
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 99
121
@@ -20,11 +20,9 @@
#include <iostream>
#include <queue>
#include <regex.h>
#include <stdio.h>
#include <cstdio>
#include <string>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <limits>
using namespace std;
@@ -48,30 +46,36 @@ extern "C" int gkfs_getsingleserverdir(const char *path,
/* PFIND OPTIONS EXTENDED We need to add the GekkoFS mount dir and the number of
* servers */
typedef struct {
std::string workdir{};
int just_count;
int print_by_process;
char *results_dir;
int stonewall_timer;
int print_rates;
char *timestamp_file;
char *name_pattern;
regex_t name_regex;
uint64_t size;
int num_servers;
char *mountdir;
// optimizing parameters NOT USED
int queue_length;
int max_entries_per_iter;
int steal_from_next; // if true, then steal from the next process
int parallel_single_dir_access; // if 1, use hashing to parallelize single
// directory access, if 2 sequential increment
int verbosity;
} pfind_options_t;
class pfind_options_t {
public :
std::string workdir{};
int stonewall_timer;
char *timestamp_file;
char *name_pattern;
regex_t name_regex;
uint64_t size;
int num_servers;
char *mountdir;
// optimizing parameters NOT USED
int verbosity;
pfind_options_t (){
workdir = "";
stonewall_timer = 0;
timestamp_file = nullptr;
name_pattern = nullptr;
size = 0;
num_servers = 0;
mountdir = nullptr;
verbosity=0;
};
};
typedef struct {
uint64_t ctime_min;
@@ -84,14 +88,13 @@ static pfind_runtime_options_t runtime;
int pfind_rank;
static pfind_options_t *opt;
void pfind_abort(const std::string str) {
void pfind_abort(const std::string & str) {
printf("%s", str.c_str());
exit(1);
}
static void pfind_print_help(pfind_options_t *res) {
static void pfind_print_help(pfind_options_t &res) {
printf("pfind \nSynopsis:\n"
"pfind <workdir> [-newer <timestamp file>] [-size <size>c] [-name "
"<substr>] [-regex <regex>] [-S <numserver>] [-M <mountdir>]\n"
@@ -102,28 +105,24 @@ static void pfind_print_help(pfind_options_t *res) {
"\t-M: mountdir = \"%s\"\n"
"Optional flags\n"
"\t-h: prints the help\n"
"\t--help: prints the help without initializing MPI\n",res->workdir.c_str(),
res->timestamp_file, res->name_pattern, res->num_servers,
res->mountdir );
"\t--help: prints the help without initializing MPI\n",res.workdir.c_str(),
res.timestamp_file, res.name_pattern, res.num_servers,
res.mountdir );
}
int pfind_size;
pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
pfind_options_t pfind_parse_args(int argc, char **argv, int force_print_help){
pfind_rank = 0;
pfind_size = 1;
pfind_options_t *res = (pfind_options_t *)malloc(sizeof(pfind_options_t));
memset(res, 0, sizeof(pfind_options_t));
auto print_help = force_print_help;
res->workdir = "./";
res->results_dir = nullptr;
res->verbosity = 0;
res->timestamp_file = nullptr;
res->name_pattern = nullptr;
res->size = std::numeric_limits<uint64_t>::max();
res->queue_length = 100000;
res->max_entries_per_iter = 1000;
pfind_options_t res;
res.workdir = "./";
res.verbosity = 0;
res.timestamp_file = nullptr;
res.name_pattern = nullptr;
res.size = std::numeric_limits<uint64_t>::max();
char *firstarg = nullptr;
// when we find special args, we process them
@@ -131,51 +130,50 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
// and getopt will continue to process beyond them
for (auto i = 1; i < argc - 1; i++) {
if (strcmp(argv[i], "-newer") == 0) {
res->timestamp_file = strdup(argv[i + 1]);
res.timestamp_file = strdup(argv[i + 1]);
argv[i][0] = 0;
argv[++i][0] = 0;
} else if (strcmp(argv[i], "-size") == 0) {
char *str = argv[i + 1];
char extension = str[strlen(str) - 1];
str[strlen(str) - 1] = 0;
res->size = atoll(str);
switch (extension) {
case 'c':
break;
default:
pfind_abort("Unsupported exension for -size\n");
res.size = atoll(str);
if (extension != 'c') {
pfind_abort("Unsupported extension for -size\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if (strcmp(argv[i], "-name") == 0) {
res->name_pattern = (char *)malloc(strlen(argv[i + 1]) * 4 + 100);
res.name_pattern = (char *)malloc(strlen(argv[i + 1]) * 4 + 100);
// transform a traditional name pattern to a regex:
char *str = argv[i + 1];
char *out = res->name_pattern;
char * out = res.name_pattern;
auto pos = 0;
for (size_t i = 0; i < strlen(str); i++) {
if (str[i] == '*') {
for (size_t k = 0; k < strlen(str); k++) {
if (str[k] == '*') {
pos += sprintf(out + pos, ".*");
} else if (str[i] == '.') {
} else if (str[k] == '.') {
pos += sprintf(out + pos, "[.]");
} else if (str[i] == '"' || str[i] == '\"') {
} else if (str[k] == '"' || str[k] == '\"') {
// erase the "
} else {
out[pos] = str[i];
out[pos] = str[k];
pos++;
}
}
out[pos] = 0;
int ret = regcomp(&res->name_regex, res->name_pattern, 0);
int ret = regcomp(&res.name_regex, res.name_pattern, 0);
if (ret) {
pfind_abort("Invalid regex for name given\n");
}
argv[i][0] = 0;
argv[++i][0] = 0;
} else if (strcmp(argv[i], "-regex") == 0) {
res->name_pattern = strdup(argv[i + 1]);
int ret = regcomp(&res->name_regex, res->name_pattern, 0);
res.name_pattern = strdup(argv[i + 1]);
int ret = regcomp(&res.name_regex, res.name_pattern, 0);
if (ret) {
pfind_abort("Invalid regex for name given\n");
}
@@ -197,59 +195,33 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
}
switch (c) {
case 'H':
res->parallel_single_dir_access = atoi(optarg);
break;
case 'N':
res->steal_from_next = 1;
break;
case 'x':
/* ignore fake arg that we added when we processed the extra args */
break;
case 'P':
res->print_by_process = 1;
break;
case 'C':
res->just_count = 1;
break;
case 'D':
if (strcmp(optarg, "rates") == 0) {
res->print_rates = 1;
} else {
pfind_abort("Unsupported debug flag\n");
}
break;
case 'h':
print_help = 1;
break;
case 'r':
res->results_dir = strdup(optarg);
break;
case 'q':
res->queue_length = atoi(optarg);
break;
if (res->queue_length < 10) {
pfind_abort("Queue must be at least 10 elements!\n");
}
break;
case 's':
res->stonewall_timer = atol(optarg);
res.stonewall_timer = atoi(optarg);
break;
case 'S':
res->num_servers = atoi(optarg);
res.num_servers = atoi(optarg);
break;
case 'M':
res->mountdir = strdup(optarg);
res.mountdir = strdup(optarg);
break;
case 'v':
res->verbosity++;
res.verbosity++;
break;
case 0:
break;
default:
cerr << "Unknown parameter" << endl;
break;
}
}
if (res->verbosity > 2 && pfind_rank == 0) {
printf("Regex: %s\n", res->name_pattern);
if (res.verbosity > 2 && pfind_rank == 0) {
printf("Regex: %s\n", res.name_pattern);
}
if (print_help) {
@@ -261,7 +233,7 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
if (!firstarg) {
pfind_abort("Error: pfind <directory>\n");
}
res->workdir = firstarg;
res.workdir = firstarg;
return res;
}
@@ -273,23 +245,21 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
* server, which is enough for most cases
*
*/
void dirProcess(const string path, unsigned long long &checked,
void dirProcess(const string & path, unsigned long long &checked,
unsigned long long &found, queue<string> &dirs,
unsigned int world_rank, unsigned int world_size,
pfind_options_t *opt) {
struct dirent_extended *getdir = (struct dirent_extended *)malloc(
pfind_options_t &opt) {
auto *getdir = (struct dirent_extended *)malloc(
(sizeof(struct dirent_extended) + 255) * 1024 * 100);
memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100);
// cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path <<
// endl;
for (auto server = 0; server < opt->num_servers; server++) {
for (auto server = 0; server < opt.num_servers; server++) {
unsigned long long total_size = 0;
unsigned long long n = gkfs_getsingleserverdir(
path.c_str(), getdir,
(sizeof(struct dirent_extended) + 255) * 1024 * 100, server);
struct dirent_extended *temp = getdir;
if (opt.verbosity) cerr << "[" << n << "] " << path.c_str() << endl;
while (total_size < n) {
if (strlen(temp->d_name) == 0)
break;
@@ -307,21 +277,24 @@ void dirProcess(const string path, unsigned long long &checked,
}
/* Find filtering */
auto timeOK = true;
if (opt->timestamp_file) {
if (opt.timestamp_file) {
if ((uint64_t)temp->ctime < runtime.ctime_min)
timeOK = false;
}
if (timeOK and (temp->size == opt->size or opt->size == std::numeric_limits<uint64_t>::max()))
if (!(opt->name_pattern &&
regexec(&opt->name_regex, temp->d_name, 0, nullptr, 0)))
if (timeOK and (temp->size == opt.size or opt.size == std::numeric_limits<uint64_t>::max()))
if (!(opt.name_pattern &&
regexec(&opt.name_regex, temp->d_name, 0, nullptr, 0)))
found++;
checked++;
if (opt.verbosity) cerr << temp->d_name << endl;
temp = reinterpret_cast<dirent_extended *>(reinterpret_cast<char *>(temp) + temp->d_reclen);
}
}
}
int process(pfind_options_t *opt) {
int process(pfind_options_t &opt) {
// Print off a hello world message
unsigned long long found,checked;
// INIT PFIND
@@ -329,11 +302,11 @@ int process(pfind_options_t *opt) {
checked = 0;
memset(&runtime, 0, sizeof(pfind_runtime_options_t));
/* Get timestamp file */
if (opt->timestamp_file) {
if (opt.timestamp_file) {
if (pfind_rank == 0) {
static struct stat timer_file{};
if (lstat(opt->timestamp_file, &timer_file) != 0) {
printf("Could not open: \"%s\", error: %s", opt->timestamp_file,
if (lstat(opt.timestamp_file, &timer_file) != 0) {
printf("Could not open: \"%s\", error: %s", opt.timestamp_file,
strerror(errno));
pfind_abort("\n");
}
@@ -342,17 +315,18 @@ int process(pfind_options_t *opt) {
}
queue<string> dirs;
string workdir = opt->workdir;
workdir = workdir.substr(strlen(opt->mountdir), workdir.size());
if (workdir.size() == 0)
string workdir = opt.workdir;
workdir = workdir.substr(strlen(opt.mountdir), workdir.size());
if (workdir.empty())
workdir = "/";
if (opt.verbosity) cerr << "STARTING PATH " << workdir << endl;
dirs.push(workdir);
do {
string processpath = dirs.front();
dirs.pop();
dirProcess(processpath, checked, found, dirs, 0, 1, opt);
if (opt.verbosity) cerr << "PROCESSING " << processpath << endl;
dirProcess(processpath, checked, found, dirs, opt);
// cout << "NO more paths " << dirs.size() << endl;
} while (!dirs.empty());
@@ -362,6 +336,10 @@ int process(pfind_options_t *opt) {
}
int main(int argc, char **argv) {
if (gkfs_getsingleserverdir == nullptr) {
cerr << "LD_PRELOAD not correctly defined or incorrect gekkofs version." << endl;
exit(1);
}
for (auto i = 0; i < argc; i++) {
if (strcmp(argv[i], "--help") == 0) {
@@ -371,7 +349,7 @@ int main(int argc, char **argv) {
exit(0);
}
}
pfind_options_t opt;
opt = pfind_parse_args(argc, argv, 0);
process(opt);
Loading