LCOV - code coverage report
Current view: top level - examples/gfind - sfind.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 117 194 60.3 %
Date: 2024-04-30 13:21:35 Functions: 4 6 66.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :   Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain
       3             :   Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany
       4             : 
       5             :   This software was partially supported by the
       6             :   EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).
       7             : 
       8             :   This software was partially supported by the
       9             :   ADA-FS project under the SPPEXA project funded by the DFG.
      10             : 
      11             :   This file is part of GekkoFS.
      12             : 
      13             :   GekkoFS is free software: you can redistribute it and/or modify
      14             :   it under the terms of the GNU General Public License as published by
      15             :   the Free Software Foundation, either version 3 of the License, or
      16             :   (at your option) any later version.
      17             : 
      18             :   GekkoFS is distributed in the hope that it will be useful,
      19             :   but WITHOUT ANY WARRANTY; without even the implied warranty of
      20             :   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      21             :   GNU General Public License for more details.
      22             : 
      23             :   You should have received a copy of the GNU General Public License
      24             :   along with GekkoFS.  If not, see <https://www.gnu.org/licenses/>.
      25             : 
      26             :   SPDX-License-Identifier: GPL-3.0-or-later
      27             : */
      28             : 
      29             : /* Based on pfind from ior500 */
      30             : /* https://github.com/VI4IO/pfind/ */
      31             : 
      32             : #include <cmath>
      33             : #include <cstring>
      34             : #include <getopt.h>
      35             : #include <iostream>
      36             : #include <queue>
      37             : #include <regex.h>
      38             : #include <stdio.h>
      39             : #include <string>
      40             : #include <sys/stat.h>
      41             : #include <sys/types.h>
      42             : #include <unistd.h>
      43             : #include <limits>
      44             : #include <cstdint>
      45             : 
      46             : using namespace std;
      47             : 
      48             : /* Minimal struct needed for io500 find */
      49             : /* We could also do the filtering on the server */
      50             : struct dirent_extended {
      51             :   size_t size;
      52             :   time_t ctime;
      53             :   unsigned short d_reclen;
      54             :   unsigned char d_type;
      55             :   char d_name[1];
      56             : };
      57             : 
      58             : /* Function exported from GekkoFS LD_PRELOAD, code needs to be compiled with
      59             :  * -fPIC */
      60             : extern "C" int gkfs_getsingleserverdir(const char *path,
      61             :                                        struct dirent_extended *dirp,
      62             :                                        unsigned int count, int server)
      63             :     __attribute__((weak));
      64             : 
      65             : /* PFIND OPTIONS EXTENDED We need to add the GekkoFS mount dir and the number of
      66             :  * servers */
      67             : typedef struct {
      68             :   char* workdir;
      69             :   int just_count;
      70             :   int print_by_process;
      71             :   char *results_dir;
      72             :   int stonewall_timer;
      73             :   int print_rates;
      74             : 
      75             :   char *timestamp_file;
      76             :   char *name_pattern;
      77             :   regex_t name_regex;
      78             :   uint64_t size;
      79             : 
      80             :   int num_servers;
      81             :   char *mountdir;
      82             :   // optimizing parameters NOT USED
      83             :   int queue_length;
      84             :   int max_entries_per_iter;
      85             :   int steal_from_next;            // if true, then steal from the next process
      86             :   int parallel_single_dir_access; // if 1, use hashing to parallelize single
      87             :                                   // directory access, if 2 sequential increment
      88             : 
      89             :   int verbosity;
      90             : } pfind_options_t;
      91             : 
      92             : typedef struct {
      93             :   uint64_t ctime_min;
      94             :   double stonewall_endtime;
      95             :   FILE *logfile;
      96             :   int needs_stat;
      97             : } pfind_runtime_options_t;
      98             : 
      99             : static pfind_runtime_options_t runtime;
     100             : 
     101             : int pfind_rank;
     102             : 
     103             : static pfind_options_t *opt;
     104             : 
     105           0 : void pfind_abort(const std::string str) {
     106           0 :   printf("%s", str.c_str());
     107           0 :   exit(1);
     108             : }
     109             : 
     110           0 : static void pfind_print_help(pfind_options_t *res) {
     111           0 :   printf("pfind \nSynopsis:\n"
     112             :          "pfind <workdir> [-newer <timestamp file>] [-size <size>c] [-name "
     113             :          "<substr>] [-regex <regex>] [-S <numserver>] [-M <mountdir>]\n"
     114             :          "\tworkdir = \"%s\"\n"
     115             :          "\t-newer = \"%s\"\n"
     116             :          "\t-name|-regex = \"%s\"\n"
     117             :          "\t-S: num servers = \"%d\"\n"
     118             :          "\t-M: mountdir = \"%s\"\n"
     119             :          "Optional flags\n"
     120             :          "\t-h: prints the help\n"
     121             :          "\t--help: prints the help without initializing MPI\n",res->workdir,
     122             :          res->timestamp_file, res->name_pattern, res->num_servers,
     123             :          res->mountdir );
     124           0 : }
     125             : int pfind_size;
     126           1 : pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){
     127             : 
     128           1 :   pfind_rank = 0;
     129           1 :   pfind_size = 1;
     130             : 
     131           1 :   pfind_options_t *res = (pfind_options_t *)malloc(sizeof(pfind_options_t));
     132             :   // Init Values
     133           1 :   res->just_count = 0;
     134           1 :   res->print_by_process = 0;
     135           1 :   res->stonewall_timer = 0;
     136           1 :   res->print_rates = 0;
     137           1 :   res->name_regex = {};
     138           1 :   res->num_servers = 0;
     139           1 :   res->mountdir = nullptr;
     140           1 :   res->queue_length = 0;
     141           1 :   res->max_entries_per_iter = 0;
     142           1 :   res->steal_from_next = 0;
     143           1 :   res->parallel_single_dir_access = 0;
     144             : 
     145           1 :   auto print_help = force_print_help;
     146           1 :   res->workdir = nullptr;
     147           1 :   res->results_dir = nullptr;
     148           1 :   res->verbosity = 0;
     149           1 :   res->timestamp_file = nullptr;
     150           1 :   res->name_pattern = nullptr;
     151             :  
     152           1 :   res->size = std::numeric_limits<uint64_t>::max();
     153           1 :   res->queue_length = 100000;
     154           1 :   res->max_entries_per_iter = 1000;
     155           1 :   char *firstarg = nullptr;
     156             :   
     157             :   // when we find special args, we process them
     158             :   // but we need to replace them with 0 so that getopt will ignore them
     159             :   // and getopt will continue to process beyond them
     160           7 :   for (auto i = 1; i < argc - 1; i++) {
     161           6 :     if (strcmp(argv[i], "-newer") == 0) {
     162           0 :       res->timestamp_file = strdup(argv[i + 1]);
     163           0 :       argv[i][0] = 0;
     164           0 :       argv[++i][0] = 0;
     165           6 :     } else if (strcmp(argv[i], "-size") == 0) {
     166           0 :       char *str = argv[i + 1];
     167           0 :       char extension = str[strlen(str) - 1];
     168           0 :       str[strlen(str) - 1] = 0;
     169           0 :       res->size = atoll(str);
     170           0 :       switch (extension) {
     171             :       case 'c':
     172             :         break;
     173           0 :       default:
     174           0 :         pfind_abort("Unsupported exension for -size\n");
     175             :       }
     176           0 :       argv[i][0] = 0;
     177           0 :       argv[++i][0] = 0;
     178           6 :     } else if (strcmp(argv[i], "-name") == 0) {
     179           1 :       res->name_pattern = (char *)malloc(strlen(argv[i + 1]) * 4 + 100);
     180             :       // transform a traditional name pattern to a regex:
     181           1 :       char *str = argv[i + 1];
     182           1 :       char *out = res->name_pattern;
     183           1 :       auto pos = 0;
     184           5 :       for (long unsigned int i = 0; i < strlen(str); i++) {
     185           4 :         if (str[i] == '*') {
     186           2 :           pos += sprintf(out + pos, ".*");
     187           2 :         } else if (str[i] == '.') {
     188           0 :           pos += sprintf(out + pos, "[.]");
     189           2 :         } else if (str[i] == '"' || str[i] == '\"') {
     190             :           // erase the "
     191             :         } else {
     192           2 :           out[pos] = str[i];
     193           2 :           pos++;
     194             :         }
     195             :       }
     196           1 :       out[pos] = 0;
     197             : 
     198           1 :       int ret = regcomp(&res->name_regex, res->name_pattern, 0);
     199           1 :       if (ret) {
     200           0 :         pfind_abort("Invalid regex for name given\n");
     201             :       }
     202           1 :       argv[i][0] = 0;
     203           1 :       argv[++i][0] = 0;
     204           5 :     } else if (strcmp(argv[i], "-regex") == 0) {
     205           0 :       res->name_pattern = strdup(argv[i + 1]);
     206           0 :       int ret = regcomp(&res->name_regex, res->name_pattern, 0);
     207           0 :       if (ret) {
     208           0 :         pfind_abort("Invalid regex for name given\n");
     209             :       }
     210           0 :       argv[i][0] = 0;
     211           0 :       argv[++i][0] = 0;
     212           5 :     } else if (!firstarg) {
     213           1 :       firstarg = strdup(argv[i]);
     214           1 :       argv[i][0] = 0;
     215             :     }
     216             :   }
     217           1 :   if (argc == 2) {
     218           0 :     firstarg = strdup(argv[1]);
     219             :   }
     220             : 
     221           3 :   int c;
     222           3 :   while ((c = getopt(argc, argv, "CPs:r:vhD:xq:H:NM:S:")) != -1) {
     223           2 :     if (c == -1) {
     224             :       break;
     225             :     }
     226             : 
     227           2 :     switch (c) {
     228           0 :     case 'H':
     229           0 :       res->parallel_single_dir_access = atoi(optarg);
     230           0 :       break;
     231           0 :     case 'N':
     232           0 :       res->steal_from_next = 1;
     233           0 :       break;
     234             :     case 'x':
     235             :       /* ignore fake arg that we added when we processed the extra args */
     236             :       break;
     237           0 :     case 'P':
     238           0 :       res->print_by_process = 1;
     239           0 :       break;
     240           0 :     case 'C':
     241           0 :       res->just_count = 1;
     242           0 :       break;
     243           0 :     case 'D':
     244           0 :       if (strcmp(optarg, "rates") == 0) {
     245           0 :         res->print_rates = 1;
     246             :       } else {
     247           0 :         pfind_abort("Unsupported debug flag\n");
     248             :       }
     249             :       break;
     250           0 :     case 'h':
     251           0 :       print_help = 1;
     252           0 :       break;
     253           0 :     case 'r':
     254           0 :       res->results_dir = strdup(optarg);
     255           0 :       break;
     256           0 :     case 'q':
     257           0 :       res->queue_length = atoi(optarg);
     258           0 :       break;
     259             :       if (res->queue_length < 10) {
     260             :         pfind_abort("Queue must be at least 10 elements!\n");
     261             :       }
     262             :       break;
     263           0 :     case 's':
     264           0 :       res->stonewall_timer = atol(optarg);
     265           0 :       break;
     266           1 :     case 'S':
     267           1 :       res->num_servers = atoi(optarg);
     268           1 :       break;
     269           1 :     case 'M':
     270           1 :       res->mountdir = strdup(optarg);
     271           1 :       break;
     272           0 :     case 'v':
     273           0 :       res->verbosity++;
     274           0 :       break;
     275             :     case 0:
     276             :       break;
     277             :     }
     278             :   }
     279           1 :   if (res->verbosity > 2 && pfind_rank == 0) {
     280           0 :     printf("Regex: %s\n", res->name_pattern);
     281             :   }
     282             : 
     283           1 :   if (print_help) {
     284           0 :     if (pfind_rank == 0)
     285           0 :       pfind_print_help(res);
     286           0 :     exit(0);
     287             :   }
     288             : 
     289           1 :   if (!firstarg) {
     290           0 :     pfind_abort("Error: pfind <directory>\n");
     291             :   }
     292           1 :   res->workdir = firstarg;
     293             : 
     294           1 :   return res;
     295             : }
     296             : 
     297             : /* Client Processing a path.
     298             :  * We increment local checked/found based on the filters
     299             :  * Each client sends the request to a subset of GekkoFS servers.
     300             :  * We use 102400 (plus space from 255 chars paths) so it is nearly 1M files per
     301             :  * server, which is enough for most cases
     302             :  *
     303             :  */
     304           4 : void dirProcess(const string path, unsigned long long &checked,
     305             :                 unsigned long long &found, queue<string> &dirs,
     306             :                 unsigned int world_rank, unsigned int world_size,
     307             :                 pfind_options_t *opt) {
     308           4 :   struct dirent_extended *getdir = (struct dirent_extended *)malloc(
     309             :       (sizeof(struct dirent_extended) + 255) * 1024 * 100);
     310           4 :   memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100);
     311             :   // cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path <<
     312             :   // endl;
     313             : 
     314           8 :   for (auto server = 0; server < opt->num_servers; server++) {
     315           4 :     unsigned long long total_size = 0;
     316           4 :     long unsigned int n = gkfs_getsingleserverdir(
     317             :         path.c_str(), getdir,
     318           4 :         (sizeof(struct dirent_extended) + 255) * 1024 * 100, server);
     319           4 :     struct dirent_extended *temp = getdir;
     320             : 
     321           8 :     while (total_size < n) {
     322           6 :       if (strlen(temp->d_name) == 0)
     323             :         break;
     324           4 :       total_size += temp->d_reclen;
     325             :       /* Queue directory to process */
     326           4 :       if (temp->d_type == 1) {
     327           6 :         string slash;
     328           3 :         if (path[path.size() - 1] != '/')
     329           3 :           slash = "/";
     330           3 :         checked++;
     331           6 :         dirs.push(path + slash + temp->d_name);
     332           3 :         temp =
     333           3 :             reinterpret_cast<dirent_extended *>(reinterpret_cast<char *>(temp) + temp->d_reclen);
     334           3 :         continue;
     335             :       }
     336             :       /* Find filtering */
     337           1 :       auto timeOK = true;
     338           1 :       if (opt->timestamp_file) {
     339           0 :         if ((uint64_t)temp->ctime < runtime.ctime_min)
     340             :           timeOK = false;
     341             :       }
     342           1 :       if (timeOK and (temp->size == opt->size or opt->size == std::numeric_limits<uint64_t>::max()))
     343           2 :         if (!(opt->name_pattern &&
     344           1 :               regexec(&opt->name_regex, temp->d_name, 0, nullptr, 0)))
     345           0 :           found++;
     346           1 :       checked++;
     347           1 :       temp = reinterpret_cast<dirent_extended *>(reinterpret_cast<char *>(temp) + temp->d_reclen);
     348             :     }
     349             :   }
     350           4 : }
     351             : 
     352           1 : int process(pfind_options_t *opt) {
     353             :   // Print off a hello world message
     354           1 :   unsigned long long found,checked;
     355             :   // INIT PFIND
     356           1 :   found = 0;
     357           1 :   checked = 0;
     358           1 :   memset(&runtime, 0, sizeof(pfind_runtime_options_t));
     359             :   
     360             :   /* Get timestamp file */
     361           1 :   if (opt->timestamp_file) {
     362           0 :     if (pfind_rank == 0) {
     363           0 :       static struct stat timer_file{};
     364           0 :       if (lstat(opt->timestamp_file, &timer_file) != 0) {
     365           0 :         printf("Could not open: \"%s\", error: %s", opt->timestamp_file,
     366           0 :                strerror(errno));
     367           0 :         pfind_abort("\n");
     368             :       }
     369           0 :       runtime.ctime_min = timer_file.st_ctime;
     370             :     }
     371             :   }
     372             : 
     373           1 :   queue<string> dirs;
     374           2 :   string workdir = opt->workdir;
     375           1 :   workdir = workdir.substr(strlen(opt->mountdir), workdir.size());
     376           1 :   if (workdir.size() == 0)
     377           0 :       workdir = "/";
     378           1 :   dirs.push(workdir);
     379             : 
     380           4 :   do {
     381           4 :       string processpath = dirs.front();
     382           4 :       dirs.pop();
     383             : 
     384           8 :       dirProcess(processpath, checked, found, dirs, 0, 1, opt);
     385             :       // cout << "NO more paths " << dirs.size() << endl;
     386           4 :     } while (!dirs.empty());
     387             : 
     388           1 :     cout << "MATCHED " << found << "/" << checked << endl;
     389             : 
     390           2 :   return 0;
     391             : }
     392             : 
     393           1 : int main(int argc, char **argv) {
     394             : 
     395           9 :   for (auto i = 0; i < argc; i++) {
     396           8 :     if (strcmp(argv[i], "--help") == 0) {
     397           0 :       argv[i][0] = 0;
     398           0 :       pfind_rank = 0;
     399           0 :       pfind_parse_args(argc, argv, 1);
     400           0 :       exit(0);
     401             :     }
     402             :   }
     403             : 
     404           1 :   opt = pfind_parse_args(argc, argv, 0);
     405             : 
     406           1 :   process(opt);
     407             : 
     408           1 : }

Generated by: LCOV version 1.16