Line data Source code
1 : /*
2 : Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain
3 : Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany
4 :
5 : This software was partially supported by the
6 : EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu).
7 :
8 : This software was partially supported by the
9 : ADA-FS project under the SPPEXA project funded by the DFG.
10 :
11 : This file is part of GekkoFS' POSIX interface.
12 :
13 : GekkoFS' POSIX interface is free software: you can redistribute it and/or
14 : modify it under the terms of the GNU Lesser General Public License as
15 : published by the Free Software Foundation, either version 3 of the License,
16 : or (at your option) any later version.
17 :
18 : GekkoFS' POSIX interface is distributed in the hope that it will be useful,
19 : but WITHOUT ANY WARRANTY; without even the implied warranty of
20 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 : GNU Lesser General Public License for more details.
22 :
23 : You should have received a copy of the GNU Lesser General Public License
24 : along with GekkoFS' POSIX interface. If not, see
25 : <https://www.gnu.org/licenses/>.
26 :
27 : SPDX-License-Identifier: LGPL-3.0-or-later
28 : */
29 :
30 : #include <client/logging.hpp>
31 : #include <client/env.hpp>
32 : #include <client/make_array.hpp>
33 : #include <regex>
34 : #include <filesystem>
35 :
36 : extern "C" {
37 : #include <date/tz.h>
38 : #include <fmt/ostream.h>
39 : }
40 :
41 : #ifdef GKFS_ENABLE_LOGGING
42 :
43 : #include <hermes/logging.hpp>
44 :
45 : #endif
46 :
47 : namespace fs = std::filesystem;
48 :
49 : namespace {
50 : enum class split_str_mode {
51 : is_any_of,
52 : is_exactly_of // not used at the moment
53 : }; ///> Mode for string splitter
54 :
55 : /**
56 : * @brief Splits a string by a given delimiter for a given split_mode
57 : * @param input String to be splitted
58 : * @param delimiter split token
59 : * @param split_mode <split_str_node> how the string is split
60 : * @return vector<string> entries for splitted string
61 : */
62 : std::vector<std::string>
63 495 : split_str(const std::string& input, const std::string_view delimiter,
64 : const split_str_mode split_mode) {
65 495 : std::vector<std::string> out{};
66 495 : std::string regex_builder{};
67 495 : if(split_mode == split_str_mode::is_any_of) {
68 495 : regex_builder = fmt::format("[{}]+", delimiter);
69 : } else {
70 0 : regex_builder = delimiter;
71 : }
72 495 : const std::regex line_re(regex_builder,
73 495 : std::regex::ECMAScript | std::regex::optimize);
74 495 : std::copy(
75 990 : std::sregex_token_iterator(input.begin(), input.end(), line_re, -1),
76 495 : std::sregex_token_iterator(), std::back_inserter(out));
77 990 : return out;
78 : }
79 : } // namespace
80 :
81 : namespace gkfs::log {
82 :
83 : struct opt_info {
84 : const char name_[32];
85 : const std::size_t length_;
86 : const char help_text_[8][64];
87 : const log_level mask_;
88 : };
89 :
90 : #define STR_AND_LEN(strbuf) strbuf, sizeof(strbuf) - 1
91 :
92 : static const auto constexpr debug_opts = utils::make_array(
93 :
94 : opt_info{STR_AND_LEN("none"), {"don't print any messages"}, log::none},
95 :
96 : #ifdef GKFS_DEBUG_BUILD
97 :
98 : opt_info{STR_AND_LEN("syscalls"),
99 : {"Trace system calls: print the name of each system call,",
100 : "its arguments, and its return value. All system calls are",
101 : "printed after being executed save for those that may not",
102 : "return, such as execve() and execve_at()",
103 : "[ default: off ]"},
104 : log::syscall},
105 :
106 : opt_info{STR_AND_LEN("syscalls_at_entry"),
107 : {"Trace system calls: print the name of each system call",
108 : "and its arguments. All system calls are printed before ",
109 : "being executed and therefore their return values are not",
110 : "available in the log", "[ default: off ]"},
111 : log::syscall_at_entry},
112 :
113 : #endif // !GKFS_DEBUG_BUILD
114 :
115 : opt_info{STR_AND_LEN("info"),
116 : {"Print information messages", "[ default: on ]"},
117 : log::info},
118 :
119 : opt_info{STR_AND_LEN("critical"),
120 : {"Print critical errors", "[ default: on ]"},
121 : log::critical},
122 :
123 : opt_info{STR_AND_LEN("errors"),
124 : {"Print errors", "[ default: on ]"},
125 : log::error},
126 :
127 : opt_info{STR_AND_LEN("warnings"),
128 : {"Print warnings", "[ default: on ]"},
129 : log::warning},
130 :
131 : opt_info{STR_AND_LEN("hermes"),
132 : {"Print messages from Hermes (GekkoFS high-level RPC library)",
133 : "[ default: on ]"},
134 : log::hermes},
135 :
136 : opt_info{STR_AND_LEN("mercury"),
137 : {"Print messages from Mercury (GekkoFS low-level RPC library)",
138 : "[ default: on ]"},
139 : log::mercury},
140 :
141 : opt_info{STR_AND_LEN("trace_reads"),
142 : {"Print extended read information", "[ default: off ]"},
143 : log::trace_reads},
144 :
145 : #ifdef GKFS_DEBUG_BUILD
146 :
147 : opt_info{STR_AND_LEN("debug"),
148 : {"Print debug messages", "[ default: off ]"},
149 : log::debug},
150 :
151 : opt_info{STR_AND_LEN("most"),
152 : {"All previous options except 'syscalls_at_entry' combined."},
153 : log::most},
154 :
155 : #endif // !GKFS_DEBUG_BUILD
156 :
157 : opt_info{STR_AND_LEN("all"),
158 : {"All previous options combined."},
159 : log::all},
160 :
161 : opt_info{STR_AND_LEN("help"),
162 : {"Print this help message and exit."},
163 : log::help});
164 :
165 : static const auto constexpr max_debug_opt_length =
166 : sizeof("syscalls_at_entry") - 1;
167 :
168 : static const auto constexpr max_help_text_rows =
169 : sizeof(debug_opts[0].help_text_) / sizeof(debug_opts[0].help_text_[0]);
170 :
171 : /**
172 : * process_log_options -- process the string given as parameter to determine
173 : * which debugging options are enabled and return a
174 : * log_level describing them
175 : */
176 : log_level
177 248 : process_log_options(const std::string gkfs_debug) {
178 :
179 : #ifndef GKFS_ENABLE_LOGGING
180 :
181 : (void) gkfs_debug;
182 : logger::log_message(stdout, "warning: logging options ignored: "
183 : "logging support was disabled in this build");
184 : return log::none;
185 :
186 : #endif // ! GKFS_ENABLE_LOGGING
187 248 : log_level dm = log::none;
188 :
189 : // skip separating white spaces and commas
190 496 : auto tokens = ::split_str(gkfs_debug, " ,", ::split_str_mode::is_any_of);
191 :
192 496 : for(const auto& t : tokens) {
193 :
194 3224 : bool is_known = false;
195 :
196 3224 : for(const auto& opt : debug_opts) {
197 : // none disables any future and previous flags observed
198 3224 : if(t == "none") {
199 0 : return log::none;
200 : }
201 :
202 3224 : if(t == opt.name_) {
203 248 : dm |= opt.mask_;
204 248 : is_known = true;
205 248 : break;
206 : }
207 : }
208 :
209 248 : if(!is_known) {
210 0 : logger::log_message(stdout,
211 : "warning: logging option '{}' unknown; "
212 : "try {}=help",
213 : t, gkfs::env::LOG);
214 : }
215 : }
216 :
217 248 : if(!!(dm & log::help)) {
218 0 : logger::log_message(stdout,
219 : "Valid options for the {} "
220 : "environment variable are:\n",
221 : gkfs::env::LOG);
222 :
223 :
224 0 : for(const auto& opt : debug_opts) {
225 0 : const auto padding = max_debug_opt_length - opt.length_ + 2;
226 :
227 0 : logger::log_message(stdout, " {}{:>{}}{}", opt.name_, "", padding,
228 0 : opt.help_text_[0]);
229 :
230 0 : for(auto i = 1lu; i < max_help_text_rows; ++i) {
231 0 : if(opt.help_text_[i][0] != 0) {
232 0 : logger::log_message(stdout, " {:>{}}{}", "",
233 0 : max_debug_opt_length + 2,
234 0 : opt.help_text_[i]);
235 : }
236 : }
237 :
238 0 : logger::log_message(stdout, "");
239 : }
240 :
241 0 : logger::log_message(stdout,
242 : "\n"
243 : "To direct the logging output into a file "
244 : "instead of standard output\n"
245 : "a filename can be specified using the "
246 : "{} environment variable.",
247 : gkfs::env::LOG_OUTPUT);
248 0 : ::_exit(0);
249 : }
250 :
251 : return dm;
252 : }
253 :
254 : #ifdef GKFS_DEBUG_BUILD
255 :
256 : std::bitset<512>
257 248 : process_log_filter(const std::string& log_filter) {
258 248 : std::bitset<512> filtered_syscalls;
259 :
260 248 : if(log_filter.empty()) {
261 : return filtered_syscalls;
262 : }
263 :
264 : // skip separating white spaces and commas
265 247 : auto tokens = ::split_str(log_filter, " ,", ::split_str_mode::is_any_of);
266 :
267 741 : for(const auto& t : tokens) {
268 494 : const auto sc = syscall::lookup_by_name(t);
269 :
270 494 : if(std::strcmp(sc.name(), "unknown_syscall") == 0) {
271 0 : logger::log_message(stdout,
272 : "warning: system call '{}' unknown; "
273 : "will not filter",
274 : t);
275 0 : continue;
276 : }
277 :
278 494 : filtered_syscalls.set(sc.number());
279 : }
280 :
281 247 : return filtered_syscalls;
282 : }
283 :
284 : #endif // GKFS_DEBUG_BUILD
285 :
286 248 : logger::logger(const std::string& opts, const std::string& path,
287 : bool log_per_process, bool trunc
288 : #ifdef GKFS_DEBUG_BUILD
289 : ,
290 : const std::string& filter, int verbosity
291 : #endif
292 248 : )
293 248 : : timezone_(nullptr) {
294 :
295 : /* use stderr by default */
296 248 : log_fd_ = 2;
297 248 : log_process_id_ = ::syscall_no_intercept(SYS_gettid);
298 248 : log_mask_ = process_log_options(opts);
299 :
300 : #ifdef GKFS_DEBUG_BUILD
301 248 : filtered_syscalls_ = process_log_filter(filter);
302 248 : debug_verbosity_ = verbosity;
303 : #endif
304 :
305 248 : if(!path.empty()) {
306 248 : int flags = O_CREAT | O_RDWR | O_APPEND | O_TRUNC;
307 :
308 248 : if(!trunc) {
309 248 : flags &= ~O_TRUNC;
310 : }
311 :
312 496 : std::string file_path = path;
313 248 : if(log_per_process) {
314 0 : if(fs::is_regular_file(file_path) && fs::exists(file_path)) {
315 0 : fs::remove(file_path);
316 : }
317 0 : fs::create_directories(path);
318 0 : file_path = fmt::format("{}/{}", path, log_process_id_);
319 : }
320 :
321 : // we use ::open() here rather than ::syscall_no_intercept(SYS_open)
322 : // because we want the call to be intercepted by our hooks, which
323 : // allows us to categorize the resulting fd as 'internal' and
324 : // relocate it to our private range
325 248 : int fd = ::open(file_path.c_str(), flags, 0600);
326 :
327 248 : if(fd == -1) {
328 0 : log(gkfs::log::error, __func__, __LINE__,
329 : "Failed to open log "
330 : "file '{}'. Logging will fall back to stderr",
331 : path);
332 0 : return;
333 : }
334 :
335 248 : log_fd_ = fd;
336 : }
337 :
338 : // Finding the current timezone implies accessing OS files (i.e. syscalls),
339 : // but current_zone() doesn't actually retrieve the time zone but rather
340 : // provides a descriptor to it that is **atomically initialized** upon its
341 : // first use. Thus, if we don't force the initialization here, logging the
342 : // first intercepted syscall will produce a call to date::time_zone::init()
343 : // (under std::call_once) which internally ends up calling fopen(). Since
344 : // fopen() ends up calling sys_open(), we will need to generate another
345 : // timestamp for a system call log entry, which will attempt to call
346 : // date::time_zone::init() since the prior initialization (under the same
347 : // std::call_once) has not yet completed.
348 : //
349 : // Unfortunately, date::time_zone doesn't provide a function to prevent
350 : // this lazy initialization, therefore we force it by requesting
351 : // information from an arbitrary timepoint (January 1st 1970) which forces
352 : // the initialization. This doesn't do any actual work and could safely be
353 : // removed if the date API ends up providing this functionality.
354 248 : try {
355 248 : timezone_ = date::current_zone();
356 : #ifdef GKFS_DEBUG_BUILD
357 248 : using namespace date;
358 496 : timezone_->get_info(date::sys_days{January / 1 / 1970});
359 : #endif // GKFS_DEBUG_BUILD
360 0 : } catch(const std::exception& ex) {
361 : // if timezone initialization fails, setting timezone_ to nullptr
362 : // makes format_timestamp_to() default to producing epoch timestamps
363 0 : timezone_ = nullptr;
364 : }
365 :
366 : #ifdef GKFS_ENABLE_LOGGING
367 248 : const auto log_hermes_message =
368 73 : [](const std::string& msg, hermes::log::level l, int severity,
369 : const std::string& file, const std::string& func, int lineno) {
370 146 : const auto name = [](hermes::log::level l, int severity) {
371 73 : using namespace std::string_literals;
372 :
373 73 : switch(l) {
374 0 : case hermes::log::info:
375 0 : return "info"s;
376 0 : case hermes::log::warning:
377 0 : return "warning"s;
378 73 : case hermes::log::error:
379 73 : return "error"s;
380 0 : case hermes::log::fatal:
381 0 : return "fatal"s;
382 0 : case hermes::log::mercury:
383 0 : return "mercury"s;
384 0 : default:
385 0 : return "unknown"s;
386 : }
387 : };
388 :
389 146 : LOG(HERMES, "[{}] {}", name(l, severity), msg);
390 73 : };
391 :
392 : #ifdef GKFS_DEBUG_BUILD
393 248 : const auto log_hermes_debug_message =
394 68753 : [this](const std::string& msg, hermes::log::level l, int severity,
395 : const std::string& file, const std::string& func,
396 68753 : int lineno) {
397 68753 : if(severity > debug_verbosity_) {
398 : return;
399 : }
400 :
401 36816 : LOG(HERMES, "[debug{}] <{}():{}> {}",
402 : (severity == 0 ? "" : std::to_string(severity + 1)), func,
403 68753 : lineno, msg);
404 248 : };
405 : #endif // GKFS_DEBUG_BUILD
406 :
407 248 : const auto log_hg_message = [](const std::string& msg, hermes::log::level l,
408 : int severity, const std::string& file,
409 : const std::string& func, int lineno) {
410 0 : (void) l;
411 :
412 : // mercury message might contain one or more sub-messages
413 : // separated by '\n'
414 0 : auto sub_msgs = ::split_str(msg, "\n", ::split_str_mode::is_any_of);
415 :
416 0 : for(const auto& m : sub_msgs) {
417 0 : if(!m.empty()) {
418 0 : LOG(MERCURY, "{}", m);
419 : }
420 : }
421 0 : };
422 :
423 : // register log callbacks into hermes so that we can manage
424 : // both its and mercury's log messages
425 248 : hermes::log::logger::register_callback(hermes::log::info,
426 : log_hermes_message);
427 248 : hermes::log::logger::register_callback(hermes::log::warning,
428 : log_hermes_message);
429 248 : hermes::log::logger::register_callback(hermes::log::error,
430 : log_hermes_message);
431 248 : hermes::log::logger::register_callback(hermes::log::fatal,
432 : log_hermes_message);
433 : #ifdef GKFS_DEBUG_BUILD
434 248 : hermes::log::logger::register_callback(hermes::log::debug,
435 : log_hermes_debug_message);
436 : #endif
437 496 : hermes::log::logger::register_callback(hermes::log::mercury,
438 : log_hg_message);
439 :
440 : #endif // GKFS_ENABLE_LOGGING
441 : }
442 :
443 496 : logger::~logger() {
444 248 : log_fd_ = ::syscall_no_intercept(SYS_close, log_fd_);
445 248 : }
446 :
447 : void
448 22345811 : logger::log_syscall(syscall::info info, const long syscall_number,
449 : const long args[6], std::optional<long> result) {
450 :
451 :
452 22345811 : const bool log_syscall_entry = !!(log::syscall_at_entry & log_mask_);
453 22345811 : const bool log_syscall_result = !!(log::syscall & log_mask_);
454 :
455 : // log the syscall if and only if logging for syscalls is enabled
456 22345811 : if(!log_syscall_entry && !log_syscall_result) {
457 21173732 : return;
458 : }
459 :
460 : #ifdef GKFS_DEBUG_BUILD
461 22345617 : if(filtered_syscalls_[syscall_number]) {
462 : return;
463 : }
464 : #endif
465 :
466 : // log the syscall even if we don't have information on it, since it may
467 : // be important to the user (we assume that the syscall has completed
468 : // though)
469 1172079 : if(info == syscall::no_info) {
470 0 : goto print_syscall;
471 : }
472 :
473 : // log the syscall entry if the syscall may not return (e.g. execve) or
474 : // if we are sure that it won't ever return (e.g. exit), even if
475 : // log::syscall_at_entry is disabled
476 2344151 : if(syscall::may_not_return(syscall_number) ||
477 1172079 : syscall::never_returns(syscall_number)) {
478 773 : goto print_syscall;
479 : }
480 :
481 1171299 : if(log_syscall_entry && syscall::execution_is_pending(info)) {
482 585130 : goto print_syscall;
483 : }
484 :
485 586169 : if(log_syscall_result && !syscall::execution_is_pending(info)) {
486 586169 : goto print_syscall;
487 : }
488 :
489 : return;
490 :
491 1172072 : print_syscall:
492 :
493 2344208 : static_buffer buffer;
494 :
495 1172072 : detail::format_timestamp_to(buffer, timezone_);
496 1172008 : detail::format_syscall_info_to(buffer, info);
497 :
498 1172101 : if(result) {
499 586160 : syscall::decode(buffer, syscall_number, args, *result);
500 : } else {
501 585941 : syscall::decode(buffer, syscall_number, args);
502 : }
503 :
504 1172109 : fmt::format_to(buffer, "\n");
505 :
506 1172096 : ::syscall_no_intercept(SYS_write, log_fd_, buffer.data(), buffer.size());
507 : }
508 :
509 : } // namespace gkfs::log
|