Verified Commit 1a44c795 authored by Tommaso Tocci's avatar Tommaso Tocci
Browse files

Remove mercury patch

this patch was resolving the 100% CPU bug [1]

But it was also introducing invalid read inside mercury library. Thus
from time to time it could led to segmentation faults.

[1]: https://github.com/mercury-hpc/mercury/issues/229
parent 2ca389cb
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -286,8 +286,6 @@ if [ "$NA_LAYER" == "cci" ] || [ "$NA_LAYER" == "all" ]; then
    echo "########## Applying cci addr lookup error handling patch"
    git apply ${PATCH_DIR}/mercury_cci_verbs_lookup.patch
fi
echo "########## Applying mercury deregister socket patch"
git apply ${PATCH_DIR}/mercury_deregister_sock.patch
cd ${CURR}/build
$CMAKE -DMERCURY_USE_SELF_FORWARD:BOOL=ON -DMERCURY_USE_CHECKSUMS:BOOL=OFF -DBUILD_TESTING:BOOL=ON \
-DMERCURY_USE_BOOST_PP:BOOL=ON -DBUILD_SHARED_LIBS:BOOL=ON -DCMAKE_INSTALL_PREFIX=${INSTALL} \
+0 −63
Original line number Diff line number Diff line
From 7fbc56996cb5cdba7f0a4429b17c0db110346916 Mon Sep 17 00:00:00 2001
From: Tommaso Tocci <tommaso@tocci.pro>
Date: Mon, 2 Jul 2018 11:54:10 +0200
Subject: [PATCH] sm: clear client disconnection event on socket

---
 src/na/na_sm.c          | 13 +++++++++++++
 src/util/mercury_poll.c |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/src/na/na_sm.c b/src/na/na_sm.c
index d96f1b6..4c728b1 100644
--- a/src/na/na_sm.c
+++ b/src/na/na_sm.c
@@ -2134,6 +2134,16 @@ na_sm_progress_sock(na_class_t *na_class, struct na_sm_addr *poll_addr,
             *progressed = NA_TRUE;
         }
         break;
+        case NA_SM_SOCK_DONE: {
+            *progressed = NA_FALSE;
+            ret = na_sm_poll_deregister(na_class, NA_SM_SOCK, poll_addr);
+            if (ret != NA_SUCCESS) {
+                NA_LOG_ERROR("Could not deregister socket from poll set");
+                ret = NA_PROTOCOL_ERROR;
+                goto done;
+            }
+        }
+        break;
         default:
             /* TODO Silently ignore, no progress */
             *progressed = NA_FALSE;
@@ -2858,10 +2868,13 @@ na_sm_addr_free(na_class_t *na_class, na_addr_t addr)
 
         /* Deregister sock file descriptor */
         ret = na_sm_poll_deregister(na_class, NA_SM_SOCK, na_sm_addr);
+
+        /*
         if (ret != NA_SUCCESS) {
             NA_LOG_ERROR("Could not delete sock from poll set");
             goto done;
         }
+        */
 
         /* Remove addr from poll addr queue */
         hg_thread_spin_lock(
diff --git a/src/util/mercury_poll.c b/src/util/mercury_poll.c
index afc7f8e..c2a38f5 100644
--- a/src/util/mercury_poll.c
+++ b/src/util/mercury_poll.c
@@ -399,8 +399,10 @@ hg_poll_remove(hg_poll_set_t *poll_set, int fd)
     hg_thread_spin_unlock(&poll_set->poll_data_list_lock);
 #endif
     if (!found) {
+        /*
         HG_UTIL_LOG_ERROR("Could not find fd in poll_set");
         ret = HG_UTIL_FAIL;
+        */
         goto done;
     }
     hg_atomic_decr32(&poll_set->nfds);
-- 
2.19.0