From 9e108f474a8e79a720c1eecef0e11ef5b7699e7b Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Tue, 4 Jan 2022 15:24:03 +1100 Subject: [PATCH 01/31] First stab - doesn't seem work properly with the old WinOFED code --- hdRDMA.cc | 30 +++++-- hdRDMA.h | 5 +- hdRDMAThread.cc | 107 ++++++++++++++++++------ hdRDMAThread.h | 27 ++++-- vc/hdrdmacp.sln | 31 +++++++ vc/hdrdmacp.vcxproj | 161 ++++++++++++++++++++++++++++++++++++ vc/hdrdmacp.vcxproj.filters | 36 ++++++++ vc/hdrdmacp.vcxproj.user | 19 +++++ 8 files changed, 372 insertions(+), 44 deletions(-) create mode 100644 vc/hdrdmacp.sln create mode 100644 vc/hdrdmacp.vcxproj create mode 100644 vc/hdrdmacp.vcxproj.filters create mode 100644 vc/hdrdmacp.vcxproj.user diff --git a/hdRDMA.cc b/hdRDMA.cc index 0bc3fdd..e124261 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -3,7 +3,6 @@ #include #include -#include #include #include @@ -48,9 +47,11 @@ hdRDMA::hdRDMA() case IBV_TRANSPORT_IWARP: transport_type = "IWARP"; break; +#ifdef __GNUC__ case IBV_EXP_TRANSPORT_SCIF: transport_type = "SCIF"; break; +#endif default: transport_type = "UNKNOWN"; break; @@ -87,7 +88,12 @@ hdRDMA::hdRDMA() cout << " device " << i << " : " << devs[i]->name +#ifdef __GNUC__ << " : " << devs[i]->dev_name +#endif +#ifdef _MSC_VER + << " : " << devs[i]->name +#endif << " : " << transport_type << " : " << ibv_node_type_str(devs[i]->node_type) << " : Num. ports=" << Nports @@ -113,7 +119,9 @@ hdRDMA::hdRDMA() ibv_query_gid(ctx, port_num, index, &gid); cout << "Device " << dev->name << " opened." +#ifdef __GNUC__ << " num_comp_vectors=" << ctx->num_comp_vectors +#endif << endl; // Print some of the port attributes @@ -126,7 +134,9 @@ hdRDMA::hdRDMA() cout << " active_width: " << (uint64_t)port_attr.active_width << endl; cout << " active_speed: " << (uint64_t)port_attr.active_speed << endl; cout << " phys_state: " << (uint64_t)port_attr.phys_state << endl; +#ifdef __GNUC__ cout << " link_layer: " << (uint64_t)port_attr.link_layer << endl; +#endif // Allocate protection domain pd = ibv_alloc_pd(ctx); @@ -195,6 +205,10 @@ hdRDMA::~hdRDMA() if( buff!=nullptr ) delete[] buff; if( pd!=nullptr ) ibv_dealloc_pd( pd ); if( ctx!=nullptr ) ibv_close_device( ctx ); + +#ifdef _MSC_VER +# define SHUT_RDWR SD_BOTH +#endif if( server_sockfd ) shutdown( server_sockfd, SHUT_RDWR ); } @@ -209,7 +223,7 @@ void hdRDMA::Listen(int port) { // Create socket, bind it and put it into the listening state. struct sockaddr_in addr; - bzero( &addr, sizeof(addr) ); + memset( &addr, 0, sizeof(addr) ); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = htons( port ); @@ -231,7 +245,7 @@ void hdRDMA::Listen(int port) thread_started = true; while( !done ){ - int peer_sockfd = 0; + SOCKET peer_sockfd = 0; struct sockaddr_in peer_addr; socklen_t peer_addr_len = sizeof(struct sockaddr_in); peer_sockfd = accept(server_sockfd, (struct sockaddr *)&peer_addr, &peer_addr_len); @@ -272,7 +286,7 @@ void hdRDMA::StopListening(void) server_thread->join(); delete server_thread; server_thread = nullptr; - if( server_sockfd ) close( server_sockfd ); + if( server_sockfd ) closesocket( server_sockfd ); server_sockfd = 0; }else{ cout << "Server not running." < &buffers, int Nre //cout << "buffer_pool.size()="< x(NULL, [&](int*){ close(sockfd); stopped=true;}); + std::shared_ptr x(NULL, [&](int*){ closesocket(sockfd); stopped=true;}); // Get pool buffers (up to 4). If none are available then tell // remote client we have too many RDMA connections. @@ -112,7 +169,7 @@ void hdRDMAThread::ThreadRun(int sockfd) // No buffers in MR available. Notify remote peer and exit thread std::string mess("BD: RDMA server has no more MR buffers (too many connections)"); cerr << mess << endl; - write(sockfd, mess.c_str(), mess.length()+1); + send64(sockfd, mess.c_str(), mess.length()+1, 0); return; } @@ -126,20 +183,20 @@ void hdRDMAThread::ThreadRun(int sockfd) // errors in a separate thread if we wanted to guarantee that we were // processing the data as fast as it is coming in. That adds some // significant complication so we skip it for now. - int cq_size = buffers.size(); + size_t cq_size = buffers.size(); comp_channel = ibv_create_comp_channel( hdrdma->ctx ); cq = ibv_create_cq( hdrdma->ctx, cq_size, NULL, comp_channel, 0); if( !cq ){ std::stringstream ss; ss << "BD: ERROR: Unable to create Completion Queue! errno=" << errno; cerr << ss.str() << endl; - write(sockfd, ss.str().c_str(), ss.str().length()+1); + send64(sockfd, ss.str().c_str(), ss.str().length()+1, 0); return; } // Tell remote peer we are ready to exchange QPInfo std::string mess("OK:"); - write(sockfd, mess.c_str(), mess.length()); + send64(sockfd, mess.c_str(), mess.length(), 0); // Exchange QP info over TCP socket so we can transmit via RDMA try{ @@ -226,8 +283,8 @@ void hdRDMAThread::PostWR( int id ) struct ibv_recv_wr wr; struct ibv_sge sge; - bzero( &wr, sizeof(wr)); - bzero( &sge, sizeof(sge)); + memset( &wr, 0, sizeof(wr)); + memset( &sge, 0, sizeof(sge)); wr.wr_id = id; wr.sg_list = &sge; wr.num_sge = 1; @@ -249,9 +306,9 @@ void hdRDMAThread::PostWR( int id ) // QP to the RTS (Ready To Send) state and RTR (Ready to Receive) // state. //------------------------------------------------------------- -void hdRDMAThread::ExchangeQPInfo( int sockfd ) +void hdRDMAThread::ExchangeQPInfo( SOCKET sockfd ) { - int n; + size_t n; struct QPInfo tmp_qp_info; // Create a new QP to use with the remote peer. @@ -267,7 +324,7 @@ void hdRDMAThread::ExchangeQPInfo( int sockfd ) // the same. This will be true if we're using the same executable. //------ Send QPInfo --------- - n = write(sockfd, (char *)&tmp_qp_info, sizeof(struct QPInfo)); + n = send64(sockfd, (char *)&tmp_qp_info, sizeof(struct QPInfo), 0); if( n!= sizeof(struct QPInfo) ){ std::stringstream ss; ss << "ERROR: Sending QPInfo! Tried sending " << sizeof(struct QPInfo) << " bytes but only " << n << " were sent!"; @@ -275,7 +332,7 @@ void hdRDMAThread::ExchangeQPInfo( int sockfd ) } //------ Receive QPInfo --------- - n = read(sockfd, (char *)&tmp_qp_info, sizeof(struct QPInfo)); + n = recv64(sockfd, (char *)&tmp_qp_info, sizeof(struct QPInfo), MSG_WAITALL); if( n!= sizeof(struct QPInfo) ){ std::stringstream ss; ss << "ERROR: Sending QPInfo! Tried reading " << sizeof(struct QPInfo) << " bytes but only " << n << " were read!!"; @@ -306,7 +363,7 @@ void hdRDMAThread::CreateQP(void) // Set up attributes for creating a QP. struct ibv_qp_init_attr qp_init_attr; - bzero( &qp_init_attr, sizeof(qp_init_attr) ); + memset( &qp_init_attr, 0, sizeof(qp_init_attr) ); qp_init_attr.send_cq = cq; qp_init_attr.recv_cq = cq; qp_init_attr.cap.max_send_wr = 1; @@ -341,7 +398,7 @@ int hdRDMAThread::SetToRTS(void) /* change QP state to INIT */ { struct ibv_qp_attr qp_attr; - bzero( &qp_attr, sizeof(qp_attr) ); + memset( &qp_attr, 0, sizeof(qp_attr) ); qp_attr.qp_state = IBV_QPS_INIT, qp_attr.pkey_index = 0, qp_attr.port_num = hdrdma->port_num, @@ -362,7 +419,7 @@ int hdRDMAThread::SetToRTS(void) /* Change QP state to RTR */ { struct ibv_qp_attr qp_attr; - bzero( &qp_attr, sizeof(qp_attr) ); + memset( &qp_attr, 0, sizeof(qp_attr) ); qp_attr.qp_state = IBV_QPS_RTR, qp_attr.path_mtu = IB_MTU, qp_attr.dest_qp_num = remote_qpinfo.qp_num, @@ -389,7 +446,7 @@ int hdRDMAThread::SetToRTS(void) /* Change QP state to RTS */ { struct ibv_qp_attr qp_attr; - bzero( &qp_attr, sizeof(qp_attr) ); + memset( &qp_attr, 0, sizeof(qp_attr) ); qp_attr.qp_state = IBV_QPS_RTS, qp_attr.timeout = 14, qp_attr.retry_cnt = 7, @@ -525,11 +582,11 @@ void hdRDMAThread::ReceiveBuffer(uint8_t *buff, uint32_t buff_len) // via TCP to the server, but nothing will have been read/written // yet. //------------------------------------------------------------- -void hdRDMAThread::ClientConnect( int sockfd ) +void hdRDMAThread::ClientConnect( SOCKET sockfd ) { // This bit of magic ensures that the sockfd is closed and our "stopped" // flag is set before leaving this method, even if early due to error. - std::shared_ptr x(NULL, [&](int*){ close(sockfd); stopped=true;}); + std::shared_ptr x(NULL, [&](int*){ closesocket(sockfd); stopped=true;}); // Get pool buffers (all of them). If none are available then throw exception hdrdma->GetBuffers( buffers ); @@ -561,12 +618,12 @@ void hdRDMAThread::ClientConnect( int sockfd ) // Read first 3 bytes from TCP socket to make sure the server is able to // send us QPInfo. char str[256]; - bzero(str, 256); // status code does not include terminating null - auto n = read(sockfd, str, 3); + memset(str, 0, 256); // status code does not include terminating null + auto n = recv64(sockfd, str, 3, MSG_WAITALL); if( n!= 3 ) throw Exception("ERROR: Unable to read 3 byte status code from TCP socket!" ); if( std::string(str) != "OK:" ){ - auto n = read(sockfd, str, 256); + auto n = recv64(sockfd, str, 256, MSG_WAITALL); if( n<=0 ) sprintf(str, "Unknown error status from server"); throw Exception( str ); } @@ -599,8 +656,8 @@ void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bo struct ibv_send_wr wr, *bad_wr = nullptr; struct ibv_sge sge; - bzero( &wr, sizeof(wr) ); - bzero( &sge, sizeof(sge) ); + memset( &wr, 0, sizeof(wr) ); + memset( &sge, 0, sizeof(sge) ); wr.opcode = IBV_WR_SEND; wr.sg_list = &sge; diff --git a/hdRDMAThread.h b/hdRDMAThread.h index d9b96c4..7471374 100644 --- a/hdRDMAThread.h +++ b/hdRDMAThread.h @@ -19,6 +19,19 @@ class hdRDMA; +#ifdef __GNUC__ +#define PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__)) +#endif + +#ifdef _MSC_VER +#define PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop)) +#endif + +#ifdef __GNUC__ +typedef int SOCKET; +#define closesocket close +#endif + class hdRDMAThread{ public: @@ -32,18 +45,18 @@ class hdRDMAThread{ }HeaderInfoFlag_t; // Header info sent as first bytes of data packed - struct HeaderInfo { + PACK(struct HeaderInfo { uint32_t header_len; uint16_t buff_type; uint16_t flags; // bit 0=first, 1=last uint32_t payload; - }__attribute__ ((packed)); + }); // Hold info of queue pair on one side of connection - struct QPInfo { + PACK(struct QPInfo { uint16_t lid; uint32_t qp_num; - }__attribute__ ((packed)); + }); class Exception:public std::exception{ public: @@ -58,13 +71,13 @@ class hdRDMAThread{ hdRDMAThread(hdRDMA *hdrdma); ~hdRDMAThread(); - void ThreadRun(int sockfd); + void ThreadRun(SOCKET sockfd); void PostWR( int id ); // id= index to buffers - void ExchangeQPInfo( int sockfd ); + void ExchangeQPInfo( SOCKET sockfd ); void CreateQP(void); int SetToRTS(void); void ReceiveBuffer(uint8_t *buff, uint32_t buff_len); - void ClientConnect( int sockfd ); + void ClientConnect( SOCKET sockfd ); void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); void PollCQ(void); bool makePath( const std::string &path ); diff --git a/vc/hdrdmacp.sln b/vc/hdrdmacp.sln new file mode 100644 index 0000000..c09750b --- /dev/null +++ b/vc/hdrdmacp.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31912.275 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hdrdmacp", "hdrdmacp.vcxproj", "{06121512-5986-437C-AD1F-79FCF2224F15}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x64.ActiveCfg = Debug|x64 + {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x64.Build.0 = Debug|x64 + {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x86.ActiveCfg = Debug|Win32 + {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x86.Build.0 = Debug|Win32 + {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x64.ActiveCfg = Release|x64 + {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x64.Build.0 = Release|x64 + {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x86.ActiveCfg = Release|Win32 + {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {01A3DA07-F0C6-480B-B543-D04A90883D52} + EndGlobalSection +EndGlobal diff --git a/vc/hdrdmacp.vcxproj b/vc/hdrdmacp.vcxproj new file mode 100644 index 0000000..570f89e --- /dev/null +++ b/vc/hdrdmacp.vcxproj @@ -0,0 +1,161 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + 16.0 + Win32Proj + {06121512-5986-437c-ad1f-79fcf2224f15} + ConsoleApplication1 + 10.0 + + + + Application + true + v143 + Unicode + + + Application + false + v143 + true + Unicode + + + Application + true + v143 + Unicode + + + Application + false + v143 + true + Unicode + + + + + + + + + + + + + + + + + + + + + true + + + false + + + true + + + false + + + + Level3 + true + _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) + + + Console + true + ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) + + + + + Level3 + true + true + true + _CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) + + + Console + true + true + true + ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) + + + + + Level3 + true + _CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;_WINSOCK_DEPRECATED_NO_WARNINGS;%(PreprocessorDefinitions) + true + $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) + + + Console + true + ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) + + + + + Level3 + true + true + true + _CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) + + + Console + true + true + true + ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) + + + + + + \ No newline at end of file diff --git a/vc/hdrdmacp.vcxproj.filters b/vc/hdrdmacp.vcxproj.filters new file mode 100644 index 0000000..8f1ed50 --- /dev/null +++ b/vc/hdrdmacp.vcxproj.filters @@ -0,0 +1,36 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/vc/hdrdmacp.vcxproj.user b/vc/hdrdmacp.vcxproj.user new file mode 100644 index 0000000..ad407ec --- /dev/null +++ b/vc/hdrdmacp.vcxproj.user @@ -0,0 +1,19 @@ + + + + -s + WindowsLocalDebugger + + + -s + WindowsLocalDebugger + + + -s + WindowsLocalDebugger + + + -s + WindowsLocalDebugger + + \ No newline at end of file From 6680216b2685d505222c84671643355e0e437204 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 5 Jan 2022 11:03:34 +1100 Subject: [PATCH 02/31] Moving to CMake --- .gitmodules | 3 + CMakeLists.txt | 31 +++++++ hdRDMA.cc | 5 +- hdRDMA.h | 11 ++- hdrdmacp.cc | 4 +- modules/libwinibverbs | 1 + vc/hdrdmacp.sln | 31 ------- vc/hdrdmacp.vcxproj | 161 ------------------------------------ vc/hdrdmacp.vcxproj.filters | 36 -------- vc/hdrdmacp.vcxproj.user | 19 ----- 10 files changed, 52 insertions(+), 250 deletions(-) create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 160000 modules/libwinibverbs delete mode 100644 vc/hdrdmacp.sln delete mode 100644 vc/hdrdmacp.vcxproj delete mode 100644 vc/hdrdmacp.vcxproj.filters delete mode 100644 vc/hdrdmacp.vcxproj.user diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..fdc27b7 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "modules/libwinibverbs"] + path = modules/libwinibverbs + url = https://github.com/lucasz93/libwinibverbs diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..8b11ba0 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 3.0) +project (hdrdmacp) + +add_executable(hdrdmacp + hdRDMA.cc + hdRDMA.h + hdrdmacp.cc + hdRDMAThread.cc + hdRDMAThread.h +) + +find_package(Git QUIET) +if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") +# Update submodules as needed + option(GIT_SUBMODULE "Check submodules during build" ON) + if(GIT_SUBMODULE) + message(STATUS "Submodule update") + execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + RESULT_VARIABLE GIT_SUBMOD_RESULT) + if(NOT GIT_SUBMOD_RESULT EQUAL "0") + message(FATAL_ERROR "git submodule update --init --recursive failed with ${GIT_SUBMOD_RESULT}, please checkout submodules") + endif() + endif() +endif() + +if(NOT EXISTS "${PROJECT_SOURCE_DIR}/modules/libwinibverbs/CMakeLists.txt") + message(FATAL_ERROR "The submodules were not downloaded! GIT_SUBMODULE was turned off or failed. Please update submodules and try again.") +endif() + +add_subdirectory(modules/libwinibverbs) \ No newline at end of file diff --git a/hdRDMA.cc b/hdRDMA.cc index e124261..7b900af 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -1,7 +1,10 @@ -#include +#include "hdRDMA.h" +#ifdef __GNUC__ #include +#endif + #include #include diff --git a/hdRDMA.h b/hdRDMA.h index bc5270a..501beca 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -8,13 +8,22 @@ #include #include +#if __GNUC__ #include #include #include +#endif + +#ifdef _MSC_VER +#ifndef WIN32 +#define WIN32 +#endif +#include +#endif #include -#include +#include "hdRDMAThread.h" class hdRDMA{ public: diff --git a/hdrdmacp.cc b/hdrdmacp.cc index c0e6ff0..8e19cf9 100644 --- a/hdrdmacp.cc +++ b/hdrdmacp.cc @@ -1,8 +1,10 @@ +#ifdef __GNUC__ #include +#endif //#include -#include +#include "hdRDMA.h" #include #include diff --git a/modules/libwinibverbs b/modules/libwinibverbs new file mode 160000 index 0000000..ec1d0ee --- /dev/null +++ b/modules/libwinibverbs @@ -0,0 +1 @@ +Subproject commit ec1d0ee53b8ff1faf8f9cdf40019ac8637548e11 diff --git a/vc/hdrdmacp.sln b/vc/hdrdmacp.sln deleted file mode 100644 index c09750b..0000000 --- a/vc/hdrdmacp.sln +++ /dev/null @@ -1,31 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.0.31912.275 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hdrdmacp", "hdrdmacp.vcxproj", "{06121512-5986-437C-AD1F-79FCF2224F15}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|x64 = Debug|x64 - Debug|x86 = Debug|x86 - Release|x64 = Release|x64 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x64.ActiveCfg = Debug|x64 - {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x64.Build.0 = Debug|x64 - {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x86.ActiveCfg = Debug|Win32 - {06121512-5986-437C-AD1F-79FCF2224F15}.Debug|x86.Build.0 = Debug|Win32 - {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x64.ActiveCfg = Release|x64 - {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x64.Build.0 = Release|x64 - {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x86.ActiveCfg = Release|Win32 - {06121512-5986-437C-AD1F-79FCF2224F15}.Release|x86.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {01A3DA07-F0C6-480B-B543-D04A90883D52} - EndGlobalSection -EndGlobal diff --git a/vc/hdrdmacp.vcxproj b/vc/hdrdmacp.vcxproj deleted file mode 100644 index 570f89e..0000000 --- a/vc/hdrdmacp.vcxproj +++ /dev/null @@ -1,161 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - Debug - x64 - - - Release - x64 - - - - - - - - - - - - - 16.0 - Win32Proj - {06121512-5986-437c-ad1f-79fcf2224f15} - ConsoleApplication1 - 10.0 - - - - Application - true - v143 - Unicode - - - Application - false - v143 - true - Unicode - - - Application - true - v143 - Unicode - - - Application - false - v143 - true - Unicode - - - - - - - - - - - - - - - - - - - - - true - - - false - - - true - - - false - - - - Level3 - true - _CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) - - - Console - true - ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) - - - - - Level3 - true - true - true - _CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) - - - Console - true - true - true - ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) - - - - - Level3 - true - _CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;_WINSOCK_DEPRECATED_NO_WARNINGS;%(PreprocessorDefinitions) - true - $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) - - - Console - true - ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) - - - - - Level3 - true - true - true - _CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - true - $(OFED_INCLUDE);$(OFED_INCLUDE)\linux;$(OFED_INCLUDE)\etc;..\;$(ZLIB_INCLUDE);%(AdditionalIncludeDirectories) - - - Console - true - true - true - ws2_32.lib;$(ZLIB_LIBRARY);$(OFED_LIBRARY_DIR)\libibverbs.lib;%(AdditionalDependencies) - - - - - - \ No newline at end of file diff --git a/vc/hdrdmacp.vcxproj.filters b/vc/hdrdmacp.vcxproj.filters deleted file mode 100644 index 8f1ed50..0000000 --- a/vc/hdrdmacp.vcxproj.filters +++ /dev/null @@ -1,36 +0,0 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms - - - - - Source Files - - - Source Files - - - Source Files - - - - - Source Files - - - Source Files - - - \ No newline at end of file diff --git a/vc/hdrdmacp.vcxproj.user b/vc/hdrdmacp.vcxproj.user deleted file mode 100644 index ad407ec..0000000 --- a/vc/hdrdmacp.vcxproj.user +++ /dev/null @@ -1,19 +0,0 @@ - - - - -s - WindowsLocalDebugger - - - -s - WindowsLocalDebugger - - - -s - WindowsLocalDebugger - - - -s - WindowsLocalDebugger - - \ No newline at end of file From d0c7308f6345e709b5ce09c576adbb805a85acdd Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 5 Jan 2022 12:29:15 +1100 Subject: [PATCH 03/31] Small fix --- CMakeLists.txt | 45 +++++++++++++++++++++++++++++++-------------- hdRDMA.cc | 6 +++++- hdRDMAThread.cc | 2 +- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b11ba0..adc2d8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,17 +1,11 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.21) project (hdrdmacp) -add_executable(hdrdmacp - hdRDMA.cc - hdRDMA.h - hdrdmacp.cc - hdRDMAThread.cc - hdRDMAThread.h -) - +# +# Pull submodules. +# find_package(Git QUIET) if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") -# Update submodules as needed option(GIT_SUBMODULE "Check submodules during build" ON) if(GIT_SUBMODULE) message(STATUS "Submodule update") @@ -24,8 +18,31 @@ if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") endif() endif() -if(NOT EXISTS "${PROJECT_SOURCE_DIR}/modules/libwinibverbs/CMakeLists.txt") - message(FATAL_ERROR "The submodules were not downloaded! GIT_SUBMODULE was turned off or failed. Please update submodules and try again.") -endif() +# +# Add submodules +# +add_subdirectory(modules/libwinibverbs) + +# +# Thanks to a bug in CMake, we just manually pass ZLIB_LIBRARY and ZLIB_INCLUDE_DIRS +# +#find_library(ZLIB REQUIRED) +message("ZLIB_LIBRARY: ${ZLIB_LIBRARY}") +message("ZLIB_INCLUDE_DIRS: ${ZLIB_INCLUDE_DIRS}") + +add_executable(hdrdmacp + hdRDMA.cc + hdRDMA.h + hdrdmacp.cc + hdRDMAThread.cc + hdRDMAThread.h +) +add_dependencies(hdrdmacp winibverbs) +target_compile_definitions(hdrdmacp PRIVATE _CRT_SECURE_NO_WARNINGS) +target_link_libraries(hdrdmacp winibverbs ${ZLIB_LIBRARY}) +target_include_directories(hdrdmacp PRIVATE ${ZLIB_INCLUDE_DIRS}) -add_subdirectory(modules/libwinibverbs) \ No newline at end of file +add_custom_command(TARGET hdrdmacp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ $ + COMMAND_EXPAND_LISTS +) \ No newline at end of file diff --git a/hdRDMA.cc b/hdRDMA.cc index 7b900af..df02f10 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -428,13 +428,17 @@ void hdRDMA::Poll(void) // Look for stopped threads and free their resources std::lock_guard lck( threads_mtx ); + std::vector stopped; for( auto t : threads ){ if( t.second->stopped ){ t.first->join(); delete t.second; - threads.erase( t.first ); + stopped.push_back( t.first ); } } + for (auto s : stopped) { + threads.erase(s); + } } diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index 439d42a..809c33b 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -75,7 +75,7 @@ size_t recv64(SOCKET s, void *buffer, size_t sz, int flags) { bytes_to_read = (int)std::min(sz, INT_MAX); - bytes_read = recv64(s, (char *)buffer, bytes_to_read, flags); + bytes_read = recv(s, (char *)buffer, bytes_to_read, flags); if (bytes_read < 0) { break; From 6915176a4fe1bedaf25b2c35813c317ded683ff2 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 5 Jan 2022 16:06:10 +1100 Subject: [PATCH 04/31] Fixes --- CMakeLists.txt | 2 +- hdRDMAThread.cc | 6 +++--- modules/libwinibverbs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index adc2d8d..76ff385 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ add_executable(hdrdmacp add_dependencies(hdrdmacp winibverbs) target_compile_definitions(hdrdmacp PRIVATE _CRT_SECURE_NO_WARNINGS) target_link_libraries(hdrdmacp winibverbs ${ZLIB_LIBRARY}) -target_include_directories(hdrdmacp PRIVATE ${ZLIB_INCLUDE_DIRS}) +target_include_directories(hdrdmacp PRIVATE ${ZLIB_INCLUDE_DIRS} modules/libwinibverbs/include) add_custom_command(TARGET hdrdmacp POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ $ diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index 809c33b..af2b089 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -313,9 +313,6 @@ void hdRDMAThread::ExchangeQPInfo( SOCKET sockfd ) // Create a new QP to use with the remote peer. CreateQP(); - - // Create a work receive request for each MR buffer we have - for( uint32_t id=0; id Date: Wed, 5 Jan 2022 19:08:53 +1100 Subject: [PATCH 05/31] Converted hdrdmacp into a static library and application --- CMakeLists.txt | 31 +++++++++++++++++++++++++------ hdRDMA.cc | 20 ++++++++++++++------ hdRDMA.h | 25 +++++++++++++++---------- hdRDMAThread.cc | 5 ++--- hdrdmacp.cc | 21 +++++++++++++-------- 5 files changed, 69 insertions(+), 33 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 76ff385..18a22d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,17 +30,36 @@ add_subdirectory(modules/libwinibverbs) message("ZLIB_LIBRARY: ${ZLIB_LIBRARY}") message("ZLIB_INCLUDE_DIRS: ${ZLIB_INCLUDE_DIRS}") -add_executable(hdrdmacp +#=============================================================================== +# LIBRARY +#=============================================================================== +add_library(libhdrdmacp SHARED + IhdRDMA.h hdRDMA.cc hdRDMA.h - hdrdmacp.cc hdRDMAThread.cc hdRDMAThread.h ) -add_dependencies(hdrdmacp winibverbs) -target_compile_definitions(hdrdmacp PRIVATE _CRT_SECURE_NO_WARNINGS) -target_link_libraries(hdrdmacp winibverbs ${ZLIB_LIBRARY}) -target_include_directories(hdrdmacp PRIVATE ${ZLIB_INCLUDE_DIRS} modules/libwinibverbs/include) +add_dependencies(libhdrdmacp winibverbs) +target_compile_definitions(libhdrdmacp PRIVATE _CRT_SECURE_NO_WARNINGS BUILDING_HDRDMA) +target_link_libraries(libhdrdmacp winibverbs ${ZLIB_LIBRARY}) +target_include_directories(libhdrdmacp PRIVATE ${ZLIB_INCLUDE_DIRS} modules/libwinibverbs/include) + +set_target_properties(libhdrdmacp PROPERTIES OUTPUT_NAME hdrdmacp) + +install(TARGETS libhdrdmacp DESTINATION ${CMAKE_INSTALL_PREFIX}) +install(FILES + IhdRDMA.h + DESTINATION include) + +#=============================================================================== +# APPLICATION +#=============================================================================== +add_executable(hdrdmacp + hdrdmacp.cc +) +add_dependencies(hdrdmacp libhdrdmacp) +target_link_libraries(hdrdmacp libhdrdmacp) add_custom_command(TARGET hdrdmacp POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ $ diff --git a/hdRDMA.cc b/hdRDMA.cc index df02f10..ba9119a 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -20,9 +20,18 @@ using std::chrono::duration; using std::chrono::duration_cast; using std::chrono::high_resolution_clock; -extern uint64_t HDRDMA_BUFF_LEN_GB; -extern uint64_t HDRDMA_NUM_BUFF_SECTIONS; +extern "C" +{ + HDRDMA_DLL hdrdma::IhdRDMA* hdrdma_allocate(const hdrdma::config& config) + { + return new hdRDMA(config); + } + HDRDMA_DLL void hdrdma_free(hdrdma::IhdRDMA* hdrdma) + { + delete hdrdma; + } +} //------------------------------------------------------------- // hdRDMA @@ -30,7 +39,7 @@ extern uint64_t HDRDMA_NUM_BUFF_SECTIONS; // hdRDMA constructor. This will look for IB devices and set up // for RDMA communications on the first one it finds. //------------------------------------------------------------- -hdRDMA::hdRDMA() +hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) { cout << "Looking for IB devices ..." << endl; int num_devices = 0; @@ -151,9 +160,8 @@ hdRDMA::hdRDMA() // Allocate a large buffer and create a memory region pointing to it. // We will split this one memory region among multiple receive requests // n.b. initial tests failed on transfer for buffers larger than 1GB - uint64_t buff_len_GB = HDRDMA_BUFF_LEN_GB; - num_buff_sections = HDRDMA_NUM_BUFF_SECTIONS; - buff_section_len = (buff_len_GB*1000000000)/(uint64_t)num_buff_sections; + num_buff_sections = config.num_buffer_sections; + buff_section_len = (config.buffer_len_gb *1000000000)/(uint64_t)num_buff_sections; buff_len = num_buff_sections*buff_section_len; buff = new uint8_t[buff_len]; if( !buff ){ diff --git a/hdRDMA.h b/hdRDMA.h index 501beca..6832ff8 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -25,22 +25,25 @@ #include "hdRDMAThread.h" -class hdRDMA{ +#include "IhdRDMA.h" + +class hdRDMA : public hdrdma::IhdRDMA { public: - hdRDMA(); - ~hdRDMA(); - void CreateQP(void); - void Listen(int port); - void StopListening(void); - void Connect(std::string host, int port); + hdRDMA(const hdrdma::config &config); + ~hdRDMA() override; + virtual void Listen(int port) override; + virtual void StopListening(void) override; + virtual void Connect(std::string host, int port) override; uint32_t GetNpeers(void); void GetBuffers( std::vector &buffers, int Nrequested=4 ); void ReturnBuffers( std::vector &buffers ); - void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); - void Poll(void); + virtual void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false) override; + virtual void Poll(void) override; + + virtual uint64_t TotalBytesReceived() const override { return total_bytes_received; } + std::atomic_ullong total_bytes_received = 0; - struct ibv_device *dev = nullptr; struct ibv_context *ctx = nullptr; int port_num = 1; @@ -67,5 +70,7 @@ class hdRDMA{ std::atomic Ntransferred; uint64_t Ntransferred_last = 0; std::chrono::high_resolution_clock::time_point t_last; + + std::string remote_addr; }; diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index af2b089..25b96e4 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -19,7 +19,6 @@ using std::chrono::duration; using std::chrono::duration_cast; using std::chrono::high_resolution_clock; -extern atomic BYTES_RECEIVED_TOT; extern std::string HDRDMA_REMOTE_ADDR; // @@ -256,7 +255,7 @@ void hdRDMAThread::ThreadRun(SOCKET sockfd) auto &buffer = buffers[id]; auto buff = std::get<0>(buffer); //auto buff_len = std::get<1>(buffer); - BYTES_RECEIVED_TOT += wc.byte_len; + hdrdma->total_bytes_received += wc.byte_len; ReceiveBuffer( buff, wc.byte_len ); //n.b. do NOT use buff_len here! t_last_received = high_resolution_clock::now(); @@ -652,7 +651,7 @@ void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bo double filesize_GB = (double)filesize*1.0E-9; std::string mess = delete_after_send ? " - will be deleted after send":""; - cout << "Sending file: " << srcfilename << "-> (" << HDRDMA_REMOTE_ADDR << ":)" << dstfilename << " (" << filesize_GB << " GB)" << mess << endl; + cout << "Sending file: " << srcfilename << "-> (" << hdrdma->remote_addr << ":)" << dstfilename << " (" << filesize_GB << " GB)" << mess << endl; struct ibv_send_wr wr, *bad_wr = nullptr; struct ibv_sge sge; diff --git a/hdrdmacp.cc b/hdrdmacp.cc index 8e19cf9..65a3138 100644 --- a/hdrdmacp.cc +++ b/hdrdmacp.cc @@ -3,11 +3,12 @@ #include #endif -//#include -#include "hdRDMA.h" +#include "IhdRDMA.h" #include #include +#include +#include using namespace std; using std::chrono::steady_clock; using std::chrono::duration; @@ -42,7 +43,11 @@ int main(int narg, char *argv[]) ParseCommandLineArguments( narg, argv ); // Create an hdRDMA object - hdRDMA hdrdma; + hdrdma::config hdrdma_config; + hdrdma_config.remote_addr = HDRDMA_REMOTE_ADDR; + hdrdma_config.buffer_len_gb = HDRDMA_BUFF_LEN_GB; + hdrdma_config.num_buffer_sections = HDRDMA_NUM_BUFF_SECTIONS; + auto hdrdma = hdrdma::Create(hdrdma_config); // Listen for remote peers if we are in server mode // This will launch a thread and listen for any remote connections. @@ -51,7 +56,7 @@ int main(int narg, char *argv[]) // This will return right away so one must check the GetNpeers() method // to see when a connection is made. if( HDRDMA_IS_SERVER ){ - hdrdma.Listen( HDRDMA_LOCAL_PORT ); + hdrdma->Listen( HDRDMA_LOCAL_PORT ); // We want to report 10sec, 1min, and 5min averages auto t_last_10sec = steady_clock::now(); @@ -63,7 +68,7 @@ int main(int narg, char *argv[]) while( true ){ - hdrdma.Poll(); + hdrdma->Poll(); auto now = steady_clock::now(); auto duration_10sec = duration_cast(now - t_last_10sec); @@ -101,7 +106,7 @@ int main(int narg, char *argv[]) } // Stop server from listening (if one is) - hdrdma.StopListening(); + hdrdma->StopListening(); } // Connect to remote peer if we are in client mode. @@ -111,8 +116,8 @@ int main(int narg, char *argv[]) // be made available for transfers. If the connection cannot be made // then it will exit the program with an error message. if( HDRDMA_IS_CLIENT ){ - hdrdma.Connect( HDRDMA_REMOTE_ADDR, HDRDMA_REMOTE_PORT ); - hdrdma.SendFile( HDRDMA_SRCFILENAME, HDRDMA_DSTFILENAME, HDRDMA_DELETE_AFTER_SEND, HDRDMA_CALCULATE_CHECKSUM, HDRDMA_MAKE_PARENT_DIRS ); + hdrdma->Connect( HDRDMA_REMOTE_ADDR, HDRDMA_REMOTE_PORT ); + hdrdma->SendFile( HDRDMA_SRCFILENAME, HDRDMA_DSTFILENAME, HDRDMA_DELETE_AFTER_SEND, HDRDMA_CALCULATE_CHECKSUM, HDRDMA_MAKE_PARENT_DIRS ); } return 0; From 2950f4b1360a588df0b69f21eec12a55511f860f Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 5 Jan 2022 19:09:23 +1100 Subject: [PATCH 06/31] Missed the header... --- IhdRDMA.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 IhdRDMA.h diff --git a/IhdRDMA.h b/IhdRDMA.h new file mode 100644 index 0000000..babeb97 --- /dev/null +++ b/IhdRDMA.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +#ifdef _MSC_VER +# define HDRMDA_DLL_EXPORT __declspec(dllexport) +# define HDRMDA_DLL_IMPORT __declspec(dllimport) +#else +# define HDRMDA_DLL_EXPORT __attribute__((__visibility__("default"))) +# define HDRMDA_DLL_IMPORT +#endif + +#ifdef BUILDING_HDRDMA +# define HDRDMA_DLL HDRMDA_DLL_EXPORT +#else +# define HDRDMA_DLL HDRMDA_DLL_IMPORT +#endif + +namespace hdrdma +{ + class IhdRDMA; + + struct config + { + size_t buffer_len_gb; + size_t num_buffer_sections; + std::string remote_addr; + }; +} + +extern "C" +{ + // Raw pointers. You probably don't want to use these. + HDRDMA_DLL hdrdma::IhdRDMA* hdrdma_allocate(const hdrdma::config& config); + HDRDMA_DLL void hdrdma_free(hdrdma::IhdRDMA*); +} + +namespace hdrdma +{ + class IhdRDMA { + public: + virtual ~IhdRDMA() {} + + virtual void Listen(int port) = 0; + virtual void StopListening(void) = 0; + virtual void Connect(std::string host, int port) = 0; + virtual void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send = false, bool calculate_checksum = false, bool makeparentdirs = false) = 0; + virtual void Poll(void) = 0; + + virtual uint64_t TotalBytesReceived() const = 0; + }; + + // Wrappers. You probably want to use these. + static std::shared_ptr Create(const hdrdma::config& config) + { + return std::shared_ptr(hdrdma_allocate(config), hdrdma_free); + } +} From e590ba4b0b1fc9efe23b28c95f703f803970c8b3 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Thu, 6 Jan 2022 10:51:36 +1100 Subject: [PATCH 07/31] Fixes for Linux --- CMakeLists.txt | 27 +++++++++++++++++++-------- hdRDMAThread.cc | 2 +- hdrdmacp.cc | 1 + 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 18a22d7..006d70d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.21) project (hdrdmacp) +set(CMAKE_CXX_STANDARD 17) # # Pull submodules. @@ -21,7 +22,13 @@ endif() # # Add submodules # -add_subdirectory(modules/libwinibverbs) +if(WIN32) + add_subdirectory(modules/libwinibverbs) + set(winibverbs_include "modules/libwinibverbs/include") + set(iverbs_lib winibverbs) +else() + set(iverbs_lib ibverbs) +endif() # # Thanks to a bug in CMake, we just manually pass ZLIB_LIBRARY and ZLIB_INCLUDE_DIRS @@ -40,10 +47,12 @@ add_library(libhdrdmacp SHARED hdRDMAThread.cc hdRDMAThread.h ) -add_dependencies(libhdrdmacp winibverbs) +if(WIN32) + add_dependencies(libhdrdmacp winibverbs) +endif() target_compile_definitions(libhdrdmacp PRIVATE _CRT_SECURE_NO_WARNINGS BUILDING_HDRDMA) -target_link_libraries(libhdrdmacp winibverbs ${ZLIB_LIBRARY}) -target_include_directories(libhdrdmacp PRIVATE ${ZLIB_INCLUDE_DIRS} modules/libwinibverbs/include) +target_link_libraries(libhdrdmacp ${iverbs_lib} ${ZLIB_LIBRARY}) +target_include_directories(libhdrdmacp PRIVATE ${ZLIB_INCLUDE_DIRS} ${winibverbs_include}) set_target_properties(libhdrdmacp PROPERTIES OUTPUT_NAME hdrdmacp) @@ -61,7 +70,9 @@ add_executable(hdrdmacp add_dependencies(hdrdmacp libhdrdmacp) target_link_libraries(hdrdmacp libhdrdmacp) -add_custom_command(TARGET hdrdmacp POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy $ $ - COMMAND_EXPAND_LISTS -) \ No newline at end of file +if (WIN32) + add_custom_command(TARGET hdrdmacp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy $ $ + COMMAND_EXPAND_LISTS + ) +endif() diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index 25b96e4..16e69d4 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -96,7 +96,7 @@ typedef int mode_t; #ifdef __GNUC__ #define send64 send -#define recv recv +#define recv64 recv #endif //----------------------------------------- diff --git a/hdrdmacp.cc b/hdrdmacp.cc index 65a3138..9f4c2e5 100644 --- a/hdrdmacp.cc +++ b/hdrdmacp.cc @@ -9,6 +9,7 @@ #include #include #include +#include using namespace std; using std::chrono::steady_clock; using std::chrono::duration; From 5786089b69a60180d2a75539249cc2ecb06f283c Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Thu, 6 Jan 2022 20:31:45 +1100 Subject: [PATCH 08/31] Propagating errors --- IhdRDMA.h | 2 +- hdRDMA.cc | 4 ++-- hdRDMA.h | 2 +- hdRDMAThread.cc | 4 ++-- hdRDMAThread.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/IhdRDMA.h b/IhdRDMA.h index babeb97..44e7989 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -45,7 +45,7 @@ namespace hdrdma virtual void Listen(int port) = 0; virtual void StopListening(void) = 0; virtual void Connect(std::string host, int port) = 0; - virtual void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send = false, bool calculate_checksum = false, bool makeparentdirs = false) = 0; + virtual int SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send = false, bool calculate_checksum = false, bool makeparentdirs = false) = 0; virtual void Poll(void) = 0; virtual uint64_t TotalBytesReceived() const = 0; diff --git a/hdRDMA.cc b/hdRDMA.cc index ba9119a..ec02291 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -400,7 +400,7 @@ void hdRDMA::ReturnBuffers( std::vector &buffers ) //------------------------------------------------------------- // SendFile //------------------------------------------------------------- -void hdRDMA::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) +int hdRDMA::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) { // This just calls the SendFile method of the client hdRDMAThread @@ -409,7 +409,7 @@ void hdRDMA::SendFile(std::string srcfilename, std::string dstfilename, bool del return; } - hdthr_client->SendFile( srcfilename, dstfilename, delete_after_send, calculate_checksum, makeparentdirs); + return hdthr_client->SendFile( srcfilename, dstfilename, delete_after_send, calculate_checksum, makeparentdirs); } diff --git a/hdRDMA.h b/hdRDMA.h index 6832ff8..7e47f26 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -38,7 +38,7 @@ class hdRDMA : public hdrdma::IhdRDMA { uint32_t GetNpeers(void); void GetBuffers( std::vector &buffers, int Nrequested=4 ); void ReturnBuffers( std::vector &buffers ); - virtual void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false) override; + virtual int SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false) override; virtual void Poll(void) override; virtual uint64_t TotalBytesReceived() const override { return total_bytes_received; } diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index 16e69d4..0f585bb 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -635,13 +635,13 @@ void hdRDMAThread::ClientConnect( SOCKET sockfd ) //------------------------------------------------------------- // SendFile //------------------------------------------------------------- -void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) +int hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) { // Open local file std::ifstream ifs(srcfilename.c_str()); if( !ifs.is_open() ){ cerr <<"ERROR: Unable to open file \"" << srcfilename << "\"!" << endl; - exit(-40); + return -40; } // Get filesize diff --git a/hdRDMAThread.h b/hdRDMAThread.h index 7471374..9eb4054 100644 --- a/hdRDMAThread.h +++ b/hdRDMAThread.h @@ -78,7 +78,7 @@ class hdRDMAThread{ int SetToRTS(void); void ReceiveBuffer(uint8_t *buff, uint32_t buff_len); void ClientConnect( SOCKET sockfd ); - void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); + int SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); void PollCQ(void); bool makePath( const std::string &path ); From 6fec0507d097349b323cbde2de1e08348a33b93c Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Thu, 6 Jan 2022 21:02:13 +1100 Subject: [PATCH 09/31] Oopsie --- hdRDMA.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hdRDMA.cc b/hdRDMA.cc index ec02291..05f2c33 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -406,7 +406,7 @@ int hdRDMA::SendFile(std::string srcfilename, std::string dstfilename, bool dele if( hdthr_client == nullptr ){ cerr << "ERROR: hdRDMA::SendFile called before hdthr_client instantiated." << endl; - return; + return -1; } return hdthr_client->SendFile( srcfilename, dstfilename, delete_after_send, calculate_checksum, makeparentdirs); From 5bed6a161abc0830bd03801d4cb89264f3023296 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Thu, 6 Jan 2022 23:55:18 +1100 Subject: [PATCH 10/31] Default config --- IhdRDMA.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/IhdRDMA.h b/IhdRDMA.h index 44e7989..3fc17f8 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -23,6 +23,8 @@ namespace hdrdma struct config { + config(const std::string_view& remote) : remote_addr(remote), buffer_len_gb(1), num_buffer_sections(4) {} + size_t buffer_len_gb; size_t num_buffer_sections; std::string remote_addr; From 612a0ea2ca514048962d313829cf7219119452d1 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Fri, 7 Jan 2022 21:55:03 +1100 Subject: [PATCH 11/31] Moving to exceptions because having exit() kill your server isn't fun :(( --- IhdRDMA.h | 4 +++- hdRDMA.cc | 39 +++++++++++++++++++++++++++------------ hdRDMA.h | 3 ++- hdRDMAThread.cc | 4 ++-- hdRDMAThread.h | 3 ++- 5 files changed, 36 insertions(+), 17 deletions(-) diff --git a/IhdRDMA.h b/IhdRDMA.h index 3fc17f8..139b3b4 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -24,6 +24,7 @@ namespace hdrdma struct config { config(const std::string_view& remote) : remote_addr(remote), buffer_len_gb(1), num_buffer_sections(4) {} + config() {} size_t buffer_len_gb; size_t num_buffer_sections; @@ -47,8 +48,9 @@ namespace hdrdma virtual void Listen(int port) = 0; virtual void StopListening(void) = 0; virtual void Connect(std::string host, int port) = 0; - virtual int SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send = false, bool calculate_checksum = false, bool makeparentdirs = false) = 0; + virtual void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send = false, bool calculate_checksum = false, bool makeparentdirs = false) = 0; virtual void Poll(void) = 0; + virtual void Join(void) = 0; virtual uint64_t TotalBytesReceived() const = 0; }; diff --git a/hdRDMA.cc b/hdRDMA.cc index 05f2c33..7b83e17 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -120,7 +120,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) ctx = ibv_open_device(dev); if( !ctx ){ cout << "Error opening IB device context!" << endl; - exit(-11); + throw std::runtime_error("ibv_open_device failed"); } // Get device and port attributes @@ -154,7 +154,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) pd = ibv_alloc_pd(ctx); if( !pd ){ cout << "ERROR allocation protection domain!" << endl; - exit(-12); + throw std::runtime_error("ibv_alloc_pd failed"); } // Allocate a large buffer and create a memory region pointing to it. @@ -166,7 +166,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) buff = new uint8_t[buff_len]; if( !buff ){ cout << "ERROR: Unable to allocate buffer!" << endl; - exit(-13); + throw std::runtime_error("buff allocation failed"); } errno = 0; auto access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; @@ -174,7 +174,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) if( !mr ){ cout << "ERROR: Unable to register memory region! errno=" << errno << endl; cout << " (Please see usage statement for a possible work around)" << endl; - exit( -14 ); + throw std::runtime_error("ibv_reg_mr failed"); } // Fill in buffers @@ -243,7 +243,7 @@ void hdRDMA::Listen(int port) auto ret = bind( server_sockfd, (struct sockaddr*)&addr, sizeof(addr) ); if( ret != 0 ){ cout << "ERROR: binding server socket!" << endl; - exit(-2); + throw std::runtime_error("bind failed"); } listen(server_sockfd, 5); @@ -260,7 +260,7 @@ void hdRDMA::Listen(int port) struct sockaddr_in peer_addr; socklen_t peer_addr_len = sizeof(struct sockaddr_in); peer_sockfd = accept(server_sockfd, (struct sockaddr *)&peer_addr, &peer_addr_len); - if( peer_sockfd > 0 ){ + if( peer_sockfd != INVALID_SOCKET ){ // cout << "Connection from " << inet_ntoa(peer_addr.sin_addr) << endl; // Create a new thread to handle this connection @@ -294,11 +294,11 @@ void hdRDMA::StopListening(void) if( server_thread ){ cout << "Waiting for server to finish ..." << endl; done = true; + if (server_sockfd) closesocket(server_sockfd); + server_sockfd = 0; server_thread->join(); delete server_thread; server_thread = nullptr; - if( server_sockfd ) closesocket( server_sockfd ); - server_sockfd = 0; }else{ cout << "Server not running." < &buffers ) //------------------------------------------------------------- // SendFile //------------------------------------------------------------- -int hdRDMA::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) +void hdRDMA::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) { // This just calls the SendFile method of the client hdRDMAThread if( hdthr_client == nullptr ){ cerr << "ERROR: hdRDMA::SendFile called before hdthr_client instantiated." << endl; - return -1; + throw std::runtime_error("Need to Connect() first"); } - return hdthr_client->SendFile( srcfilename, dstfilename, delete_after_send, calculate_checksum, makeparentdirs); + hdthr_client->SendFile( srcfilename, dstfilename, delete_after_send, calculate_checksum, makeparentdirs); } @@ -450,3 +450,18 @@ void hdRDMA::Poll(void) } +//------------------------------------------------------------- +// Join +// +// Waits for client threads to finish. +//------------------------------------------------------------- +void hdRDMA::Join(void) +{ + cout << "Waiting for clients to finish ..." << endl; + for (auto t : threads) { + t.first->join(); + delete t.second; + } + threads.clear(); +} + diff --git a/hdRDMA.h b/hdRDMA.h index 7e47f26..510aef9 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -38,8 +38,9 @@ class hdRDMA : public hdrdma::IhdRDMA { uint32_t GetNpeers(void); void GetBuffers( std::vector &buffers, int Nrequested=4 ); void ReturnBuffers( std::vector &buffers ); - virtual int SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false) override; + virtual void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false) override; virtual void Poll(void) override; + virtual void Join(void) override; virtual uint64_t TotalBytesReceived() const override { return total_bytes_received; } std::atomic_ullong total_bytes_received = 0; diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index 0f585bb..cb8f971 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -635,13 +635,13 @@ void hdRDMAThread::ClientConnect( SOCKET sockfd ) //------------------------------------------------------------- // SendFile //------------------------------------------------------------- -int hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) +void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) { // Open local file std::ifstream ifs(srcfilename.c_str()); if( !ifs.is_open() ){ cerr <<"ERROR: Unable to open file \"" << srcfilename << "\"!" << endl; - return -40; + throw std::runtime_error("Couldn't open output file"); } // Get filesize diff --git a/hdRDMAThread.h b/hdRDMAThread.h index 9eb4054..882d4b5 100644 --- a/hdRDMAThread.h +++ b/hdRDMAThread.h @@ -30,6 +30,7 @@ class hdRDMA; #ifdef __GNUC__ typedef int SOCKET; #define closesocket close +#define INVALID_SOCKET -1 #endif class hdRDMAThread{ @@ -78,7 +79,7 @@ class hdRDMAThread{ int SetToRTS(void); void ReceiveBuffer(uint8_t *buff, uint32_t buff_len); void ClientConnect( SOCKET sockfd ); - int SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); + void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); void PollCQ(void); bool makePath( const std::string &path ); From 5f236ef81b40887d8d3ed4f3a019cbed47dc7439 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Fri, 7 Jan 2022 22:43:58 +1100 Subject: [PATCH 12/31] Fixed async ack thread causing problems in Windows - needed to patch libibverbs --- hdRDMA.cc | 14 +++++++++++--- hdRDMA.h | 1 + modules/libwinibverbs | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/hdRDMA.cc b/hdRDMA.cc index 7b83e17..d416168 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -186,12 +186,15 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) cout << "Created " << buffer_pool.size() << " buffers of " << buff_section_len/1000000 << "MB (" << buff_len/1000000000 << "GB total)" << endl; // Create thread to listen for async ibv events - new std::thread( [&](){ + ack_thread = new std::thread( [&](){ while( !done ){ struct ibv_async_event async_event; auto ret = ibv_get_async_event( ctx, &async_event); - cout << "+++ RDMA async event: type=" << async_event.event_type << " ret=" << ret << endl; - ibv_ack_async_event( &async_event ); + if (ret != -1) + { + cout << "+++ RDMA async event: type=" << async_event.event_type << " ret=" << ret << endl; + ibv_ack_async_event(&async_event); + } } }); @@ -205,6 +208,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) hdRDMA::~hdRDMA() { // Stop all connection threads + done = true; for( auto t : threads ){ t.second->stop = true; t.first->join(); @@ -216,6 +220,10 @@ hdRDMA::~hdRDMA() if( buff!=nullptr ) delete[] buff; if( pd!=nullptr ) ibv_dealloc_pd( pd ); if( ctx!=nullptr ) ibv_close_device( ctx ); + + ack_thread->join(); + delete ack_thread; + ack_thread = nullptr; #ifdef _MSC_VER # define SHUT_RDWR SD_BOTH diff --git a/hdRDMA.h b/hdRDMA.h index 510aef9..35bb7eb 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -62,6 +62,7 @@ class hdRDMA : public hdrdma::IhdRDMA { bool done = false; SOCKET server_sockfd = 0; std::thread *server_thread = nullptr; + std::thread *ack_thread = nullptr; uint32_t Nconnections = 0; hdRDMAThread *hdthr_client = nullptr; diff --git a/modules/libwinibverbs b/modules/libwinibverbs index 610d156..6591fb7 160000 --- a/modules/libwinibverbs +++ b/modules/libwinibverbs @@ -1 +1 @@ -Subproject commit 610d1561a2aea7b99fed1e77ba9cef6bd643fea4 +Subproject commit 6591fb78aab09ac0eb2f392af6d3ac01f5d4300b From 8af8dc32f3cb2634482373f1507855121796161e Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Mon, 11 Jul 2022 15:03:24 +1000 Subject: [PATCH 13/31] Fixed ZLIB include on Windows. --- CMakeLists.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 006d70d..6e5bbe0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,12 +30,7 @@ else() set(iverbs_lib ibverbs) endif() -# -# Thanks to a bug in CMake, we just manually pass ZLIB_LIBRARY and ZLIB_INCLUDE_DIRS -# -#find_library(ZLIB REQUIRED) -message("ZLIB_LIBRARY: ${ZLIB_LIBRARY}") -message("ZLIB_INCLUDE_DIRS: ${ZLIB_INCLUDE_DIRS}") +find_package(ZLIB REQUIRED) #=============================================================================== # LIBRARY From 69f38e7bc8a6ae0b7c6dc000ffb154276b4d45e1 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 13 Jul 2022 21:38:13 +1000 Subject: [PATCH 14/31] Downgrade to CMake 3.16 to match the default Ubuntu 20.04 LTS version --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e5bbe0..0bf901e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.21) +cmake_minimum_required(VERSION 3.16) project (hdrdmacp) set(CMAKE_CXX_STANDARD 17) From dc0ddc7cfe5aa2829ba3972a3dec363548616dbb Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 28 Sep 2022 01:15:41 +1000 Subject: [PATCH 15/31] Fixed compiler warnings --- CMakeLists.txt | 17 ----------------- hdRDMA.cc | 2 +- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bf901e..e4d1918 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,23 +2,6 @@ cmake_minimum_required(VERSION 3.16) project (hdrdmacp) set(CMAKE_CXX_STANDARD 17) -# -# Pull submodules. -# -find_package(Git QUIET) -if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") - option(GIT_SUBMODULE "Check submodules during build" ON) - if(GIT_SUBMODULE) - message(STATUS "Submodule update") - execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE GIT_SUBMOD_RESULT) - if(NOT GIT_SUBMOD_RESULT EQUAL "0") - message(FATAL_ERROR "git submodule update --init --recursive failed with ${GIT_SUBMOD_RESULT}, please checkout submodules") - endif() - endif() -endif() - # # Add submodules # diff --git a/hdRDMA.cc b/hdRDMA.cc index d416168..9f136ba 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -180,7 +180,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) // Fill in buffers for( uint32_t i=0; i Date: Thu, 17 Nov 2022 17:46:32 +1100 Subject: [PATCH 16/31] Ubuntu 20.04 was refusing to deregister memory windows larger than 2GB. Now allocating multiple MRs. --- hdRDMA.cc | 127 +++++++++++++++++++++++++++++++++++++----------- hdRDMA.h | 15 ++++-- hdRDMAThread.cc | 15 +++--- hdRDMAThread.h | 10 +++- 4 files changed, 123 insertions(+), 44 deletions(-) diff --git a/hdRDMA.cc b/hdRDMA.cc index 9f136ba..33c7303 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -20,6 +20,56 @@ using std::chrono::duration; using std::chrono::duration_cast; using std::chrono::high_resolution_clock; +#include // mmap, munmap +#include + +template +struct thp_allocator +{ + constexpr static std::size_t huge_page_size = 1 << 30; // 1 GiB + using value_type = T; + +#ifdef WIN32 + static T *allocate(std::size_t n) + { + if (n > std::numeric_limits::max() / sizeof(T)) + { + throw std::bad_alloc(); + } + + void *p = _aligned_malloc(, huge_page_size); + if (p == nullptr) + { + throw std::bad_alloc(); + } + + return static_cast(p); + } + + static void deallocate(T *p) { _aligned_free(p); } +#else + static T *allocate(std::size_t n) + { + if (n > std::numeric_limits::max() / sizeof(T)) + { + throw std::bad_alloc(); + } + + void *p = nullptr; + posix_memalign(&p, huge_page_size, n * sizeof(T)); + madvise(p, n * sizeof(T), MADV_HUGEPAGE); + if (p == nullptr) + { + throw std::bad_alloc(); + } + + return static_cast(p); + } + + static void deallocate(T *p) { std::free(p); } +#endif +}; + extern "C" { HDRDMA_DLL hdrdma::IhdRDMA* hdrdma_allocate(const hdrdma::config& config) @@ -156,34 +206,50 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) cout << "ERROR allocation protection domain!" << endl; throw std::runtime_error("ibv_alloc_pd failed"); } - - // Allocate a large buffer and create a memory region pointing to it. - // We will split this one memory region among multiple receive requests - // n.b. initial tests failed on transfer for buffers larger than 1GB - num_buff_sections = config.num_buffer_sections; - buff_section_len = (config.buffer_len_gb *1000000000)/(uint64_t)num_buff_sections; - buff_len = num_buff_sections*buff_section_len; - buff = new uint8_t[buff_len]; - if( !buff ){ - cout << "ERROR: Unable to allocate buffer!" << endl; - throw std::runtime_error("buff allocation failed"); - } - errno = 0; - auto access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; - mr = ibv_reg_mr( pd, buff, buff_len, access); - if( !mr ){ - cout << "ERROR: Unable to register memory region! errno=" << errno << endl; - cout << " (Please see usage statement for a possible work around)" << endl; - throw std::runtime_error("ibv_reg_mr failed"); - } - - // Fill in buffers - for( uint32_t i=0; i 2GB fail to deregister. + // The failure causes the entire system to freeze and become completely unusable! + // https://forums.developer.nvidia.com/t/cannot-deregister-memory-region-ibv-dereg-mr-if-mr-size-reach-2gb/217980 + // + // So instead of creating a single large memory region, create a number of smaller memory regions. + // This can hurt performance, but at least the system remains stable! + auto remaining_buffer_len_gb = config.buffer_len_gb; + while (remaining_buffer_len_gb) + { + auto buffer_len_gb = std::min(2, remaining_buffer_len_gb); + hdBuffer buff; + + // Allocate a large buffer and create a memory region pointing to it. + // We will split this one memory region among multiple receive requests + // n.b. initial tests failed on transfer for buffers larger than 1GB + buff.num_buff_sections = config.num_buffer_sections; + buff.buff_section_len = (buffer_len_gb *1000000000)/(uint64_t)buff.num_buff_sections; + buff.buff_len = buff.num_buff_sections*buff.buff_section_len; + buff.buff = thp_allocator::allocate(buff.buff_len); + if( !buff.buff ){ + cout << "ERROR: Unable to allocate buffer!" << endl; + throw std::runtime_error("buff allocation failed"); + } + errno = 0; + auto access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; + buff.mr = ibv_reg_mr( pd, buff.buff, buff.buff_len, access); + if( !buff.mr ){ + cout << "ERROR: Unable to register memory region! errno=" << errno << endl; + cout << " (Please see usage statement for a possible work around)" << endl; + throw std::runtime_error("ibv_reg_mr failed"); + } + + // Fill in buffers + for( uint32_t i=0; i::deallocate(b.buff); + } if( pd!=nullptr ) ibv_dealloc_pd( pd ); if( ctx!=nullptr ) ibv_close_device( ctx ); diff --git a/hdRDMA.h b/hdRDMA.h index 35bb7eb..df94a33 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -27,6 +27,15 @@ #include "IhdRDMA.h" +struct hdBuffer +{ + uint64_t buff_len = 0; + uint8_t *buff = nullptr; + uint64_t num_buff_sections = 0; + uint64_t buff_section_len = 0; + struct ibv_mr *mr = nullptr; +}; + class hdRDMA : public hdrdma::IhdRDMA { public: @@ -51,11 +60,7 @@ class hdRDMA : public hdrdma::IhdRDMA { struct ibv_device_attr attr; struct ibv_port_attr port_attr; ibv_pd *pd = nullptr; - uint64_t buff_len = 0; - uint8_t *buff = nullptr; - uint64_t num_buff_sections = 0; - uint64_t buff_section_len = 0; - struct ibv_mr *mr = nullptr; + std::vector buffers; std::deque buffer_pool; std::mutex buffer_pool_mutex; diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index cb8f971..0f3e6ee 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -253,7 +253,7 @@ void hdRDMAThread::ThreadRun(SOCKET sockfd) break; // exit thread } auto &buffer = buffers[id]; - auto buff = std::get<0>(buffer); + auto buff = buffer.Buffer; //auto buff_len = std::get<1>(buffer); hdrdma->total_bytes_received += wc.byte_len; ReceiveBuffer( buff, wc.byte_len ); //n.b. do NOT use buff_len here! @@ -277,8 +277,8 @@ void hdRDMAThread::PostWR( int id ) //cout << "Posting WR for id: " << id << endl; auto &buffer = buffers[id]; - auto buff = std::get<0>(buffer); - auto buff_len = std::get<1>(buffer); + auto buff = buffer.Buffer; + auto buff_len = buffer.BufferLen; struct ibv_recv_wr wr; struct ibv_sge sge; @@ -289,7 +289,7 @@ void hdRDMAThread::PostWR( int id ) wr.num_sge = 1; sge.addr = (uint64_t)buff; sge.length = buff_len; - sge.lkey = hdrdma->mr->lkey; + sge.lkey = buffer.MR->lkey; auto ret = ibv_post_recv( qp, &wr, &bad_wr); if( ret != 0 ){ cout << "ERROR: ibv_post_recv returned non zero value (" << ret << ")" << endl; @@ -663,8 +663,6 @@ void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bo wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED, - sge.lkey = hdrdma->mr->lkey; - // Send buffers crcsum = adler32( 0L, Z_NULL, 0 ); t1 = high_resolution_clock::now(); @@ -676,9 +674,10 @@ void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bo for(int i=0; i<1000; i++){ // if sending more than 1000 buffers something is wrong! auto id = i%buffers.size(); auto &buffer = buffers[id]; - auto buff = std::get<0>(buffer); - auto buff_len = std::get<1>(buffer); + auto buff = buffer.Buffer; + auto buff_len = buffer.BufferLen; sge.addr = (uint64_t)buff; + sge.lkey = buffer.MR->lkey; HeaderInfo *hi = (HeaderInfo*)sge.addr; hi->buff_type = 1; // buffer holds data for file transfer hi->flags = 0x0; diff --git a/hdRDMAThread.h b/hdRDMAThread.h index 882d4b5..7f092b4 100644 --- a/hdRDMAThread.h +++ b/hdRDMAThread.h @@ -65,9 +65,15 @@ class hdRDMAThread{ const char* what(void) const noexcept { return mess.c_str(); } std::string mess; }; - - typedef std::tuple bufferinfo; + struct bufferinfo + { + bufferinfo(uint8_t* buff, uint32_t buff_len, struct ibv_mr* mr) : Buffer(buff), BufferLen(buff_len), MR(mr) {} + uint8_t* Buffer = nullptr; + uint32_t BufferLen = 0; + struct ibv_mr* MR = nullptr; + }; + hdRDMAThread(hdRDMA *hdrdma); ~hdRDMAThread(); From 8f5e0aba12265fab987c3600882b313be377878c Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Thu, 17 Nov 2022 18:05:54 +1100 Subject: [PATCH 17/31] More truthful logging --- hdRDMA.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hdRDMA.cc b/hdRDMA.cc index 33c7303..9700e5b 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -246,7 +246,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) } remaining_buffer_len_gb -= buffer_len_gb; - cout << "Created " << buffer_pool.size() << " buffers of " << buff.buff_section_len/1000000 << "MB (" << buff.buff_len/1000000000 << "GB total)" << endl; + cout << "Created " << buff.num_buff_sections << " buffers of " << buff.buff_section_len/1000000 << "MB (" << buff.buff_len/1000000000 << "GB total)" << endl; buffers.push_back(buff); } From faf37d4f79c67b44161d9335a64fa82152c2b8c7 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Thu, 17 Nov 2022 22:33:13 +1100 Subject: [PATCH 18/31] Config buffer size and buffer section counts are now respected when going over the 2GB limit. --- IhdRDMA.h | 9 ++++----- hdRDMA.cc | 20 +++++++++++--------- hdRDMA.h | 2 +- hdrdmacp.cc | 5 +---- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/IhdRDMA.h b/IhdRDMA.h index 139b3b4..c0f0d5e 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -23,12 +23,11 @@ namespace hdrdma struct config { - config(const std::string_view& remote) : remote_addr(remote), buffer_len_gb(1), num_buffer_sections(4) {} - config() {} + config(const std::string_view& remote, size_t buffer_section_sz, int buffer_section_count) : RemoteAddr(remote), BufferSectionSize(buffer_section_sz), BufferSectionCount(buffer_section_count) {} - size_t buffer_len_gb; - size_t num_buffer_sections; - std::string remote_addr; + const size_t BufferSectionSize; + const size_t BufferSectionCount; + const std::string RemoteAddr; }; } diff --git a/hdRDMA.cc b/hdRDMA.cc index 9700e5b..d2e1b53 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -89,7 +89,7 @@ extern "C" // hdRDMA constructor. This will look for IB devices and set up // for RDMA communications on the first one it finds. //------------------------------------------------------------- -hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) +hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.RemoteAddr) { cout << "Looking for IB devices ..." << endl; int num_devices = 0; @@ -213,18 +213,20 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) // // So instead of creating a single large memory region, create a number of smaller memory regions. // This can hurt performance, but at least the system remains stable! - auto remaining_buffer_len_gb = config.buffer_len_gb; - while (remaining_buffer_len_gb) + constexpr size_t PAGE_SIZE = 2'000'000'000; + const auto BUFFER_SECTIONS_PER_PAGE = PAGE_SIZE / config.BufferSectionSize; + auto remaining_buffer_size = config.BufferSectionSize * config.BufferSectionCount; + while (remaining_buffer_size) { - auto buffer_len_gb = std::min(2, remaining_buffer_len_gb); + auto page_sz = std::min(BUFFER_SECTIONS_PER_PAGE * config.BufferSectionSize, remaining_buffer_size); hdBuffer buff; // Allocate a large buffer and create a memory region pointing to it. // We will split this one memory region among multiple receive requests // n.b. initial tests failed on transfer for buffers larger than 1GB - buff.num_buff_sections = config.num_buffer_sections; - buff.buff_section_len = (buffer_len_gb *1000000000)/(uint64_t)buff.num_buff_sections; - buff.buff_len = buff.num_buff_sections*buff.buff_section_len; + buff.num_buff_sections = page_sz / config.BufferSectionSize; + buff.buff_section_len = config.BufferSectionSize; + buff.buff_len = page_sz; buff.buff = thp_allocator::allocate(buff.buff_len); if( !buff.buff ){ cout << "ERROR: Unable to allocate buffer!" << endl; @@ -245,7 +247,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.remote_addr) buffer_pool.emplace_back( b, (uint32_t)buff.buff_section_len, buff.mr ); } - remaining_buffer_len_gb -= buffer_len_gb; + remaining_buffer_size -= page_sz; cout << "Created " << buff.num_buff_sections << " buffers of " << buff.buff_section_len/1000000 << "MB (" << buff.buff_len/1000000000 << "GB total)" << endl; buffers.push_back(buff); @@ -434,7 +436,7 @@ void hdRDMA::Connect(std::string host, int port) // Create an hdRDMAThread object to handle the RDMA connection details. // (we won't actually run it in a separate thread.) - hdthr_client = new hdRDMAThread( this ); + hdthr_client.reset(new hdRDMAThread( this )); hdthr_client->ClientConnect( sockfd ); } diff --git a/hdRDMA.h b/hdRDMA.h index df94a33..8655f41 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -70,7 +70,7 @@ class hdRDMA : public hdrdma::IhdRDMA { std::thread *ack_thread = nullptr; uint32_t Nconnections = 0; - hdRDMAThread *hdthr_client = nullptr; + std::unique_ptr hdthr_client = nullptr; std::map threads; std::mutex threads_mtx; diff --git a/hdrdmacp.cc b/hdrdmacp.cc index 9f4c2e5..1ce0c77 100644 --- a/hdrdmacp.cc +++ b/hdrdmacp.cc @@ -44,10 +44,7 @@ int main(int narg, char *argv[]) ParseCommandLineArguments( narg, argv ); // Create an hdRDMA object - hdrdma::config hdrdma_config; - hdrdma_config.remote_addr = HDRDMA_REMOTE_ADDR; - hdrdma_config.buffer_len_gb = HDRDMA_BUFF_LEN_GB; - hdrdma_config.num_buffer_sections = HDRDMA_NUM_BUFF_SECTIONS; + const hdrdma::config hdrdma_config(HDRDMA_REMOTE_ADDR, HDRDMA_BUFF_LEN_GB * 1000'000'000 / HDRDMA_NUM_BUFF_SECTIONS, HDRDMA_NUM_BUFF_SECTIONS); auto hdrdma = hdrdma::Create(hdrdma_config); // Listen for remote peers if we are in server mode From 4699e91bb476f85c9ecc0ecb9cfef1f20729b9e3 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Thu, 17 Nov 2022 23:56:35 +1100 Subject: [PATCH 19/31] Fix warning --- hdRDMAThread.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index 0f3e6ee..e43c3c3 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -689,7 +689,7 @@ void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bo hi->flags |= HI_FIRST_BUFFER; // first buffer of file if( calculate_checksum ) hi->flags |= HI_CALCULATE_CHECKSUM; // tell remote server to calculate checksum if( makeparentdirs ) hi->flags |= HI_MAKE_PARENT_DIRS; // tell remote server to make directory path if needed - sprintf( (char*)&hi->payload, dstfilename.c_str() ); + sprintf( (char*)&hi->payload, "%s", dstfilename.c_str() ); }else{ hi->header_len = sizeof(*hi) - sizeof(hi->payload); } From f0a573447da8152df9c98a0465b9316cd3493be8 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Fri, 18 Nov 2022 01:51:46 +1100 Subject: [PATCH 20/31] Clients will now wait for buffers, instead of failing --- CMakeLists.txt | 2 +- hdRDMA.cc | 39 ++++++++++++++++++++++++++++++++------- hdRDMA.h | 2 ++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e4d1918..a19be80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.16) project (hdrdmacp) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) # # Add submodules diff --git a/hdRDMA.cc b/hdRDMA.cc index d2e1b53..d8515e5 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -300,7 +300,11 @@ hdRDMA::~hdRDMA() # define SHUT_RDWR SD_BOTH #endif - if( server_sockfd ) shutdown( server_sockfd, SHUT_RDWR ); + if( server_sockfd ) + { + shutdown(server_sockfd, SHUT_RD); + closesocket(server_sockfd); + } } //------------------------------------------------------------- @@ -345,7 +349,7 @@ void hdRDMA::Listen(int port) // Create a new thread to handle this connection auto hdthr = new hdRDMAThread( this ); auto thr = new std::thread( &hdRDMAThread::ThreadRun, hdthr, peer_sockfd ); - std::lock_guard lck( threads_mtx ); + std::scoped_lock lck( threads_mtx ); threads[ thr ] = hdthr; Nconnections++; @@ -373,7 +377,11 @@ void hdRDMA::StopListening(void) if( server_thread ){ cout << "Waiting for server to finish ..." << endl; done = true; - if (server_sockfd) closesocket(server_sockfd); + if (server_sockfd) + { + shutdown(server_sockfd, SHUT_RD); + closesocket(server_sockfd); + } server_sockfd = 0; server_thread->join(); delete server_thread; @@ -455,7 +463,17 @@ uint32_t hdRDMA::GetNpeers(void) //------------------------------------------------------------- void hdRDMA::GetBuffers( std::vector &buffers, int Nrequested ) { - std::lock_guard grd( buffer_pool_mutex ); + std::unique_lock grd( buffer_pool_mutex ); + + while (!done && !buffers.size()) + { + buffer_pool_cond.wait(grd); + } + + if (done) + { + return; + } //cout << "buffer_pool.size()="< Date: Tue, 22 Nov 2022 00:00:02 +1100 Subject: [PATCH 23/31] Removed remote_addr from Config --- IhdRDMA.h | 3 +-- hdRDMA.cc | 5 +++-- hdRDMA.h | 2 +- hdrdmacp.cc | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/IhdRDMA.h b/IhdRDMA.h index c0f0d5e..279e2ac 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -23,11 +23,10 @@ namespace hdrdma struct config { - config(const std::string_view& remote, size_t buffer_section_sz, int buffer_section_count) : RemoteAddr(remote), BufferSectionSize(buffer_section_sz), BufferSectionCount(buffer_section_count) {} + config(size_t buffer_section_sz, int buffer_section_count) : BufferSectionSize(buffer_section_sz), BufferSectionCount(buffer_section_count) {} const size_t BufferSectionSize; const size_t BufferSectionCount; - const std::string RemoteAddr; }; } diff --git a/hdRDMA.cc b/hdRDMA.cc index 631d86c..5aadb19 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -90,7 +90,7 @@ extern "C" // hdRDMA constructor. This will look for IB devices and set up // for RDMA communications on the first one it finds. //------------------------------------------------------------- -hdRDMA::hdRDMA(const hdrdma::config& config) : remote_addr(config.RemoteAddr) +hdRDMA::hdRDMA(const hdrdma::config& config) { cout << "Looking for IB devices ..." << endl; int num_devices = 0; @@ -457,7 +457,8 @@ void hdRDMA::Connect(std::string host, int port) cout << "ERROR: connecting to server: " << host << " (" << inet_ntoa(addr.sin_addr) << ")" << endl; throw std::runtime_error("connect failed"); }else{ - cout << "Connected to " << host << ":" << port << endl; + remote_addr = host + ':' + std::to_string(port); + cout << "Connected to " << remote_addr << endl; } // Create an hdRDMAThread object to handle the RDMA connection details. diff --git a/hdRDMA.h b/hdRDMA.h index 4bd2f1b..ef80bb1 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -37,7 +37,7 @@ struct hdBuffer struct ibv_mr *mr = nullptr; }; -class hdRDMA : public hdrdma::IhdRDMA { +class hdRDMA final : public hdrdma::IhdRDMA { public: hdRDMA(const hdrdma::config &config); diff --git a/hdrdmacp.cc b/hdrdmacp.cc index 1ce0c77..46eb880 100644 --- a/hdrdmacp.cc +++ b/hdrdmacp.cc @@ -44,7 +44,7 @@ int main(int narg, char *argv[]) ParseCommandLineArguments( narg, argv ); // Create an hdRDMA object - const hdrdma::config hdrdma_config(HDRDMA_REMOTE_ADDR, HDRDMA_BUFF_LEN_GB * 1000'000'000 / HDRDMA_NUM_BUFF_SECTIONS, HDRDMA_NUM_BUFF_SECTIONS); + const hdrdma::config hdrdma_config(HDRDMA_BUFF_LEN_GB * 1000'000'000 / HDRDMA_NUM_BUFF_SECTIONS, HDRDMA_NUM_BUFF_SECTIONS); auto hdrdma = hdrdma::Create(hdrdma_config); // Listen for remote peers if we are in server mode From 3db5628e59617cca407f327baa705fe606310568 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Mon, 21 Nov 2022 14:52:50 +1100 Subject: [PATCH 24/31] Windows fixes --- hdRDMA.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hdRDMA.cc b/hdRDMA.cc index 5aadb19..b07e6c3 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -31,6 +31,7 @@ struct thp_allocator using value_type = T; #ifdef WIN32 +#undef max static T *allocate(std::size_t n) { if (n > std::numeric_limits::max() / sizeof(T)) @@ -38,7 +39,7 @@ struct thp_allocator throw std::bad_alloc(); } - void *p = _aligned_malloc(, huge_page_size); + void *p = _aligned_malloc(n, huge_page_size); if (p == nullptr) { throw std::bad_alloc(); @@ -305,6 +306,7 @@ hdRDMA::~hdRDMA() #ifdef _MSC_VER # define SHUT_RDWR SD_BOTH +# define SHUT_RD SD_RECEIVE #endif if( server_sockfd ) From 064400db1b6a26930ae349a0abb1fa1261c4ada8 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Tue, 22 Nov 2022 02:09:15 +1100 Subject: [PATCH 25/31] Added Good() method to determine if the interface is good for use. --- IhdRDMA.h | 3 ++- hdRDMA.cc | 7 +++++++ hdRDMA.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/IhdRDMA.h b/IhdRDMA.h index 279e2ac..1621d06 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -42,7 +42,8 @@ namespace hdrdma class IhdRDMA { public: virtual ~IhdRDMA() {} - + + virtual bool Good() const = 0; virtual void Listen(int port) = 0; virtual void StopListening(void) = 0; virtual void Connect(std::string host, int port) = 0; diff --git a/hdRDMA.cc b/hdRDMA.cc index b07e6c3..a9757a7 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -166,6 +166,13 @@ hdRDMA::hdRDMA(const hdrdma::config& config) << " : lid=" << lid << endl; } + + if (!dev) + { + cout << "### No Infiniband adapters found. Is opensm running?" << endl; + return; + } + cout << "=============================================" << endl << endl; // Open device diff --git a/hdRDMA.h b/hdRDMA.h index ef80bb1..47175ad 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -42,6 +42,7 @@ class hdRDMA final : public hdrdma::IhdRDMA { hdRDMA(const hdrdma::config &config); ~hdRDMA() override; + virtual bool Good() const override { return dev != nullptr; } virtual void Listen(int port) override; virtual void StopListening(void) override; virtual void Connect(std::string host, int port) override; From 10fb3e68f78b31d66661a07c9b2f30c00a801cc7 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Tue, 22 Nov 2022 02:26:41 +1100 Subject: [PATCH 26/31] Fix dangling pointer --- hdRDMA.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/hdRDMA.cc b/hdRDMA.cc index a9757a7..efb4fb2 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -147,6 +147,7 @@ hdRDMA::hdRDMA(const hdrdma::config& config) } } ibv_close_device( ctx ); + ctx = nullptr; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From b7ad058aefd5dd558be1eb312a9ed9b14f248f3f Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Tue, 22 Nov 2022 23:22:26 +1100 Subject: [PATCH 27/31] Fixed resource leak --- hdRDMAThread.cc | 26 ++++++++++++++++++++++++-- hdRDMAThread.h | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index e43c3c3..889c883 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -116,6 +116,28 @@ hdRDMAThread::hdRDMAThread(hdRDMA *hdrdma) //----------------------------------------- hdRDMAThread::~hdRDMAThread() { +} + +//---------------------------------------------------------------------- +// ThreadRun +// +// This is run in a dedicated thread in server mode as soon as a +// TCP connection is established. It will exchange RDMA connection +// information over the given socket and then loop continously until +// the client signals it is done or the "stop" flag is set by the +// hdRDMA object. +//---------------------------------------------------------------------- +void hdRDMAThread::ThreadRun(SOCKET sockfd) +{ + try + { + TryThreadRun(sockfd); + } + catch(const std::exception& e) + { + std::cerr << e.what() << '\n'; + } + // Put QP insto RESET state so it releases all outstanding work requests if( qp!=nullptr ){ struct ibv_qp_attr qp_attr; @@ -136,7 +158,7 @@ hdRDMAThread::~hdRDMAThread() // Return MR buffers to pool hdrdma->ReturnBuffers( buffers ); } - + //---------------------------------------------------------------------- // ThreadRun // @@ -146,7 +168,7 @@ hdRDMAThread::~hdRDMAThread() // the client signals it is done or the "stop" flag is set by the // hdRDMA object. //---------------------------------------------------------------------- -void hdRDMAThread::ThreadRun(SOCKET sockfd) +void hdRDMAThread::TryThreadRun(SOCKET sockfd) { // The first thing we send via TCP is a 3 byte message indicating // success or failure. This really just allows us to inform the client diff --git a/hdRDMAThread.h b/hdRDMAThread.h index 7f092b4..0ede555 100644 --- a/hdRDMAThread.h +++ b/hdRDMAThread.h @@ -79,6 +79,7 @@ class hdRDMAThread{ ~hdRDMAThread(); void ThreadRun(SOCKET sockfd); + void TryThreadRun(SOCKET sockfd); void PostWR( int id ); // id= index to buffers void ExchangeQPInfo( SOCKET sockfd ); void CreateQP(void); From 47f2a3e71ff55c4abe265316e22324524e1e25c4 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Tue, 22 Nov 2022 23:24:41 +1100 Subject: [PATCH 28/31] shared_ptr -> unique_ptr --- IhdRDMA.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/IhdRDMA.h b/IhdRDMA.h index 1621d06..da253ea 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -54,9 +54,11 @@ namespace hdrdma virtual uint64_t TotalBytesReceived() const = 0; }; + using Ptr = std::unique_ptr; + // Wrappers. You probably want to use these. - static std::shared_ptr Create(const hdrdma::config& config) + static auto Create(const hdrdma::config& config) { - return std::shared_ptr(hdrdma_allocate(config), hdrdma_free); + return Ptr(hdrdma_allocate(config), hdrdma_free); } } From e4312fd85c8521dd0d2e91c18e939b3e11502560 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 23 Nov 2022 00:04:28 +1100 Subject: [PATCH 29/31] Fixed resource leak introduced by last --- hdRDMA.cc | 2 ++ hdRDMAThread.cc | 68 +++++++++++++++++++++++++++++++++++-------------- hdRDMAThread.h | 3 +++ 3 files changed, 54 insertions(+), 19 deletions(-) diff --git a/hdRDMA.cc b/hdRDMA.cc index efb4fb2..0090d2c 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -525,6 +525,8 @@ void hdRDMA::ReturnBuffers( std::vector &buffers ) { buffer_pool.push_back( b ); } + + buffers.clear(); } buffer_pool_cond.notify_all(); diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index 889c883..fb63636 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -116,6 +116,7 @@ hdRDMAThread::hdRDMAThread(hdRDMA *hdrdma) //----------------------------------------- hdRDMAThread::~hdRDMAThread() { + Dispose(); } //---------------------------------------------------------------------- @@ -137,26 +138,8 @@ void hdRDMAThread::ThreadRun(SOCKET sockfd) { std::cerr << e.what() << '\n'; } - - // Put QP insto RESET state so it releases all outstanding work requests - if( qp!=nullptr ){ - struct ibv_qp_attr qp_attr; - memset( &qp_attr, 0, sizeof(qp_attr) ); - qp_attr.qp_state = IBV_QPS_RESET; - ibv_modify_qp (qp, &qp_attr, IBV_QP_STATE); - } - - // Delete all of our allocated objects - // n.b. order here matters! If the qp is destroyed after the - // comp_channel it will leave open a file descriptor pointing - // to [infinibandevent] that we have no way of closing! - if( qp!=nullptr ) ibv_destroy_qp( qp ); - if( cq!=nullptr ) ibv_destroy_cq ( cq ); - if( comp_channel!=nullptr ) ibv_destroy_comp_channel( comp_channel ); - if( ofs!=nullptr ) delete ofs; - // Return MR buffers to pool - hdrdma->ReturnBuffers( buffers ); + Dispose(); } //---------------------------------------------------------------------- @@ -658,6 +641,23 @@ void hdRDMAThread::ClientConnect( SOCKET sockfd ) // SendFile //------------------------------------------------------------- void hdRDMAThread::SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) +{ + try + { + TrySendFile(srcfilename, dstfilename, delete_after_send, calculate_checksum, makeparentdirs); + } + catch(const std::exception& e) + { + std::cerr << e.what() << '\n'; + } + + Dispose(); +} + +//------------------------------------------------------------- +// TrySendFile +//------------------------------------------------------------- +void hdRDMAThread::TrySendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send, bool calculate_checksum, bool makeparentdirs) { // Open local file std::ifstream ifs(srcfilename.c_str()); @@ -875,3 +875,33 @@ bool hdRDMAThread::makePath( const std::string &path ) } } +//------------------------------------------------------------- +// Dispose +//------------------------------------------------------------- +void hdRDMAThread::Dispose() +{ + // Put QP insto RESET state so it releases all outstanding work requests + if( qp!=nullptr ){ + struct ibv_qp_attr qp_attr; + memset( &qp_attr, 0, sizeof(qp_attr) ); + qp_attr.qp_state = IBV_QPS_RESET; + ibv_modify_qp (qp, &qp_attr, IBV_QP_STATE); + } + + // Delete all of our allocated objects + // n.b. order here matters! If the qp is destroyed after the + // comp_channel it will leave open a file descriptor pointing + // to [infinibandevent] that we have no way of closing! + if( qp!=nullptr ) ibv_destroy_qp( qp ); + if( cq!=nullptr ) ibv_destroy_cq ( cq ); + if( comp_channel!=nullptr ) ibv_destroy_comp_channel( comp_channel ); + if( ofs!=nullptr ) delete ofs; + + qp = nullptr; + cq = nullptr; + comp_channel = nullptr; + ofs = nullptr; + + // Return MR buffers to pool + hdrdma->ReturnBuffers( buffers ); +} diff --git a/hdRDMAThread.h b/hdRDMAThread.h index 0ede555..856b65c 100644 --- a/hdRDMAThread.h +++ b/hdRDMAThread.h @@ -87,8 +87,11 @@ class hdRDMAThread{ void ReceiveBuffer(uint8_t *buff, uint32_t buff_len); void ClientConnect( SOCKET sockfd ); void SendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); + void TrySendFile(std::string srcfilename, std::string dstfilename, bool delete_after_send=false, bool calculate_checksum=false, bool makeparentdirs=false); void PollCQ(void); bool makePath( const std::string &path ); + + void Dispose(); bool stop = false; // Flag so thread can be told to stop bool stopped = false; // Flag so thread can declare it has stopped From d5c4e75c8fda745aa9db70efe037067fa9bf3bd4 Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Tue, 29 Nov 2022 01:45:10 +1100 Subject: [PATCH 30/31] Added support for custom file path decoding --- IhdRDMA.h | 12 +++++++++++- hdRDMA.cc | 12 ++++++++++++ hdRDMA.h | 3 +++ hdRDMAThread.cc | 7 ++----- hdRDMAThread.h | 2 +- 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/IhdRDMA.h b/IhdRDMA.h index da253ea..39fab4c 100644 --- a/IhdRDMA.h +++ b/IhdRDMA.h @@ -20,13 +20,16 @@ namespace hdrdma { class IhdRDMA; + class IPathDecoder; struct config { - config(size_t buffer_section_sz, int buffer_section_count) : BufferSectionSize(buffer_section_sz), BufferSectionCount(buffer_section_count) {} + config(size_t buffer_section_sz, int buffer_section_count, const std::shared_ptr& path_decoder = nullptr) : BufferSectionSize(buffer_section_sz), BufferSectionCount(buffer_section_count), PathDecoder(path_decoder) {} const size_t BufferSectionSize; const size_t BufferSectionCount; + + std::shared_ptr PathDecoder; }; } @@ -39,6 +42,13 @@ extern "C" namespace hdrdma { + class IPathDecoder { + public: + virtual ~IPathDecoder() {} + + virtual std::string Decode(const std::string_view& path) const = 0; + }; + class IhdRDMA { public: virtual ~IhdRDMA() {} diff --git a/hdRDMA.cc b/hdRDMA.cc index 0090d2c..47203fc 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -600,3 +600,15 @@ void hdRDMA::Join(void) threads.clear(); } +//------------------------------------------------------------- +// DecodePath +// +// Handles and user filesystem translations. +//------------------------------------------------------------- +std::string hdRDMA::DecodePath(const std::string_view& p) const +{ + return PathDecoder + ? PathDecoder->Decode(p) + : std::string(p); +} + diff --git a/hdRDMA.h b/hdRDMA.h index 47175ad..63bc8e5 100644 --- a/hdRDMA.h +++ b/hdRDMA.h @@ -56,6 +56,9 @@ class hdRDMA final : public hdrdma::IhdRDMA { virtual uint64_t TotalBytesReceived() const override { return total_bytes_received; } std::atomic_ullong total_bytes_received = 0; + std::string DecodePath(const std::string_view& p) const; + std::shared_ptr PathDecoder; + struct ibv_device *dev = nullptr; struct ibv_context *ctx = nullptr; int port_num = 1; diff --git a/hdRDMAThread.cc b/hdRDMAThread.cc index fb63636..276fe94 100644 --- a/hdRDMAThread.cc +++ b/hdRDMAThread.cc @@ -485,10 +485,9 @@ void hdRDMAThread::ReceiveBuffer(uint8_t *buff, uint32_t buff_len) if( ofs != nullptr ) { cout << "ERROR: Received new file buffer while file " << ofilename << " already open!" << endl; ofs->close(); - delete ofs; ofs = nullptr; } - ofilename = (char*)&hi->payload; + ofilename = hdrdma->DecodePath((const char*)&hi->payload); cout << "Receiving file: " << ofilename << endl; // Create parent directory path if specified by remote sender @@ -498,7 +497,7 @@ void hdRDMAThread::ReceiveBuffer(uint8_t *buff, uint32_t buff_len) if( pos != std::string::npos ) makePath( ofilename.substr(0, pos) ); } - ofs = new std::ofstream( ofilename.c_str() ); + ofs = std::make_unique(ofilename); ofilesize = 0; crcsum = adler32( 0L, Z_NULL, 0 ); calculate_checksum = (hi->flags & HI_CALCULATE_CHECKSUM); // optionally calculate checksum @@ -536,7 +535,6 @@ void hdRDMAThread::ReceiveBuffer(uint8_t *buff, uint32_t buff_len) duration duration_io = duration_cast>(t_io_end-t_io_start); delta_t_io += duration_io.count(); ofs->close(); - delete ofs; ofs = nullptr; } // auto t2 = high_resolution_clock::now(); @@ -895,7 +893,6 @@ void hdRDMAThread::Dispose() if( qp!=nullptr ) ibv_destroy_qp( qp ); if( cq!=nullptr ) ibv_destroy_cq ( cq ); if( comp_channel!=nullptr ) ibv_destroy_comp_channel( comp_channel ); - if( ofs!=nullptr ) delete ofs; qp = nullptr; cq = nullptr; diff --git a/hdRDMAThread.h b/hdRDMAThread.h index 856b65c..98abab4 100644 --- a/hdRDMAThread.h +++ b/hdRDMAThread.h @@ -107,7 +107,7 @@ class hdRDMAThread{ QPInfo qpinfo; QPInfo remote_qpinfo; - std::ofstream *ofs = nullptr; + std::unique_ptr ofs; std::string ofilename; uint64_t ofilesize = 0; uint32_t crcsum; From 10207d6c88ad6c59afda597197772e0e291d5e3b Mon Sep 17 00:00:00 2001 From: Lucas Zadrozny Date: Wed, 30 Nov 2022 13:41:21 +1100 Subject: [PATCH 31/31] Fixed crash on quit --- hdRDMA.cc | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/hdRDMA.cc b/hdRDMA.cc index 47203fc..ec81770 100644 --- a/hdRDMA.cc +++ b/hdRDMA.cc @@ -302,16 +302,19 @@ hdRDMA::~hdRDMA() if( pd!=nullptr ) ibv_dealloc_pd( pd ); if( ctx!=nullptr ) ibv_close_device( ctx ); + if (ack_thread) + { #ifdef __GNUC__ - // 'linux_rdma' wakes up their async event file descriptor with a SIGINT. Here: https://github.com/linux-rdma/rdma-core/blob/3b28e0e4784cce3aceedab39e1ae102538e212e2/srp_daemon/srp_daemon.c#L2017 - // I tried using async file descriptors with polling, but that didn't seem to help. Oh well, this works I guess. - pthread_kill(ack_thread->native_handle(), SIGINT); + // 'linux_rdma' wakes up their async event file descriptor with a SIGINT. Here: https://github.com/linux-rdma/rdma-core/blob/3b28e0e4784cce3aceedab39e1ae102538e212e2/srp_daemon/srp_daemon.c#L2017 + // I tried using async file descriptors with polling, but that didn't seem to help. Oh well, this works I guess. + pthread_kill(ack_thread->native_handle(), SIGINT); #endif - ack_thread->join(); - delete ack_thread; - ack_thread = nullptr; - + ack_thread->join(); + delete ack_thread; + ack_thread = nullptr; + } + #ifdef _MSC_VER # define SHUT_RDWR SD_BOTH # define SHUT_RD SD_RECEIVE