From 08967d103413ba8d13730188ba3df7617732a7ca Mon Sep 17 00:00:00 2001 From: yukai Date: Tue, 12 Apr 2016 15:36:05 +0800 Subject: [PATCH 01/58] ADD: master loader and slave loader as well as some config variables OPTIMIZE: some flags in Makefile.am --- Config.cpp | 10 + Config.h | 3 + Environment.cpp | 67 +++-- Environment.h | 16 +- Executor/AdaptiveEndPoint.cpp | 303 +++++++++++--------- Executor/Coordinator.cpp | 505 ++++++++++++++++++---------------- Executor/Coordinator.h | 56 ++-- Makefile.am | 9 +- conf/config | 6 + loader/Makefile.am | 20 +- loader/loader_message.h | 37 +++ loader/master_loader.cpp | 181 ++++++++++++ loader/master_loader.h | 86 ++++++ loader/slave_loader.cpp | 224 +++++++++++++++ loader/slave_loader.h | 83 ++++++ 15 files changed, 1173 insertions(+), 433 deletions(-) create mode 100644 loader/loader_message.h create mode 100644 loader/master_loader.cpp create mode 100644 loader/master_loader.h create mode 100644 loader/slave_loader.cpp create mode 100644 loader/slave_loader.h diff --git a/Config.cpp b/Config.cpp index 9d862da8a..c8dab029e 100644 --- a/Config.cpp +++ b/Config.cpp @@ -90,6 +90,10 @@ int Config::thread_pool_init_thread_num; int Config::load_thread_num; +bool Config::is_master_loader; +std::string Config::master_loader_ip; +int Config::master_loader_port; + Config *Config::getInstance() { if (instance_ == 0) { instance_ = new Config(); @@ -151,6 +155,12 @@ void Config::initialize() { load_thread_num = getInt("load_thread_num", sysconf(_SC_NPROCESSORS_CONF)); + is_master_loader = getBoolean("is_master_loader", true); + + master_loader_ip = getString("master_loader_ip", "10.11.1.193"); + + master_loader_port = getInt("master_loader_port", 9001); + #ifdef DEBUG_Config print_configure(); #endif diff --git a/Config.h b/Config.h index 460ff7139..18cb26935 100644 --- a/Config.h +++ b/Config.h @@ -79,6 +79,9 @@ class Config { static int thread_pool_init_thread_num; static int load_thread_num; + static bool is_master_loader; + static std::string master_loader_ip; + static int master_loader_port; private: static Config* instance_; diff --git a/Environment.cpp b/Environment.cpp index 293cb24cb..23bdd252b 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -6,6 +6,8 @@ */ #include "Environment.h" + +#include "caf/all.hpp" #define GLOG_NO_ABBREVIATED_SEVERITIES #include #undef GLOG_NO_ABBREVIATED_SEVERITIES @@ -13,6 +15,9 @@ #include #include #include +#include // NOLINT +#include "loader/master_loader.h" +#include "loader/slave_loader.h" #include "./Debug.h" #include "./Config.h" #include "common/Logging.h" @@ -23,14 +28,16 @@ #include "common/expression/data_type_oper.h" #include "common/expression/expr_type_cast.h" #include "common/expression/type_conversion_matrix.h" +// #define DEBUG_MODE +#include "catalog/catalog.h" using claims::common::InitAggAvgDivide; using claims::common::InitOperatorFunc; using claims::common::InitTypeCastFunc; using claims::common::InitTypeConversionMatrix; -//#define DEBUG_MODE -#include "catalog/catalog.h" using claims::common::rSuccess; +using claims::loader::MasterLoader; +using claims::loader::SlaveLoader; Environment* Environment::_instance = 0; @@ -47,12 +54,12 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { logging_->log("Initializing the Coordinator..."); initializeCoordinator(); logging_->log("Initializing the catalog ..."); - catalog_ = claims::catalog::Catalog::getInstance(); - logging_->log("restore the catalog ..."); - if (rSuccess != catalog_->restoreCatalog()) { - LOG(ERROR) << "failed to restore catalog" << std::endl; - cerr << "ERROR: restore catalog failed" << endl; - } + } + catalog_ = claims::catalog::Catalog::getInstance(); + logging_->log("restore the catalog ..."); + if (rSuccess != catalog_->restoreCatalog()) { + LOG(ERROR) << "failed to restore catalog" << std::endl; + cerr << "ERROR: restore catalog failed" << endl; } if (true == g_thread_pool_used) { @@ -76,12 +83,20 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { /*Before initializing Resource Manager, the instance ip and port should be * decided.*/ + logging_->log("Initializing the ResourceManager..."); initializeResourceManager(); + logging_->log("Initializing the Storage..."); initializeStorage(); + logging_->log("Initializing the BufferManager..."); initializeBufferManager(); + logging_->log("Initializing the loader..."); + if (!InitLoader()) { + LOG(ERROR) << "failed to initialize loader"; + } + logging_->log("Initializing the ExecutorMaster..."); iteratorExecutorMaster = new IteratorExecutorMaster(); @@ -136,10 +151,10 @@ void Environment::readConfigFile() { ip = (const char*)cfg.lookup("ip"); } void Environment::initializeEndPoint() { - // libconfig::Config cfg; - // cfg.readFile("/home/claims/config/wangli/config"); - // std::string endpoint_ip=(const char*)cfg.lookup("ip"); - // std::string endpoint_port=(const char*)cfg.lookup("port"); + // libconfig::Config cfg; + // cfg.readFile("/home/claims/config/wangli/config"); + // std::string endpoint_ip=(const char*)cfg.lookup("ip"); + // std::string endpoint_port=(const char*)cfg.lookup("port"); std::string endpoint_ip = ip; int endpoint_port; if ((endpoint_port = portManager->applyPort()) == -1) { @@ -162,11 +177,11 @@ void Environment::initializeStorage() { blockManagerMaster_->initialize(); } /*both master and slave node run the BlockManager.*/ - // BlockManagerId *bmid=new BlockManagerId(); - // string + // BlockManagerId *bmid=new BlockManagerId(); + // string // actorname="blockManagerWorkerActor_"+bmid->blockManagerId; - // cout<Register(); } + +bool Environment::InitLoader() { + if (Config::is_master_loader) { + LOG(INFO) << "I'm master loader. Oyeah"; + master_loader_ = new MasterLoader(); + std::thread master_thread(&MasterLoader::StartMasterLoader, nullptr); + master_thread.detach(); + DLOG(INFO) << "started thread as master loader"; + } + + usleep(10000); + DLOG(INFO) << "starting create thread as slave loader"; + slave_loader_ = new SlaveLoader(); + std::thread slave_thread(&SlaveLoader::StartSlaveLoader, nullptr); + slave_thread.detach(); + + // caf::await_all_actors_done(); + return true; +} + void Environment::initializeBufferManager() { bufferManager_ = BufferManager::getInstance(); } diff --git a/Environment.h b/Environment.h index 01ca21379..e9339986c 100755 --- a/Environment.h +++ b/Environment.h @@ -24,8 +24,15 @@ #include "Executor/expander_tracker.h" #include "Resource/BufferManager.h" +namespace claims { +namespace loader { +class SlaveLoader; +class MasterLoader; +} +} using claims::catalog::Catalog; -// class Catalog; +using claims::loader::SlaveLoader; +using claims::loader::MasterLoader; class Environment { public: @@ -43,6 +50,9 @@ class Environment { IteratorExecutorSlave* getIteratorExecutorSlave() const; explicit Environment(bool ismaster = false); + MasterLoader* get_master_loader() const { return master_loader_; } + SlaveLoader* get_slave_loader() const { return slave_loader_; } + private: void readConfigFile(); void initializeEndPoint(); @@ -56,6 +66,8 @@ class Environment { void destoryClientListener(); bool initializeThreadPool(); + bool InitLoader(); + private: static Environment* _instance; PortManager* portManager; @@ -81,6 +93,8 @@ class Environment { ClientListener* listener_; ThreadPool* thread_pool_; + MasterLoader* master_loader_; + SlaveLoader* slave_loader_; /** * TODO: the master and slave pair, such as ResouceManagerMaster and diff --git a/Executor/AdaptiveEndPoint.cpp b/Executor/AdaptiveEndPoint.cpp index 11ef6cdba..14208b01f 100755 --- a/Executor/AdaptiveEndPoint.cpp +++ b/Executor/AdaptiveEndPoint.cpp @@ -8,157 +8,188 @@ #include #include #include -#include #include -#include "AdaptiveEndPoint.h" +#include +#include +#include +#include "./AdaptiveEndPoint.h" #include "../Debug.h" #include "../common/Logging.h" #include "../utility/ThreadSafe.h" #include "../Config.h" - -#include -#include -AdaptiveEndPoint::AdaptiveEndPoint(const char* name, std::string ip, std::string port) -:Theron::EndPoint(name, ("tcp://"+ip+":"+port).c_str()){ - logging_=new AdaptiveEndPointLogging(); - - logging_->log("The AdaptiveEndPoint is created as %s:%s",ip.c_str(),port.c_str()); - framework=new Theron::Framework(*(EndPoint*)this); - connectionActor=new AdaptiveEndPoint::ConnectionActor(this,("ConnectionActor://"+ip+":"+port).c_str()); - - if(SayHelloToCoordinator(ip,port)==false){ - logging_->elog("Error occurs when saying hello to the coordinator!"); - } - - logging_->log("Get Coordinator EndPoint Port..."); - - int coordinator_endpoint_port; - if((coordinator_endpoint_port=GetCoordinatorEndPointPort())==-1){ - logging_->elog("Error occurs when getting the coordinator EndPoint port"); - } - - logging_->log("Connect to Coordinator EndPoint..."); - if(ConnectToCoordinateEndPoint(coordinator_endpoint_port)==false){ - logging_->elog("Error occurs when connecting to the coordinator EndPoint"); - } - - logging_->log("Waiting for the Ready signal from the Coordinator."); - if(WaitForReadySignalFromCoordinator()==false){ - logging_->elog("Error occurs when waiting for the coordinator EndPoint"); - } - FileClose(socket_coor); +#include "../loader/master_loader.h" + +AdaptiveEndPoint::AdaptiveEndPoint(const char* name, std::string ip, + std::string port) + : Theron::EndPoint(name, ("tcp://" + ip + ":" + port).c_str()) { + logging_ = new AdaptiveEndPointLogging(); + + logging_->log("The AdaptiveEndPoint is created as %s:%s", ip.c_str(), + port.c_str()); + framework = new Theron::Framework(*(EndPoint*)this); + connectionActor = new AdaptiveEndPoint::ConnectionActor( + this, ("ConnectionActor://" + ip + ":" + port).c_str()); + + if (SayHelloToCoordinator(ip, port) == false) { + logging_->elog("Error occurs when saying hello to the coordinator!"); + } + + logging_->log("Get Coordinator EndPoint Port..."); + + int coordinator_endpoint_port; + if ((coordinator_endpoint_port = GetCoordinatorEndPointPort()) == -1) { + logging_->elog("Error occurs when getting the coordinator EndPoint port"); + } + + logging_->log("Connect to Coordinator EndPoint..."); + if (ConnectToCoordinateEndPoint(coordinator_endpoint_port) == false) { + logging_->elog("Error occurs when connecting to the coordinator EndPoint"); + } + + logging_->log("Waiting for the Ready signal from the Coordinator."); + if (WaitForReadySignalFromCoordinator() == false) { + logging_->elog("Error occurs when waiting for the coordinator EndPoint"); + } + FileClose(socket_coor); } AdaptiveEndPoint::~AdaptiveEndPoint() { - // TODO Auto-generated destructor stub -// return; - delete connectionActor; - delete framework; -// this->~EndPoint(); + // TODO Auto-generated destructor stub + // return; + delete connectionActor; + delete framework; + // this->~EndPoint(); } -bool AdaptiveEndPoint::SayHelloToCoordinator(std::string ip,std::string port){ - libconfig::Config cfg; - cfg.readFile(Config::config_file.c_str()); - ip_coor=(const char *)cfg.lookup("coordinator.ip"); - - std::string coord_port=(const char*)cfg.lookup("coordinator.port"); - int recvbytes; - - struct hostent host; - struct sockaddr_in serv_addr; - - if((ThreadSafe::gethostbyname_ts(host,ip_coor.c_str()))==0) - { - logging_->elog("gethostbyname errors!\n"); - assert(false); - return false; - } - if((socket_coor = socket(AF_INET, SOCK_STREAM,0))==-1) - { - logging_->elog("socket create errors!\n"); - assert(false); - return false; - } - - serv_addr.sin_family=AF_INET; - serv_addr.sin_port=htons(atoi(coord_port.c_str())); - serv_addr.sin_addr=*((struct in_addr*)host.h_addr); - bzero(&(serv_addr.sin_zero),8); - int attemps_budget=10; - while(connect(socket_coor,(struct sockaddr *)&serv_addr, sizeof(struct sockaddr))==-1) - { - logging_->elog("Cannot connect to the master! To retry in one second!"); - sleep(1); - attemps_budget--; - if(attemps_budget==0){ - logging_->elog("connection errors when connecting to %s:%s! Reason:%s",inet_ntoa(serv_addr.sin_addr),coord_port.c_str(),strerror(errno)); - return false; - } - } - - int port_send=atoi(port.c_str()); - if(send(socket_coor,&port_send,sizeof(int),0)==-1) - { - logging_->elog("Error occurs when sending the hello message to the coordinator!\n"); - assert(false); - return false; - } - return true; +bool AdaptiveEndPoint::SayHelloToCoordinator(std::string ip, std::string port) { + libconfig::Config cfg; + cfg.readFile(Config::config_file.c_str()); + ip_coor = (const char*)cfg.lookup("coordinator.ip"); + + std::string coord_port = (const char*)cfg.lookup("coordinator.port"); + int recvbytes; + + struct hostent host; + struct sockaddr_in serv_addr; + + if ((ThreadSafe::gethostbyname_ts(host, ip_coor.c_str())) == 0) { + logging_->elog("gethostbyname errors!\n"); + assert(false); + return false; + } + if ((socket_coor = socket(AF_INET, SOCK_STREAM, 0)) == -1) { + logging_->elog("socket create errors!\n"); + assert(false); + return false; + } + + serv_addr.sin_family = AF_INET; + serv_addr.sin_port = htons(atoi(coord_port.c_str())); + serv_addr.sin_addr = *((struct in_addr*)host.h_addr); + bzero(&(serv_addr.sin_zero), 8); + int attemps_budget = 10; + while (connect(socket_coor, (struct sockaddr*)&serv_addr, + sizeof(struct sockaddr)) == -1) { + logging_->elog( + "Cannot connect to the master %s:%d! To retry in one second!", + ip_coor.c_str(), atoi(coord_port.c_str())); + perror("connect to coordinator "); + sleep(1); + attemps_budget--; + if (attemps_budget == 0) { + logging_->elog("connection errors when connecting to %s:%s! Reason:%s", + inet_ntoa(serv_addr.sin_addr), coord_port.c_str(), + strerror(errno)); + return false; + } + } + + int port_send = atoi(port.c_str()); + if (send(socket_coor, &port_send, sizeof(int), 0) == -1) { + logging_->elog( + "Error occurs when sending the hello message to the coordinator!\n"); + assert(false); + return false; + } + return true; } -int AdaptiveEndPoint::GetCoordinatorEndPointPort(){ - int recvbytes; - int port; - if((recvbytes=recv(socket_coor,&port,sizeof(int),0))==-1){ - logging_->elog("recv error!\n"); - return -1; - } - logging_->log("Get CoordinatorEndPoint is successfully! The Coordinator Theron EndPoint is %s:%d",ip_coor.c_str(),port); - return port; - +int AdaptiveEndPoint::GetCoordinatorEndPointPort() { + int recvbytes; + int port; + if ((recvbytes = recv(socket_coor, &port, sizeof(int), 0)) == -1) { + logging_->elog("recv error!\n"); + return -1; + } + logging_->log( + "Get CoordinatorEndPoint is successfully! The Coordinator Theron " + "EndPoint is %s:%d", + ip_coor.c_str(), port); + return port; } -bool AdaptiveEndPoint::WaitForReadySignalFromCoordinator(){ - int recvbytes; - char signal; - if((recvbytes=recv(socket_coor,&signal,sizeof(char),0))==-1){ - logging_->elog("recv error!\n"); - return false; - } - logging_->log("Join to the EndPoint network successfully!"); - return true; +bool AdaptiveEndPoint::WaitForReadySignalFromCoordinator() { + int recvbytes; + char signal; + if ((recvbytes = recv(socket_coor, &signal, sizeof(char), 0)) == -1) { + logging_->elog("recv error!\n"); + return false; + } + logging_->log("Join to the EndPoint network successfully!"); + + /* + * below code should be keep in case of dynamically selecting master loader + * + if (signal == 'M') { + + * this node is selected to be loader master,then start a new thread acting + * master loader + + pthread_t load_master; + int ret = + pthread_create(&load_master, NULL, + claims::loader::MasterLoader::StartMasterLoader, NULL); + if (0 == ret) { + LOG(INFO) << "start master loader......"; + } else { + PLOG(ERROR) << "can't start master loader!!"; + } + }*/ + return true; } -bool AdaptiveEndPoint::ConnectToCoordinateEndPoint(int port){ - std::ostringstream os; - os<<"tcp://"<elog("Check whether network is enabled! can't connect ot %s",os.str().c_str()); -// assert(false); - return false; - } - - logging_->log("ConnectToCoordiateEndPoint is successful!"); - return true; +bool AdaptiveEndPoint::ConnectToCoordinateEndPoint(int port) { + std::ostringstream os; + os << "tcp://" << ip_coor << ":" << port; + // sleep(1); + if (!ConnectToRemoteEndPoint(os.str().c_str())) { + logging_->elog("Check whether network is enabled! can't connect ot %s", + os.str().c_str()); + // assert(false); + return false; + } + + logging_->log("ConnectToCoordiateEndPoint is successful!"); + return true; } -bool AdaptiveEndPoint::ConnectToRemoteEndPoint(std::string location){ - return this->Connect(location.c_str()); +bool AdaptiveEndPoint::ConnectToRemoteEndPoint(std::string location) { + return this->Connect(location.c_str()); } -AdaptiveEndPoint::ConnectionActor::ConnectionActor(AdaptiveEndPoint* AEP, const char* Name) -:Actor(*(AEP->framework),Name),AEP(AEP){ - RegisterHandler(this,&AdaptiveEndPoint::ConnectionActor::ReceiveNodeStatus256); - +AdaptiveEndPoint::ConnectionActor::ConnectionActor(AdaptiveEndPoint* AEP, + const char* Name) + : Actor(*(AEP->framework), Name), AEP(AEP) { + RegisterHandler(this, + &AdaptiveEndPoint::ConnectionActor::ReceiveNodeStatus256); } -void AdaptiveEndPoint::ConnectionActor::ReceiveNodeStatus256(const Message256 &message, const Theron::Address from){ - NodeConnectionMessage NCM=NodeConnectionMessage::deserialize(message); - std::ostringstream str; - str<<"tcp://"+NCM.ip<<":"<ConnectToRemoteEndPoint(str.str().c_str())){ - AEP->logging_->elog("Check whether the Network is enabled!"); - } - Send(0,from); - - - AEP->logging_->log("Successfully connected to the EndPoint of new node through %s",str.str().c_str()); +void AdaptiveEndPoint::ConnectionActor::ReceiveNodeStatus256( + const Message256& message, const Theron::Address from) { + NodeConnectionMessage NCM = NodeConnectionMessage::deserialize(message); + std::ostringstream str; + str << "tcp://" + NCM.ip << ":" << NCM.port; + if (!AEP->ConnectToRemoteEndPoint(str.str().c_str())) { + AEP->logging_->elog("Check whether the Network is enabled!"); + } + Send(0, from); + + AEP->logging_->log( + "Successfully connected to the EndPoint of new node through %s", + str.str().c_str()); } diff --git a/Executor/Coordinator.cpp b/Executor/Coordinator.cpp index ff9e7a542..8c8b14a41 100755 --- a/Executor/Coordinator.cpp +++ b/Executor/Coordinator.cpp @@ -6,287 +6,306 @@ */ #include "Coordinator.h" +#include +#include #include #include #include #include #include #include -#include -#include #include "../Debug.h" #include "../common/Message.h" #include "../Environment.h" #include "../common/TimeOutReceiver.h" #include "../Config.h" Coordinator::Coordinator() { - logging = new CoordinatorLogging(); - /** swap the order of SetupTheTheron and PreparetheSocket to provide more time - * to Theron::EndPoint initialization before other EndPoints connect to it. - * - */ - - if (SetupTheTheron() == false) { - logging->elog("Error occurs during the Theron setup!"); - return; - } - if (PrepareTheSocket() == false) { - logging->elog("Error occurs during the socket prepare!"); - return; - } - - if (CreateListeningThread() == false) { - logging->elog("Error occurs during creating listening thread!"); - return; - } - + logging = new CoordinatorLogging(); + /** swap the order of SetupTheTheron and PreparetheSocket to provide more time + * to Theron::EndPoint initialization before other EndPoints connect to it. + * + */ + + if (SetupTheTheron() == false) { + logging->elog("Error occurs during the Theron setup!"); + return; + } + if (PrepareTheSocket() == false) { + logging->elog("Error occurs during the socket prepare!"); + return; + } + + if (CreateListeningThread() == false) { + logging->elog("Error occurs during creating listening thread!"); + return; + } } Coordinator::~Coordinator() { - pthread_cancel(prochaseId); - void *res=0; - while(res!=PTHREAD_CANCELED){ - pthread_join(prochaseId,&res); - } - - FileClose(socket_fd); -// logging->elog("-----for debug: fd %d is closed", socket_fd); -// std::cout<<"in "<<__FILE__<<":"<<__LINE__;printf("-----for debug: fd %d is closed\n", socket_fd); - delete framework; - delete endpoint; + pthread_cancel(prochaseId); + void *res = 0; + while (res != PTHREAD_CANCELED) { + pthread_join(prochaseId, &res); + } + + FileClose(socket_fd); + // logging->elog("-----for debug: fd %d is closed", socket_fd); + // std::cout<<"in "<<__FILE__<<":"<<__LINE__;printf("-----for debug: fd %d + // is closed\n", socket_fd); + delete framework; + delete endpoint; } bool Coordinator::PrepareTheSocket() { - libconfig::Config cfg; - cfg.readFile(Config::config_file.c_str()); - std::string master_ip = (const char *) cfg.lookup("coordinator.ip"); - std::string master_port = (const char*) cfg.lookup("coordinator.port"); - - struct hostent* host; - struct sockaddr_in my_addr; - - if ((socket_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) { - logging->elog("socket create errors!\n"); - return false; - } - my_addr.sin_family = AF_INET; - my_addr.sin_port = htons(atoi(master_port.c_str())); - my_addr.sin_addr.s_addr = INADDR_ANY; - - bzero(&(my_addr.sin_zero), 8); - - /* Enable address reuse */ - int on = 1; - setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); - - if (bind(socket_fd, (struct sockaddr *) &my_addr, sizeof(struct sockaddr)) - == -1) { - logging->elog("bind errors!\n"); - return false; - } - - if (listen(socket_fd, 100) == -1) { - logging->elog("listen errors!\n"); - return false; - } - - logging->log("Coordinator: PrepareTheSocket() is successful."); - - return true; + libconfig::Config cfg; + cfg.readFile(Config::config_file.c_str()); + std::string master_ip = (const char *)cfg.lookup("coordinator.ip"); + std::string master_port = (const char *)cfg.lookup("coordinator.port"); + + struct hostent *host; + struct sockaddr_in my_addr; + + if ((socket_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) { + logging->elog("socket create errors!\n"); + return false; + } + my_addr.sin_family = AF_INET; + my_addr.sin_port = htons(atoi(master_port.c_str())); + my_addr.sin_addr.s_addr = INADDR_ANY; + + bzero(&(my_addr.sin_zero), 8); + + /* Enable address reuse */ + int on = 1; + setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + + if (bind(socket_fd, (struct sockaddr *)&my_addr, sizeof(struct sockaddr)) == + -1) { + logging->elog("bind errors!\n"); + return false; + } + + if (listen(socket_fd, 100) == -1) { + logging->elog("listen errors!\n"); + return false; + } + + logging->log("Coordinator(%s:%d): PrepareTheSocket() is successful.", + master_ip.c_str(), atoi(master_port.c_str())); + + return true; } bool Coordinator::SetupTheTheron() { + std::string ip_port; + std::string ip = Environment::getInstance()->getIp(); + std::ostringstream port; + port << PortManager::getInstance()->applyPort(); + EndPointPort = port.str(); + ip_port = std::string("tcp://") + ip + ":" + port.str(); - std::string ip_port; - std::string ip = Environment::getInstance()->getIp(); - std::ostringstream port; - port << PortManager::getInstance()->applyPort(); - EndPointPort = port.str(); - ip_port = std::string("tcp://") + ip + ":" + port.str(); - - logging->log("[Coordinator]: Now is initializing the Theron EndPoint as %s",ip_port.c_str()); + logging->log("[Coordinator]: Now is initializing the Theron EndPoint as %s", + ip_port.c_str()); - endpoint = new Theron::EndPoint((ip + ":" + port.str()).c_str(), - ip_port.c_str()); + endpoint = + new Theron::EndPoint((ip + ":" + port.str()).c_str(), ip_port.c_str()); - framework = new Theron::Framework(*endpoint); - framework->SetMaxThreads(1); + framework = new Theron::Framework(*endpoint); + framework->SetMaxThreads(1); - logging->log("[Coordinator]: Now is initializing The CoordinatorActor"); - - return true; + logging->log("[Coordinator]: Now is initializing The CoordinatorActor"); + return true; } bool Coordinator::CreateListeningThread() { + const int error = pthread_create(&prochaseId, NULL, ListeningNewNode, this); - const int error = pthread_create(&prochaseId, NULL, ListeningNewNode, this); - - logging->log("[Coordinator]: The listening thread is created!"); + logging->log("[Coordinator]: The listening thread is created!"); - return error == 0; + return error == 0; } -void* Coordinator::ListeningNewNode(void *arg) { - - Coordinator* Cthis = (Coordinator*) arg; - while (true) { - socklen_t sin_size = sizeof(struct sockaddr_in); - struct sockaddr_in remote_addr; - int socket_fd_new; - - Cthis->logging->log( - "[Coordinator]: I'm waiting for the new node's connection!"); - - if ((socket_fd_new = accept(Cthis->socket_fd, - (struct sockaddr*) &remote_addr, &sin_size)) == -1) { - perror("accept errors!\n"); - return 0; - } - - Cthis->logging->log( - "[Coordinator]: New Node is connected! Waiting for the port infor.."); - - /*Set the timeout value, which is essential to guarantee the correctness when - * there are more nodes trying to connect the coordinate at the same time. */ - struct timeval timeout = { 1, 0 }; - setsockopt(socket_fd_new, SOL_SOCKET, SO_RCVTIMEO, (char*) &timeout, - sizeof(struct timeval)); - - int recvbytes; - int port; - - if ((recvbytes = recv(socket_fd_new, &port, sizeof(port), MSG_WAITALL)) - == -1) { - std::cout << "New node " << inet_ntoa(remote_addr.sin_addr) - << " has connected, but the receiving the information times out!" - << std::endl; - FileClose(socket_fd_new); -// logging->elog("-----for debug: fd %d is closed", socket_fd_new); - continue; - //return false; - } - if (recvbytes != sizeof(int)) { - std::cout << "Information received, but the length is not right!" - << std::endl; - FileClose(socket_fd_new); -// logging->elog("-----for debug: fd %d is closed", socket_fd_new); - continue; - } - - Cthis->logging->log( - "[Coordinator]: The EndPoint of the new node is %s:%d", - inet_ntoa(remote_addr.sin_addr), port); - - if (!Cthis->SendCoordinatorEndPointPort(socket_fd_new)) { - continue; - } - - std::ostringstream ost; - ost << port; - std::string new_node_ip = inet_ntoa(remote_addr.sin_addr); - std::string new_node_port = ost.str(); - - if (!Cthis->endpoint->Connect(("tcp://" + new_node_ip + ":" - + new_node_port).c_str())) { - Cthis->logging->elog( - "Error occurs when the Coordinator EndPoint is connecting to the EndPoint of the new node: " - "tcp://%s:%s", new_node_ip.c_str(), new_node_port.c_str()); - Cthis->logging->log(" tcp://%s:%s", new_node_ip.c_str(), - new_node_port.c_str()); - } - - else - { - Cthis->logging->log( - "[Coordinator]: The Coordinator EndPoint has successfully connected to the EndPoint of the new node!"); - } - - TimeOutReceiver *receiver = new TimeOutReceiver(Cthis->endpoint); - Theron::Catcher resultCatcher; - receiver->RegisterHandler(&resultCatcher, &Theron::Catcher::Push); - const int TimeOut = 1000;//ms - - /** - * TODO: In the current version, the Coordinator will repeatedly send - * message to each NodeConnectionActor until the feedback is received - * which means the target node has conducted new connection based on - * message received. - * However, if the target node is dead, the message will be sent repeatedly - * and infinitely. Additional code is needed to handle the dead node. - */ - for (unsigned i = 0; i < Cthis->PeersIpPort.size(); i++) { - NodeConnectionMessage new_NCM(new_node_ip, new_node_port); - NodeConnectionMessage old_NCM(Cthis->PeersIpPort[i].first, - Cthis->PeersIpPort[i].second); - receiver->Reset(); - Cthis->framework->Send(NodeConnectionMessage::serialize(new_NCM), - receiver->GetAddress(), Theron::Address( - ("ConnectionActor://" + old_NCM.ip + ":" - + old_NCM.port).c_str())); - while (receiver->TimeOutWait(1, TimeOut) != 1) { - Cthis->framework->Send( - NodeConnectionMessage::serialize(new_NCM), - receiver->GetAddress(), Theron::Address( - ("ConnectionActor://" + old_NCM.ip + ":" - + old_NCM.port).c_str())); - } - - receiver->Reset(); - - Cthis->framework->Send(NodeConnectionMessage::serialize(old_NCM), - receiver->GetAddress(), Theron::Address( - ("ConnectionActor://" + new_NCM.ip + ":" - + new_NCM.port).c_str())); - while (receiver->TimeOutWait(1, TimeOut) != 1) { - Cthis->framework->Send( - NodeConnectionMessage::serialize(old_NCM), - receiver->GetAddress(), Theron::Address( - ("ConnectionActor://" + new_NCM.ip + ":" - + new_NCM.port).c_str())); - } - } - - Cthis->PeersIpPort.push_back(std::pair( - new_node_ip, new_node_port)); - - Cthis->logging->log( - "[Coordinator]: New node %s;%s is successfully added to the Theron communication network!", - new_node_ip.c_str(), new_node_port.c_str()); - - Cthis->SendReadyNotificationToNewNode(socket_fd_new); - - FileClose(socket_fd_new); -// logging->elog("-----for debug: fd %d is closed", socket_fd_new); - receiver->~TimeOutReceiver(); - } +void *Coordinator::ListeningNewNode(void *arg) { + Coordinator *Cthis = (Coordinator *)arg; + while (true) { + socklen_t sin_size = sizeof(struct sockaddr_in); + struct sockaddr_in remote_addr; + int socket_fd_new; + + Cthis->logging->log( + "[Coordinator]: I'm waiting for the new node's connection!"); + + if ((socket_fd_new = accept(Cthis->socket_fd, + (struct sockaddr *)&remote_addr, &sin_size)) == + -1) { + perror("accept errors!\n"); + return 0; + } + + Cthis->logging->log( + "[Coordinator]: New Node is connected! Waiting for the port infor.."); + + /*Set the timeout value, which is essential to guarantee the correctness + * when + * there are more nodes trying to connect the coordinate at the same time. + */ + struct timeval timeout = {1, 0}; + setsockopt(socket_fd_new, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(struct timeval)); + + int recvbytes; + int port; + + if ((recvbytes = recv(socket_fd_new, &port, sizeof(port), MSG_WAITALL)) == + -1) { + std::cout + << "New node " << inet_ntoa(remote_addr.sin_addr) + << " has connected, but the receiving the information times out!" + << std::endl; + FileClose(socket_fd_new); + // logging->elog("-----for debug: fd %d is closed", + // socket_fd_new); + continue; + // return false; + } + if (recvbytes != sizeof(int)) { + std::cout << "Information received, but the length is not right!" + << std::endl; + FileClose(socket_fd_new); + // logging->elog("-----for debug: fd %d is closed", + // socket_fd_new); + continue; + } + + Cthis->logging->log("[Coordinator]: The EndPoint of the new node is %s:%d", + inet_ntoa(remote_addr.sin_addr), port); + + if (!Cthis->SendCoordinatorEndPointPort(socket_fd_new)) { + continue; + } + + std::ostringstream ost; + ost << port; + std::string new_node_ip = inet_ntoa(remote_addr.sin_addr); + std::string new_node_port = ost.str(); + + if (!Cthis->endpoint->Connect( + ("tcp://" + new_node_ip + ":" + new_node_port).c_str())) { + Cthis->logging->elog( + "Error occurs when the Coordinator EndPoint is connecting to the " + "EndPoint of the new node: " + "tcp://%s:%s", + new_node_ip.c_str(), new_node_port.c_str()); + Cthis->logging->log(" tcp://%s:%s", new_node_ip.c_str(), + new_node_port.c_str()); + } else { + Cthis->logging->log( + "[Coordinator]: The Coordinator EndPoint has successfully connected " + "to the EndPoint of the new node!"); + } + + TimeOutReceiver *receiver = new TimeOutReceiver(Cthis->endpoint); + Theron::Catcher resultCatcher; + receiver->RegisterHandler(&resultCatcher, &Theron::Catcher::Push); + const int TimeOut = 1000; // ms + + /** + * TODO: In the current version, the Coordinator will repeatedly send + * message to each NodeConnectionActor until the feedback is received + * which means the target node has conducted new connection based on + * message received. + * However, if the target node is dead, the message will be sent repeatedly + * and infinitely. Additional code is needed to handle the dead node. + */ + for (unsigned i = 0; i < Cthis->PeersIpPort.size(); i++) { + NodeConnectionMessage new_NCM(new_node_ip, new_node_port); + NodeConnectionMessage old_NCM(Cthis->PeersIpPort[i].first, + Cthis->PeersIpPort[i].second); + receiver->Reset(); + Cthis->framework->Send( + NodeConnectionMessage::serialize(new_NCM), receiver->GetAddress(), + Theron::Address(("ConnectionActor://" + old_NCM.ip + ":" + + old_NCM.port).c_str())); + while (receiver->TimeOutWait(1, TimeOut) != 1) { + Cthis->framework->Send( + NodeConnectionMessage::serialize(new_NCM), receiver->GetAddress(), + Theron::Address(("ConnectionActor://" + old_NCM.ip + ":" + + old_NCM.port).c_str())); + } + + receiver->Reset(); + + Cthis->framework->Send( + NodeConnectionMessage::serialize(old_NCM), receiver->GetAddress(), + Theron::Address(("ConnectionActor://" + new_NCM.ip + ":" + + new_NCM.port).c_str())); + while (receiver->TimeOutWait(1, TimeOut) != 1) { + Cthis->framework->Send( + NodeConnectionMessage::serialize(old_NCM), receiver->GetAddress(), + Theron::Address(("ConnectionActor://" + new_NCM.ip + ":" + + new_NCM.port).c_str())); + } + } + + Cthis->PeersIpPort.push_back( + std::pair(new_node_ip, new_node_port)); + + Cthis->logging->log( + "[Coordinator]: New node %s;%s is successfully added to the Theron " + "communication network!", + new_node_ip.c_str(), new_node_port.c_str()); + + Cthis->SendReadyNotificationToNewNode(socket_fd_new, 'R'); + + // below code should be keep in case of dynamically selecting master loader + /*if (1 == Cthis->PeersIpPort.size()) { + // select the first new node as loader master + LOG(INFO) << "Congratulations! (" << new_node_ip << ", " << new_node_port + << ") is selected to be master loader"; + if (Cthis->SendReadyNotificationToNewNode(socket_fd_new, 'M')) + LOG(INFO) << "succeed to send M notify this node"; + else + LOG(INFO) << "failed to send M notify this node"; + + } else { + if (Cthis->SendReadyNotificationToNewNode(socket_fd_new, 'S')) { + LOG(INFO) << "succeed to send S notify this node"; + } else { + LOG(INFO) << "failed to send S notify this node"; + } + }*/ + FileClose(socket_fd_new); + // logging->elog("-----for debug: fd %d is closed", socket_fd_new); + receiver->~TimeOutReceiver(); + } } -bool Coordinator::SendReadyNotificationToNewNode(int socket_new_node) { - char signal = 'R'; - if (!send(socket_new_node, &signal, sizeof(char), 0)) { - logging->log( - "Error occurs when sending the Coordinate EndPoint port to the new node!"); - } +bool Coordinator::SendReadyNotificationToNewNode(int socket_new_node, + char signal) { + if (!send(socket_new_node, &signal, sizeof(char), 0)) { + logging->log( + "Error occurs when sending the Coordinate EndPoint port to the new " + "node!"); + } - logging->log("[Coordinator]: The 'Ready' signal is sent to the new node!"); + logging->log("[Coordinator]: The 'Ready' signal is sent to the new node!"); - return true; + return true; } bool Coordinator::SendCoordinatorEndPointPort(int socket_new_node) { - int port = atoi(EndPointPort.c_str()); + int port = atoi(EndPointPort.c_str()); - if (!send(socket_new_node, &port, sizeof(int), 0)) { - logging->elog( - "Error occurs when sending the Coordinate EndPoint port to the new node!"); - } + if (!send(socket_new_node, &port, sizeof(int), 0)) { + logging->elog( + "Error occurs when sending the Coordinate EndPoint port to the new " + "node!"); + } - logging->log( - "[Coordinator]: The Coordinator EndPoint port [%d] is sent to the new node!", - port); + logging->log( + "[Coordinator]: The Coordinator EndPoint port [%d] is sent to the new " + "node!", + port); - return true; + return true; } Coordinator::CoordinateActor::CoordinateActor(Theron::Framework *framework, - const char* Name) : - Actor(*framework, Name) { - -} - + const char *Name) + : Actor(*framework, Name) {} diff --git a/Executor/Coordinator.h b/Executor/Coordinator.h index 930752667..6f0b3d198 100755 --- a/Executor/Coordinator.h +++ b/Executor/Coordinator.h @@ -22,34 +22,36 @@ #include "../common/Logging.h" class Coordinator { -public: - Coordinator(); - virtual ~Coordinator(); -private: - bool PrepareTheSocket(); - bool SetupTheTheron(); - bool CreateListeningThread(); - bool SendReadyNotificationToNewNode(int socket_new_node); + public: + Coordinator(); + virtual ~Coordinator(); - bool SendCoordinatorEndPointPort(int socket_new_node); - static void* ListeningNewNode(void * arg); -private: - int socket_fd; - Theron::EndPoint* endpoint; - Theron::Framework* framework; - std::string EndPointPort; - std::vector > PeersIpPort; - Logging* logging; - pthread_t prochaseId; -/** - * Actor - */ -/////////////////////////////////////////////////////////////////////// -public: - class CoordinateActor:public Theron::Actor{ - public: - CoordinateActor(Theron::Framework *framework, const char* Name); - }; + private: + bool PrepareTheSocket(); + bool SetupTheTheron(); + bool CreateListeningThread(); + bool SendReadyNotificationToNewNode(int socket_new_node, char signal); + + bool SendCoordinatorEndPointPort(int socket_new_node); + static void* ListeningNewNode(void* arg); + + private: + int socket_fd; + Theron::EndPoint* endpoint; + Theron::Framework* framework; + std::string EndPointPort; + std::vector > PeersIpPort; + Logging* logging; + pthread_t prochaseId; + /** + * Actor + */ + /////////////////////////////////////////////////////////////////////// + public: + class CoordinateActor : public Theron::Actor { + public: + CoordinateActor(Theron::Framework* framework, const char* Name); + }; }; #endif /* COORDINATOR_H_ */ diff --git a/Makefile.am b/Makefile.am index 7f43f872b..ee665c98f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,11 +5,13 @@ AM_CPPFLAGS=-fPIC -DTHERON_XS\ -I${THERON_HOME}/Include \ -I${BOOST_HOME} \ -I${BOOST_HOME}/boost/serialization \ --I${GTEST_HOME}/include +-I${GTEST_HOME}/include \ +-I${CAF_HOME}/libcaf_io \ +-I${CAF_HOME}/libcaf_core #-L/usr/local/lib \ #-I/usr/local/include -AM_LDFLAGS= -lc -lm -lrt -lpthread -lboost_serialization -lboost_date_time -lboost_system \ +AM_LDFLAGS= -lc -lm -lrt -lpthread \ -lconfig++ -lxs -lnuma -lreadline -lhistory -lz -ltinfo -Wl,--no-as-needed -ldl -rdynamic -lglog if OPT_TCMALLOC @@ -55,9 +57,12 @@ LDADD = \ common/types/Test/libtest.a \ common/types/ttmath/libttmath.a \ utility/libutility.a \ + ${CAF_HOME}/build/lib/libcaf_core.so \ + ${CAF_HOME}/build/lib/libcaf_io.so \ ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ ${BOOST_HOME}/stage/lib/libboost_system.so \ + ${BOOST_HOME}/stage/lib/libboost_date_time.so \ ${HADOOP_HOME}/lib/native/libhdfs.so\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${GTEST_HOME}/libgtest.a \ diff --git a/conf/config b/conf/config index 665548de3..ea732ef6f 100755 --- a/conf/config +++ b/conf/config @@ -61,3 +61,9 @@ local_disk_mode=1 scan_batch=100 +is_master_loader=1 + +master_loader_ip="10.11.1.192" + +master_loader_port=9001 + diff --git a/loader/Makefile.am b/loader/Makefile.am index 097fe6a85..40bbcfa09 100644 --- a/loader/Makefile.am +++ b/loader/Makefile.am @@ -5,9 +5,11 @@ AM_CPPFLAGS= -fPIC -fpermissive \ -I${HADOOP_HOME}/include\ -I${JAVA_HOME}/include\ -I${JAVA_HOME}/include/linux \ --I${THERON_HOME}/Include +-I${THERON_HOME}/Include \ +-I${CAF_HOME}/libcaf_io \ +-I${CAF_HOME}/libcaf_core -AM_LDFLAGS=-lc -lm -lrt -lxs -lboost_serialization +AM_LDFLAGS=-lc -lm -lrt -lcaf_core -lcaf_io -lxs -lboost_serialization if OPT_TCMALLOC AM_CPPFLAGS+=-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free @@ -27,12 +29,14 @@ LDADD = ../catalog/libcatalog.a \ noinst_LIBRARIES=libloader.a libloader_a_SOURCES = \ - data_injector.cpp data_injector.h \ - single_file_connector.h single_file_connector.cpp \ - single_thread_single_file_connector.h single_thread_single_file_connector.cpp \ - table_file_connector.cpp table_file_connector.h \ - validity.h validity.cpp - + data_injector.cpp data_injector.h \ + file_connector.h loader_message.h \ + master_loader.cpp master_loader.h \ + single_file_connector.cpp single_file_connector.h \ + single_thread_single_file_connector.cpp single_thread_single_file_connector.h \ + slave_loader.cpp slave_loader.h \ + table_file_connector.cpp table_file_connector.h \ + validity.cpp validity.h SUBDIRS = test DIST_SUBDIRS = test diff --git a/loader/loader_message.h b/loader/loader_message.h new file mode 100644 index 000000000..83ee56ace --- /dev/null +++ b/loader/loader_message.h @@ -0,0 +1,37 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/loader_message.h + * + * Created on: Apr 9, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#ifndef LOADER_LOADER_MESSAGE_H_ +#define LOADER_LOADER_MESSAGE_H_ + +#include "caf/all.hpp" +#include "caf/io/all.hpp" + +using IpPortAtom = caf::atom_constant; + +#endif // LOADER_LOADER_MESSAGE_H_ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp new file mode 100644 index 000000000..28ed75420 --- /dev/null +++ b/loader/master_loader.cpp @@ -0,0 +1,181 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/master_loader.cpp + * + * Created on: Apr 7, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#include "./master_loader.h" +#include +#include + +#include "caf/all.hpp" +#include "caf/io/all.hpp" +#include +#include + +#include "../catalog/catalog.h" +#include "../Config.h" +#include "../Environment.h" +#include "./loader_message.h" +using caf::aout; +using caf::behavior; +using caf::event_based_actor; +using caf::io::publish; +using caf::io::remote_actor; +using caf::mixin::sync_sender_impl; +using caf::spawn; +using std::endl; +using claims::catalog::Catalog; +using claims::common::rSuccess; +using claims::common::rFailure; + +namespace claims { +namespace loader { + +MasterLoader::MasterLoader() + : master_loader_ip(Config::master_loader_ip), + master_loader_port(Config::master_loader_port) { + // TODO Auto-generated constructor stub +} + +MasterLoader::~MasterLoader() { + // TODO Auto-generated destructor stub +} + +static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, + MasterLoader* mloader) { + return { + [=](IpPortAtom, std::string ip, int port) { // NOLINT + LOG(INFO) << "receive slave network address(" << ip << ":" << port + << ")" << endl; + // slave_addrs_.push_back(NetAddr(ip, port)); + int new_slave_fd = -1; + if (rSuccess != + mloader->GetSocketFdConnectedWithSlave(ip, port, &new_slave_fd)) { + LOG(ERROR) << "failed to get connected fd with slave"; + } else { + LOG(INFO) << "succeed to get connected fd with slave"; + } + assert(new_slave_fd > 3); + mloader->slave_sockets_.push_back(new_slave_fd); + DLOG(INFO) << "start to send test message to slave"; + + // test whether socket works well + ostringstream oss; + oss << "hello, i'm master, whose address is " + << mloader->master_loader_ip << ":" + << to_string(mloader->master_loader_port) << ". \0"; + + int message_length = oss.str().length(); + DLOG(INFO) << "message length is " << message_length; + + if (-1 == + write(new_slave_fd, reinterpret_cast(&message_length), 4)) { + PLOG(ERROR) << "failed to send message length to slave(" << ip << ":" + << port << ")"; + } else { + DLOG(INFO) << "message length is sent"; + } + if (-1 == write(new_slave_fd, oss.str().c_str(), message_length)) { + PLOG(ERROR) << "failed to send message to slave(" << ip << ":" << port + << ")"; + } else { + DLOG(INFO) << "message buffer is sent"; + } + }, + caf::others >> [] { LOG(ERROR) << "nothing matched!!!"; }}; +} + +RetCode MasterLoader::ConnectWithSlaves() { + int ret = rSuccess; + try { + auto listening_actor = spawn(&MasterLoader::ReceiveSlaveReg, this); + publish(listening_actor, master_loader_port, master_loader_ip.c_str(), + true); + DLOG(INFO) << "published in " << master_loader_ip << ":" + << master_loader_port; + + // auto test_actor = remote_actor(master_loader_ip, master_loader_port); + // caf::scoped_actor test1; + // test1->sync_send(test_actor, IpPortAtom::value, "123.123.13.123", + // 123); + + // while (int temp = getchar() != 'm') { + // } + } catch (exception& e) { + LOG(ERROR) << e.what(); + return rFailure; + } + return ret; +} + +RetCode MasterLoader::Inject() {} + +string MasterLoader::GetMessage() {} + +bool MasterLoader::CheckValidity() {} + +void MasterLoader::DistributeSubInjection() {} + +RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, + int* connected_fd) { + int fd = socket(AF_INET, SOCK_STREAM, 0); + + // port = 23667; + + struct sockaddr_in slave_addr; + slave_addr.sin_family = AF_INET; + slave_addr.sin_port = htons(port); + slave_addr.sin_addr.s_addr = inet_addr(ip.c_str()); + + if (-1 == connect(fd, (struct sockaddr*)(&slave_addr), sizeof(sockaddr_in))) { + PLOG(ERROR) << "failed to connect socket(" << ip << ":" << port << ")"; + return rFailure; + } + *connected_fd = fd; + return rSuccess; +} + +void* MasterLoader::StartMasterLoader(void* arg) { + Config::getInstance(); + // if (rSuccess != Catalog::getInstance()->restoreCatalog()) { + // LOG(ERROR) << "failed to restore catalog" << std::endl; + // cerr << "ERROR: restore catalog failed" << endl; + // } + LOG(INFO) << "start master loader..."; + + int ret = rSuccess; + MasterLoader* master_loader = Environment::getInstance()->get_master_loader(); + EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), + "failed to connect all slaves"); + + EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Inject(), + "failed to inject data"); + + return NULL; +} + +} /* namespace loader */ +} /* namespace claims */ diff --git a/loader/master_loader.h b/loader/master_loader.h new file mode 100644 index 000000000..d62911400 --- /dev/null +++ b/loader/master_loader.h @@ -0,0 +1,86 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/master_loader.h + * + * Created on: Apr 7, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#ifndef LOADER_MASTER_LOADER_H_ +#define LOADER_MASTER_LOADER_H_ +#include +#include +#include "../common/error_define.h" +#include "caf/all.hpp" + +using caf::behavior; +using caf::event_based_actor; + +namespace claims { +namespace loader { + +using std::string; +using std::vector; + +class MasterLoader { + public: + struct NetAddr { + NetAddr(string ip, int port) : ip_(ip), port_(port) {} + string ip_; + int port_; + }; + + public: + MasterLoader(); + ~MasterLoader(); + + RetCode ConnectWithSlaves(); + + RetCode Inject(); + + private: + string GetMessage(); + + RetCode GetSlaveNetAddr(); + RetCode SetSocketWithSlaves(); + RetCode GetSocketFdConnectedWithSlave(string ip, int port, int* connected_fd); + bool CheckValidity(); + void DistributeSubInjection(); + + static behavior ReceiveSlaveReg(event_based_actor* self, + MasterLoader* mloader); + + public: + static void* StartMasterLoader(void* arg); + + private: + string master_loader_ip; + int master_loader_port; + vector slave_addrs_; + vector slave_sockets_; +}; + +} /* namespace loader */ +} /* namespace claims */ + +#endif // LOADER_MASTER_LOADER_H_ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp new file mode 100644 index 000000000..edd01da28 --- /dev/null +++ b/loader/slave_loader.cpp @@ -0,0 +1,224 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/slave_loader.cpp + * + * Created on: Apr 8, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#include "./slave_loader.h" + +#include +#include +#include +#include "caf/all.hpp" +#include "caf/io/all.hpp" + +#include "./loader_message.h" +#include "../Config.h" +#include "../Environment.h" +#include "../common/error_define.h" +using caf::event_based_actor; +using caf::io::remote_actor; +using caf::mixin::sync_sender_impl; +using caf::spawn; +using claims::common::rSuccess; +using claims::common::rFailure; + +namespace claims { +namespace loader { + +SlaveLoader::SlaveLoader() { + // TODO Auto-generated constructor stub +} + +SlaveLoader::~SlaveLoader() { + // TODO Auto-generated destructor stub +} + +RetCode SlaveLoader::ConnectWithMaster() { + int ret = rSuccess; + int retry_time = 10; + for (int i = 0; Clean(), i < retry_time; ++i) { // if failed, call Clean() + EXEC_AND_LOG(ret, EstablishListeningSocket(), + "established listening socket", + "failed to establish listening socket in " << i << " times"); + if (rSuccess == ret) break; + } + if (rSuccess != ret) { + Clean(); + return ret; + } + + for (int i = 0; i < retry_time; ++i) { + EXEC_AND_LOG(ret, SendSelfAddrToMaster(), "sent self ip/port to master", + "failed to send self ip/port to master in " << i << " times"); + if (rSuccess == ret) break; + sleep(1); + } + if (rSuccess != ret) { + Clean(); + return ret; + } + + for (int i = 0; i < retry_time; ++i) { + EXEC_AND_LOG(ret, GetConnectedSocket(), "got connected socket with master", + "failed to get connected socket with master in " << i + << " times"); + if (rSuccess == ret) break; + } + if (rSuccess != ret) Clean(); + return ret; +} + +RetCode SlaveLoader::EstablishListeningSocket() { + int ret = rSuccess; + listening_fd_ = socket(AF_INET, SOCK_STREAM, 0); + if (-1 == listening_fd_) { + PLOG(ERROR) << "failed to create socket"; + } + + struct sockaddr_in sock_addr; + sock_addr.sin_family = AF_INET; + sock_addr.sin_port = 0; + sock_addr.sin_addr.s_addr = + inet_addr(Environment::getInstance()->getIp().c_str()); + + if (-1 == + bind(listening_fd_, (struct sockaddr*)(&sock_addr), sizeof(sock_addr))) { + PLOG(ERROR) << "failed to bind socket"; + return rFailure; + } + + OutputFdIpPort(listening_fd_); + + if (-1 == listen(listening_fd_, 5)) { + PLOG(ERROR) << "failed to listen socket"; + return rFailure; + } + + OutputFdIpPort(listening_fd_); + + struct sockaddr_in temp_addr; + socklen_t addr_len = sizeof(sockaddr_in); + if (-1 == + getsockname(listening_fd_, (struct sockaddr*)(&temp_addr), &addr_len)) { + PLOG(ERROR) << "failed to get socket name "; + return rFailure; + } + + self_port = ntohs(temp_addr.sin_port); + self_ip = inet_ntoa(temp_addr.sin_addr); + + LOG(INFO) << "slave socket IP:" << self_ip << ", port:" << self_port; + return ret; +} + +RetCode SlaveLoader::SendSelfAddrToMaster() { + // auto send_actor = spawn([&](event_based_actor* self) { + // auto master_actor = + // remote_actor(Config::master_loader_ip, Config::master_loader_port); + // self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port); + // }); + DLOG(INFO) << "going to send self (" << self_ip << ":" << self_port << ")" + << "to (" << Config::master_loader_ip << ":" + << Config::master_loader_port << ")"; + try { + auto master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); + caf::scoped_actor self; + self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port); + } catch (exception& e) { + LOG(ERROR) << e.what(); + return rFailure; + } + return rSuccess; +} + +RetCode SlaveLoader::GetConnectedSocket() { + assert(listening_fd_ > 3); + OutputFdIpPort(listening_fd_); + DLOG(INFO) << "fd is accepting..."; + + struct sockaddr_in master_addr; + socklen_t len = sizeof(sockaddr_in); + int master_fd = accept(listening_fd_, (struct sockaddr*)(&master_addr), &len); + if (-1 == master_fd) { + PLOG(ERROR) << "failed to accept socket"; + return rFailure; + } + master_fd_ = master_fd; + return rSuccess; +} +void SlaveLoader::OutputFdIpPort(int fd) { + struct sockaddr_in temp_addr; + socklen_t addr_len = sizeof(sockaddr_in); + if (-1 == getsockname(fd, (struct sockaddr*)(&temp_addr), &addr_len)) { + PLOG(ERROR) << "failed to get socket name "; + } + DLOG(INFO) << "fd ----> (" << inet_ntoa(temp_addr.sin_addr) << ":" + << ntohs(temp_addr.sin_port) << ")"; +} + +void SlaveLoader::ReceiveAndWorkLoop() { + assert(master_fd_ > 3); + const int length = 1000; + char* buffer = new char[length]; + DLOG(INFO) << "slave is recving ..."; + while (1) { + if (-1 == recv(master_fd_, buffer, 4, MSG_WAITALL)) { + PLOG(ERROR) << "failed to receive message length from master"; + } + LOG(INFO) << "length is " << *reinterpret_cast(buffer); + if (-1 == recv(master_fd_, buffer, *reinterpret_cast(buffer), + MSG_WAITALL)) { + PLOG(ERROR) << "failed to receive message from master"; + } + LOG(INFO) << "receive message from master:" << buffer << endl; + } +} + +void* SlaveLoader::StartSlaveLoader(void* arg) { + Config::getInstance(); + // if (rSuccess != Catalog::getInstance()->restoreCatalog()) { + // LOG(ERROR) << "failed to restore catalog" << std::endl; + // cerr << "ERROR: restore catalog failed" << endl; + // } + LOG(INFO) << "start slave loader..."; + + SlaveLoader* slave_loader = Environment::getInstance()->get_slave_loader(); + int ret = rSuccess; + EXEC_AND_LOG(ret, slave_loader->ConnectWithMaster(), + "succeed to connect with master", + "failed to connect with master "); + + assert(rSuccess == ret && "can't connect with master"); + + cout << "connected with master loader" << endl; + // TODO(YK): error handle + slave_loader->ReceiveAndWorkLoop(); + return NULL; +} + +} /* namespace loader */ +} /* namespace claims */ diff --git a/loader/slave_loader.h b/loader/slave_loader.h new file mode 100644 index 000000000..22ff201a3 --- /dev/null +++ b/loader/slave_loader.h @@ -0,0 +1,83 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/slave_loader.h + * + * Created on: Apr 8, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#ifndef LOADER_SLAVE_LOADER_H_ +#define LOADER_SLAVE_LOADER_H_ +#include +#include +#include +#include + +#include "../catalog/catalog.h" +#include "../storage/BlockManager.h" + +namespace claims { +namespace loader { +using std::string; +using claims::catalog::Catalog; + +class SlaveLoader { + public: + SlaveLoader(); + virtual ~SlaveLoader(); + + public: + RetCode ConnectWithMaster(); + void ReceiveAndWorkLoop(); + void Clean() { + if (-1 != listening_fd_) FileClose(listening_fd_); + listening_fd_ = -1; + + if (-1 != master_fd_) FileClose(master_fd_); + master_fd_ = -1; + } + + private: + RetCode EstablishListeningSocket(); + RetCode SendSelfAddrToMaster(); + RetCode GetConnectedSocket(); + + void OutputFdIpPort(int fd); + + private: + int master_socket_fd_; + string self_ip; + int self_port; + + public: + static void* StartSlaveLoader(void* arg); + + private: + int listening_fd_ = -1; + int master_fd_ = -1; +}; + +} /* namespace loader */ +} /* namespace claims */ + +#endif // LOADER_SLAVE_LOADER_H_ From a78cbadd152af71ba48bb34c786a70db205e8cc1 Mon Sep 17 00:00:00 2001 From: yukai Date: Wed, 13 Apr 2016 17:14:43 +0800 Subject: [PATCH 02/58] FIX: name from 'inject' to 'ingest' --- Test/gtest_main.cpp | 2 +- catalog/projection_binding.cpp | 2 +- catalog/table.h | 4 +- common/Schema/SchemaFix.cpp | 12 +- loader/Makefile.am | 2 +- .../{data_injector.cpp => data_ingestion.cpp} | 191 +++++++++--------- loader/{data_injector.h => data_ingestion.h} | 20 +- loader/master_loader.cpp | 38 ++-- loader/master_loader.h | 13 +- loader/single_file_connector.cpp | 2 +- loader/test/Makefile.am | 2 +- ...ector_test.cpp => data_ingestion_test.cpp} | 11 +- ..._injector_test.h => data_ingestion_test.h} | 32 +-- stmt_handler/delete_stmt_exec.cpp | 8 +- stmt_handler/drop_table_exec.cpp | 3 +- stmt_handler/insert_exec.cpp | 10 +- stmt_handler/load_exec.cpp | 12 +- 17 files changed, 188 insertions(+), 176 deletions(-) rename loader/{data_injector.cpp => data_ingestion.cpp} (88%) rename loader/{data_injector.h => data_ingestion.h} (95%) rename loader/test/{data_injector_test.cpp => data_ingestion_test.cpp} (88%) rename loader/test/{data_injector_test.h => data_ingestion_test.h} (78%) diff --git a/Test/gtest_main.cpp b/Test/gtest_main.cpp index 9aa5894bd..1212db322 100644 --- a/Test/gtest_main.cpp +++ b/Test/gtest_main.cpp @@ -28,12 +28,12 @@ #include "./common/errno_test.h" #include "../common/test/operate_test.h" #include "../common/types/Test/data_type_test.h" -#include "../loader/test/data_injector_test.h" #include "../common/file_handle/test/disk_file_handle_imp_test.h" #include "../common/file_handle/test/hdfs_file_handle_imp_test.h" #include "../loader/test/single_thread_single_file_connector_test.h" #include "../loader/test/single_file_connector_test.h" #include "./iterator/elastic_iterator_model_test.h" +#include "../loader/test/data_ingestion_test.h" #include "../loader/test/table_file_connector_test.h" #define GLOG_NO_ABBREVIATED_SEVERITIES diff --git a/catalog/projection_binding.cpp b/catalog/projection_binding.cpp index cc86f400b..43b4be895 100644 --- a/catalog/projection_binding.cpp +++ b/catalog/projection_binding.cpp @@ -124,7 +124,7 @@ bool ProjectionBinding::BindingEntireProjection( // // BlockManagerMaster::getInstance()->SendBindingMessage(partition_id,number_of_chunks,MEMORY,target); } - /* conduct the binding according to the bingding information list*/ + /* conduct the binding according to the binding information list*/ for (unsigned i = 0; i < partition_id_to_nodeid_list.size(); i++) { const unsigned partition_off = partition_id_to_nodeid_list[i].first; const NodeID node_id = partition_id_to_nodeid_list[i].second; diff --git a/catalog/table.h b/catalog/table.h index cca88c8ef..72990923c 100755 --- a/catalog/table.h +++ b/catalog/table.h @@ -51,7 +51,7 @@ using claims::utility::LockGuard; namespace claims { namespace loader { -class DataInjector; +class DataIngestion; class TableFileConnector; }; @@ -59,7 +59,7 @@ namespace catalog { using claims::loader::TableFileConnector; class TableDescriptor { public: - friend class claims::loader::DataInjector; + friend class claims::loader::DataIngestion; friend class claims::loader::TableFileConnector; public: diff --git a/common/Schema/SchemaFix.cpp b/common/Schema/SchemaFix.cpp index eba9e8385..905474765 100755 --- a/common/Schema/SchemaFix.cpp +++ b/common/Schema/SchemaFix.cpp @@ -15,10 +15,10 @@ #include #include -#include "../../loader/data_injector.h" +#include "../../loader/data_ingestion.h" #include "../../utility/Timer.h" #include "../common/error_define.h" -using claims::loader::DataInjector; +using claims::loader::DataIngestion; using claims::common::rTooFewColumn; using claims::common::rSuccess; using claims::common::rIncorrectData; @@ -180,11 +180,11 @@ RetCode SchemaFix::CheckAndToValue(std::string text_tuple, void* binary_tuple, columns[i].operate->SetDefault(text_column); ret = rSuccess; } - // ATOMIC_ADD_SF(DataInjector::total_check_string_time_, + // ATOMIC_ADD_SF(DataIngestion::total_check_string_time_, // GetElapsedTimeInUs(check_string_time)); } } - // ATOMIC_ADD_SF(DataInjector::total_get_substr_time_, + // ATOMIC_ADD_SF(DataIngestion::total_get_substr_time_, // GetElapsedTimeInUs(get_substr_time)); // PLOG_SF("get_substr time:" << GetElapsedTimeInUs(get_substr_time)); @@ -198,7 +198,7 @@ RetCode SchemaFix::CheckAndToValue(std::string text_tuple, void* binary_tuple, << "\t Transfer: " << columns[i].operate->toString(binary_tuple + accum_offsets[i])); - // ATOMIC_ADD_SF(DataInjector::total_to_value_time_, + // ATOMIC_ADD_SF(DataIngestion::total_to_value_time_, // GetElapsedTimeInUs(to_value_time)); // PLOG_SF("just to_value time:" << GetElapsedTimeInUs(to_value_time)); // PLOG_SF("inner loop time:" << GetElapsedTimeInUs(get_substr_time)); @@ -206,7 +206,7 @@ RetCode SchemaFix::CheckAndToValue(std::string text_tuple, void* binary_tuple, // PLOG_SF("while loop time:" << GetElapsedTimeInUs(to_value_func_time)); - ATOMIC_ADD_SF(DataInjector::total_check_and_to_value_func_time_, + ATOMIC_ADD_SF(DataIngestion::total_check_and_to_value_func_time_, GetElapsedTimeInUs(to_value_func_time)); DLOG_SF("after all tovalue, prev_pos :" << (prev_pos == string::npos) << "prev_pos+1 :" diff --git a/loader/Makefile.am b/loader/Makefile.am index 40bbcfa09..9169d2c31 100644 --- a/loader/Makefile.am +++ b/loader/Makefile.am @@ -29,7 +29,7 @@ LDADD = ../catalog/libcatalog.a \ noinst_LIBRARIES=libloader.a libloader_a_SOURCES = \ - data_injector.cpp data_injector.h \ + data_ingestion.cpp data_ingestion.h \ file_connector.h loader_message.h \ master_loader.cpp master_loader.h \ single_file_connector.cpp single_file_connector.h \ diff --git a/loader/data_injector.cpp b/loader/data_ingestion.cpp similarity index 88% rename from loader/data_injector.cpp rename to loader/data_ingestion.cpp index 2bb600096..96a5f363d 100644 --- a/loader/data_injector.cpp +++ b/loader/data_ingestion.cpp @@ -16,7 +16,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * /Claims/loader/data_injector.cpp + * /Claims/loader/data_injestion.cpp * * Created on: Oct 22, 2015 * Author: yukai @@ -26,8 +26,6 @@ * */ -#include "./data_injector.h" - #include #include #include @@ -67,6 +65,7 @@ #include "../utility/thread_pool.h" #include "../utility/Timer.h" #include "./table_file_connector.h" +#include "data_ingestion.h" using claims::common::FileOpenFlag; using claims::common::FilePlatform; @@ -101,16 +100,16 @@ using namespace claims::common; // NOLINT #define DATA_DO_LOAD /* switch to open debug log ouput */ -// #define DATA_INJECTOR_DEBUG -// #define DATA_INJECTOR_PREF +// #define DATA_INGESTION_DEBUG +// #define DATA_INGESTION_PREF #ifdef CLAIMS_DEBUG_LOG -#ifdef DATA_INJECTOR_DEBUG +#ifdef DATA_INGESTION_DEBUG #define DLOG_DI(info) DLOG(INFO) << info << std::endl; #else #define DLOG_DI(info) #endif -#ifdef DATA_INJECTOR_PREF +#ifdef DATA_INGESTION_PREF #define PLOG_DI(info) DLOG(INFO) << info << endl; #else #define PLOG_DI(info) @@ -119,7 +118,7 @@ using namespace claims::common; // NOLINT #define DLOG_DI(info) #endif -#ifdef DATA_INJECTOR_PREF +#ifdef DATA_INGESTION_PREF #define ATOMIC_ADD(var, value) __sync_add_and_fetch(&var, value); #define GET_TIME_DI(var) GETCURRENTTIME(var); #else @@ -130,25 +129,25 @@ using namespace claims::common; // NOLINT namespace claims { namespace loader { -uint64_t DataInjector::total_get_substr_time_ = 0; -uint64_t DataInjector::total_check_string_time_ = 0; -uint64_t DataInjector::total_to_value_time_ = 0; -uint64_t DataInjector::total_check_and_to_value_func_time_ = 0; -uint64_t DataInjector::total_check_and_to_value_time_ = 0; -uint64_t DataInjector::total_insert_time_ = 0; -uint64_t DataInjector::total_add_time_ = 0; -uint64_t DataInjector::total_lock_tuple_buffer_time_ = 0; -uint64_t DataInjector::total_lock_pj_buffer_time_ = 0; -uint64_t DataInjector::total_get_task_time_ = 0; - -uint64_t DataInjector::total_read_sem_time_ = 0; -uint64_t DataInjector::total_unread_sem_time_ = 0; - -uint64_t DataInjector::total_read_sem_fail_count_ = 0; -uint64_t DataInjector::total_unread_sem_fail_count_ = 0; -uint64_t DataInjector::total_append_warning_time_ = 0; - -DataInjector::DataInjector(TableDescriptor* table, const string col_separator, +uint64_t DataIngestion::total_get_substr_time_ = 0; +uint64_t DataIngestion::total_check_string_time_ = 0; +uint64_t DataIngestion::total_to_value_time_ = 0; +uint64_t DataIngestion::total_check_and_to_value_func_time_ = 0; +uint64_t DataIngestion::total_check_and_to_value_time_ = 0; +uint64_t DataIngestion::total_insert_time_ = 0; +uint64_t DataIngestion::total_add_time_ = 0; +uint64_t DataIngestion::total_lock_tuple_buffer_time_ = 0; +uint64_t DataIngestion::total_lock_pj_buffer_time_ = 0; +uint64_t DataIngestion::total_get_task_time_ = 0; + +uint64_t DataIngestion::total_read_sem_time_ = 0; +uint64_t DataIngestion::total_unread_sem_time_ = 0; + +uint64_t DataIngestion::total_read_sem_fail_count_ = 0; +uint64_t DataIngestion::total_unread_sem_fail_count_ = 0; +uint64_t DataIngestion::total_append_warning_time_ = 0; + +DataIngestion::DataIngestion(TableDescriptor* table, const string col_separator, const string row_separator) : table_(table), col_separator_(col_separator), @@ -199,7 +198,7 @@ DataInjector::DataInjector(TableDescriptor* table, const string col_separator, // #endif } -DataInjector::~DataInjector() { +DataIngestion::~DataIngestion() { DELETE_PTR(table_schema_); // DELETE_PTR(connector_); DELETE_PTR(sblock_); @@ -217,7 +216,7 @@ DataInjector::~DataInjector() { file_list_.clear(); } -RetCode DataInjector::PrepareInitInfo(FileOpenFlag open_flag) { +RetCode DataIngestion::PrepareInitInfo(FileOpenFlag open_flag) { int ret = rSuccess; for (int i = 0; i < table_->getNumberOfProjection(); i++) { vector temp_v; @@ -250,7 +249,7 @@ RetCode DataInjector::PrepareInitInfo(FileOpenFlag open_flag) { return ret; } -RetCode DataInjector::LoadFromFileSingleThread(vector input_file_names, +RetCode DataIngestion::LoadFromFileSingleThread(vector input_file_names, FileOpenFlag open_flag, ExecutedResult* result, double sample_rate) { @@ -364,7 +363,7 @@ RetCode DataInjector::LoadFromFileSingleThread(vector input_file_names, return ret; } -RetCode DataInjector::SetTableState(FileOpenFlag open_flag, +RetCode DataIngestion::SetTableState(FileOpenFlag open_flag, ExecutedResult* result) { int ret = rSuccess; if (FileOpenFlag::kCreateFile == open_flag) { @@ -397,7 +396,7 @@ RetCode DataInjector::SetTableState(FileOpenFlag open_flag, return ret; } -RetCode DataInjector::CheckFiles(vector input_file_names, +RetCode DataIngestion::CheckFiles(vector input_file_names, ExecutedResult* result) { int ret = rSuccess; for (auto file_name : input_file_names) { @@ -413,7 +412,7 @@ RetCode DataInjector::CheckFiles(vector input_file_names, return ret; } -RetCode DataInjector::PrepareEverythingForLoading( +RetCode DataIngestion::PrepareEverythingForLoading( vector input_file_names, FileOpenFlag open_flag, ExecutedResult* result) { int ret = rSuccess; @@ -449,7 +448,7 @@ RetCode DataInjector::PrepareEverythingForLoading( return ret; } -RetCode DataInjector::FinishJobAfterLoading(FileOpenFlag open_flag) { +RetCode DataIngestion::FinishJobAfterLoading(FileOpenFlag open_flag) { int ret = rSuccess; #ifdef DATA_DO_LOAD @@ -468,7 +467,7 @@ RetCode DataInjector::FinishJobAfterLoading(FileOpenFlag open_flag) { return ret; } -RetCode DataInjector::LoadFromFileMultiThread(vector input_file_names, +RetCode DataIngestion::LoadFromFileMultiThread(vector input_file_names, FileOpenFlag open_flag, ExecutedResult* result, double sample_rate) { @@ -595,7 +594,7 @@ RetCode DataInjector::LoadFromFileMultiThread(vector input_file_names, * Phase 2: after handing all raw data file, flush all block that are not full * into HDFS/disk */ -RetCode DataInjector::LoadFromFile(vector input_file_names, +RetCode DataIngestion::LoadFromFile(vector input_file_names, FileOpenFlag open_flag, ExecutedResult* result, double sample_rate) { total_get_substr_time_ = 0; @@ -625,7 +624,7 @@ RetCode DataInjector::LoadFromFile(vector input_file_names, #endif } -RetCode DataInjector::PrepareLocalPJBuffer( +RetCode DataIngestion::PrepareLocalPJBuffer( vector>& pj_buffer) { int ret = rSuccess; for (int i = 0; i < table_->getNumberOfProjection(); i++) { @@ -643,7 +642,7 @@ RetCode DataInjector::PrepareLocalPJBuffer( return ret; } -RetCode DataInjector::DestroyLocalPJBuffer( +RetCode DataIngestion::DestroyLocalPJBuffer( vector>& pj_buffer) { int ret = rSuccess; for (auto it : pj_buffer) { @@ -656,19 +655,19 @@ RetCode DataInjector::DestroyLocalPJBuffer( return ret; } -void* DataInjector::HandleTuple(void* ptr) { - DataInjector* injector = static_cast(ptr); +void* DataIngestion::HandleTuple(void* ptr) { + DataIngestion* injestion = static_cast(ptr); string tuple_to_handle = ""; string file_name = ""; uint64_t row_id_in_file = 0; - DataInjector::LoadTask task; + DataIngestion::LoadTask task; RetCode ret = rSuccess; - int self_thread_index = __sync_fetch_and_add(&injector->thread_index_, 1); + int self_thread_index = __sync_fetch_and_add(&injestion->thread_index_, 1); LOG(INFO) << "my thread_index is " << self_thread_index << endl; LOG(INFO) << "before handling tuple, thread " << self_thread_index << " tuple count sem value is :" - << injector->tuple_count_sem_in_lists_[self_thread_index].get_value() + << injestion->tuple_count_sem_in_lists_[self_thread_index].get_value() << endl; /* * store the validity of every column data, @@ -676,72 +675,72 @@ void* DataInjector::HandleTuple(void* ptr) { * is used for too many columns and too few columns */ vector columns_validities; - void* tuple_buffer = Malloc(injector->table_schema_->getTupleMaxSize()); + void* tuple_buffer = Malloc(injestion->table_schema_->getTupleMaxSize()); if (NULL == tuple_buffer) { ret = rNoMemory; - injector->multi_thread_status_ = ret; - injector->finished_thread_sem_.post(); + injestion->multi_thread_status_ = ret; + injestion->finished_thread_sem_.post(); return NULL; } Block* block_to_write = new Block(BLOCK_SIZE); vector> local_pj_buffer; - injector->PrepareLocalPJBuffer(local_pj_buffer); + injestion->PrepareLocalPJBuffer(local_pj_buffer); while (true) { - if (injector->multi_thread_status_ != rSuccess) break; + if (injestion->multi_thread_status_ != rSuccess) break; GET_TIME_DI(start_get_task_time); - if (injector->all_tuple_read_ == 1) { + if (injestion->all_tuple_read_ == 1) { GET_TIME_DI(start_read_sem); - if (!injector->tuple_count_sem_in_lists_[self_thread_index] + if (!injestion->tuple_count_sem_in_lists_[self_thread_index] .try_wait()) { ///// lock/sem - ATOMIC_ADD(injector->total_read_sem_fail_count_, 1); - ATOMIC_ADD(injector->total_read_sem_time_, + ATOMIC_ADD(injestion->total_read_sem_fail_count_, 1); + ATOMIC_ADD(injestion->total_read_sem_time_, GetElapsedTimeInUs(start_read_sem)); DLOG_DI("all tuple in pool is handled "); EXEC_AND_LOG( - ret, injector->FlushNotFullBlock(block_to_write, local_pj_buffer), + ret, injestion->FlushNotFullBlock(block_to_write, local_pj_buffer), "flush all last block that are not full", "failed to flush all last block."); if (ret != rSuccess) // it is not need to use lock - injector->multi_thread_status_ = ret; + injestion->multi_thread_status_ = ret; DELETE_PTR(tuple_buffer); - injector->DestroyLocalPJBuffer(local_pj_buffer); - injector->finished_thread_sem_.post(); + injestion->DestroyLocalPJBuffer(local_pj_buffer); + injestion->finished_thread_sem_.post(); return NULL; // success. all tuple is handled } DLOG_DI("all tuple is read ,tuple count sem is:" - << injector->tuple_count_sem_in_lists_[self_thread_index] + << injestion->tuple_count_sem_in_lists_[self_thread_index] .get_value()); // get tuple from list without lock, as // producer thread is over, there are only consumer threads - task = std::move(injector->task_lists_[self_thread_index].front()); - injector->task_lists_[self_thread_index].pop_front(); + task = std::move(injestion->task_lists_[self_thread_index].front()); + injestion->task_lists_[self_thread_index].pop_front(); } else { DLOG_DI("tuple count sem is:" - << injector->tuple_count_sem_in_lists_[self_thread_index] + << injestion->tuple_count_sem_in_lists_[self_thread_index] .get_value()); // waiting for new tuple read from file GET_TIME_DI(start_unread_sem); - if (!injector + if (!injestion ->tuple_count_sem_in_lists_[self_thread_index] ///// lock/sem .try_wait()) { ///// lock/sem - ATOMIC_ADD(injector->total_unread_sem_fail_count_, 1); + ATOMIC_ADD(injestion->total_unread_sem_fail_count_, 1); continue; } - ATOMIC_ADD(injector->total_unread_sem_time_, + ATOMIC_ADD(injestion->total_unread_sem_time_, GetElapsedTimeInUs(start_unread_sem)); // get tuple from pool with lock GET_TIME_DI(start_tuple_buffer_lock_time); LockGuard guard( - injector->task_list_access_lock_[self_thread_index]); ///// lock/sem + injestion->task_list_access_lock_[self_thread_index]); ///// lock/sem ATOMIC_ADD( - injector->total_lock_tuple_buffer_time_, ///// lock/sem + injestion->total_lock_tuple_buffer_time_, ///// lock/sem GetElapsedTimeInUs(start_tuple_buffer_lock_time)); ///// lock/sem task = std::move( - injector->task_lists_[self_thread_index].front()); ///// lock/sem - injector->task_lists_[self_thread_index].pop_front(); ///// lock/sem + injestion->task_lists_[self_thread_index].front()); ///// lock/sem + injestion->task_lists_[self_thread_index].pop_front(); ///// lock/sem } tuple_to_handle = task.tuple_; @@ -750,24 +749,24 @@ void* DataInjector::HandleTuple(void* ptr) { DLOG_DI("thread " << self_thread_index << " get task whose row_id_in_file is " << row_id_in_file); - ATOMIC_ADD(injector->total_get_task_time_, + ATOMIC_ADD(injestion->total_get_task_time_, GetElapsedTimeInUs(start_get_task_time)); - if (0 == row_id_in_file % 50000) injector->AnnounceIAmLoading(); + if (0 == row_id_in_file % 50000) injestion->AnnounceIAmLoading(); GET_TIME_DI(add_time); - EXEC_AND_ONLY_LOG_ERROR(ret, injector->AddRowIdColumn(tuple_to_handle), + EXEC_AND_ONLY_LOG_ERROR(ret, injestion->AddRowIdColumn(tuple_to_handle), "failed to add row_id column for tuple."); if (ret != rSuccess) { // it is not need to use lock - injector->multi_thread_status_ = ret; + injestion->multi_thread_status_ = ret; break; } - ATOMIC_ADD(injector->total_add_time_, GetElapsedTimeInUs(add_time)); + ATOMIC_ADD(injestion->total_add_time_, GetElapsedTimeInUs(add_time)); DLOG_DI("after adding row id, tuple is:" << tuple_to_handle); GET_TIME_DI(start_check_time); columns_validities.clear(); - memset(tuple_buffer, 0, injector->table_schema_->getTupleMaxSize()); - if (rSuccess != (ret = injector->CheckAndToValue( + memset(tuple_buffer, 0, injestion->table_schema_->getTupleMaxSize()); + if (rSuccess != (ret = injestion->CheckAndToValue( tuple_to_handle, tuple_buffer, RawDataSource::kFile, columns_validities))) { /** @@ -776,42 +775,42 @@ void* DataInjector::HandleTuple(void* ptr) { */ ELOG(ret, "Data is in file name: " << file_name << " Line: " << row_id_in_file); - injector->multi_thread_status_ = ret; + injestion->multi_thread_status_ = ret; break; } // only handle data warnings, because of no data error - if (!injector->result_->HasEnoughWarning()) { + if (!injestion->result_->HasEnoughWarning()) { for (auto it : columns_validities) { - string validity_info = injector->GenerateDataValidityInfo( - it, injector->table_, row_id_in_file, file_name); + string validity_info = injestion->GenerateDataValidityInfo( + it, injestion->table_, row_id_in_file, file_name); DLOG_DI("append warning info:" << validity_info); GET_TIME_DI(start_append_warning_time); - injector->result_->AtomicAppendWarning(validity_info); ///// lock/sem - ATOMIC_ADD(injector->total_append_warning_time_, + injestion->result_->AtomicAppendWarning(validity_info); ///// lock/sem + ATOMIC_ADD(injestion->total_append_warning_time_, GetElapsedTimeInUs(start_append_warning_time)); } } - ATOMIC_ADD(injector->total_check_and_to_value_time_, + ATOMIC_ADD(injestion->total_check_and_to_value_time_, GetElapsedTimeInUs(start_check_time)); GET_TIME_DI(start_insert_time); EXEC_AND_ONLY_LOG_ERROR( - ret, injector->InsertSingleTuple(tuple_buffer, block_to_write, + ret, injestion->InsertSingleTuple(tuple_buffer, block_to_write, local_pj_buffer), ///// lock/sem "failed to insert tuple in " << file_name << " at line " << row_id_in_file << "."); if (ret != rSuccess) { // it is not need to use lock - injector->multi_thread_status_ = ret; + injestion->multi_thread_status_ = ret; break; } - ATOMIC_ADD(injector->total_insert_time_, + ATOMIC_ADD(injestion->total_insert_time_, GetElapsedTimeInUs(start_insert_time)); } DELETE_PTR(block_to_write); DELETE_PTR(tuple_buffer); - injector->DestroyLocalPJBuffer(local_pj_buffer); - injector->finished_thread_sem_.post(); + injestion->DestroyLocalPJBuffer(local_pj_buffer); + injestion->finished_thread_sem_.post(); } /** @@ -819,7 +818,7 @@ void* DataInjector::HandleTuple(void* ptr) { * if all OK, then insert into file and update catalog; * else return error to client without inserting any data */ -RetCode DataInjector::InsertFromString(const string tuples, +RetCode DataIngestion::InsertFromString(const string tuples, ExecutedResult* result) { int ret = rSuccess; LOG(INFO) << "tuples is: " << tuples << endl; @@ -910,7 +909,7 @@ RetCode DataInjector::InsertFromString(const string tuples, } // flush the last block which is not full of 64*1024Byte -RetCode DataInjector::FlushNotFullBlock( +RetCode DataIngestion::FlushNotFullBlock( Block* block_to_write, vector>& pj_buffer) { TableDescriptor* table = table_; int ret = rSuccess; @@ -938,7 +937,7 @@ RetCode DataInjector::FlushNotFullBlock( return ret; } -RetCode DataInjector::UpdateCatalog(FileOpenFlag open_flag) { +RetCode DataIngestion::UpdateCatalog(FileOpenFlag open_flag) { int ret = rSuccess; // register the partition information to catalog for (int i = 0; i < table_->getNumberOfProjection(); i++) { @@ -962,7 +961,7 @@ RetCode DataInjector::UpdateCatalog(FileOpenFlag open_flag) { return ret; } -inline RetCode DataInjector::AddRowIdColumn(string& tuple_string) { +inline RetCode DataIngestion::AddRowIdColumn(string& tuple_string) { uint64_t row_id_value = __sync_fetch_and_add(&row_id_in_table_, 1L); // make sure tuple string in a uniform format(always has a column // separator before row separator) with format of what is get from INSERT @@ -971,7 +970,7 @@ inline RetCode DataInjector::AddRowIdColumn(string& tuple_string) { return rSuccess; } -RetCode DataInjector::InsertTupleIntoProjection( +RetCode DataIngestion::InsertTupleIntoProjection( int proj_index, void* tuple_buffer, Block* block_to_write, vector>& local_pj_buffer) { int ret = rSuccess; @@ -1039,7 +1038,7 @@ RetCode DataInjector::InsertTupleIntoProjection( * partition key * if the block is full, write to real data file in HDFS/disk. */ -RetCode DataInjector::InsertSingleTuple( +RetCode DataIngestion::InsertSingleTuple( void* tuple_buffer, Block* block_to_write, vector>& local_pj_buffer) { int ret = rSuccess; @@ -1050,7 +1049,7 @@ RetCode DataInjector::InsertSingleTuple( return ret; } -inline RetCode DataInjector::CheckAndToValue( +inline RetCode DataIngestion::CheckAndToValue( string tuple_string, void* tuple_buffer, RawDataSource raw_data_source, vector& columns_validities) { // RetCode success = @@ -1059,7 +1058,7 @@ inline RetCode DataInjector::CheckAndToValue( columns_validities); } -istream& DataInjector::GetTupleTerminatedBy(ifstream& ifs, string& res, +istream& DataIngestion::GetTupleTerminatedBy(ifstream& ifs, string& res, const string& terminator) { res.clear(); if (1 == terminator.length()) { @@ -1087,7 +1086,7 @@ istream& DataInjector::GetTupleTerminatedBy(ifstream& ifs, string& res, return ifs; } -void DataInjector::AnnounceIAmLoading() { +void DataIngestion::AnnounceIAmLoading() { static char* load_output_info[7] = { "Loading \r", "Loading.\r", "Loading..\r", "Loading...\r", "Loading....\r", "Loading.....\r", "Loading......\r"}; @@ -1108,7 +1107,7 @@ const char* validity_info[9][2] = { {}, {}}; */ -string DataInjector::GenerateDataValidityInfo(const Validity& vali, +string DataIngestion::GenerateDataValidityInfo(const Validity& vali, TableDescriptor* table, int line, const string& file) { ostringstream oss; diff --git a/loader/data_injector.h b/loader/data_ingestion.h similarity index 95% rename from loader/data_injector.h rename to loader/data_ingestion.h index c9fea08df..c01fb0407 100644 --- a/loader/data_injector.h +++ b/loader/data_ingestion.h @@ -16,18 +16,18 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * /Claims/loader/data_injector.h + * /Claims/loader/data_injestion.h * * Created on: Oct 22, 2015 * Author: yukai * Email: yukai2014@gmail.com * - * Description: class for injecting data from files or string + * Description: class for ingesting data from files or string * */ -#ifndef LOADER_DATA_INJECTOR_H_ -#define LOADER_DATA_INJECTOR_H_ +#ifndef LOADER_DATA_INGESTION_H_ +#define LOADER_DATA_INGESTION_H_ #include #include #include @@ -52,7 +52,7 @@ class ExecutedResult; namespace claims { namespace loader { class TableFileConnector; -class DataInjector { +class DataIngestion { public: struct LoadTask { std::string tuple_; @@ -64,7 +64,7 @@ class DataInjector { }; public: - // DataInjector() {} + // DataIngestion() {} /** * @brief Method description: get necessary info from table and init * connector_ @@ -72,10 +72,10 @@ class DataInjector { * @param col_separator: column separator * @param row_separator: row separator */ - DataInjector(TableDescriptor* table, const string col_separator = "|", - const string row_separator = "\n"); + DataIngestion(TableDescriptor* table, const string col_separator = "|", + const string row_separator = "\n"); - virtual ~DataInjector(); + virtual ~DataIngestion(); /** * @brief Method description: load/append data from multiple files into table @@ -239,4 +239,4 @@ class DataInjector { } /* namespace loader */ } /* namespace claims */ -#endif // LOADER_DATA_INJECTOR_H_ +#endif // LOADER_DATA_INGESTION_H_ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 28ed75420..97539a035 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -70,7 +70,6 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, [=](IpPortAtom, std::string ip, int port) { // NOLINT LOG(INFO) << "receive slave network address(" << ip << ":" << port << ")" << endl; - // slave_addrs_.push_back(NetAddr(ip, port)); int new_slave_fd = -1; if (rSuccess != mloader->GetSocketFdConnectedWithSlave(ip, port, &new_slave_fd)) { @@ -79,7 +78,9 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, LOG(INFO) << "succeed to get connected fd with slave"; } assert(new_slave_fd > 3); + mloader->slave_addrs_.push_back(NetAddr(ip, port)); mloader->slave_sockets_.push_back(new_slave_fd); + assert(mloader->slave_sockets_.size() == mloader->slave_addrs_.size()); DLOG(INFO) << "start to send test message to slave"; // test whether socket works well @@ -116,14 +117,6 @@ RetCode MasterLoader::ConnectWithSlaves() { true); DLOG(INFO) << "published in " << master_loader_ip << ":" << master_loader_port; - - // auto test_actor = remote_actor(master_loader_ip, master_loader_port); - // caf::scoped_actor test1; - // test1->sync_send(test_actor, IpPortAtom::value, "123.123.13.123", - // 123); - - // while (int temp = getchar() != 'm') { - // } } catch (exception& e) { LOG(ERROR) << e.what(); return rFailure; @@ -131,13 +124,24 @@ RetCode MasterLoader::ConnectWithSlaves() { return ret; } -RetCode MasterLoader::Inject() {} +RetCode MasterLoader::Ingest() { + RetCode ret = rSuccess; + string message = GetMessage(); + + IngestionRequest req; + EXEC_AND_LOG(ret, GetRequestFromMessage(message, &req), "got request!", + "failed to get request"); + + // CheckAndToValue(); + + return ret; +} string MasterLoader::GetMessage() {} bool MasterLoader::CheckValidity() {} -void MasterLoader::DistributeSubInjection() {} +void MasterLoader::DistributeSubIngestion() {} RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, int* connected_fd) { @@ -158,12 +162,12 @@ RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, return rSuccess; } +RetCode MasterLoader::GetRequestFromMessage(const string& message, + IngestionRequest* req) { +} + void* MasterLoader::StartMasterLoader(void* arg) { Config::getInstance(); - // if (rSuccess != Catalog::getInstance()->restoreCatalog()) { - // LOG(ERROR) << "failed to restore catalog" << std::endl; - // cerr << "ERROR: restore catalog failed" << endl; - // } LOG(INFO) << "start master loader..."; int ret = rSuccess; @@ -171,8 +175,8 @@ void* MasterLoader::StartMasterLoader(void* arg) { EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), "failed to connect all slaves"); - EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Inject(), - "failed to inject data"); + EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), + "failed to ingest data"); return NULL; } diff --git a/loader/master_loader.h b/loader/master_loader.h index d62911400..0e66a2f4a 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -50,22 +50,31 @@ class MasterLoader { int port_; }; + struct IngestionRequest { + string table_name_; + string col_sep_; + string row_sep_; + string tuples_; + }; + public: MasterLoader(); ~MasterLoader(); RetCode ConnectWithSlaves(); - RetCode Inject(); + RetCode Ingest(); private: string GetMessage(); + RetCode GetRequestFromMessage(const string& message, IngestionRequest* req); + RetCode GetSlaveNetAddr(); RetCode SetSocketWithSlaves(); RetCode GetSocketFdConnectedWithSlave(string ip, int port, int* connected_fd); bool CheckValidity(); - void DistributeSubInjection(); + void DistributeSubIngestion(); static behavior ReceiveSlaveReg(event_based_actor* self, MasterLoader* mloader); diff --git a/loader/single_file_connector.cpp b/loader/single_file_connector.cpp index cb3702769..672cb3e79 100644 --- a/loader/single_file_connector.cpp +++ b/loader/single_file_connector.cpp @@ -47,7 +47,7 @@ #define DLOG_FC(info) #endif -#ifdef DATA_INJECTOR_PREF +#ifdef DATA_INGESTION_PREF #define ATOMIC_ADD(var, value) __sync_add_and_fetch(&var, value); #define GET_TIME_DI(var) GETCURRENTTIME(var); #else diff --git a/loader/test/Makefile.am b/loader/test/Makefile.am index 1c808107b..bfbde05c9 100644 --- a/loader/test/Makefile.am +++ b/loader/test/Makefile.am @@ -27,4 +27,4 @@ libtest_a_SOURCES = \ single_file_connector_test.h single_file_connector_test.cpp \ single_thread_single_file_connector_test.h\ table_file_connector_test.h table_file_connector_test.cpp \ - data_injector_test.cpp data_injector_test.h + data_ingestion_test.cpp data_ingestion_test.h diff --git a/loader/test/data_injector_test.cpp b/loader/test/data_ingestion_test.cpp similarity index 88% rename from loader/test/data_injector_test.cpp rename to loader/test/data_ingestion_test.cpp index 1208f6b3b..9b1f54de9 100644 --- a/loader/test/data_injector_test.cpp +++ b/loader/test/data_ingestion_test.cpp @@ -16,7 +16,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * /Claims/loader/test/data_injector_test.cpp + * /Claims/loader/test/data_injestion_test.cpp * * Created on: Oct 30, 2015 * Author: yukai @@ -26,14 +26,13 @@ * */ -#include "./data_injector_test.h" - #include #include #include "../../catalog/table.h" #include "../../catalog/attribute.h" #include "../../common/Block/ResultSet.h" +#include "data_ingestion_test.h" using claims::catalog::TableDescriptor; using claims::catalog::Attribute; using std::string; @@ -42,16 +41,16 @@ using std::vector; namespace claims { namespace loader { -TEST_F(DataInjectorTest, Load) { +TEST_F(DataIngestionTest, Load) { // EXPECT_TRUE(client_.connected()); ResultSet rs; string command = ""; std::cout << "it is load" << std::endl; } -TEST_F(DataInjectorTest, Append) {} +TEST_F(DataIngestionTest, Append) {} -TEST_F(DataInjectorTest, Insert) {} +TEST_F(DataIngestionTest, Insert) {} } /* namespace loader */ } /* namespace claims */ diff --git a/loader/test/data_injector_test.h b/loader/test/data_ingestion_test.h similarity index 78% rename from loader/test/data_injector_test.h rename to loader/test/data_ingestion_test.h index 0b1e60dc6..9a89e6b14 100644 --- a/loader/test/data_injector_test.h +++ b/loader/test/data_ingestion_test.h @@ -16,7 +16,7 @@ * See the License for the specific language governing permissions and * limitations under the License. * - * /Claims/loader/test/data_injector_test.h + * /Claims/loader/test/data_injestion_test.h * * Created on: Oct 30, 2015 * Author: yukai @@ -26,14 +26,14 @@ * */ -#ifndef LOADER_TEST_DATA_INJECTOR_TEST_H_ -#define LOADER_TEST_DATA_INJECTOR_TEST_H_ +#ifndef LOADER_TEST_DATA_INGESTION_TEST_H_ +#define LOADER_TEST_DATA_INGESTION_TEST_H_ #include #include #include -#include "../../loader/data_injector.h" #include "../../catalog/catalog.h" +#include "../data_ingestion.h" using std::ofstream; using claims::catalog::Catalog; @@ -41,10 +41,10 @@ using claims::catalog::Catalog; namespace claims { namespace loader { -class DataInjectorTest : public ::testing::Test { +class DataIngestionTest : public ::testing::Test { public: - DataInjectorTest() {} - virtual ~DataInjectorTest() {} + DataIngestionTest() {} + virtual ~DataIngestionTest() {} public: // Client client_; @@ -52,7 +52,7 @@ class DataInjectorTest : public ::testing::Test { int port_; }; -TEST_F(DataInjectorTest, GetTuple1) { +TEST_F(DataIngestionTest, GetTuple1) { char* data[2] = {"sf|sfs||sfssfs|\n", "sfsf\fw|fsfegbf,s,nn|fwnelweh\n"}; ofstream ofs("test_for_DI_GetTuple"); @@ -62,13 +62,13 @@ TEST_F(DataInjectorTest, GetTuple1) { string res; string ter("\n"); int i = 0; - while (DataInjector::GetTupleTerminatedBy(ifs, res, ter)) { + while (DataIngestion::GetTupleTerminatedBy(ifs, res, ter)) { EXPECT_STREQ(data[i++], res.c_str()); res.clear(); } } -TEST_F(DataInjectorTest, GetTuple2) { +TEST_F(DataIngestionTest, GetTuple2) { char* data[2] = {"sf|sfs||sfssfs|\n", "sfsf\fw|fsfegbf,s,nn|fwnelweh\n|\n"}; ofstream ofs("test_for_DI_GetTuple"); @@ -78,12 +78,12 @@ TEST_F(DataInjectorTest, GetTuple2) { string res; string ter = "|\n"; int i = 0; - while (DataInjector::GetTupleTerminatedBy(ifs, res, ter)) { + while (DataIngestion::GetTupleTerminatedBy(ifs, res, ter)) { EXPECT_STREQ(data[i++], res.c_str()); res.clear(); } } -TEST_F(DataInjectorTest, GetTuple3) { +TEST_F(DataIngestionTest, GetTuple3) { char* data[3] = {"sf|sfs||sfssfs|\n", "sfsf\fw|fsfegbf,s,nn|fwnelweh\n|\n", "|\n"}; @@ -94,13 +94,13 @@ TEST_F(DataInjectorTest, GetTuple3) { string res; string ter = "|\n"; int i = 0; - while (DataInjector::GetTupleTerminatedBy(ifs, res, ter)) { + while (DataIngestion::GetTupleTerminatedBy(ifs, res, ter)) { EXPECT_STREQ(data[i++], res.c_str()); res.clear(); } } -TEST_F(DataInjectorTest, GetTuple4) { +TEST_F(DataIngestionTest, GetTuple4) { char* data[4] = {"sf|sfs||sfssfs||||", "sfsf\fw|fsfe|||gbf,s,nn|fwnelweh\n||||", "|\n||||", "sfsf"}; @@ -111,7 +111,7 @@ TEST_F(DataInjectorTest, GetTuple4) { string res; string ter = "||||"; int i = 0; - while (DataInjector::GetTupleTerminatedBy(ifs, res, ter)) { + while (DataIngestion::GetTupleTerminatedBy(ifs, res, ter)) { EXPECT_STREQ(data[i++], res.c_str()); res.clear(); } @@ -119,4 +119,4 @@ TEST_F(DataInjectorTest, GetTuple4) { } /* namespace loader */ } /* namespace claims */ -#endif // LOADER_TEST_DATA_INJECTOR_TEST_H_ +#endif // LOADER_TEST_DATA_INGESTION_TEST_H_ diff --git a/stmt_handler/delete_stmt_exec.cpp b/stmt_handler/delete_stmt_exec.cpp index c6fa2a969..522363aa4 100644 --- a/stmt_handler/delete_stmt_exec.cpp +++ b/stmt_handler/delete_stmt_exec.cpp @@ -34,14 +34,14 @@ #include "../common/Block/BlockStream.h" #include "../common/Block/ResultSet.h" -#include "../loader/data_injector.h" #include "../catalog/table.h" #include "../catalog/projection.h" #include "../Daemon/Daemon.h" #include "../sql_parser/ast_node/ast_select_stmt.h" #include "../stmt_handler/select_exec.h" #include "../common/error_define.h" -using claims::loader::DataInjector; +#include "../loader/data_ingestion.h" +using claims::loader::DataIngestion; using std::endl; using std::string; using std::vector; @@ -262,8 +262,8 @@ void DeleteStmtExec::InsertDeletedDataIntoTableDEL( delete tuple_it; } - DataInjector* injector = new DataInjector(table_del); - injector->InsertFromString(ostr.str(), exec_result); + DataIngestion* injestion = new DataIngestion(table_del); + injestion->InsertFromString(ostr.str(), exec_result); // HdfsLoader* Hl = new HdfsLoader(tabledel); // string tmp = ostr.str(); // Hl->append(ostr.str()); diff --git a/stmt_handler/drop_table_exec.cpp b/stmt_handler/drop_table_exec.cpp index a7b853013..4ca0c4400 100644 --- a/stmt_handler/drop_table_exec.cpp +++ b/stmt_handler/drop_table_exec.cpp @@ -36,9 +36,10 @@ #include "../common/error_define.h" #include "../Config.h" #include "../Environment.h" -#include "../loader/data_injector.h" #include "../loader/table_file_connector.h" #include "../stmt_handler/drop_table_exec.h" + +#include "../loader/data_ingestion.h" using claims::common::FilePlatform; using claims::loader::TableFileConnector; using claims::catalog::Catalog; diff --git a/stmt_handler/insert_exec.cpp b/stmt_handler/insert_exec.cpp index 302d46c8f..931ffd33c 100644 --- a/stmt_handler/insert_exec.cpp +++ b/stmt_handler/insert_exec.cpp @@ -33,13 +33,13 @@ #include "../catalog/table.h" #include "../catalog/catalog.h" #include "../Environment.h" -#include "../loader/data_injector.h" #include "../common/error_define.h" +#include "../loader/data_ingestion.h" using claims::catalog::Catalog; using claims::common::rSuccess; using claims::common::FileOpenFlag; -using claims::loader::DataInjector; +using claims::loader::DataIngestion; using claims::common::rNotSupport; using claims::catalog::TableDescriptor; using claims::common::rTableNotExisted; @@ -315,9 +315,9 @@ RetCode InsertExec::Execute(ExecutedResult *exec_result) { if (!is_correct_) return claims::common::rFailure; - DataInjector *injector = new DataInjector(table); + DataIngestion *injestion = new DataIngestion(table); // str() will copy string buffer without the last '\n' - ret = injector->InsertFromString(ostr.str() + "\n", exec_result); + ret = injestion->InsertFromString(ostr.str() + "\n", exec_result); if (rSuccess == ret) { ostr.clear(); ostr.str(""); @@ -329,7 +329,7 @@ RetCode InsertExec::Execute(ExecutedResult *exec_result) { << table->getTableName() << endl; exec_result->SetError("failed to insert tuples into table "); } - DELETE_PTR(injector); + DELETE_PTR(injestion); Environment::getInstance()->getCatalog()->saveCatalog(); } diff --git a/stmt_handler/load_exec.cpp b/stmt_handler/load_exec.cpp index ea1058fbc..3986d2fc0 100644 --- a/stmt_handler/load_exec.cpp +++ b/stmt_handler/load_exec.cpp @@ -31,9 +31,9 @@ #include #include "../stmt_handler/load_exec.h" #include "../Environment.h" -#include "../loader/data_injector.h" +#include "../loader/data_ingestion.h" using std::vector; -using claims::loader::DataInjector; +using claims::loader::DataIngestion; namespace claims { namespace stmt_handler { #define NEWRESULT @@ -119,9 +119,9 @@ RetCode LoadExec::Execute(ExecutedResult *exec_result) { GETCURRENTTIME(start_time); // LOG(INFO) << buf << std::endl; #ifdef NEW_LOADER - DataInjector *injector = - new DataInjector(table, column_separator, tuple_separator); - ret = injector->LoadFromFile(path_names, + DataIngestion *injestion = + new DataIngestion(table, column_separator, tuple_separator); + ret = injestion->LoadFromFile(path_names, static_cast(load_ast_->mode_), exec_result, load_ast_->sample_); double load_time_ms = GetElapsedTime(start_time); @@ -140,7 +140,7 @@ RetCode LoadExec::Execute(ExecutedResult *exec_result) { oss << "load data successfully (" << load_time_ms / 1000.0 << " sec) "; exec_result->SetResult(oss.str(), NULL); } - DELETE_PTR(injector); + DELETE_PTR(injestion); #else Hdfsloader *loader = new Hdfsloader(column_separator[0], tuple_separator[0], path_names, table, From 00b42f0afa1b986b0a1b9feb012f1459470d05d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Sat, 16 Apr 2016 12:41:50 +0800 Subject: [PATCH 03/58] txn_manager first commit --- Makefile.am | 16 +- configure.ac | 1 + txn_client_test.cpp | 118 +++++++++++++ txn_manager/Makefile.am | 31 ++++ txn_manager/txn.cpp | 167 ++++++++++++++++++ txn_manager/txn.hpp | 280 ++++++++++++++++++++++++++++++ txn_manager/txn_client.cpp | 164 ++++++++++++++++++ txn_manager/txn_client.hpp | 84 +++++++++ txn_manager/txn_server.cpp | 344 +++++++++++++++++++++++++++++++++++++ txn_manager/txn_server.hpp | 141 +++++++++++++++ txn_server_test.cpp | 217 +++++++++++++++++++++++ 11 files changed, 1559 insertions(+), 4 deletions(-) create mode 100644 txn_client_test.cpp create mode 100644 txn_manager/Makefile.am create mode 100644 txn_manager/txn.cpp create mode 100644 txn_manager/txn.hpp create mode 100644 txn_manager/txn_client.cpp create mode 100644 txn_manager/txn_client.hpp create mode 100644 txn_manager/txn_server.cpp create mode 100644 txn_manager/txn_server.hpp create mode 100644 txn_server_test.cpp diff --git a/Makefile.am b/Makefile.am index ee665c98f..42e086343 100644 --- a/Makefile.am +++ b/Makefile.am @@ -57,6 +57,7 @@ LDADD = \ common/types/Test/libtest.a \ common/types/ttmath/libttmath.a \ utility/libutility.a \ + txn_manager/libtxnmanager.a \ ${CAF_HOME}/build/lib/libcaf_core.so \ ${CAF_HOME}/build/lib/libcaf_io.so \ ${THERON_HOME}/Lib/libtherond.a \ @@ -77,11 +78,16 @@ include_HEADERS = Config.h \ Debug.h \ Environment.h \ IDsGenerator.h \ - configure.h + configure.h \ + txn_manager/txn.hpp \ + txn_manager/txn_server.hpp \ + txn_manager/txn_client.hpp bin_PROGRAMS = claimsserver \ client \ - test + test \ + txnclient \ + txnserver client_SOURCES = Client.cpp \ Environment.cpp \ @@ -98,13 +104,15 @@ test_SOURCES = Test/gtest_main.cpp\ IDsGenerator.cpp \ Config.cpp +txnclient_SOURCES = txn_client_test.cpp +txnserver_SOURCES = txn_server_test.cpp SUBDIRS= catalog Client common Daemon Executor IndexManager\ loader physical_operator logical_operator Resource \ -storage Test utility codegen sql_parser stmt_handler +storage Test utility codegen sql_parser stmt_handler txn_manager DIST_SUBDIRS = catalog Client common Daemon Executor IndexManager\ loader physical_operator logical_operator Resource \ -storage Test utility codegen sql_parser stmt_handler +storage Test utility codegen sql_parser stmt_handler txn_manager diff --git a/configure.ac b/configure.ac index 4222ea980..fea71cbcc 100644 --- a/configure.ac +++ b/configure.ac @@ -46,6 +46,7 @@ AC_CONFIG_FILES([ Test/Makefile Test/common/Makefile Test/utility/Makefile + txn_manager/Makefile utility/Makefile codegen/Makefile stmt_handler/Makefile diff --git a/txn_client_test.cpp b/txn_client_test.cpp new file mode 100644 index 000000000..8f20343b5 --- /dev/null +++ b/txn_client_test.cpp @@ -0,0 +1,118 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /txn/client.cpp + * + * Created on: 2016年4月7日 + * Author: imdb + * Email: + * + * Description: + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "caf/all.hpp" +#include "caf/io/all.hpp" +#include "txn_manager/txn.hpp" +#include "unistd.h" +#include "txn_manager/txn_client.hpp" +using std::cin; +using std::cout; +using std::endl; +using std::vector; +using std::string; +using std::map; +using std::pair; +using std::unordered_map; +using std::to_string; +using std::function; +using std::sort; +using std::tuple; +using std::make_tuple; +using std::make_pair; +using std::get; +using UInt64 = unsigned long long; +using UInt32 = unsigned int; +using UInt16 = unsigned short; +using UInt8 = char; +using RetCode = int; +using OkAtom = caf::atom_constant; +using FailAtom = caf::atom_constant; + + +using namespace claims::txn; + +class Foo { + public: + vector request1; + unordered_map> request2; + vector> request3; + void set_request1(const vector & req) { request1 = req;} + void set_request2(const unordered_map> & req) { + request2 = req; + } + void set_request3(const vector> &req) { request3 = req;} + vector get_request1() const {return request1;} + unordered_map> get_request2() const {return request2;} + vector> get_request3() const { return request3;} +}; + + +inline bool operator == (const Foo & a, const Foo & b) { + return a.request1 == b.request1 && a.request2 == b.request2; +} + + +int main(){ + TxnClient::Init(); + FixTupleIngestReq request1; + Ingest ingest; + + struct timeval tv1, tv2; + gettimeofday(&tv1,NULL); +// request1.Content = {{0, {45, 10}}, {1, {54, 10}}}; +// TxnClient::BeginIngest(request1, ingest); + Checkpoint cp; + cp.Part = 0; + TxnClient::BeginCheckpoint(cp); + cout << cp.ToString() << endl; + cp.LogicCP = 10000; + cp.PhyCP = 10000; + TxnClient::CommitCheckpoint(cp); + TxnClient::BeginCheckpoint(cp); + cout << cp.ToString() << endl; + gettimeofday(&tv2,NULL); + cout << tv2.tv_sec - tv1.tv_sec << "-" << (tv2.tv_usec - tv1.tv_usec)/1000 < & input, map> & output) { + output.clear(); + for (auto & strip:input) { + if (output.find(strip.Part) != output.end()) + output[strip.Part].push_back(strip); + else + output[strip.Part] = vector(); + } +} + +void Strip::Sort(vector & input) { + sort(input.begin(), input.end(), + [](const Strip & a, const Strip &b){ return a.Pos < b.Pos;}); +} + +void Strip::Sort(vector & input) { + + sort(input.begin(), input.end(), + [](const PStrip & a, const PStrip & b) + { return a.first < b.first;}); +} + +void Strip::Merge(vector & input){ + vector buffer(input); + input.clear(); + if (buffer.size() == 0) return; + auto pid = buffer[0].Part; + auto begin = buffer[0].Pos; + auto end = buffer[0].Pos + buffer[0].Offset; + for (auto i = 1; i < buffer.size(); i ++) { + if (end == buffer[i].Pos) + end = buffer[i].Pos + buffer[i].Offset; + else { + input.emplace_back(pid, begin, end - begin); + begin = buffer[i].Pos; + end = begin + buffer[i].Offset; + } + } + input.emplace_back(pid, begin, end - begin); +} + +void Strip::Merge(vector & input) { + if (input.size() == 0) return; + vector buffer; + auto begin = input[0].first; + auto end = input[0].first + input[0].second; + for (auto i = 1; i < input.size(); i++) { + if (end == input[i].first) + end = input[i].first + input[i].second; + else { + buffer.emplace_back(begin, end - begin); + begin = input[i].first; + end = input[i].first + input[i].second; + } + } + buffer.emplace_back(begin, end - begin); + input = buffer; +} + + +void Strip::Filter(vector & input, function predicate) { + vector buffer(input); + input.clear(); + for (auto & strip : buffer) + if (predicate(strip)) + input.push_back(strip); +} + +string Strip::ToString() { + string str = "*******Strip******\n"; + str += "part:" + to_string(Part) + + ",pos:" + to_string(Pos) + + ",Offset:" + to_string(Offset) + "\n"; + return str; +} + +string FixTupleIngestReq::ToString() { + string str = "*******FixTupleIngestReq********\n"; + for (auto & item : Content) + str += "part:" + to_string(item.first) + + ",tuple_size:" + to_string(item.second.first) + + ",tuple_count:"+ to_string(item.second.second)+"\n"; + return str; +} +string Ingest::ToString() { + UInt64 core_id = Id % 1000; + core_id << 54; + core_id >> 54; + string str = "*******Ingest*********\n"; + str += "id:" + to_string(Id) + ",core:" + to_string(core_id)+ "\n"; + for (auto & item : StripList) + str += "part:" + to_string(item.first) + + ",pos:" + to_string(item.second.first) + + ",offset:"+ to_string(item.second.second)+"\n"; + return str; +} +string QueryReq::ToString() { + string str = "*******QueryReq********\n"; + for (auto & part : PartList) + str += "part:" + to_string(part) +"\n"; + return str; +} + +string Query::ToString() { + string str = "******Query*******\n"; + for (auto & part : Snapshot){ + str += "part:" + to_string(part.first)+"\n"; + for (auto & strip : part.second) + str += "Pos:" + to_string(strip.first) + + ",Offset:" + to_string(strip.second) + "\n"; + } + return str; +} + +string Checkpoint::ToString() { + string str = "******checkpoint******\n"; + + str += "part:" + to_string(Part) +"\n"; + str += "commit strip\n"; + for (auto & strip : CommitStripList) + str += "Pos:" + to_string(strip.first) + + ",Offset:" + to_string(strip.second) + "\n"; + + str += "abort strip\n"; + for (auto & strip : AbortStripList) + str += "Pos:" + to_string(strip.first) + + ",Offset:" + to_string(strip.second) + "\n"; + str += "logic cp:" + to_string(LogicCP) + "\n"; + str += "phy cp:" + to_string(PhyCP) + "\n"; + return str; +} + +} +} + diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp new file mode 100644 index 000000000..d56c532a8 --- /dev/null +++ b/txn_manager/txn.hpp @@ -0,0 +1,280 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /txn/txn_utility.hpp + * + * Created on: 2016年3月28日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#ifndef TXN_HPP_ +#define TXN_HPP_ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "caf/all.hpp" +#include "caf/io/all.hpp" +using std::cin; +using std::cout; +using std::endl; +using std::vector; +using std::string; +using std::map; +using std::pair; +using std::unordered_map; +using std::to_string; +using std::function; +using std::sort; +using std::make_pair;; +namespace claims { +namespace txn{ +using UInt64 = unsigned long long; +using UInt32 = unsigned int; +using UInt16 = unsigned short; +using UInt8 = char; +using RetCode = int; +using OkAtom = caf::atom_constant; +using FailAtom = caf::atom_constant; +using IngestAtom = caf::atom_constant; +using QueryAtom = caf::atom_constant; +using CheckpointAtom = caf::atom_constant; +using GCAtom = caf::atom_constant; +using CommitIngestAtom = caf::atom_constant; +using AbortIngestAtom = caf::atom_constant; +using CommitCPAtom = caf::atom_constant; +using AbortCPAtom = caf::atom_constant; +using QuitAtom = caf::atom_constant; +using LinkAtom = caf::atom_constant; + +static const int kTxnPort = 8089; +static const string kTxnIp = "127.0.0.1"; +static const int kConcurrency = 4; +static const int kTxnBufferSize = 1024 * 10000; +static const int kTxnLowLen = 10; +static const int kTxnHighLen = 54; +static const int kGCTime = 5; +static const int kTimeout = 1; +static const int kBlockSize = 64 * 1024; +static const int kTailSize = sizeof(unsigned); + +/********Strip******/ +using PStrip = pair; +class Strip{ + public: + UInt64 Part; + UInt64 Pos; + UInt64 Offset; + Strip() {} + Strip(UInt64 pId, UInt64 pos, UInt32 offset): + Part(pId), Pos(pos), Offset(offset) {} + UInt64 get_Part() const { return Part;} + UInt64 get_Pos() const { return Pos;} + UInt64 get_Offset() const { return Offset;} + void set_Part(UInt64 part) { Part = part;} + void set_Pos(UInt64 pos) { Pos = pos;} + void set_Offset(UInt64 offset) { Offset = offset;} + string ToString(); + static void Map(vector & input, map> & output); + static void Sort(vector & input); + static void Sort(vector & input); + static void Merge(vector & input); + static void Merge(vector & input); + static void Filter(vector & input, function predicate); +}; +inline bool operator == (const Strip & a, const Strip & b) { + return a.Part == b.Part && a.Pos == b.Pos && a.Offset == b.Offset; +} + + +/***********FixTupleIngestReq************/ + +class FixTupleIngestReq{ + public: + /*fix tuple part -> */ + map Content; + void Insert(UInt64 part, UInt64 tuple_size, UInt64 tuple_count) { + Content[part] = make_pair(tuple_size, tuple_count); + } + map get_Content() const{ + return Content; + } + void set_Content(const map & content) { + Content = content; + } + string ToString (); +}; +inline bool operator == (const FixTupleIngestReq & a, const FixTupleIngestReq & b) { + return a.Content == b.Content; +} + + +/****************Ingest***************/ +class Ingest { + public: + UInt64 Id; + map StripList; + RetCode Ret = 0; + void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset) { + StripList[part] = make_pair(pos, offset); + } + void InsertStrip (const Strip & strip) { + StripList[strip.Part] = make_pair(strip.Pos, strip.Offset); + } + UInt64 get_Id() const { return Id;} + map get_StripList() const { return StripList;} + RetCode get_Ret() const {} + void set_Id(const UInt64 & id){ Id = id;} + void set_StripList(const map & stripList) { + StripList = stripList; + } + void set_Ret(RetCode ret) { Ret = ret;} + string ToString(); +}; +inline bool operator == (const Ingest & a, const Ingest & b) { + return a.Id == b.Id; +} + +/************QueryReq************/ +class QueryReq{ + public: + vector PartList; + void InsertPart(UInt64 part) { PartList.push_back(part);} + vector get_PartList() const { return PartList;} + void set_PartList(const vector & partList) { PartList = partList;} + string ToString(); +}; +inline bool operator == (const QueryReq & a, const QueryReq & b) { + return a.PartList == b.PartList; +} + +/***********Snapshot***********/ +class Query{ + public: + map> Snapshot; + map CPList; + RetCode Ret = 0; + void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset){ + // if (Snapshot.find(part) == Snapshot.end()) + // Snapshot[part] = vector>(); + // else + Snapshot[part].push_back(make_pair(pos, offset)); + } + void InsertCP(UInt64 part, UInt64 cp) { + CPList[part] = cp; + } + map> get_Snapshot() const { + return Snapshot; + } + map get_CPList() const { return CPList;} + RetCode get_Ret() const { return Ret;} + void set_Snapshot(const map> & sp){ + Snapshot = sp; + } + void set_CPList(const map & cplist) { + CPList = cplist; + } + void set_Ret(RetCode ret) {Ret = ret;} + string ToString(); +}; +inline bool operator == (const Query & a, const Query & b) { + return a.Snapshot == b.Snapshot; +} + +/*********Checkpoint***********/ +class Checkpoint{ + public: + UInt64 Id; + UInt64 Part; + UInt64 LogicCP; + UInt64 PhyCP; + RetCode Ret = 0; + vector CommitStripList; + vector AbortStripList; + Checkpoint() {} + Checkpoint(UInt64 part, UInt64 newLogicCP, UInt64 oldPhyCP): + Part(part), LogicCP(newLogicCP),PhyCP(oldPhyCP) {} + UInt64 get_Id() const { return Id;} + UInt64 get_Part() const { return Part;} + UInt64 get_LogicCP() const { return LogicCP;} + UInt64 get_PhyCP() const { return PhyCP;} + UInt64 get_Ret() const { return Ret;} + vector get_CommitStripList() const { return CommitStripList;}; + vector get_AbortStripList() const { return AbortStripList;}; + void set_Part(UInt64 part) { Part = part;} + void set_LogicCP(UInt64 logicCP) { LogicCP = logicCP;} + void set_PhyCP(UInt64 phyCP) { PhyCP = phyCP;} + void set_Ret(RetCode ret) { Ret = ret;} + void set_CommitStripList(const vector & commitstripList) { + CommitStripList = commitstripList; + } + void set_AbortStripList(const vector & abortstripList) { + AbortStripList = abortstripList; + } + string ToString(); +}; +inline bool operator == (const Checkpoint & a, const Checkpoint & b) { + return a.Id == b.Id; +} + +inline void SerializeConfig() { + caf::announce("FixTupleIngestReq", + make_pair(&FixTupleIngestReq::get_Content, &FixTupleIngestReq::set_Content)); + caf::announce("Ingest", + make_pair(&Ingest::get_Id,&Ingest::set_Id), + make_pair(&Ingest::get_StripList,&Ingest::set_StripList), + make_pair(&Ingest::get_Ret,&Ingest::set_Ret)); + caf::announce("QueryReq", + make_pair(&QueryReq::get_PartList, &QueryReq::set_PartList)); + caf::announce("Query", + make_pair(&Query::get_Snapshot,&Query::set_Snapshot), + make_pair(&Query::get_CPList, &Query::set_CPList), + make_pair(&Query::get_Ret, &Query::set_Ret)); + caf::announce("Checkpoint", + make_pair(&Checkpoint::get_Part, &Checkpoint::set_Part), + make_pair(&Checkpoint::get_LogicCP, &Checkpoint::set_LogicCP), + make_pair(&Checkpoint::get_PhyCP, &Checkpoint::set_PhyCP), + make_pair(&Checkpoint::get_Ret, &Checkpoint::set_Ret), + make_pair(&Checkpoint::get_CommitStripList, + &Checkpoint::set_CommitStripList), + make_pair(&Checkpoint::get_AbortStripList, + &Checkpoint::set_AbortStripList)); +} + + + + + + + + + +} +} +#endif // TXN_HPP_ diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp new file mode 100644 index 000000000..fe27f5090 --- /dev/null +++ b/txn_manager/txn_client.cpp @@ -0,0 +1,164 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /txn/txn_client.cpp + * + * Created on: 2016年4月10日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#include "txn_client.hpp" +namespace claims{ +namespace txn{ + +//using claims::txn::TxnClient; +//using claims::txn::RetCode; +//using claims::txn::FixTupleIngestReq; +//using claims::txn::Ingest; + +string TxnClient::Ip = kTxnIp; +int TxnClient::Port = kTxnPort; + +RetCode TxnClient::Init(string ip, int port){ + Ip = ip; + Port = port; + SerializeConfig(); +} + +RetCode TxnClient::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest){ + RetCode ret = -1; + if (TxnServer::Active) + return TxnServer::BeginIngest(request, ingest); + else { + try{ + auto router = caf::io::remote_actor(Ip, Port); + caf::scoped_actor self; + self->sync_send(router, IngestAtom::value, request). + await([&](Ingest & reply, RetCode r) { ingest = reply; ret = r;}, + caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); + } catch (...){ + cout << "link fail" << endl; + return -1; + } + } + return ret; +} + +RetCode TxnClient::CommitIngest(const Ingest & ingest) { + RetCode ret = -1; + if (TxnServer::Active) + return TxnServer::CommitIngest(ingest); + else { + try { + auto router = caf::io::remote_actor(Ip, Port); + caf::scoped_actor self; + self->sync_send(router, CommitIngestAtom::value, ingest). + await([&](RetCode r) { ret = r;}, + caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; + return -1; + } + } + return ret; +} + +RetCode TxnClient::AbortIngest(const Ingest & ingest) { + RetCode ret = -1; + if (TxnServer::Active) + return TxnServer::AbortIngest(ingest); + else { + try { + auto router = caf::io::remote_actor(Ip, Port); + caf::scoped_actor self; + self->sync_send(router, AbortIngestAtom::value, ingest). + await([&](RetCode r) { ret = r;}, + caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; + return -1; + } + } + return ret; +} + +RetCode TxnClient::BeginQuery(const QueryReq & request, Query & query) { + RetCode ret = -1; + if (TxnServer::Active) + return TxnServer::BeginQuery(request, query); + else { + try { + auto router = caf::io::remote_actor(Ip, Port); + caf::scoped_actor self; + self->sync_send(router, QueryAtom::value, request). + await([&](const QueryReq & request, RetCode r) { ret = r;}, + caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; + return -1; + } + } + return ret; +} + +RetCode TxnClient::BeginCheckpoint(Checkpoint & cp) { + RetCode ret = -1; + if (TxnServer::Active) + return TxnServer::BeginCheckpoint(cp); + else { + try { + auto router = caf::io::remote_actor(Ip, Port); + caf::scoped_actor self; + self->sync_send(router, CheckpointAtom::value, cp.Part). + await([&](const Checkpoint & checkpoint, RetCode r) {cp = checkpoint; ret = r;}, + caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; + return -1; + } + } + return ret; +} + +RetCode TxnClient::CommitCheckpoint(const Checkpoint & cp) { + RetCode ret = -1; + if (TxnServer::Active) + return TxnServer::CommitCheckpoint(cp); + else { + try { + auto router = caf::io::remote_actor(Ip, Port); + caf::scoped_actor self; + self->sync_send(router, CommitCPAtom::value, cp). + await([&](RetCode r) { ret = r;}, + caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; + return -1; + } + } + return ret; +} + +} +} + + diff --git a/txn_manager/txn_client.hpp b/txn_manager/txn_client.hpp new file mode 100644 index 000000000..77949c6d2 --- /dev/null +++ b/txn_manager/txn_client.hpp @@ -0,0 +1,84 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /txn/txn_client.hpp + * + * Created on: 2016年4月10日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#ifndef TXN_CLIENT_HPP_ +#define TXN_CLIENT_HPP_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "unistd.h" +#include "stdlib.h" +#include "caf/all.hpp" +#include "caf/io/all.hpp" +#include "txn.hpp" +#include "txn_server.hpp" +#include +using std::cin; +using std::cout; +using std::endl; +using std::vector; +using std::string; +using std::map; +using std::pair; +using std::unordered_map; +using std::to_string; +using std::function; +using std::sort; +using std::atomic; +using std::chrono::seconds; +using std::chrono::milliseconds; + +namespace claims{ +namespace txn{ + +class TxnClient{ + public: + static string Ip; + static int Port; + static RetCode Init(string ip = kTxnIp, int port = kTxnPort); + static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); + static RetCode CommitIngest(const Ingest & ingest); + static RetCode AbortIngest(const Ingest & ingest); + static RetCode BeginQuery(const QueryReq & request, Query & query); + static RetCode BeginCheckpoint(Checkpoint & cp); + static RetCode CommitCheckpoint(const Checkpoint & cp); +}; + +} +} + +#endif // TXN_CLIENT_HPP_ diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp new file mode 100644 index 000000000..094c2e701 --- /dev/null +++ b/txn_manager/txn_server.cpp @@ -0,0 +1,344 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /txn/txn_server.cpp + * + * Created on: 2016年4月10日 + * Author: imdb + * Email: + * + * Description: + * + */ +#include "txn_server.hpp" + +namespace claims{ +namespace txn{ +int TxnCore::BufferSize = kTxnBufferSize; + + +int TxnServer::Port = kTxnPort; +int TxnServer::Concurrency = kConcurrency; +caf::actor TxnServer::Router; +vector TxnServer::Cores; +bool TxnServer::Active = false; + +unordered_map> TxnServer::PosList; +unordered_map TxnServer::LogicCPList; +unordered_map TxnServer::PhyCPList; +unordered_map> TxnServer::CountList; + +RetCode TxnCore::ReMalloc() { + Size = 0; + TxnIndex.clear(); + try { + delete [] Commit; + delete [] Abort; + delete [] StripList; + Commit = new bool[BufferSize]; + Abort = new bool[BufferSize]; + StripList = new vector[BufferSize]; + } catch (...) { + cout << "core:"<delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); + return { + [=](IngestAtom, const FixTupleIngestReq * request, Ingest * ingest)->int { + struct timeval tv1; + if (Size >= BufferSize) + return -1; + auto id = ingest->Id = GetId(); + TxnIndex[id] = Size; + Commit[Size] = Abort[Size] = false; + for (auto & item : request->Content) { + auto part = item.first; + auto tupleSize = item.second.first; + auto tupleCount = item.second.second; + auto strip = TxnServer::AtomicMalloc(part, tupleSize, tupleCount); + StripList[Size].push_back(strip); + //cout << strip.ToString() << endl; + ingest->InsertStrip(strip); + } + Size ++; + + return 0; + }, + [=](CommitIngestAtom, const Ingest * ingest)->int{ + if (TxnIndex.find(ingest->Id) == TxnIndex.end()) + return -1; + Commit[TxnIndex[ingest->Id]] = true; + return 0; + }, + [=](AbortIngestAtom, const Ingest * ingest)->int { + if (TxnIndex.find(ingest->Id) == TxnIndex.end()) + return -1; + Commit[TxnIndex[ingest->Id]] = true; + return 0; + }, + [=](QueryAtom, const QueryReq * request, Query * query)->int { + for (auto i = 0; i < Size; i++) + if (Commit[i]) + for (auto & strip : StripList[i]) { + if (query->CPList.find(strip.Part) != query->CPList.end() && + strip.Pos >= query->CPList[strip.Part]) + query->InsertStrip(strip.Part, strip.Pos, strip.Offset); + } + return 1; + }, + [=] (CheckpointAtom, Checkpoint * cp)->int { + + for (auto i = 0; i < Size; i++) + if (Commit[i]) { + for (auto & strip : StripList[i]) + if ( strip.Part == cp->Part && strip.Pos >= cp->LogicCP ) + cp->CommitStripList.push_back(PStrip(strip.Pos, strip.Offset)); + } + else if (Abort[i]) { + for (auto & strip : StripList[i]) + if (strip.Part == cp->Part && strip.Pos >= cp->LogicCP) + cp->AbortStripList.push_back(PStrip(strip.Pos, strip.Offset)); + } + + }, + [=](GCAtom) { + auto size_old = Size; + auto pos = 0; + for (auto i = 0; i < Size; i++) + if (!TxnServer::IsStripListGarbage(StripList[i])) { + TxnIndex[TxnIndex[i]] = pos; + Commit[pos] = Commit[i]; + Abort[pos] = Abort[i]; + StripList[pos] = StripList[i]; + ++ pos; + } + Size = pos; + cout <<"core:"<"<< pos << endl; + this->delayed_send(this, seconds(kGCTime), GCAtom::value); + }, + caf::others >> [] () { cout<<"core unkown message"<caf::message { + Ingest ingest; + auto ret = TxnServer::BeginIngest(request, ingest); + quit(); + return caf::make_message(ingest, ret); + }, + [=](CommitIngestAtom, const Ingest & ingest)->RetCode { + quit(); + return TxnServer::CommitIngest(ingest); + }, + [=](AbortIngestAtom, const Ingest & ingest)->RetCode { + quit(); + return TxnServer::AbortIngest(ingest); + }, + [=](QueryAtom, const QueryReq & request)->caf::message { + Query query; + auto ret = TxnServer::BeginQuery(request, query); + quit(); + return caf::make_message(query, ret); + }, + [=](CheckpointAtom, const UInt64 part)->caf::message{ + Checkpoint cp; + cp.Part = part; + auto ret = TxnServer::BeginCheckpoint(cp); + quit(); + return caf::make_message(cp, ret); + }, + [=](CommitCPAtom, const Checkpoint & cp)->RetCode { + quit(); + return TxnServer::CommitCheckpoint(cp); + }, + caf::others >> [] () { cout<<"work unkown message"<forward_to(caf::spawn()); + }, + [=](CommitIngestAtom, const Ingest & ingest) { + this->forward_to(caf::spawn()); + }, + [=](AbortIngestAtom, const Ingest & ingest) { + this->forward_to(caf::spawn()); + }, + [=](QueryAtom, const QueryReq & request) { + this->forward_to(caf::spawn()); + }, + [=](CheckpointAtom, const UInt64 part){ + this->forward_to(caf::spawn()); + }, + [=](CommitCPAtom, const Checkpoint & cp) { + this->forward_to(caf::spawn()); + }, + caf::others >> [] () { cout<<"unkown message"<(); + for (auto i = 0; i < Concurrency; i++) + Cores.push_back(caf::spawn(i)); + SerializeConfig(); + RecoveryFromCatalog(); + RecoveryFromTxnLog(); + srand((unsigned) time(NULL)); + + return 0; +} + +RetCode TxnServer::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest) { + RetCode ret; + UInt64 core_id = SelectCore(); + caf::scoped_actor self; + self->sync_send(Cores[core_id], IngestAtom::value, & request, & ingest). + await([&](int r) {ret = r;}); + return 0; +} +RetCode TxnServer::CommitIngest(const Ingest & ingest) { + RetCode ret; + UInt64 core_id = GetCoreId(ingest.Id); + caf::scoped_actor self; + self->sync_send(Cores[core_id], CommitIngestAtom::value, &ingest). + await([&](int r) { ret = r;}); + return 0; +} +RetCode TxnServer::AbortIngest(const Ingest & ingest) { + RetCode ret; + UInt64 core_id = GetCoreId(ingest.Id); + caf::scoped_actor self; + self->sync_send(Cores[core_id], AbortIngestAtom::value, &ingest). + await([&](int r) { ret = r;}); + return 0; +} +RetCode TxnServer::BeginQuery(const QueryReq & request, Query & query) { + RetCode ret; + caf::scoped_actor self; + for (auto & part : request.PartList) + query.CPList[part] = TxnServer::LogicCPList[part]; + for (auto & core : Cores) + self->sync_send(core, QueryAtom::value, & request, & query). + await([&](int r) {r = ret;}); + for (auto & part : query.Snapshot) { + Strip::Sort(part.second); + Strip::Merge(part.second); + } + return ret; +} +RetCode TxnServer::BeginCheckpoint(Checkpoint & cp) { + RetCode ret; + if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) + return -1; + cp.LogicCP = TxnServer::LogicCPList[cp.Part]; + cp.PhyCP = TxnServer::PhyCPList[cp.Part]; + + caf::scoped_actor self; + for (auto & core : Cores) + self->sync_send(core,CheckpointAtom::value, &cp). + await([&]( int r) { r = ret;}); + Strip::Sort(cp.CommitStripList); + Strip::Merge(cp.CommitStripList); + Strip::Sort(cp.AbortStripList); + Strip::Merge(cp.AbortStripList); + return 0; +} +RetCode TxnServer::CommitCheckpoint(const Checkpoint & cp) { + if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) + return -1; + TxnServer::LogicCPList[cp.Part] = cp.LogicCP; + TxnServer::PhyCPList[cp.Part] = cp.PhyCP; + return 0; +} + + +Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount) { + Strip strip; + strip.Part = part; + if (TupleSize * TupleCount == 0) + return strip; + do { + strip.Pos = PosList[part].load(); + strip.Offset = 0; + UInt64 block_pos = strip.Pos % kBlockSize; + UInt64 remain_count = TupleCount; + int count = 0; + while(remain_count > 0) { + // 求出一个块内可以存放的最多元组数 + UInt64 use_count = (kBlockSize - block_pos - kTailSize) / TupleSize; + if (use_count > remain_count) + use_count = remain_count; + + //使用块内可用区域 + remain_count -= use_count; + strip.Offset += use_count * TupleSize; + block_pos += use_count * TupleSize; + //将不可利用的空间也分配 + if (kBlockSize - block_pos - kTailSize < TupleSize) { + strip.Offset += kBlockSize - block_pos; + block_pos = 0; + } + } + + } while(!PosList[part].compare_exchange_weak(strip.Pos, strip.Pos + strip.Offset)); + + return strip; +} + +RetCode TxnServer::RecoveryFromCatalog() { + for (auto i = 0; i < 10; i++ ) { + PosList[i] = 0; + CountList[i] = 0; + LogicCPList[i] = 0; + } +} + +RetCode TxnServer::RecoveryFromTxnLog() { + +} + +} +} + + + diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp new file mode 100644 index 000000000..0caf79208 --- /dev/null +++ b/txn_manager/txn_server.hpp @@ -0,0 +1,141 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /txn/txn_server.hpp + * + * Created on: 2016年4月10日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#ifndef TXN_SERVER_HPP_ +#define TXN_SERVER_HPP_ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "unistd.h" +#include "stdlib.h" +#include "caf/all.hpp" +#include "caf/io/all.hpp" +#include "txn.hpp" +#include +using std::cin; +using std::cout; +using std::endl; +using std::vector; +using std::string; +using std::map; +using std::pair; +using std::unordered_map; +using std::to_string; +using std::function; +using std::sort; +using std::atomic; +using std::chrono::seconds; +using std::chrono::milliseconds; +namespace claims{ +namespace txn{ + +class TxnCore: public caf::event_based_actor { + public: + static int BufferSize; + + UInt64 CoreId; + UInt64 LocalId = 0; + + UInt64 Size; + map TxnIndex; + bool * Commit = nullptr; + bool * Abort = nullptr; + vector * StripList; + + caf::behavior make_behavior() override; + RetCode ReMalloc(); + TxnCore(int coreId):CoreId(coreId) {} + UInt64 GetId(){ + UInt64 id = ((++LocalId) *1000) + CoreId; + + } +}; + +class TxnWorker:public caf::event_based_actor { + public: + caf::behavior make_behavior() override; +}; + +class TxnServer: public caf::event_based_actor{ + public: + static bool Active; + static int Port; + static int Concurrency; + static caf::actor Router; + static vector Cores; + static unordered_map> PosList; + static unordered_map LogicCPList; + static unordered_map PhyCPList; + static unordered_map> CountList; + /**************** User APIs ***************/ + static RetCode Init(int concurrency = kConcurrency , int port = kTxnPort); + + /**************** System APIs ***************/ + static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); + static RetCode CommitIngest(const Ingest & ingest); + static RetCode AbortIngest(const Ingest & ingest); + static RetCode BeginQuery(const QueryReq & request, Query & snapshot); + static RetCode BeginCheckpoint(Checkpoint & cp); + static RetCode CommitCheckpoint(const Checkpoint & cp); + static UInt64 GetCoreId(UInt64 id) { + return id % 1000; + } + static inline UInt64 SelectCore() { + return rand() % Concurrency; + } + caf::behavior make_behavior() override; + + static RetCode RecoveryFromCatalog(); + static RetCode RecoveryFromTxnLog(); + static inline Strip AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount); + static inline bool IsStripListGarbage(const vector & striplist) { + for (auto & strip : striplist) { + if (strip.Pos >= TxnServer::LogicCPList[strip.Part]) + return false; + } + return true; + } + +}; + + +} +} + + + +#endif // TXN_SERVER_HPP_ diff --git a/txn_server_test.cpp b/txn_server_test.cpp new file mode 100644 index 000000000..ff6a0bce3 --- /dev/null +++ b/txn_server_test.cpp @@ -0,0 +1,217 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /txn/client.cpp + * + * Created on: 2016年4月7日 + * Author: imdb + * Email: + * + * Description: + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "unistd.h" +#include +#include "caf/all.hpp" +#include "caf/io/all.hpp" +#include "txn_manager/txn.hpp" +#include "txn_manager/txn_server.hpp" +using std::cin; +using std::cout; +using std::endl; +using std::vector; +using std::string; +using std::map; +using std::pair; +using std::unordered_map; +using std::to_string; +using std::function; +using std::sort; +using std::tuple; +using std::make_pair; +using std::make_tuple; +using std::get; + +using namespace claims::txn; + +using UInt64 = unsigned long long; +using UInt32 = unsigned int; +using UInt16 = unsigned short; +using UInt8 = char; +using RetCode = int; +using OkAtom = caf::atom_constant; +using FailAtom = caf::atom_constant; +using QuitAtom = caf::atom_constant; + +class Foo { + public: + vector request1; + unordered_map> request2; + vector> request3; + void set_request1(const vector & req) { request1 = req;} + void set_request2(const unordered_map> & req) { + request2 = req; + } + void set_request3(const vector> &req) { request3 = req;} + vector get_request1() const {return request1;} + unordered_map> get_request2() const {return request2;} + vector> get_request3() const { return request3;} +}; + +inline bool operator == (const Foo & a, const Foo & b) { + return a.request1 == b.request1 && a.request2 == b.request2; +} + + +class AA:public caf::event_based_actor { + caf::behavior make_behavior() override { + return{ + + [] (FixTupleIngestReq & request){ + cout << request.ToString() << endl; + }, + [] (int a) {cout << a << endl;}, + caf::others >> []() { + cout << "no matched" << endl; + } + }; + } +}; +class C:public caf::event_based_actor { + caf::behavior make_behavior() override { + return { + [=] (int a)->int { quit(); aout(this)<< a*1000 << endl;}, + caf::others >> []() {cout << "no matched" << endl;} + }; + } +}; + +class Foo2 { + public: + int a = 0; + int b = 0; +}; +class Foo3{ + public: + int c = 0; +}; +using Foo2Atom = caf::atom_constant; +using Foo3Atom = caf::atom_constant; + +class B:public caf::event_based_actor { + public: + caf::actor Router; + B() {} + B(caf::actor router):Router(router) {} + caf::behavior make_behavior() override { + return { + [=](int a) { + forward_to(caf::spawn()); + }, + [=](Foo2Atom, Foo2 * foo2)->int { + foo2->a = 97; + foo2->b = 98; + cout << "foo2" << endl; + return 101; + }, + [=](Foo3Atom, Foo3 * foo3)->int { + foo3->c = 99; + cout << "foo3" << endl; + return 102; + }, + caf::others >> []() { cout << "unkown" << endl;} + }; + } +}; + + + +void task(int a){ + for (auto i = 0; i< 10; i++) { +// if (i % 10 > 10) { +// QueryReq request2; +// request2.PartList = {0,1}; +// Query query; +// TxnServer::BeginQuery(request2, query); +// } else { + FixTupleIngestReq request1; + Ingest ingest; + request1.Content = {{0, {45, 10}}}; + TxnServer::BeginIngest(request1, ingest); + TxnServer::CommitIngest(ingest); +// } + } +} + +using claims::txn::TxnServer; +using claims::txn::FixTupleIngestReq; +using claims::txn::Ingest; +int main(){ +// auto server = caf::spawn(); +// SerializeConfig(); +// caf::announce("foo", +// make_pair(&Foo::get_request1, &Foo::set_request1), +// make_pair(&Foo::get_request2, &Foo::set_request2), +// make_pair(&Foo::get_request3, &Foo::set_request3)); +// +// try { +// caf::io::publish(server, 8088); +// } catch (...) { +// cout << "bind fail" << endl; +// } + + + TxnServer::Init(); +// for (auto j = 0;j < 100 ;j++) { +//// request1.Content[0] = {45, 10}; +//// request1.Content[1] = {54, 10}; +// FixTupleIngestReq request1; +// Ingest ingest; +// request1.Content = {{0, {45, 10}}, {1, {54, 10}}}; +// TxnServer::BeginIngest(request1, ingest); +// TxnServer::CommitIngest(ingest); +// } + sleep(1); + struct timeval tv1, tv2; + gettimeofday(&tv1,NULL); + cout <<"a:" < v; + int n = 1; + for (auto i=0;i Date: Sat, 16 Apr 2016 16:03:56 +0800 Subject: [PATCH 04/58] ADD: stl_guard to implement RAII for STL container; ADD: function implementation in MasterLoader --- common/Schema/SchemaFix.cpp | 4 + loader/data_ingestion.cpp | 80 +++++++------- loader/data_ingestion.h | 7 +- loader/master_loader.cpp | 202 ++++++++++++++++++++++++++++++++++-- loader/master_loader.h | 47 ++++++++- loader/validity.cpp | 91 +++++++++++++++- loader/validity.h | 12 +++ utility/Makefile.am | 2 +- utility/stl_guard.h | 103 ++++++++++++++++++ 9 files changed, 495 insertions(+), 53 deletions(-) create mode 100644 utility/stl_guard.h diff --git a/common/Schema/SchemaFix.cpp b/common/Schema/SchemaFix.cpp index 905474765..48e1d9e07 100755 --- a/common/Schema/SchemaFix.cpp +++ b/common/Schema/SchemaFix.cpp @@ -107,6 +107,10 @@ int SchemaFix::getColumnOffset(unsigned index) const { * 若只有warning,放入columns_validities, * 同时处理warning(字符串过长,截断;数字类型不在合法范围内设为默认值); * 如果来自kFile, 出现error将值设为默认值, 视为warning对待. + * + * TODO(ANYONE): check from the second column, because the first column is + * always row_id, which is generated by claims and is always valid. So + * AddRowId() should be called after this method---YuKai */ RetCode SchemaFix::CheckAndToValue(std::string text_tuple, void* binary_tuple, const string attr_separator, diff --git a/loader/data_ingestion.cpp b/loader/data_ingestion.cpp index 96a5f363d..582e2efb7 100644 --- a/loader/data_ingestion.cpp +++ b/loader/data_ingestion.cpp @@ -148,7 +148,7 @@ uint64_t DataIngestion::total_unread_sem_fail_count_ = 0; uint64_t DataIngestion::total_append_warning_time_ = 0; DataIngestion::DataIngestion(TableDescriptor* table, const string col_separator, - const string row_separator) + const string& row_separator) : table_(table), col_separator_(col_separator), row_separator_(row_separator), @@ -250,9 +250,9 @@ RetCode DataIngestion::PrepareInitInfo(FileOpenFlag open_flag) { } RetCode DataIngestion::LoadFromFileSingleThread(vector input_file_names, - FileOpenFlag open_flag, - ExecutedResult* result, - double sample_rate) { + FileOpenFlag open_flag, + ExecutedResult* result, + double sample_rate) { int ret = rSuccess; int file_count = 0; uint64_t row_id_in_file = 0; @@ -317,8 +317,8 @@ RetCode DataIngestion::LoadFromFileSingleThread(vector input_file_names, } // only handle data warnings, because of no data error for (auto it : columns_validities) { - string validity_info = - GenerateDataValidityInfo(it, table_, row_id_in_file, file_name); + string validity_info = Validity::GenerateDataValidityInfo( + it, table_, row_id_in_file, file_name); DLOG_DI("append warning info:" << validity_info); result->AppendWarning(validity_info); } @@ -364,7 +364,7 @@ RetCode DataIngestion::LoadFromFileSingleThread(vector input_file_names, } RetCode DataIngestion::SetTableState(FileOpenFlag open_flag, - ExecutedResult* result) { + ExecutedResult* result) { int ret = rSuccess; if (FileOpenFlag::kCreateFile == open_flag) { /* @@ -397,7 +397,7 @@ RetCode DataIngestion::SetTableState(FileOpenFlag open_flag, } RetCode DataIngestion::CheckFiles(vector input_file_names, - ExecutedResult* result) { + ExecutedResult* result) { int ret = rSuccess; for (auto file_name : input_file_names) { ifstream input_file(file_name.c_str()); @@ -468,9 +468,9 @@ RetCode DataIngestion::FinishJobAfterLoading(FileOpenFlag open_flag) { } RetCode DataIngestion::LoadFromFileMultiThread(vector input_file_names, - FileOpenFlag open_flag, - ExecutedResult* result, - double sample_rate) { + FileOpenFlag open_flag, + ExecutedResult* result, + double sample_rate) { int ret = rSuccess; int file_count = 0; uint64_t row_id_in_file = 0; @@ -595,8 +595,9 @@ RetCode DataIngestion::LoadFromFileMultiThread(vector input_file_names, * into HDFS/disk */ RetCode DataIngestion::LoadFromFile(vector input_file_names, - FileOpenFlag open_flag, - ExecutedResult* result, double sample_rate) { + FileOpenFlag open_flag, + ExecutedResult* result, + double sample_rate) { total_get_substr_time_ = 0; total_check_string_time_ = 0; total_to_value_time_ = 0; @@ -736,7 +737,7 @@ void* DataIngestion::HandleTuple(void* ptr) { LockGuard guard( injestion->task_list_access_lock_[self_thread_index]); ///// lock/sem ATOMIC_ADD( - injestion->total_lock_tuple_buffer_time_, ///// lock/sem + injestion->total_lock_tuple_buffer_time_, ///// lock/sem GetElapsedTimeInUs(start_tuple_buffer_lock_time)); ///// lock/sem task = std::move( injestion->task_lists_[self_thread_index].front()); ///// lock/sem @@ -781,7 +782,7 @@ void* DataIngestion::HandleTuple(void* ptr) { // only handle data warnings, because of no data error if (!injestion->result_->HasEnoughWarning()) { for (auto it : columns_validities) { - string validity_info = injestion->GenerateDataValidityInfo( + string validity_info = Validity::GenerateDataValidityInfo( it, injestion->table_, row_id_in_file, file_name); DLOG_DI("append warning info:" << validity_info); GET_TIME_DI(start_append_warning_time); @@ -796,7 +797,7 @@ void* DataIngestion::HandleTuple(void* ptr) { GET_TIME_DI(start_insert_time); EXEC_AND_ONLY_LOG_ERROR( ret, injestion->InsertSingleTuple(tuple_buffer, block_to_write, - local_pj_buffer), ///// lock/sem + local_pj_buffer), ///// lock/sem "failed to insert tuple in " << file_name << " at line " << row_id_in_file << "."); if (ret != rSuccess) { // it is not need to use lock @@ -819,7 +820,7 @@ void* DataIngestion::HandleTuple(void* ptr) { * else return error to client without inserting any data */ RetCode DataIngestion::InsertFromString(const string tuples, - ExecutedResult* result) { + ExecutedResult* result) { int ret = rSuccess; LOG(INFO) << "tuples is: " << tuples << endl; @@ -853,33 +854,40 @@ RetCode DataIngestion::InsertFromString(const string tuples, vector columns_validities; void* tuple_buffer = Malloc(table_schema_->getTupleMaxSize()); - if (tuple_buffer == NULL) return rNoMemory; + if (tuple_buffer == NULL) { + for (auto it : correct_tuple_buffer) DELETE_PTR(it); + correct_tuple_buffer.clear(); + return rNoMemory; + } ostringstream oss; if (rSuccess != (ret = CheckAndToValue(tuple_record, tuple_buffer, RawDataSource::kSQL, columns_validities))) { // contain data error, which is stored in the end of columns_validities - for (auto it : correct_tuple_buffer) DELETE_PTR(it); - correct_tuple_buffer.clear(); - // handle error which stored in the end Validity err = columns_validities.back(); columns_validities.pop_back(); - string validity_info = GenerateDataValidityInfo(err, table_, line, ""); + string validity_info = + Validity::GenerateDataValidityInfo(err, table_, line, ""); LOG(ERROR) << validity_info; result->SetError(validity_info); } // handle all warnings for (auto it : columns_validities) { - string validity_info = GenerateDataValidityInfo(it, table_, line, ""); + string validity_info = + Validity::GenerateDataValidityInfo(it, table_, line, ""); DLOG_DI("append warning info:" << validity_info); result->AppendWarning(validity_info); } // if check failed, return ret - if (rSuccess != ret) return ret; + if (rSuccess != ret) { + for (auto it : correct_tuple_buffer) DELETE_PTR(it); + correct_tuple_buffer.clear(); + return ret; + } correct_tuple_buffer.push_back(tuple_buffer); ++line; @@ -1059,7 +1067,7 @@ inline RetCode DataIngestion::CheckAndToValue( } istream& DataIngestion::GetTupleTerminatedBy(ifstream& ifs, string& res, - const string& terminator) { + const string& terminator) { res.clear(); if (1 == terminator.length()) { return getline(ifs, res, static_cast(terminator[0])); @@ -1107,15 +1115,17 @@ const char* validity_info[9][2] = { {}, {}}; */ + +/* string DataIngestion::GenerateDataValidityInfo(const Validity& vali, - TableDescriptor* table, int line, - const string& file) { + TableDescriptor* table, int line, + const string& file) { ostringstream oss; oss.clear(); switch (vali.check_res_) { case rTooLargeData: { oss << "Data larger than range value for column '" - << table_->getAttribute(vali.column_index_).attrName + << table->getAttribute(vali.column_index_).attrName << "' at line: " << line; if ("" != file) oss << " in file: " << file; oss << "\n"; @@ -1123,7 +1133,7 @@ string DataIngestion::GenerateDataValidityInfo(const Validity& vali, } case rTooSmallData: { oss << "Data smaller than range value for column '" - << table_->getAttribute(vali.column_index_).attrName + << table->getAttribute(vali.column_index_).attrName << "' at line: " << line; if ("" != file) oss << " in file: " << file; oss << "\n"; @@ -1131,7 +1141,7 @@ string DataIngestion::GenerateDataValidityInfo(const Validity& vali, } case rInterruptedData: { oss << "Data truncated from non-digit for column '" - << table_->getAttribute(vali.column_index_).attrName + << table->getAttribute(vali.column_index_).attrName << "' at line: " << line; if ("" != file) oss << " in file: " << file; oss << "\n"; @@ -1139,7 +1149,7 @@ string DataIngestion::GenerateDataValidityInfo(const Validity& vali, } case rTooLongData: { oss << "Data truncated for column '" - << table_->getAttribute(vali.column_index_).attrName + << table->getAttribute(vali.column_index_).attrName << "' at line: " << line; if ("" != file) oss << " in file: " << file; oss << "\n"; @@ -1147,7 +1157,7 @@ string DataIngestion::GenerateDataValidityInfo(const Validity& vali, } case rIncorrectData: { oss << "Incorrect format value for column '" - << table_->getAttribute(vali.column_index_).attrName + << table->getAttribute(vali.column_index_).attrName << "' at line: " << line; if ("" != file) oss << " in file: " << file; oss << "\n"; @@ -1155,7 +1165,7 @@ string DataIngestion::GenerateDataValidityInfo(const Validity& vali, } case rInvalidNullData: { oss << "Null Data value is invalid for column '" - << table_->getAttribute(vali.column_index_).attrName + << table->getAttribute(vali.column_index_).attrName << "' at line: " << line; if ("" != file) oss << " in file: " << file; oss << "\n"; @@ -1176,7 +1186,7 @@ string DataIngestion::GenerateDataValidityInfo(const Validity& vali, } case rInvalidInsertData: { oss << "Data value is invalid for column '" - << table_->getAttribute(vali.column_index_).attrName + << table->getAttribute(vali.column_index_).attrName << "' at line: " << line; if ("" != file) oss << " in file: " << file; oss << "\n"; @@ -1189,6 +1199,6 @@ string DataIngestion::GenerateDataValidityInfo(const Validity& vali, } return oss.str(); } - +*/ } /* namespace loader */ } /* namespace claims */ diff --git a/loader/data_ingestion.h b/loader/data_ingestion.h index c01fb0407..ed156a4d2 100644 --- a/loader/data_ingestion.h +++ b/loader/data_ingestion.h @@ -73,7 +73,7 @@ class DataIngestion { * @param row_separator: row separator */ DataIngestion(TableDescriptor* table, const string col_separator = "|", - const string row_separator = "\n"); + const string& row_separator = "\n"); virtual ~DataIngestion(); @@ -157,8 +157,9 @@ class DataIngestion { string data_source, uint64_t row_id_in_raw_data, ExecutedResult* result); - string GenerateDataValidityInfo(const Validity& vali, TableDescriptor* table, - int line, const string& file); + // string GenerateDataValidityInfo(const Validity& vali, TableDescriptor* + // table, + // int line, const string& file); void AnnounceIAmLoading(); static void* HandleTuple(void* ptr); diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 97539a035..8f5302a67 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -29,16 +29,21 @@ #include "./master_loader.h" #include #include - -#include "caf/all.hpp" -#include "caf/io/all.hpp" #include #include +#include "caf/all.hpp" +#include "caf/io/all.hpp" +#include "./loader_message.h" +#include "./validity.h" #include "../catalog/catalog.h" +#include "../catalog/table.h" +#include "../common/data_type.h" +#include "../common/memory_handle.h" +#include "../common/Schema/TupleConvertor.h" #include "../Config.h" #include "../Environment.h" -#include "./loader_message.h" +#include "../utility/stl_guard.h" using caf::aout; using caf::behavior; using caf::event_based_actor; @@ -48,6 +53,8 @@ using caf::mixin::sync_sender_impl; using caf::spawn; using std::endl; using claims::catalog::Catalog; +using claims::catalog::TableDescriptor; +using claims::common::Malloc; using claims::common::rSuccess; using claims::common::rFailure; @@ -128,16 +135,61 @@ RetCode MasterLoader::Ingest() { RetCode ret = rSuccess; string message = GetMessage(); + // get message from MQ IngestionRequest req; EXEC_AND_LOG(ret, GetRequestFromMessage(message, &req), "got request!", "failed to get request"); - // CheckAndToValue(); + // parse message and get all tuples of all partitions, then + // check the validity of all tuple in message + TableDescriptor* table = + Environment::getInstance()->getCatalog()->getTable(req.table_name_); + assert(table != NULL && "table is not exist!"); + vector>> tuple_buffers_per_part( + table->getNumberOfProjection()); + for (auto proj : (*(table->GetProjectionList()))) { + tuple_buffers_per_part.push_back(vector>( + proj->getPartitioner()->getNumberOfPartitions(), vector())); + } + vector columns_validities; + EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, + columns_validities), + "got all tuples of every partition", + "failed to get all tuples of every partition"); + if (ret != rSuccess && ret != claims::common::rNoMemory) { + // TODO(YUKAI): error handle, like sending error message to client + LOG(ERROR) << "the tuple is not valid"; + return rFailure; + } + + // merge all tuple buffers of partition into one partition buffer + vector> partition_buffers( + table->getNumberOfProjection()); + EXEC_AND_LOG(ret, MergePartitionTupleIntoOneBuffer( + table, tuple_buffers_per_part, partition_buffers), + "merged all tuple of same partition into one buffer", + "failed to merge tuples buffers into one buffer"); + + // start transaction from here + ApplyTransaction(req, table, partition_buffers); + + // write data log + EXEC_AND_LOG(ret, WriteLog(req, table, partition_buffers), "written log ", + "failed to write log"); + + EXEC_AND_LOG(ret, ReplyToMQ(req), "replied to MQ", "failed to reply to MQ"); + + EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers), + "sent every partition data to its slave", + "failed to send every partition data to its slave"); return ret; } -string MasterLoader::GetMessage() {} +string MasterLoader::GetMessage() { + string ret; + return ret; +} bool MasterLoader::CheckValidity() {} @@ -162,8 +214,141 @@ RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, return rSuccess; } +// get every tuples and add row id for it RetCode MasterLoader::GetRequestFromMessage(const string& message, IngestionRequest* req) { + // AddRowIdColumn() + RetCode ret = rSuccess; + return ret; +} + +RetCode MasterLoader::CheckAndToValue(const IngestionRequest& req, + void* tuple_buffer, + vector& column_validities) {} + +// map every tuple into associate part +RetCode MasterLoader::GetPartitionTuples( + const IngestionRequest& req, const TableDescriptor* table, + vector>>& tuple_buffer_per_part, + vector& columns_validities) { + RetCode ret = rSuccess; + vector correct_tuple_buffer; + STLGuardWithRetCode> guard(correct_tuple_buffer, + ret); // attention! + // must set RetCode 'ret' before returning error code!!!! + ThreeLayerSTLGuardWithRetCode>>> + return_tuple_buffer_guard(tuple_buffer_per_part, ret); // attention! + + // check all tuples to be inserted + int line = 0; + for (auto tuple_string : req.tuples_) { + void* tuple_buffer = Malloc(table->getSchema()->getTupleMaxSize()); + if (tuple_buffer == NULL) return claims::common::rNoMemory; + if (rSuccess != (ret = table->getSchema()->CheckAndToValue( + tuple_string, tuple_buffer, req.col_sep_, + RawDataSource::kSQL, columns_validities))) { + // handle error which stored in the end + Validity err = columns_validities.back(); + columns_validities.pop_back(); + string validity_info = + Validity::GenerateDataValidityInfo(err, table, line, ""); + LOG(ERROR) << validity_info; + } + // handle all warnings + for (auto it : columns_validities) { + string validity_info = + Validity::GenerateDataValidityInfo(it, table, line, ""); + LOG(WARNING) << "append warning info:" << validity_info; + } + if (rSuccess != ret) { + // clean work is done by guard + return ret; + } + ++line; + correct_tuple_buffer.push_back(tuple_buffer); + } + + // map every tuple in different partition + for (int i = 0; i < table->getNumberOfProjection(); i++) { + ProjectionDescriptor* prj = table->getProjectoin(i); + Schema* prj_schema = prj->getSchema(); + vector prj_attrs = prj->getAttributeList(); + vector prj_index; + for (int j = 0; j < prj_attrs.size(); j++) { + prj_index.push_back(prj_attrs[j].index); + } + SubTuple sub_tuple(table->getSchema(), prj_schema, prj_index); + + const int partition_key_local_index = + prj->getAttributeIndex(prj->getPartitioner()->getPartitionKey()); + unsigned tuple_max_length = prj_schema->getTupleMaxSize(); + + for (auto tuple_buffer : correct_tuple_buffer) { + // extract the sub tuple according to the projection schema + void* target = Malloc(prj_schema->getTupleMaxSize()); // newmalloc + if (target == NULL) { + return (ret = claims::common::rNoMemory); + } + sub_tuple.getSubTuple(tuple_buffer, target); + + // determine the partition to write the tuple "target" + void* partition_key_addr = + prj_schema->getColumnAddess(partition_key_local_index, target); + int part = prj_schema->getcolumn(partition_key_local_index) + .operate->getPartitionValue( + partition_key_addr, + prj->getPartitioner()->getPartitionFunction()); + + tuple_buffer_per_part[i][part].push_back(target); + } + } + return ret; +} + +RetCode MasterLoader::ApplyTransaction( + const IngestionRequest& req, const TableDescriptor* table, + const vector>& partition_buffers) { + RetCode ret = rSuccess; + + return ret; +} + +RetCode MasterLoader::WriteLog( + const IngestionRequest& req, const TableDescriptor* table, + const vector>& partition_buffers) {} + +RetCode MasterLoader::ReplyToMQ(const IngestionRequest& req) {} + +RetCode MasterLoader::SendPartitionTupleToSlave( + const TableDescriptor* table, + const vector>& partition_buffers) {} + +RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( + const TableDescriptor* table, + vector>>& tuple_buffer_per_part, + vector>& partition_buffers) { + RetCode ret = rSuccess; + for (int i = 0; i < tuple_buffer_per_part.size(); ++i) { + for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { + int tuple_count = tuple_buffer_per_part[i][j].size(); + int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); + int buffer_len = tuple_count * tuple_len; + + void* new_buffer = Malloc(buffer_len); + for (int k = 0; k < tuple_count; ++k) { + memcpy(new_buffer + k * tuple_len, tuple_buffer_per_part[i][j][k], + tuple_len); + // release old memory stored tuple buffer + DELETE_PTR(tuple_buffer_per_part[i][j][k]); + } + // push new partition buffer + partition_buffers[i].push_back(PartitionBuffer(new_buffer, buffer_len)); + tuple_buffer_per_part[i][j].clear(); + } + tuple_buffer_per_part[i].clear(); + } + tuple_buffer_per_part.clear(); + return ret; } void* MasterLoader::StartMasterLoader(void* arg) { @@ -175,8 +360,9 @@ void* MasterLoader::StartMasterLoader(void* arg) { EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), "failed to connect all slaves"); - EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), - "failed to ingest data"); + while (true) + EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), + "failed to ingest data"); return NULL; } diff --git a/loader/master_loader.h b/loader/master_loader.h index 0e66a2f4a..db9cd604f 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -33,14 +33,19 @@ #include "../common/error_define.h" #include "caf/all.hpp" -using caf::behavior; -using caf::event_based_actor; +#include "./validity.h" namespace claims { +namespace catalog { +class TableDescriptor; +} namespace loader { using std::string; using std::vector; +using caf::behavior; +using caf::event_based_actor; +using claims::catalog::TableDescriptor; class MasterLoader { public: @@ -54,7 +59,13 @@ class MasterLoader { string table_name_; string col_sep_; string row_sep_; - string tuples_; + vector tuples_; + }; + + struct PartitionBuffer { + PartitionBuffer(void* buf, uint64_t len) : buffer_(buf), length_(len) {} + void* buffer_; + uint64_t length_; }; public: @@ -70,6 +81,36 @@ class MasterLoader { RetCode GetRequestFromMessage(const string& message, IngestionRequest* req); + RetCode CheckAndToValue(const IngestionRequest& req, void* tuple_buffer, + vector& column_validities); + + RetCode GetPartitionTuples( + const IngestionRequest& req, const TableDescriptor* table, + vector>>& tuple_buffer_per_part, + vector& columns_validities); + + /** + * copy and merge all tuples buffer of the same partition into one buffer, + * and release all memory in tuple_buffer_per_part + */ + RetCode MergePartitionTupleIntoOneBuffer( + const TableDescriptor* table, + vector>>& tuple_buffer_per_part, + vector>& partition_buffers); + + RetCode ApplyTransaction( + const IngestionRequest& req, const TableDescriptor* table, + const vector>& partition_buffers); + + RetCode WriteLog(const IngestionRequest& req, const TableDescriptor* table, + const vector>& partition_buffers); + + RetCode ReplyToMQ(const IngestionRequest& req); + + RetCode SendPartitionTupleToSlave( + const TableDescriptor* table, + const vector>& partition_buffers); + RetCode GetSlaveNetAddr(); RetCode SetSocketWithSlaves(); RetCode GetSocketFdConnectedWithSlave(string ip, int port, int* connected_fd); diff --git a/loader/validity.cpp b/loader/validity.cpp index faca10302..ae179f4c4 100644 --- a/loader/validity.cpp +++ b/loader/validity.cpp @@ -26,13 +26,98 @@ * */ -#include "validity.h" +#include "./validity.h" +#include +#include "../catalog/table.h" +using claims::catalog::TableDescriptor; +using namespace claims::common; // NOLINT namespace claims { namespace loader { -Validity::~Validity() { - // TODO Auto-generated destructor stub +Validity::~Validity() {} + +string Validity::GenerateDataValidityInfo(const Validity& vali, + const TableDescriptor* table, + int line, const string& file) { + ostringstream oss; + oss.clear(); + switch (vali.check_res_) { + case rTooLargeData: { + oss << "Data larger than range value for column '" + << table->getAttribute(vali.column_index_).attrName + << "' at line: " << line; + if ("" != file) oss << " in file: " << file; + oss << "\n"; + break; + } + case rTooSmallData: { + oss << "Data smaller than range value for column '" + << table->getAttribute(vali.column_index_).attrName + << "' at line: " << line; + if ("" != file) oss << " in file: " << file; + oss << "\n"; + break; + } + case rInterruptedData: { + oss << "Data truncated from non-digit for column '" + << table->getAttribute(vali.column_index_).attrName + << "' at line: " << line; + if ("" != file) oss << " in file: " << file; + oss << "\n"; + break; + } + case rTooLongData: { + oss << "Data truncated for column '" + << table->getAttribute(vali.column_index_).attrName + << "' at line: " << line; + if ("" != file) oss << " in file: " << file; + oss << "\n"; + break; + } + case rIncorrectData: { + oss << "Incorrect format value for column '" + << table->getAttribute(vali.column_index_).attrName + << "' at line: " << line; + if ("" != file) oss << " in file: " << file; + oss << "\n"; + break; + } + case rInvalidNullData: { + oss << "Null Data value is invalid for column '" + << table->getAttribute(vali.column_index_).attrName + << "' at line: " << line; + if ("" != file) oss << " in file: " << file; + oss << "\n"; + break; + } + case rTooFewColumn: { + oss << "Line: " << line; + if ("" != file) oss << " in file: " << file; + oss << " doesn't contain data for all columns\n"; + break; + } + case rTooManyColumn: { + oss << "Line: " << line; + if ("" != file) oss << " in file: " << file; + oss << " was truncated; it contained more data than there were " + "input columns\n"; + break; + } + case rInvalidInsertData: { + oss << "Data value is invalid for column '" + << table->getAttribute(vali.column_index_).attrName + << "' at line: " << line; + if ("" != file) oss << " in file: " << file; + oss << "\n"; + break; + } + default: + LOG(ERROR) << "Unknown ERROR" << endl; + oss << "Unknown ERROR\n"; + break; + } + return oss.str(); } } /* namespace loader */ diff --git a/loader/validity.h b/loader/validity.h index 9b8742576..b9436c376 100644 --- a/loader/validity.h +++ b/loader/validity.h @@ -28,10 +28,18 @@ #ifndef LOADER_VALIDITY_H_ #define LOADER_VALIDITY_H_ +#include + #include "../common/error_define.h" namespace claims { +namespace catalog { +class TableDescriptor; +} + namespace loader { +using claims::catalog::TableDescriptor; +using std::string; struct Validity { public: @@ -39,6 +47,10 @@ struct Validity { : column_index_(column_index), check_res_(check_res) {} virtual ~Validity(); + static string GenerateDataValidityInfo(const Validity& vali, + const TableDescriptor* table, int line, + const string& file); + public: int column_index_; RetCode check_res_; diff --git a/utility/Makefile.am b/utility/Makefile.am index 2f75a28e0..f82838e3a 100644 --- a/utility/Makefile.am +++ b/utility/Makefile.am @@ -24,5 +24,5 @@ libutility_a_SOURCES = \ string_process.h task.cpp \ task.h test_tool.h \ thread_pool.cpp thread_pool.h \ - warmup.h + warmup.h stl_guard.h \ No newline at end of file diff --git a/utility/stl_guard.h b/utility/stl_guard.h new file mode 100644 index 000000000..7583300b1 --- /dev/null +++ b/utility/stl_guard.h @@ -0,0 +1,103 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/utility/stl_guard.h + * + * Created on: Apr 14, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#ifndef UTILITY_STL_GUARD_H_ +#define UTILITY_STL_GUARD_H_ +#include "../common/error_define.h" +#include "../common/memory_handle.h" +#include "../common/rename.h" + +// a guard of STL container which stores pointer, like vector +template +class STLGuardWithRetCode { + public: + STLGuardWithRetCode(T& t, RetCode& ret) : t_(t), ret_(ret) {} + ~STLGuardWithRetCode() { + if (claims::common::rSuccess != ret_) { + for (auto it : t_) DELETE_PTR(it); + t_.clear(); + } + } + + NO_COPY_AND_ASSIGN(STLGuardWithRetCode); + + private: + T& t_; + RetCode& ret_; +}; + +template +class TwoLayerSTLGuardWithRetCode { + public: + TwoLayerSTLGuardWithRetCode(T& t, RetCode& ret) : t_(t), ret_(ret) {} + ~TwoLayerSTLGuardWithRetCode() { + if (claims::common::rSuccess != ret_) { + for (auto it1 : t_) { + for (auto it2 : it1) { + DELETE_PTR(it2); + } + it1.clear(); + } + t_.clear(); + } + } + + NO_COPY_AND_ASSIGN(TwoLayerSTLGuardWithRetCode); + + private: + T& t_; + RetCode& ret_; +}; + +template +class ThreeLayerSTLGuardWithRetCode { + public: + ThreeLayerSTLGuardWithRetCode(T& t, RetCode& ret) : t_(t), ret_(ret) {} + ~ThreeLayerSTLGuardWithRetCode() { + if (claims::common::rSuccess != ret_) { + for (auto it1 : t_) { + for (auto it2 : it1) { + for (auto it3 : it2) { + DELETE_PTR(it3); + } + it2.clear(); + } + it1.clear(); + } + t_.clear(); + } + } + + NO_COPY_AND_ASSIGN(ThreeLayerSTLGuardWithRetCode); + + private: + T& t_; + RetCode& ret_; +}; + +#endif // UTILITY_STL_GUARD_H_ From 9c2a15082a4c5c24384d484a09479abbd5b75507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Tue, 19 Apr 2016 09:41:55 +0800 Subject: [PATCH 05/58] add log to txn_manager --- Makefile.am | 6 +++--- txn_manager/Makefile.am | 4 +++- txn_manager/txn.hpp | 28 ++++++++++++--------------- txn_manager/txn_server.cpp | 39 +++++++++++++++++++++++++++++--------- txn_manager/txn_server.hpp | 3 ++- txn_server_test.cpp | 31 +++++++++++++++++------------- 6 files changed, 68 insertions(+), 43 deletions(-) diff --git a/Makefile.am b/Makefile.am index 42e086343..9995239fa 100644 --- a/Makefile.am +++ b/Makefile.am @@ -83,7 +83,7 @@ include_HEADERS = Config.h \ txn_manager/txn_server.hpp \ txn_manager/txn_client.hpp -bin_PROGRAMS = claimsserver \ +bin_PROGRAMS = claimsserver \ client \ test \ txnclient \ @@ -104,9 +104,9 @@ test_SOURCES = Test/gtest_main.cpp\ IDsGenerator.cpp \ Config.cpp -txnclient_SOURCES = txn_client_test.cpp +txnclient_SOURCES = txn_manager/txn_client_test.cpp -txnserver_SOURCES = txn_server_test.cpp +txnserver_SOURCES = txn_manager/txn_server_test.cpp SUBDIRS= catalog Client common Daemon Executor IndexManager\ loader physical_operator logical_operator Resource \ diff --git a/txn_manager/Makefile.am b/txn_manager/Makefile.am index 6df18ed88..d0af4badd 100644 --- a/txn_manager/Makefile.am +++ b/txn_manager/Makefile.am @@ -25,7 +25,9 @@ noinst_LIBRARIES=libtxnmanager.a libtxnmanager_a_SOURCES = \ txn.hpp txn.cpp \ txn_client.hpp txn_client.cpp \ - txn_server.hpp txn_server.cpp + txn_server.hpp txn_server.cpp \ + txn_log.hpp txn_log.cpp + diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index d56c532a8..efbeadf36 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -60,9 +60,16 @@ using UInt32 = unsigned int; using UInt16 = unsigned short; using UInt8 = char; using RetCode = int; +using BeginAtom = caf::atom_constant; +using CommitAtom = caf::atom_constant; +using AbortAtom = caf::atom_constant; +using DataAtom = caf::atom_constant; + using OkAtom = caf::atom_constant; using FailAtom = caf::atom_constant; using IngestAtom = caf::atom_constant; +using WriteAtom = caf::atom_constant; + using QueryAtom = caf::atom_constant; using CheckpointAtom = caf::atom_constant; using GCAtom = caf::atom_constant; @@ -70,8 +77,9 @@ using CommitIngestAtom = caf::atom_constant; using AbortIngestAtom = caf::atom_constant; using CommitCPAtom = caf::atom_constant; using AbortCPAtom = caf::atom_constant; -using QuitAtom = caf::atom_constant; -using LinkAtom = caf::atom_constant; +using QuitAtom = caf::atom_constant; +using LinkAtom = caf::atom_constant; +using RefreshAtom = caf::atom_constant; static const int kTxnPort = 8089; static const string kTxnIp = "127.0.0.1"; @@ -140,7 +148,6 @@ class Ingest { public: UInt64 Id; map StripList; - RetCode Ret = 0; void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset) { StripList[part] = make_pair(pos, offset); } @@ -149,12 +156,10 @@ class Ingest { } UInt64 get_Id() const { return Id;} map get_StripList() const { return StripList;} - RetCode get_Ret() const {} void set_Id(const UInt64 & id){ Id = id;} void set_StripList(const map & stripList) { StripList = stripList; } - void set_Ret(RetCode ret) { Ret = ret;} string ToString(); }; inline bool operator == (const Ingest & a, const Ingest & b) { @@ -179,7 +184,6 @@ class Query{ public: map> Snapshot; map CPList; - RetCode Ret = 0; void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset){ // if (Snapshot.find(part) == Snapshot.end()) // Snapshot[part] = vector>(); @@ -193,14 +197,12 @@ class Query{ return Snapshot; } map get_CPList() const { return CPList;} - RetCode get_Ret() const { return Ret;} void set_Snapshot(const map> & sp){ Snapshot = sp; } void set_CPList(const map & cplist) { CPList = cplist; } - void set_Ret(RetCode ret) {Ret = ret;} string ToString(); }; inline bool operator == (const Query & a, const Query & b) { @@ -214,7 +216,6 @@ class Checkpoint{ UInt64 Part; UInt64 LogicCP; UInt64 PhyCP; - RetCode Ret = 0; vector CommitStripList; vector AbortStripList; Checkpoint() {} @@ -224,13 +225,11 @@ class Checkpoint{ UInt64 get_Part() const { return Part;} UInt64 get_LogicCP() const { return LogicCP;} UInt64 get_PhyCP() const { return PhyCP;} - UInt64 get_Ret() const { return Ret;} vector get_CommitStripList() const { return CommitStripList;}; vector get_AbortStripList() const { return AbortStripList;}; void set_Part(UInt64 part) { Part = part;} void set_LogicCP(UInt64 logicCP) { LogicCP = logicCP;} void set_PhyCP(UInt64 phyCP) { PhyCP = phyCP;} - void set_Ret(RetCode ret) { Ret = ret;} void set_CommitStripList(const vector & commitstripList) { CommitStripList = commitstripList; } @@ -248,19 +247,16 @@ inline void SerializeConfig() { make_pair(&FixTupleIngestReq::get_Content, &FixTupleIngestReq::set_Content)); caf::announce("Ingest", make_pair(&Ingest::get_Id,&Ingest::set_Id), - make_pair(&Ingest::get_StripList,&Ingest::set_StripList), - make_pair(&Ingest::get_Ret,&Ingest::set_Ret)); + make_pair(&Ingest::get_StripList,&Ingest::set_StripList)); caf::announce("QueryReq", make_pair(&QueryReq::get_PartList, &QueryReq::set_PartList)); caf::announce("Query", make_pair(&Query::get_Snapshot,&Query::set_Snapshot), - make_pair(&Query::get_CPList, &Query::set_CPList), - make_pair(&Query::get_Ret, &Query::set_Ret)); + make_pair(&Query::get_CPList, &Query::set_CPList)); caf::announce("Checkpoint", make_pair(&Checkpoint::get_Part, &Checkpoint::set_Part), make_pair(&Checkpoint::get_LogicCP, &Checkpoint::set_LogicCP), make_pair(&Checkpoint::get_PhyCP, &Checkpoint::set_PhyCP), - make_pair(&Checkpoint::get_Ret, &Checkpoint::set_Ret), make_pair(&Checkpoint::get_CommitStripList, &Checkpoint::set_CommitStripList), make_pair(&Checkpoint::get_AbortStripList, diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 094c2e701..fb7dec7fb 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -63,7 +63,7 @@ RetCode TxnCore::ReMalloc() { caf::behavior TxnCore::make_behavior() { ReMalloc(); - this->delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); + //this->delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); return { [=](IngestAtom, const FixTupleIngestReq * request, Ingest * ingest)->int { struct timeval tv1; @@ -224,25 +224,37 @@ RetCode TxnServer::Init(int concurrency, int port) { RecoveryFromCatalog(); RecoveryFromTxnLog(); srand((unsigned) time(NULL)); + if (!LogServer::is_active) + LogServer::init("txn-log"); return 0; } RetCode TxnServer::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest) { - RetCode ret; + RetCode ret = 0; UInt64 core_id = SelectCore(); caf::scoped_actor self; self->sync_send(Cores[core_id], IngestAtom::value, & request, & ingest). await([&](int r) {ret = r;}); - return 0; + if (ret == 0) { + LogClient::Begin(ingest.Id); + for (auto & strip : ingest.StripList) + LogClient::Write(ingest.Id, strip.first, strip.second.first, strip.second.second); + LogClient::PushToDisk(); + } + return ret; } RetCode TxnServer::CommitIngest(const Ingest & ingest) { - RetCode ret; + RetCode ret = 0; UInt64 core_id = GetCoreId(ingest.Id); caf::scoped_actor self; self->sync_send(Cores[core_id], CommitIngestAtom::value, &ingest). await([&](int r) { ret = r;}); - return 0; + if (ret == 0) { + LogClient::Commit(ingest.Id); + LogClient::PushToDisk(); + } + return ret; } RetCode TxnServer::AbortIngest(const Ingest & ingest) { RetCode ret; @@ -250,7 +262,11 @@ RetCode TxnServer::AbortIngest(const Ingest & ingest) { caf::scoped_actor self; self->sync_send(Cores[core_id], AbortIngestAtom::value, &ingest). await([&](int r) { ret = r;}); - return 0; + if (ret == 0) { + LogClient::Abort(ingest.Id); + LogClient::PushToDisk(); + } + return ret; } RetCode TxnServer::BeginQuery(const QueryReq & request, Query & query) { RetCode ret; @@ -267,7 +283,7 @@ RetCode TxnServer::BeginQuery(const QueryReq & request, Query & query) { return ret; } RetCode TxnServer::BeginCheckpoint(Checkpoint & cp) { - RetCode ret; + RetCode ret = 0; if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) return -1; cp.LogicCP = TxnServer::LogicCPList[cp.Part]; @@ -281,14 +297,19 @@ RetCode TxnServer::BeginCheckpoint(Checkpoint & cp) { Strip::Merge(cp.CommitStripList); Strip::Sort(cp.AbortStripList); Strip::Merge(cp.AbortStripList); - return 0; + return ret; } RetCode TxnServer::CommitCheckpoint(const Checkpoint & cp) { + RetCode ret = 0; if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) return -1; TxnServer::LogicCPList[cp.Part] = cp.LogicCP; TxnServer::PhyCPList[cp.Part] = cp.PhyCP; - return 0; + if (ret == 0) { + LogClient::Checkpoint(cp.Part, cp.LogicCP, cp.PhyCP); + LogClient::PushToDisk(); + } + return ret; } diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index 0caf79208..ee11266e1 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -45,6 +45,7 @@ #include "caf/all.hpp" #include "caf/io/all.hpp" #include "txn.hpp" +#include "txn_log.hpp" #include using std::cin; using std::cout; @@ -81,7 +82,7 @@ class TxnCore: public caf::event_based_actor { TxnCore(int coreId):CoreId(coreId) {} UInt64 GetId(){ UInt64 id = ((++LocalId) *1000) + CoreId; - + return id; } }; diff --git a/txn_server_test.cpp b/txn_server_test.cpp index ff6a0bce3..d38945048 100644 --- a/txn_server_test.cpp +++ b/txn_server_test.cpp @@ -190,7 +190,7 @@ int main(){ // } - TxnServer::Init(); +// TxnServer::Init(); // for (auto j = 0;j < 100 ;j++) { //// request1.Content[0] = {45, 10}; //// request1.Content[1] = {54, 10}; @@ -200,18 +200,23 @@ int main(){ // TxnServer::BeginIngest(request1, ingest); // TxnServer::CommitIngest(ingest); // } +// sleep(1); +// struct timeval tv1, tv2; +// gettimeofday(&tv1,NULL); +// cout <<"a:" < v; +// int n = 1; +// for (auto i=0;i v; - int n = 1; - for (auto i=0;i Date: Tue, 19 Apr 2016 09:47:11 +0800 Subject: [PATCH 06/58] change path of txn_server/client_test --- .../txn_client_test.cpp | 4 +- .../txn_server_test.cpp | 77 +++++++++++-------- 2 files changed, 48 insertions(+), 33 deletions(-) rename txn_client_test.cpp => txn_manager/txn_client_test.cpp (97%) rename txn_server_test.cpp => txn_manager/txn_server_test.cpp (81%) diff --git a/txn_client_test.cpp b/txn_manager/txn_client_test.cpp similarity index 97% rename from txn_client_test.cpp rename to txn_manager/txn_client_test.cpp index 8f20343b5..ffe7ecaa5 100644 --- a/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -41,9 +41,9 @@ #include #include "caf/all.hpp" #include "caf/io/all.hpp" -#include "txn_manager/txn.hpp" +#include "txn.hpp" #include "unistd.h" -#include "txn_manager/txn_client.hpp" +#include "txn_client.hpp" using std::cin; using std::cout; using std::endl; diff --git a/txn_server_test.cpp b/txn_manager/txn_server_test.cpp similarity index 81% rename from txn_server_test.cpp rename to txn_manager/txn_server_test.cpp index d38945048..5d0efcea1 100644 --- a/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -43,8 +43,10 @@ #include #include "caf/all.hpp" #include "caf/io/all.hpp" -#include "txn_manager/txn.hpp" -#include "txn_manager/txn_server.hpp" +#include "txn.hpp" +#include "txn_server.hpp" +#include "txn_client.hpp" +#include "txn_log.cpp" using std::cin; using std::cout; using std::endl; @@ -155,19 +157,16 @@ class B:public caf::event_based_actor { -void task(int a){ - for (auto i = 0; i< 10; i++) { -// if (i % 10 > 10) { -// QueryReq request2; -// request2.PartList = {0,1}; -// Query query; -// TxnServer::BeginQuery(request2, query); -// } else { +void task(int time){ + for (auto i = 0; i< time; i++) { + FixTupleIngestReq request1; Ingest ingest; - request1.Content = {{0, {45, 10}}}; - TxnServer::BeginIngest(request1, ingest); - TxnServer::CommitIngest(ingest); + request1.Content = {{0, {45, 10}}, + {1, {35, 20}}, + {2,{15,100}}}; + TxnClient::BeginIngest(request1, ingest); + TxnClient::CommitIngest(ingest); // } } } @@ -175,6 +174,20 @@ void task(int a){ using claims::txn::TxnServer; using claims::txn::FixTupleIngestReq; using claims::txn::Ingest; +char v[1024+10]; + +void task2(int time) { + for (auto i = 0; i< time; i++) { + //LogClient::Data(1, 1, 1111,(void*)v, 1024); + LogClient::Begin(i); + LogClient::Write(i, 1, 0, 100 ); + LogClient::Write(i, 2, 0, 100 ); + LogClient::Write(i, 3, 0, 100 ); + //LogClient::PushToDisk() ; + } + +} + int main(){ // auto server = caf::spawn(); // SerializeConfig(); @@ -189,8 +202,9 @@ int main(){ // cout << "bind fail" << endl; // } - +// // TxnServer::Init(); + // for (auto j = 0;j < 100 ;j++) { //// request1.Content[0] = {45, 10}; //// request1.Content[1] = {54, 10}; @@ -201,22 +215,23 @@ int main(){ // TxnServer::CommitIngest(ingest); // } // sleep(1); -// struct timeval tv1, tv2; -// gettimeofday(&tv1,NULL); -// cout <<"a:" < v; -// int n = 1; -// for (auto i=0;i v; + int n = 1, time =1; + cin >> n >> time; + gettimeofday(&tv1,NULL); + for (auto i=0;i Date: Tue, 19 Apr 2016 11:15:22 +0800 Subject: [PATCH 07/58] rm cout --- txn_manager/txn_server_test.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/txn_manager/txn_server_test.cpp b/txn_manager/txn_server_test.cpp index 5d0efcea1..97c12db8b 100644 --- a/txn_manager/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -154,6 +154,10 @@ class B:public caf::event_based_actor { }; } }; +using claims::txn::TxnServer; +using claims::txn::FixTupleIngestReq; +using claims::txn::Ingest; +char v[1024+10]; @@ -166,23 +170,24 @@ void task(int time){ {1, {35, 20}}, {2,{15,100}}}; TxnClient::BeginIngest(request1, ingest); + LogClient::Data(1, 1, 1111,(void*)v, 1024); + LogClient::Data(1, 1, 1111,(void*)v, 1024); + LogClient::Data(1, 1, 1111,(void*)v, 1024); + LogClient::PushToDisk(); TxnClient::CommitIngest(ingest); // } } } -using claims::txn::TxnServer; -using claims::txn::FixTupleIngestReq; -using claims::txn::Ingest; -char v[1024+10]; void task2(int time) { for (auto i = 0; i< time; i++) { - //LogClient::Data(1, 1, 1111,(void*)v, 1024); + LogClient::Begin(i); LogClient::Write(i, 1, 0, 100 ); LogClient::Write(i, 2, 0, 100 ); LogClient::Write(i, 3, 0, 100 ); + //LogClient::PushToDisk() ; } From 95605d84feb1eff30d2b01795909c7d6444c9608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Tue, 19 Apr 2016 14:26:41 +0800 Subject: [PATCH 08/58] for cluster test --- txn_manager/txn_server.cpp | 9 +++------ txn_manager/txn_server_test.cpp | 8 ++++++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index fb7dec7fb..b7c631384 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -224,8 +224,6 @@ RetCode TxnServer::Init(int concurrency, int port) { RecoveryFromCatalog(); RecoveryFromTxnLog(); srand((unsigned) time(NULL)); - if (!LogServer::is_active) - LogServer::init("txn-log"); return 0; } @@ -240,7 +238,6 @@ RetCode TxnServer::BeginIngest(const FixTupleIngestReq & request, Ingest & inges LogClient::Begin(ingest.Id); for (auto & strip : ingest.StripList) LogClient::Write(ingest.Id, strip.first, strip.second.first, strip.second.second); - LogClient::PushToDisk(); } return ret; } @@ -252,7 +249,7 @@ RetCode TxnServer::CommitIngest(const Ingest & ingest) { await([&](int r) { ret = r;}); if (ret == 0) { LogClient::Commit(ingest.Id); - LogClient::PushToDisk(); + LogClient::Refresh(); } return ret; } @@ -264,7 +261,7 @@ RetCode TxnServer::AbortIngest(const Ingest & ingest) { await([&](int r) { ret = r;}); if (ret == 0) { LogClient::Abort(ingest.Id); - LogClient::PushToDisk(); + LogClient::Refresh(); } return ret; } @@ -307,7 +304,7 @@ RetCode TxnServer::CommitCheckpoint(const Checkpoint & cp) { TxnServer::PhyCPList[cp.Part] = cp.PhyCP; if (ret == 0) { LogClient::Checkpoint(cp.Part, cp.LogicCP, cp.PhyCP); - LogClient::PushToDisk(); + LogClient::Refresh(); } return ret; } diff --git a/txn_manager/txn_server_test.cpp b/txn_manager/txn_server_test.cpp index 97c12db8b..93ceb9d2c 100644 --- a/txn_manager/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -173,7 +173,7 @@ void task(int time){ LogClient::Data(1, 1, 1111,(void*)v, 1024); LogClient::Data(1, 1, 1111,(void*)v, 1024); LogClient::Data(1, 1, 1111,(void*)v, 1024); - LogClient::PushToDisk(); + TxnClient::CommitIngest(ingest); // } } @@ -221,11 +221,15 @@ int main(){ // } // sleep(1); memset(v, 1024, '*'); + string path; + cout << "input path" << endl; + cin >> path; TxnServer::Init(); + LogServer::init(path); struct timeval tv1, tv2; - vector v; int n = 1, time =1; + cout << "input #thread, #time" << endl; cin >> n >> time; gettimeofday(&tv1,NULL); for (auto i=0;i Date: Tue, 19 Apr 2016 22:17:51 +0800 Subject: [PATCH 09/58] add txn manager and log to config and enviroment --- Config.cpp | 32 ++++ Config.h | 9 + Environment.cpp | 26 +++ Environment.h | 13 ++ conf/config | 25 ++- loader/master_loader.cpp | 8 +- physical_operator/Makefile.am | 3 +- physical_operator/physical_txn_scan.cpp | 213 ++++++++++++++++++++++++ physical_operator/physical_txn_scan.hpp | 161 ++++++++++++++++++ txn_manager/txn.hpp | 8 +- txn_manager/txn_client.hpp | 5 +- txn_manager/txn_log.cpp | 200 ++++++++++++++++++++++ txn_manager/txn_log.hpp | 134 +++++++++++++++ txn_manager/txn_server.cpp | 13 +- txn_manager/txn_server.hpp | 15 +- 15 files changed, 839 insertions(+), 26 deletions(-) create mode 100644 physical_operator/physical_txn_scan.cpp create mode 100644 physical_operator/physical_txn_scan.hpp create mode 100644 txn_manager/txn_log.cpp create mode 100644 txn_manager/txn_log.hpp diff --git a/Config.cpp b/Config.cpp index c8dab029e..b0935a63d 100644 --- a/Config.cpp +++ b/Config.cpp @@ -94,6 +94,16 @@ bool Config::is_master_loader; std::string Config::master_loader_ip; int Config::master_loader_port; + +bool Config::enable_txn_server; +int Config::txn_server_cores; +std::string Config::txn_server_ip; +int Config::txn_server_port; + +bool Config::enable_txn_log; +std::string Config::txn_log_path; + + Config *Config::getInstance() { if (instance_ == 0) { instance_ = new Config(); @@ -161,6 +171,20 @@ void Config::initialize() { master_loader_port = getInt("master_loader_port", 9001); + // txn manager + enable_txn_server = getBoolean("txn_server", true); + + txn_server_cores = getInt("txn_server_cores", 4); + + txn_server_ip = getString("txn_server_ip", "127.0.0.1"); + + txn_server_port = getInt("txn_server_port", 9100); + + // txn log + enable_txn_log = getBoolean("txn_log", true); + + txn_log_path = getString("txn_log_path", "."); + #ifdef DEBUG_Config print_configure(); #endif @@ -219,6 +243,14 @@ void Config::print_configure() const { std::cout << "catalog_file:" << catalog_file << std::endl; std::cout << "codegen:" << enable_codegen << std::endl; std::cout << "load_thread_num:" << load_thread_num << std::endl; + + std::cout << "enable_txn_serverr:" << enable_txn_server << std::endl; + std::cout << "txn_server_cores:" << txn_server_cores << std::endl; + std::cout << "txn_server_ip:" << txn_server_ip << std::endl; + std::cout << "txn_server_port:" << txn_server_port << std::endl; + + std::cout << "enable_txn_log:" << enable_txn_log << std::endl; + std::cout << "txn_log_path:" << txn_log_path << std::endl; } void Config::setConfigFile(std::string file_name) { config_file = file_name; } diff --git a/Config.h b/Config.h index 18cb26935..9f8321f0c 100644 --- a/Config.h +++ b/Config.h @@ -83,6 +83,15 @@ class Config { static std::string master_loader_ip; static int master_loader_port; + static bool enable_txn_server; + static int txn_server_cores; + static std::string txn_server_ip; + static int txn_server_port; + + static bool enable_txn_log; + static std::string txn_log_path; + + private: static Config* instance_; libconfig::Config cfg; diff --git a/Environment.cpp b/Environment.cpp index 23bdd252b..4006c0174 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -97,6 +97,15 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { LOG(ERROR) << "failed to initialize loader"; } + logging_->log("Initializing txn manager"); + if (!InitTxnManager()) + LOG(ERROR) << "failed to initialize txn manager"; + + logging_->log("Initializing txn log server"); + if (!InitTxnLog()) + LOG(ERROR) << "failed to initialize txn log"; + + logging_->log("Initializing the ExecutorMaster..."); iteratorExecutorMaster = new IteratorExecutorMaster(); @@ -216,6 +225,23 @@ bool Environment::InitLoader() { return true; } +bool Environment::InitTxnManager() { + if (Config::enable_txn_server) { + LOG(INFO) << "I'm txn manager server" ; + TxnServer::Init(Config::txn_server_cores, Config::txn_server_port); + } + TxnClient::Init(Config::txn_server_ip, Config::txn_server_port); + return true; +} + +bool Environment::InitTxnLog() { + if (Config::enable_txn_log) { + LOG(INFO) << "I'm txn log server"; + LogServer::init(Config::txn_log_path); + } + return true; +} + void Environment::initializeBufferManager() { bufferManager_ = BufferManager::getInstance(); } diff --git a/Environment.h b/Environment.h index e9339986c..6fb858216 100755 --- a/Environment.h +++ b/Environment.h @@ -23,6 +23,10 @@ #include "Executor/exchange_tracker.h" #include "Executor/expander_tracker.h" #include "Resource/BufferManager.h" +#include "txn_manager/txn_server.hpp" +#include "txn_manager/txn_client.hpp" +#include "txn_manager/txn_log.hpp" + namespace claims { namespace loader { @@ -33,6 +37,11 @@ class MasterLoader; using claims::catalog::Catalog; using claims::loader::SlaveLoader; using claims::loader::MasterLoader; +using claims::txn::TxnServer; +using claims::txn::TxnClient; +using claims::txn::LogServer; +using claims::txn::LogClient; + class Environment { public: @@ -68,6 +77,10 @@ class Environment { bool InitLoader(); + bool InitTxnManager(); + + bool InitTxnLog(); + private: static Environment* _instance; PortManager* portManager; diff --git a/conf/config b/conf/config index ea732ef6f..224d5ae02 100755 --- a/conf/config +++ b/conf/config @@ -27,7 +27,7 @@ client_listener_port = 10000 #data="/home/imdb/data/wangli/" #data="/home/imdb/data/POC/sample/" #data="/home/minqi/git/Data/data/tpc-h/1-partition/sf-1/" -data="/home/minqi/git/Data/data/tpc-h/18-partition/sf-1/" +data="/home/imdb/config/tpc-h/1-partition/sf-1/" #data="/home/imdb/data/SF-1/" #data="/home/imdb/data/SF-1/" #data="/home/imdb/data/stock/" @@ -63,7 +63,26 @@ scan_batch=100 is_master_loader=1 -master_loader_ip="10.11.1.192" +master_loader_ip="127.0.0.1" + +master_loader_port=9002 + +#事务服务器 +txn_server=1 + +txn_server_cores=4 + +txn_server_ip="127.0.0.1" + +txn_server_port=9100 + +#事务日志 +txn_log=1 + +txn_log_path="txn-log" + + + + -master_loader_port=9001 diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 97539a035..c1445f48d 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -29,7 +29,7 @@ #include "./master_loader.h" #include #include - +#include "unistd.h" #include "caf/all.hpp" #include "caf/io/all.hpp" #include @@ -175,8 +175,10 @@ void* MasterLoader::StartMasterLoader(void* arg) { EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), "failed to connect all slaves"); - EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), - "failed to ingest data"); + while(true) + sleep(10); +// EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), +// "failed to ingest data"); return NULL; } diff --git a/physical_operator/Makefile.am b/physical_operator/Makefile.am index ddc6bfd42..74056c961 100644 --- a/physical_operator/Makefile.am +++ b/physical_operator/Makefile.am @@ -48,7 +48,8 @@ libphysicalqueryplan_a_SOURCES = \ physical_filter.cpp physical_filter.h \ physical_projection_scan.cpp physical_projection_scan.h \ physical_nest_loop_join.h physical_nest_loop_join.cpp \ - physical_delete_filter.h physical_delete_filter.cpp + physical_delete_filter.h physical_delete_filter.cpp \ + physical_txn_scan.hpp physical_txn_scan.cpp SUBDIRS = DIST_SUBDIRS = diff --git a/physical_operator/physical_txn_scan.cpp b/physical_operator/physical_txn_scan.cpp new file mode 100644 index 000000000..5c189a5c2 --- /dev/null +++ b/physical_operator/physical_txn_scan.cpp @@ -0,0 +1,213 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /CLAIMS/physical_operator/physical_txn_scan.cpp + * + * Created on: 2016年4月19日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#include "../physical_operator/physical_txn_scan.hpp" + +#include +#include +#include +#include +#include +#include +#include "../common/rename.h" +#include "../storage/BlockManager.h" +#include "../Config.h" +#include "../utility/warmup.h" +#include "../storage/ChunkStorage.h" +#include "../Executor/expander_tracker.h" +#include "../storage/PartitionStorage.h" +using claims::common::rNoPartitionIdScan; +using claims::common::rSuccess; +using claims::common::rCodegenFailed; + +namespace claims { +namespace physical_operator { +PhysicalTxnScan::PhysicalTxnScan(State state) + : state_(state), partition_reader_iterator_(NULL), perf_info_(NULL) { + InitExpandedStatus(); +} + +PhysicalTxnScan::PhysicalTxnScan() + : partition_reader_iterator_(NULL), perf_info_(NULL) { + InitExpandedStatus(); +} + +PhysicalTxnScan::~PhysicalTxnScan() { + if (NULL != state_.schema_) { + delete state_.schema_; + state_.schema_ = NULL; + } + if (NULL != perf_info_) { + delete perf_info_; + perf_info_ = NULL; + } +} + +PhysicalTxnScan::State::State(ProjectionID projection_id, Schema* schema, + unsigned block_size, float sample_rate) + : schema_(schema), + projection_id_(projection_id), + block_size_(block_size), + sample_rate_(sample_rate) {} + +/** + * Initialize the operator to get the initial position. Scan is the start point + * of stage, get instance of ExpanderTracker to add this point. Different policy + * decide if it generates a buffer. + */ + +bool PhysicalTxnScan::Open(const PartitionOffset& kPartitionOffset) { + RegisterExpandedThreadToAllBarriers(); + + if (TryEntryIntoSerializedSection()) { + /* this is the first expanded thread*/ + PartitionStorage* partition_handle_; + if (NULL == + (partition_handle_ = BlockManager::getInstance()->getPartitionHandle( + PartitionID(state_.projection_id_, kPartitionOffset)))) { + LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset) + .getName() + .c_str() << CStrError(rNoPartitionIdScan) << std::endl; + SetReturnStatus(false); + } else { + partition_reader_iterator_ = + partition_handle_->createAtomicReaderIterator(); + SetReturnStatus(true); + } + +#ifdef AVOID_CONTENTION_IN_SCAN + unsigned long long start = curtick(); + + ChunkReaderIterator* chunk_reader_it; + ChunkReaderIterator::block_accessor* ba; + while (chunk_reader_it = partition_reader_iterator_->nextChunk()) { + while (chunk_reader_it->getNextBlockAccessor(ba)) { + ba->getBlockSize(); + input_dataset_.input_data_blocks_.push_back(ba); + } + } +#endif + ExpanderTracker::getInstance()->addNewStageEndpoint( + pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0)); + perf_info_ = + ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self()); + perf_info_->initialize(); + } + BarrierArrive(); + return GetReturnStatus(); +} + +/** + * There are two method of strategy to scan data. + * 1) make a buffer(input_data). wait for quantitative block and return it. + * because destorySelfContext() is not work, we don't use this method(code has + * commented). + * 2) get a block and return it immediately. + */ + +// TODO(Hanzhang): According to AVOID_CONTENTION_IN_SCAN, we choose the +// strategy. We need finish case(1). +bool PhysicalTxnScan::Next(BlockStreamBase* block) { + unsigned long long total_start = curtick(); +#ifdef AVOID_CONTENTION_IN_SCAN + ScanThreadContext* stc = reinterpret_cast(GetContext()); + if (NULL == stc) { + stc = new ScanThreadContext(); + InitContext(stc); + } + if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( + pthread_self())) { + input_dataset_.AtomicPut(stc->assigned_data_); + delete stc; + destorySelfContext(); + kPerfInfo->report_instance_performance_in_millibytes(); + return false; + } + + if (!stc->assigned_data_.empty()) { + ChunkReaderIterator::block_accessor* ba = stc->assigned_data_.front(); + stc->assigned_data_.pop_front(); + + ba->getBlock(block); + + // whether delete InMemeryBlockAccessor::target_block_start_address + // is depend on whether use copy in ba->getBlock(block); + delete ba; + kPerfInfo->processed_one_block(); + return true; + } else { + if (input_dataset_.AtomicGet(stc->assigned_data_, Config::scan_batch)) { + // case(1) + return Next(block); + } else { + delete stc; + destorySelfContext(); + return false; + } + } + +#else + + if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( + pthread_self())) { + return false; + } + perf_info_->processed_one_block(); + // case(2) + return partition_reader_iterator_->nextBlock(block); + +#endif +} + +bool PhysicalTxnScan::Close() { + if (NULL != partition_reader_iterator_) { + delete partition_reader_iterator_; + partition_reader_iterator_ = NULL; + } + DestoryAllContext(); + + /* reset the expanded status in that open and next will be re-invoked.*/ + InitExpandedStatus(); + return true; +} + +void PhysicalTxnScan::Print() { + printf("Scan (ID=%d)\n", state_.projection_id_.table_id); +} + +bool PhysicalTxnScan::PassSample() const { + if ((rand() / (float)RAND_MAX) < state_.sample_rate_) return true; + return false; +} + +} // namespace physical_operator +} // namespace claims + + + + diff --git a/physical_operator/physical_txn_scan.hpp b/physical_operator/physical_txn_scan.hpp new file mode 100644 index 000000000..889bb1c19 --- /dev/null +++ b/physical_operator/physical_txn_scan.hpp @@ -0,0 +1,161 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /CLAIMS/physical_operator/physical_txn_scan.hpp + * + * Created on: 2016年4月19日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#ifndef PHYSICAL_OPERATOR_PHYSICAL_TXN_SCAN_HPP_ +#define PHYSICAL_OPERATOR_PHYSICAL_TXN_SCAN_HPP_ + + +#define GLOG_NO_ABBREVIATED_SEVERITIES +#include +#include +#include +#include +#include +#include "../common/error_no.h" +#include "../physical_operator/physical_operator_base.h" +#include "../common/Schema/Schema.h" +#include "../storage/ChunkStorage.h" +#include "../storage/PartitionStorage.h" +#include "../physical_operator/physical_operator.h" +#include "../common/ExpandedThreadTracker.h" + +namespace claims { +namespace physical_operator { + +typedef std::list assigned_data; +/** + * @brief Method description: As a buffer for input. + */ + +struct input_dataset { + assigned_data input_data_blocks_; + SpineLock lock; + bool AtomicGet(assigned_data& target, unsigned number_of_block) { + lock.acquire(); + bool not_empty = !target.empty(); + while (number_of_block-- && (!input_data_blocks_.empty())) { + target.push_back(input_data_blocks_.front()); + input_data_blocks_.pop_front(); + } + lock.release(); + return not_empty; + } + void AtomicPut(assigned_data blocks) { + lock.acquire(); + for (assigned_data::iterator it = blocks.begin(); it != blocks.end(); it++) + input_data_blocks_.push_front(*it); + lock.release(); + } +}; + +/** + * Implementation of Scan operator in physical layer. Get blocks for Storage + * medium. In the current implementation, for simplicity, the underlying + * storage is arranged in blocks, each of which is the same as the size of the + * block in the parameter of the next function. Actually, read chunks from + * partition, read blocks from chunk. + */ +class PhysicalTxnScan : public PhysicalOperator { + public: + class ScanThreadContext : public ThreadContext { + public: + ~ScanThreadContext(){}; + assigned_data assigned_data_; + }; + + // struct allocated_block { + // char* start; + // unsigned length; + // }; + class State { + friend class PhysicalTxnScan; + + public: + State(ProjectionID projection_id, Schema* schema, unsigned block_size, + float sample_rate = 1); + State(){}; + + public: + Schema* schema_; + ProjectionID projection_id_; + unsigned block_size_; + float sample_rate_; + friend class boost::serialization::access; + template + void serialize(Archive& ar, const unsigned int version) { + ar& schema_& projection_id_& block_size_& sample_rate_; + } + }; + PhysicalTxnScan(State state); + PhysicalTxnScan(); + virtual ~PhysicalTxnScan(); + /** + * @brief Method description: Initialize the operator and get the initial + * position of chunk read iterator. + */ + bool Open(const PartitionOffset& partition_offset = 0); + + /** + * @brief: fetch block from child operator. + */ + + bool Next(BlockStreamBase* block); + /** + * @brief: revoke resource. + */ + bool Close(); + void Print(); + + private: + bool PassSample() const; + + private: + State state_; + PartitionStorage::PartitionReaderItetaor* partition_reader_iterator_; + std::list remaining_chunk_iterator_list_; + Lock chunk_reader_container_lock_; + // like a buffer + input_dataset input_dataset_; + + PerformanceInfo* perf_info_; + + // The following code is for boost serialization. + private: + friend class boost::serialization::access; + template + void serialize(Archive& ar, const unsigned int version) { + ar& boost::serialization::base_object(*this) & state_; + } +}; + +} // namespace physical_operator +} // namespace claims + + + +#endif // PHYSICAL_OPERATOR_PHYSICAL_TXN_SCAN_HPP_ diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index efbeadf36..463d82787 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -41,6 +41,10 @@ #include #include "caf/all.hpp" #include "caf/io/all.hpp" + +namespace claims { +namespace txn{ + using std::cin; using std::cout; using std::endl; @@ -52,9 +56,7 @@ using std::unordered_map; using std::to_string; using std::function; using std::sort; -using std::make_pair;; -namespace claims { -namespace txn{ +using std::make_pair; using UInt64 = unsigned long long; using UInt32 = unsigned int; using UInt16 = unsigned short; diff --git a/txn_manager/txn_client.hpp b/txn_manager/txn_client.hpp index 77949c6d2..b47b9c36a 100644 --- a/txn_manager/txn_client.hpp +++ b/txn_manager/txn_client.hpp @@ -28,7 +28,6 @@ #ifndef TXN_CLIENT_HPP_ #define TXN_CLIENT_HPP_ - #include #include #include @@ -47,6 +46,7 @@ #include "txn.hpp" #include "txn_server.hpp" #include + using std::cin; using std::cout; using std::endl; @@ -54,7 +54,6 @@ using std::vector; using std::string; using std::map; using std::pair; -using std::unordered_map; using std::to_string; using std::function; using std::sort; @@ -65,6 +64,8 @@ using std::chrono::milliseconds; namespace claims{ namespace txn{ + + class TxnClient{ public: static string Ip; diff --git a/txn_manager/txn_log.cpp b/txn_manager/txn_log.cpp new file mode 100644 index 000000000..7f3e2f7e6 --- /dev/null +++ b/txn_manager/txn_log.cpp @@ -0,0 +1,200 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /log/log.cpp + * + * Created on: 2016年2月24日 + * Author: imdb + * Email: + * + * Description: + * + */ +#include "txn_log.hpp" + +namespace claims{ +namespace txn{ + + +string LogServer::log_path = "."; +FILE * LogServer::log_handler = nullptr; +UInt64 LogServer::log_size = 0; +UInt64 LogServer::max_log_size = kMaxLogSize; +char * LogServer::buffer = nullptr; +UInt64 LogServer::buffer_size = 0; +UInt64 LogServer::max_buffer_size = kMaxLogSize * 10; +caf::actor LogServer::log_server; +bool LogServer::is_active = false; + +RetCode LogServer::init(const string path) { + cout << "log server init" << endl; + log_server = caf::spawn(); + log_path = path; + buffer = (char*)malloc(max_buffer_size); + if (buffer == nullptr) return -1; + is_active = true; + return 0; +} + +caf::behavior LogServer::make_behavior() { + + return { + [=](BeginAtom, UInt64 id)->RetCode { + return Append(BeginLog(id)); + }, + [=](WriteAtom,UInt64 id, UInt64 part, UInt64 pos, + UInt64 offset)->RetCode { + return Append(WriteLog(id, part, pos, offset)); + }, + [=](CommitAtom, UInt64 id)->RetCode { + return Append(CommitLog(id)); + }, + [=](AbortAtom, UInt64 id)->RetCode { + return Append(AbortLog(id)); + }, + [=](CheckpointAtom, UInt64 part, UInt64 logic_cp, UInt64 phy_cp) + ->RetCode { + return Append(CheckpointLog(part, logic_cp, phy_cp)); + }, + [=](DataAtom,UInt64 part, UInt64 pos, UInt64 offset, + void * buffer, UInt64 size)->RetCode { + Append(DataLogPrefix(part, pos, offset, size)); + Append(buffer, size); + return 0; + }, + [=](RefreshAtom)->RetCode { + return Refresh(); + }, + caf::others >> [=] () { cout << "unkown log message" << endl; } + }; +} + +RetCode LogServer::Append (const string & log) { + if (buffer_size + log.length() >= max_buffer_size) { + cout << "append fail" << endl; + return -1; + } + memcpy(buffer + buffer_size, log.c_str(), log.length()); + buffer_size += log.length(); + log_size += log.length(); + return 0; +} + +RetCode LogServer::Append(void * data, UInt64 size){ + if (buffer_size + size >= max_buffer_size) + return -1; + + memcpy(buffer + buffer_size, data, size); + buffer_size += size; + buffer[buffer_size++] = '\n'; + log_size += size + 1; + + return 0; +} + +RetCode LogServer::Refresh() { + if (log_handler == nullptr) { + struct timeval ts; + gettimeofday (&ts, NULL); + string file = log_path + "/" + kTxnLogFileName + to_string(ts.tv_sec); + log_handler = fopen (file.c_str(),"a"); + if (log_handler == nullptr) return -1; + } + + if (buffer_size == 0) + return 0; + //cout << buffer_size << endl; + fwrite(buffer, sizeof(char), buffer_size, log_handler); + fflush(log_handler); + buffer_size = 0; + + + /* 日志文件已满 */ + if(log_size >= max_log_size) { + if (log_handler == nullptr) return -1; + fclose(log_handler); + log_handler = nullptr; + log_size = 0; + } + return 0; +} + +RetCode LogClient::Begin(UInt64 id) { + RetCode ret = 0; + caf::scoped_actor self; + self->sync_send( LogServer::log_server,BeginAtom::value, id). + await( [&](RetCode ret_code) { ret = ret_code;}); + return ret; +} +RetCode LogClient::Write(UInt64 id, UInt64 part, UInt64 pos, UInt64 offset) { + RetCode ret = 0; + caf::scoped_actor self; + self->sync_send(LogServer::log_server, + WriteAtom::value, id, part, pos, offset).await( + [&](RetCode ret_code) { ret = ret_code;} + ); + return ret; +} +RetCode LogClient::Commit(UInt64 id) { + RetCode ret = 0; + caf::scoped_actor self; + self->sync_send( LogServer::log_server, + CommitAtom::value,id).await( + [&](RetCode ret_code) { ret = ret_code;} + ); + return ret; +} +RetCode LogClient::Abort(UInt64 id) { + RetCode ret = 0; + caf::scoped_actor self; + self->sync_send( LogServer::log_server, + AbortAtom::value, id).await( + [&](RetCode ret_code) { ret = ret_code;} + ); + return ret; +} +RetCode LogClient::Data(UInt64 part, UInt64 pos, UInt64 offset, void * buffer, UInt64 size) { + RetCode ret = 0; + caf::scoped_actor self; + self->sync_send( LogServer::log_server, + DataAtom::value, part, pos, offset, buffer, size).await( + [&](RetCode ret_code) { ret = ret_code;} + ); + return ret; +} +RetCode LogClient::Checkpoint(UInt64 part, UInt64 logic_cp, UInt64 phy_cp) { + RetCode ret = 0; + caf::scoped_actor self; + self->sync_send(LogServer::log_server, + CheckpointAtom::value, part, logic_cp, phy_cp).await( + [&](RetCode ret_code) { ret = ret_code;} + ); + return ret; +} + +RetCode LogClient::Refresh() { + RetCode ret = 0; + caf::scoped_actor self; + self->sync_send(LogServer::log_server, RefreshAtom::value). + await( [&](RetCode ret_code) { ret = ret_code;}); + return ret; +} + + +} +} diff --git a/txn_manager/txn_log.hpp b/txn_manager/txn_log.hpp new file mode 100644 index 000000000..30814083e --- /dev/null +++ b/txn_manager/txn_log.hpp @@ -0,0 +1,134 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /log/log.hpp + * + * Created on: 2016年2月24日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#ifndef LOG_MANAGER_HPP_ +#define LOG_MANAGER_HPP_ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "unistd.h" +#include "dirent.h" +#include "caf/all.hpp" +#include "caf/io/all.hpp" +#include "txn.hpp" + +using std::string; +using std::map; +using std::thread; +using std::cin; +using std::cout; +using std::endl; +using std::ifstream; +using std::to_string; + +namespace claims{ +namespace txn{ + + + + +using UInt64 = unsigned long long; +using UInt32 = unsigned int; +const UInt64 kMaxLogSize = 16 * 1024 * 1024; +const string kTxnLogFileName = "txn_log_"; +const string kDataLogFileName = "data_log_"; +const int kTypeBegin = 1; +const int kTypeWrite = 2; +const int kTypeCommit = 3; +const int kTypeAbort = 4; +const int kTypeCP = 5; +const int kTypeData = 6; + + +class LogServer:public caf::event_based_actor { + public: + static RetCode init(const string path = "."); + static RetCode Append (const string & log); + static RetCode Append (void * buffer, UInt64 size); + static RetCode Refresh (); + inline static string BeginLog(UInt64 Tid){ + return "begin<"+to_string(Tid) +">\n"; + } + + inline static string WriteLog(UInt64 id, UInt64 part, UInt64 pos,UInt64 offset) { + return "write<"+to_string(id)+","+to_string(part)+","+ + to_string(pos)+","+to_string(offset)+">\n"; + } + + inline static string CommitLog(UInt64 id) { + return "commit<"+to_string(id)+">\n"; + } + inline static string AbortLog(UInt64 id) { + return "abort<"+to_string(id)+">\n"; + } + inline static string CheckpointLog(UInt64 part,UInt64 logic_cp, UInt64 phy_cp){ + return "checkpoint<"+to_string(part) + +","+to_string(logic_cp)+","+to_string(phy_cp)+">\n"; + } + inline static string DataLogPrefix(UInt64 part, UInt64 pos, UInt64 offset, UInt64 size ) { + return "data<"+to_string(part)+","+to_string(pos)+ + ","+to_string(offset)+","+to_string(size)+">\n"; + } + caf::behavior make_behavior(); + static caf::actor log_server; + static bool is_active; + private: + static string log_path; + static FILE * log_handler; + static UInt64 log_size; + static UInt64 max_log_size; + static char * buffer; + static UInt64 buffer_size; + static UInt64 max_buffer_size; + +}; + + +class LogClient{ + public: + static RetCode Begin(UInt64 id); + static RetCode Write(UInt64 id, UInt64 part, UInt64 pos, UInt64 offset); + static RetCode Commit(UInt64 id); + static RetCode Abort(UInt64 id); + static RetCode Data(UInt64 part, UInt64 pos, UInt64 offset, void * buffer, UInt64 size); + static RetCode Checkpoint(UInt64 part, UInt64 logic_cp, UInt64 phy_cp); + static RetCode Refresh(); + +}; + + +} +} +#endif // LOG_MANAGER_HPP_ diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index b7c631384..e369b7095 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -29,6 +29,7 @@ namespace claims{ namespace txn{ + int TxnCore::BufferSize = kTxnBufferSize; @@ -38,10 +39,10 @@ caf::actor TxnServer::Router; vector TxnServer::Cores; bool TxnServer::Active = false; -unordered_map> TxnServer::PosList; -unordered_map TxnServer::LogicCPList; -unordered_map TxnServer::PhyCPList; -unordered_map> TxnServer::CountList; +std::unordered_map> TxnServer::PosList; +std::unordered_map TxnServer::LogicCPList; +std::unordered_map TxnServer::PhyCPList; +std::unordered_map> TxnServer::CountList; RetCode TxnCore::ReMalloc() { Size = 0; @@ -183,9 +184,9 @@ caf::behavior TxnWorker::make_behavior( ){ caf::behavior TxnServer::make_behavior() { try { caf::io::publish(Router, Port); - cout << "publish to port:"<< Port<< " success" << endl; + cout << "txn server bind to port:"<< Port<< " success" << endl; } catch (...) { - cout << "publish to port:"<< Port<< " fail" << endl; + cout << "txn server bind to port:"<< Port<< " fail" << endl; } return { [=](IngestAtom, const FixTupleIngestReq & request) { diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index ee11266e1..323202c19 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -47,6 +47,9 @@ #include "txn.hpp" #include "txn_log.hpp" #include + +namespace claims{ +namespace txn{ using std::cin; using std::cout; using std::endl; @@ -54,16 +57,12 @@ using std::vector; using std::string; using std::map; using std::pair; -using std::unordered_map; using std::to_string; using std::function; using std::sort; using std::atomic; using std::chrono::seconds; using std::chrono::milliseconds; -namespace claims{ -namespace txn{ - class TxnCore: public caf::event_based_actor { public: static int BufferSize; @@ -98,10 +97,10 @@ class TxnServer: public caf::event_based_actor{ static int Concurrency; static caf::actor Router; static vector Cores; - static unordered_map> PosList; - static unordered_map LogicCPList; - static unordered_map PhyCPList; - static unordered_map> CountList; + static std::unordered_map> PosList; + static std::unordered_map LogicCPList; + static std::unordered_map PhyCPList; + static std::unordered_map> CountList; /**************** User APIs ***************/ static RetCode Init(int concurrency = kConcurrency , int port = kTxnPort); From 941616e36a8ee66ac78a61fc72a287deeb863b13 Mon Sep 17 00:00:00 2001 From: yukai Date: Thu, 21 Apr 2016 10:08:55 +0800 Subject: [PATCH 10/58] complete master loader but GetMessage(); ADD: load packet; ADD: resource guard to auto-release memory after function returning --- Environment.cpp | 5 + Resource/NodeTracker.cpp | 85 ++++--- Resource/NodeTracker.h | 33 +-- common/error_define.h | 2 + common/error_no.cpp | 2 + common/file_handle/hdfs_connector.h | 5 +- loader/Makefile.am | 3 +- loader/data_ingestion.cpp | 2 +- loader/load_packet.cpp | 78 ++++++ loader/load_packet.h | 65 +++++ loader/master_loader.cpp | 126 ++++++++-- loader/master_loader.h | 32 ++- txn_manager/txn.hpp | 223 ++++++++--------- txn_manager/txn_server.cpp | 372 +++++++++++++--------------- utility/Makefile.am | 2 +- utility/resource_guard.h | 138 +++++++++++ 16 files changed, 781 insertions(+), 392 deletions(-) create mode 100644 loader/load_packet.cpp create mode 100644 loader/load_packet.h create mode 100644 utility/resource_guard.h diff --git a/Environment.cpp b/Environment.cpp index 23bdd252b..b2fdf303d 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -8,6 +8,8 @@ #include "Environment.h" #include "caf/all.hpp" + +#include "txn_manager/txn_server.hpp" #define GLOG_NO_ABBREVIATED_SEVERITIES #include #undef GLOG_NO_ABBREVIATED_SEVERITIES @@ -38,6 +40,7 @@ using claims::common::InitTypeConversionMatrix; using claims::common::rSuccess; using claims::loader::MasterLoader; using claims::loader::SlaveLoader; +using claims::txn::TxnServer; Environment* Environment::_instance = 0; @@ -204,6 +207,8 @@ bool Environment::InitLoader() { std::thread master_thread(&MasterLoader::StartMasterLoader, nullptr); master_thread.detach(); DLOG(INFO) << "started thread as master loader"; + + TxnServer::Init(6); } usleep(10000); diff --git a/Resource/NodeTracker.cpp b/Resource/NodeTracker.cpp index 7a1e44de9..069a1738b 100755 --- a/Resource/NodeTracker.cpp +++ b/Resource/NodeTracker.cpp @@ -6,46 +6,63 @@ */ #include "NodeTracker.h" -NodeTracker* NodeTracker::instance_=0; -NodeTracker::NodeTracker():allocate_cur_(0) { -} -NodeTracker* NodeTracker::GetInstance(){ - if(instance_==0){ - instance_=new NodeTracker(); - } - return instance_; +#include +NodeTracker* NodeTracker::instance_ = 0; +NodeTracker::NodeTracker() : allocate_cur_(0) {} +NodeTracker* NodeTracker::GetInstance() { + if (instance_ == 0) { + instance_ = new NodeTracker(); + } + return instance_; } NodeTracker::~NodeTracker() { - // TODO Auto-generated destructor stub + // TODO Auto-generated destructor stub } -int NodeTracker::RegisterNode(NodeAddress new_node_address){ - if(address_to_id_.find(new_node_address)!=address_to_id_.end()){ - /*node_name already exists.*/ - return -1; - } - const int allocated_id=allocate_cur_++; - address_to_id_[new_node_address]=allocated_id; - return allocated_id; +int NodeTracker::RegisterNode(NodeAddress new_node_address) { + if (address_to_id_.find(new_node_address) != address_to_id_.end()) { + /*node_name already exists.*/ + return -1; + } + const int allocated_id = allocate_cur_++; + address_to_id_[new_node_address] = allocated_id; + return allocated_id; } -std::string NodeTracker::GetNodeIP(const NodeID& target)const{ - boost::unordered_map::const_iterator it=address_to_id_.cbegin(); - while(it!=address_to_id_.cend()){ - if(it->second==target) - return it->first.ip; - it++; - } - return NULL;//TODO avoid return NULL in case of no matching target by changing the return type to be boolean.*/ -// return NULL; +std::string NodeTracker::GetNodeIP(const NodeID& target) const { + boost::unordered_map::const_iterator it = + address_to_id_.cbegin(); + while (it != address_to_id_.cend()) { + if (it->second == target) return it->first.ip; + it++; + } + return NULL; // TODO avoid return NULL in case of no matching target by + // changing the return type to be boolean.*/ + // return NULL; +} +std::vector NodeTracker::GetNodeIDList() const { + std::vector ret; + boost::unordered_map::const_iterator it = + address_to_id_.cbegin(); + while (it != address_to_id_.cend()) { + ret.push_back(it->second); + it++; + } + return ret; } -std::vector NodeTracker::GetNodeIDList()const{ - std::vector ret; - boost::unordered_map::const_iterator it=address_to_id_.cbegin(); - while(it!=address_to_id_.cend()){ - ret.push_back(it->second); - it++; - } - return ret; + +RetCode NodeTracker::GetNodeAddr(const NodeID& target, + NodeAddress& node_addr) const { + boost::unordered_map::const_iterator it = + address_to_id_.cbegin(); + while (it != address_to_id_.cend()) { + if (it->second == target) { + node_addr = it->first; + return claims::common::rSuccess; + } + it++; + } + assert(false && "can't find node address according node ID"); + return claims::common::rFailure; } diff --git a/Resource/NodeTracker.h b/Resource/NodeTracker.h index 6c108a567..7ab733c2c 100755 --- a/Resource/NodeTracker.h +++ b/Resource/NodeTracker.h @@ -9,29 +9,34 @@ #ifndef NODETRACKER_H_ #define NODETRACKER_H_ +#include #include #include -#include + +#include "../common/error_define.h" #include "../common/ids.h" #ifdef DMALLOC -#include "dmalloc.h" +#include "./dmalloc.h" #endif -typedef std::string NodeIP;//TODO: may use ip + port to support multiple instances on a single node. +typedef std::string NodeIP; // TODO: may use ip + port to support multiple + // instances on a single node. typedef int NodeID; class NodeTracker { -public: + public: + static NodeTracker* GetInstance(); + virtual ~NodeTracker(); + int RegisterNode(NodeAddress); + std::string GetNodeIP(const NodeID&) const; + RetCode GetNodeAddr(const NodeID&, NodeAddress& node_addr) const; + + std::vector GetNodeIDList() const; - static NodeTracker* GetInstance(); - virtual ~NodeTracker(); - int RegisterNode(NodeAddress); - std::string GetNodeIP(const NodeID&)const; - std::vector GetNodeIDList()const; -private: - NodeTracker(); - boost::unordered_map address_to_id_; - unsigned allocate_cur_; - static NodeTracker* instance_; + private: + NodeTracker(); + boost::unordered_map address_to_id_; + unsigned allocate_cur_; + static NodeTracker* instance_; }; #endif /* NODETRACKER_H_ */ diff --git a/common/error_define.h b/common/error_define.h index 77139cdc3..435b346c6 100644 --- a/common/error_define.h +++ b/common/error_define.h @@ -203,6 +203,8 @@ const int rFileInUsing = -102; const int rResourceIsLocked = -103; +const int rSentMessageError = -104; + // schema associated const int rEmptyAttributeName = -501; const int rEmptyTableAlias = -502; diff --git a/common/error_no.cpp b/common/error_no.cpp index 7042d3ac5..2b79ea77a 100644 --- a/common/error_no.cpp +++ b/common/error_no.cpp @@ -159,6 +159,8 @@ ErrorInit::ErrorInit() { DefineErrorAndMessage(rFileInUsing, "Someone is still using this file"); DefineErrorAndMessage(rResourceIsLocked, "other hold the lock of resource"); + DefineErrorAndMessage(rSentMessageError, "failed to send network message"); + // schema assocated DefineErrorAndMessage(rEmptyAttributeName, "the given attribute associated with empty name"); diff --git a/common/file_handle/hdfs_connector.h b/common/file_handle/hdfs_connector.h index fca1304d4..c85e3dfc5 100644 --- a/common/file_handle/hdfs_connector.h +++ b/common/file_handle/hdfs_connector.h @@ -50,8 +50,9 @@ class HdfsConnector { fs_ = hdfsConnect(Config::hdfs_master_ip.c_str(), Config::hdfs_master_port); if (NULL == fs_) { - LOG(ERROR) << "failed to connect to HDFS(ip:" << Config::hdfs_master_ip - << ", port:" << Config::hdfs_master_port << ")" << std::endl; + PLOG(ERROR) << "failed to connect to HDFS(ip:" << Config::hdfs_master_ip + << ", port:" << Config::hdfs_master_port << ")" + << std::endl; assert(false); } LOG(INFO) << "connected to HDFS(ip:" << Config::hdfs_master_ip diff --git a/loader/Makefile.am b/loader/Makefile.am index 9169d2c31..97a2b9855 100644 --- a/loader/Makefile.am +++ b/loader/Makefile.am @@ -36,7 +36,8 @@ libloader_a_SOURCES = \ single_thread_single_file_connector.cpp single_thread_single_file_connector.h \ slave_loader.cpp slave_loader.h \ table_file_connector.cpp table_file_connector.h \ - validity.cpp validity.h + validity.cpp validity.h \ + load_packet.h load_packet.cpp SUBDIRS = test DIST_SUBDIRS = test diff --git a/loader/data_ingestion.cpp b/loader/data_ingestion.cpp index 582e2efb7..8294ba28c 100644 --- a/loader/data_ingestion.cpp +++ b/loader/data_ingestion.cpp @@ -26,6 +26,7 @@ * */ +#include "./data_ingestion.h" #include #include #include @@ -65,7 +66,6 @@ #include "../utility/thread_pool.h" #include "../utility/Timer.h" #include "./table_file_connector.h" -#include "data_ingestion.h" using claims::common::FileOpenFlag; using claims::common::FilePlatform; diff --git a/loader/load_packet.cpp b/loader/load_packet.cpp new file mode 100644 index 000000000..8aee98d9e --- /dev/null +++ b/loader/load_packet.cpp @@ -0,0 +1,78 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/load_packet.cpp + * + * Created on: Apr 17, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#include "./load_packet.h" +#include "../common/memory_handle.h" + +using namespace claims::common; // NOLINT + +namespace claims { +namespace loader { + +LoadPacket::~LoadPacket() {} + +RetCode LoadPacket::Serialize(void*& packet_buffer, + uint64_t& packet_length) const { + packet_length = sizeof(uint64_t) * 4 + data_length_; + packet_buffer = Malloc(packet_length); + if (NULL == packet_length) { + ELOG(rNoMemory, "no memory for packet buffer"); + return rNoMemory; + } + + *reinterpret_cast(packet_buffer) = global_part_id_; + *reinterpret_cast(packet_buffer + sizeof(uint64_t)) = pos_; + *reinterpret_cast(packet_buffer + 2 * sizeof(uint64_t)) = offset_; + *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)) = + data_length_; + + memcpy(packet_buffer + 4 * sizeof(uint64_t), data_buffer_, data_length_); + return rSuccess; +} + +RetCode LoadPacket::Deserialize(const void* const packet_buffer, + const uint64_t packet_length) { + global_part_id_ = *reinterpret_cast(packet_buffer); + pos_ = *reinterpret_cast(packet_buffer + sizeof(uint64_t)); + offset_ = + *reinterpret_cast(packet_buffer + 2 * sizeof(uint64_t)); + data_length_ = + *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)); + + data_buffer_ = Malloc(data_length_); + if (NULL == data_buffer_) { + ELOG(rNoMemory, "no memory for data buffer"); + return rNoMemory; + } + + memcpy(data_buffer_, packet_buffer + 4 * sizeof(uint64_t), data_length_); + return rSuccess; +} + +} /* namespace loader */ +} /* namespace claims */ diff --git a/loader/load_packet.h b/loader/load_packet.h new file mode 100644 index 000000000..8758783e0 --- /dev/null +++ b/loader/load_packet.h @@ -0,0 +1,65 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/load_packet.h + * + * Created on: Apr 17, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#ifndef LOADER_LOAD_PACKET_H_ +#define LOADER_LOAD_PACKET_H_ +#include "../common/error_define.h" +#include "../txn_manager/txn.hpp" + +namespace claims { +namespace loader { + +class LoadPacket { + public: + LoadPacket(const uint64_t g_part_id, uint64_t pos, uint64_t offset, + uint64_t data_length, const void* data_buffer) + : global_part_id_(g_part_id), + pos_(pos), + offset_(offset), + data_buffer_(data_buffer), + data_length_(data_length) {} + ~LoadPacket(); + RetCode Serialize(void*& packet_buffer, uint64_t& packet_length) const; + + RetCode Deserialize(const void* const packet_buffer, + const uint64_t packet_length); + + private: + // uint64_t packet_length_; + uint64_t global_part_id_; + // uint64_t txn_id_; + uint64_t pos_; + uint64_t offset_; + uint64_t data_length_; + void* data_buffer_; +}; + +} /* namespace loader */ +} /* namespace claims */ + +#endif // LOADER_LOAD_PACKET_H_ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 8f5302a67..b1e6f35cc 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -31,19 +31,27 @@ #include #include #include +#include +#include #include "caf/all.hpp" #include "caf/io/all.hpp" + +#include "./load_packet.h" #include "./loader_message.h" #include "./validity.h" #include "../catalog/catalog.h" +#include "../catalog/partitioner.h" #include "../catalog/table.h" #include "../common/data_type.h" +#include "../common/ids.h" #include "../common/memory_handle.h" #include "../common/Schema/TupleConvertor.h" #include "../Config.h" #include "../Environment.h" -#include "../utility/stl_guard.h" +#include "../txn_manager/txn.hpp" +#include "../txn_manager/txn_client.hpp" +#include "../utility/resource_guard.h" using caf::aout; using caf::behavior; using caf::event_based_actor; @@ -53,23 +61,21 @@ using caf::mixin::sync_sender_impl; using caf::spawn; using std::endl; using claims::catalog::Catalog; +using claims::catalog::Partitioner; using claims::catalog::TableDescriptor; using claims::common::Malloc; using claims::common::rSuccess; using claims::common::rFailure; +using namespace claims::txn; // NOLINT namespace claims { namespace loader { MasterLoader::MasterLoader() : master_loader_ip(Config::master_loader_ip), - master_loader_port(Config::master_loader_port) { - // TODO Auto-generated constructor stub -} + master_loader_port(Config::master_loader_port) {} -MasterLoader::~MasterLoader() { - // TODO Auto-generated destructor stub -} +MasterLoader::~MasterLoader() {} static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, MasterLoader* mloader) { @@ -85,9 +91,13 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, LOG(INFO) << "succeed to get connected fd with slave"; } assert(new_slave_fd > 3); - mloader->slave_addrs_.push_back(NetAddr(ip, port)); - mloader->slave_sockets_.push_back(new_slave_fd); - assert(mloader->slave_sockets_.size() == mloader->slave_addrs_.size()); + // mloader->slave_addrs_.push_back(NetAddr(ip, port)); + // mloader->slave_sockets_.push_back(new_slave_fd); + // assert(mloader->slave_sockets_.size() == + // mloader->slave_addrs_.size()); + + mloader->slave_addr_to_socket.insert( + pair(NodeAddress(ip, port), new_slave_fd)); DLOG(INFO) << "start to send test message to slave"; // test whether socket works well @@ -171,15 +181,19 @@ RetCode MasterLoader::Ingest() { "failed to merge tuples buffers into one buffer"); // start transaction from here - ApplyTransaction(req, table, partition_buffers); + claims::txn::Ingest ingest; + EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), + "applied transaction", "failed to apply transaction"); // write data log EXEC_AND_LOG(ret, WriteLog(req, table, partition_buffers), "written log ", "failed to write log"); + // reply ACK to MQ EXEC_AND_LOG(ret, ReplyToMQ(req), "replied to MQ", "failed to reply to MQ"); - EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers), + // distribute partition load task + EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), "sent every partition data to its slave", "failed to send every partition data to its slave"); @@ -244,6 +258,7 @@ RetCode MasterLoader::GetPartitionTuples( for (auto tuple_string : req.tuples_) { void* tuple_buffer = Malloc(table->getSchema()->getTupleMaxSize()); if (tuple_buffer == NULL) return claims::common::rNoMemory; + MemoryGuardWithRetCode guard(tuple_buffer, ret); if (rSuccess != (ret = table->getSchema()->CheckAndToValue( tuple_string, tuple_buffer, req.col_sep_, RawDataSource::kSQL, columns_validities))) { @@ -306,9 +321,22 @@ RetCode MasterLoader::GetPartitionTuples( } RetCode MasterLoader::ApplyTransaction( - const IngestionRequest& req, const TableDescriptor* table, - const vector>& partition_buffers) { + const TableDescriptor* table, + const vector>& partition_buffers, + claims::txn::Ingest& ingest) { RetCode ret = rSuccess; + uint64_t table_id = table->get_table_id(); + + FixTupleIngestReq req; + for (int i = 0; i < table->getNumberOfProjection(); ++i) { + ProjectionDescriptor* prj = table->getProjectoin(i); + uint64_t tuple_length = prj->getSchema()->getTupleMaxSize(); + for (int j = 0; j < prj->getPartitioner()->getNumberOfPartitions(); ++j) { + req.Insert(GetGlobalPartId(table_id, i, j), tuple_length, + partition_buffers[i][j].length_ / tuple_length); + } + } + TxnClient::BeginIngest(req, ingest); return ret; } @@ -321,7 +349,39 @@ RetCode MasterLoader::ReplyToMQ(const IngestionRequest& req) {} RetCode MasterLoader::SendPartitionTupleToSlave( const TableDescriptor* table, - const vector>& partition_buffers) {} + const vector>& partition_buffers, + claims::txn::Ingest& ingest) { + RetCode ret = rSuccess; + uint64_t table_id = table->get_table_id(); + + for (int prj_id = 0; prj_id < partition_buffers.size(); ++prj_id) { + for (int part_id = 0; part_id < partition_buffers[prj_id].size(); + ++part_id) { + uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); + LoadPacket packet(global_part_id, ingest.StripList[global_part_id].first, + ingest.StripList[global_part_id].second, + partition_buffers[prj_id][part_id].length_, + partition_buffers[prj_id][part_id].buffer_); + void* packet_buffer; + MemoryGuard guard(packet_buffer); // auto release by guard + uint64_t packet_length; + EXEC_AND_LOG_RETURN(ret, packet.Serialize(packet_buffer, packet_length), + "serialized packet into buffer", + "failed to serialize packet"); + + int socket_fd = -1; + EXEC_AND_LOG_RETURN(ret, SelectSocket(table, prj_id, part_id, socket_fd), + "selected the socket", "failed to select the socket"); + assert(socket_fd > 3); + + EXEC_AND_LOG_RETURN(ret, + SendPacket(socket_fd, packet_buffer, packet_length), + "sent message to slave :" << socket_fd, + "failed to sent message to slave :" << socket_fd); + } + } + return ret; +} RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( const TableDescriptor* table, @@ -335,6 +395,8 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( int buffer_len = tuple_count * tuple_len; void* new_buffer = Malloc(buffer_len); + if (NULL == new_buffer) return ret = claims::common::rNoMemory; + for (int k = 0; k < tuple_count; ++k) { memcpy(new_buffer + k * tuple_len, tuple_buffer_per_part[i][j][k], tuple_len); @@ -351,10 +413,44 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( return ret; } +RetCode MasterLoader::SelectSocket(const TableDescriptor* table, + const uint64_t prj_id, + const uint64_t part_id, int& socket_fd) { + RetCode ret = rSuccess; + NodeID node_id_in_rmm = + table->getProjectoin(prj_id)->getPartitioner()->getPartitionLocation( + part_id); + NodeAddress addr; + EXEC_AND_LOG_RETURN( + ret, NodeTracker::GetInstance()->GetNodeAddr(node_id_in_rmm, addr), + "got node address", "failed to get node address"); + socket_fd = slave_addr_to_socket[addr]; + return ret; +} + +RetCode MasterLoader::SendPacket(const int socket_fd, + const void* const packet_buffer, + const uint64_t packet_length) { + size_t total_write_num = 0; + while (total_write_num < packet_length) { + ssize_t write_num = write( + socket_fd, static_cast(packet_buffer) + total_write_num, + packet_length - total_write_num); + if (-1 == write_num) { + PLOG(ERROR) << "failed to send buffer to slave(" << socket_fd << "): "; + return claims::common::rSentMessageError; + } + total_write_num += write_num; + } + return rSuccess; +} + void* MasterLoader::StartMasterLoader(void* arg) { Config::getInstance(); LOG(INFO) << "start master loader..."; + TxnClient::Init(); + int ret = rSuccess; MasterLoader* master_loader = Environment::getInstance()->get_master_loader(); EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), diff --git a/loader/master_loader.h b/loader/master_loader.h index db9cd604f..fa88c0483 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -28,12 +28,16 @@ #ifndef LOADER_MASTER_LOADER_H_ #define LOADER_MASTER_LOADER_H_ + +#include #include #include -#include "../common/error_define.h" #include "caf/all.hpp" #include "./validity.h" +#include "../common/error_define.h" +#include "../common/ids.h" +#include "../txn_manager/txn.hpp" namespace claims { namespace catalog { @@ -41,6 +45,7 @@ class TableDescriptor; } namespace loader { +using std::map; using std::string; using std::vector; using caf::behavior; @@ -49,12 +54,6 @@ using claims::catalog::TableDescriptor; class MasterLoader { public: - struct NetAddr { - NetAddr(string ip, int port) : ip_(ip), port_(port) {} - string ip_; - int port_; - }; - struct IngestionRequest { string table_name_; string col_sep_; @@ -99,8 +98,9 @@ class MasterLoader { vector>& partition_buffers); RetCode ApplyTransaction( - const IngestionRequest& req, const TableDescriptor* table, - const vector>& partition_buffers); + const TableDescriptor* table, + const vector>& partition_buffers, + claims::txn::Ingest& ingest); RetCode WriteLog(const IngestionRequest& req, const TableDescriptor* table, const vector>& partition_buffers); @@ -109,7 +109,14 @@ class MasterLoader { RetCode SendPartitionTupleToSlave( const TableDescriptor* table, - const vector>& partition_buffers); + const vector>& partition_buffers, + claims::txn::Ingest& ingest); + + RetCode SelectSocket(const TableDescriptor* table, const uint64_t prj_id, + const uint64_t part_id, int& socket_fd); + + RetCode SendPacket(const int socket_fd, const void* const packet_buffer, + const uint64_t packet_length); RetCode GetSlaveNetAddr(); RetCode SetSocketWithSlaves(); @@ -126,8 +133,9 @@ class MasterLoader { private: string master_loader_ip; int master_loader_port; - vector slave_addrs_; - vector slave_sockets_; + // vector slave_addrs_; + // vector slave_sockets_; + boost::unordered_map slave_addr_to_socket; }; } /* namespace loader */ diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index d56c532a8..f3117388e 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -20,8 +20,8 @@ * * Created on: 2016年3月28日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ @@ -52,9 +52,10 @@ using std::unordered_map; using std::to_string; using std::function; using std::sort; -using std::make_pair;; +using std::make_pair; +; namespace claims { -namespace txn{ +namespace txn { using UInt64 = unsigned long long; using UInt32 = unsigned int; using UInt16 = unsigned short; @@ -84,131 +85,133 @@ static const int kTimeout = 1; static const int kBlockSize = 64 * 1024; static const int kTailSize = sizeof(unsigned); +inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, + UInt64 partition_id) { + return table_id + 1000 * (projeciton_id + 1000 * partition_id); +} + +inline UInt64 GetTableIdFromGlobalPartId(UInt64 global_partition_id) { + return global_partition_id / (1000 * 1000); +} + +inline UInt64 GetProjectionIdFromGlobalPartId(UInt64 global_partition_id) { + return (global_partition_id % (1000 * 1000)) / 1000; +} +inline UInt64 GetPartitionIdFromGlobalPartId(UInt64 global_partition_id) { + return global_partition_id % (1000); +} /********Strip******/ using PStrip = pair; -class Strip{ +class Strip { public: UInt64 Part; UInt64 Pos; UInt64 Offset; Strip() {} - Strip(UInt64 pId, UInt64 pos, UInt32 offset): - Part(pId), Pos(pos), Offset(offset) {} - UInt64 get_Part() const { return Part;} - UInt64 get_Pos() const { return Pos;} - UInt64 get_Offset() const { return Offset;} - void set_Part(UInt64 part) { Part = part;} - void set_Pos(UInt64 pos) { Pos = pos;} - void set_Offset(UInt64 offset) { Offset = offset;} + Strip(UInt64 pId, UInt64 pos, UInt32 offset) + : Part(pId), Pos(pos), Offset(offset) {} + UInt64 get_Part() const { return Part; } + UInt64 get_Pos() const { return Pos; } + UInt64 get_Offset() const { return Offset; } + void set_Part(UInt64 part) { Part = part; } + void set_Pos(UInt64 pos) { Pos = pos; } + void set_Offset(UInt64 offset) { Offset = offset; } string ToString(); - static void Map(vector & input, map> & output); - static void Sort(vector & input); - static void Sort(vector & input); - static void Merge(vector & input); - static void Merge(vector & input); - static void Filter(vector & input, function predicate); + static void Map(vector &input, map> &output); + static void Sort(vector &input); + static void Sort(vector &input); + static void Merge(vector &input); + static void Merge(vector &input); + static void Filter(vector &input, + function predicate); }; -inline bool operator == (const Strip & a, const Strip & b) { +inline bool operator==(const Strip &a, const Strip &b) { return a.Part == b.Part && a.Pos == b.Pos && a.Offset == b.Offset; } - /***********FixTupleIngestReq************/ -class FixTupleIngestReq{ +class FixTupleIngestReq { public: /*fix tuple part -> */ map Content; void Insert(UInt64 part, UInt64 tuple_size, UInt64 tuple_count) { Content[part] = make_pair(tuple_size, tuple_count); } - map get_Content() const{ - return Content; - } - void set_Content(const map & content) { - Content = content; - } - string ToString (); + map get_Content() const { return Content; } + void set_Content(const map &content) { Content = content; } + string ToString(); }; -inline bool operator == (const FixTupleIngestReq & a, const FixTupleIngestReq & b) { +inline bool operator==(const FixTupleIngestReq &a, const FixTupleIngestReq &b) { return a.Content == b.Content; } - /****************Ingest***************/ class Ingest { public: UInt64 Id; map StripList; RetCode Ret = 0; - void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset) { + void InsertStrip(UInt64 part, UInt64 pos, UInt64 offset) { StripList[part] = make_pair(pos, offset); } - void InsertStrip (const Strip & strip) { + void InsertStrip(const Strip &strip) { StripList[strip.Part] = make_pair(strip.Pos, strip.Offset); } - UInt64 get_Id() const { return Id;} - map get_StripList() const { return StripList;} + UInt64 get_Id() const { return Id; } + map get_StripList() const { return StripList; } RetCode get_Ret() const {} - void set_Id(const UInt64 & id){ Id = id;} - void set_StripList(const map & stripList) { + void set_Id(const UInt64 &id) { Id = id; } + void set_StripList(const map &stripList) { StripList = stripList; } - void set_Ret(RetCode ret) { Ret = ret;} + void set_Ret(RetCode ret) { Ret = ret; } string ToString(); }; -inline bool operator == (const Ingest & a, const Ingest & b) { +inline bool operator==(const Ingest &a, const Ingest &b) { return a.Id == b.Id; } /************QueryReq************/ -class QueryReq{ +class QueryReq { public: vector PartList; - void InsertPart(UInt64 part) { PartList.push_back(part);} - vector get_PartList() const { return PartList;} - void set_PartList(const vector & partList) { PartList = partList;} + void InsertPart(UInt64 part) { PartList.push_back(part); } + vector get_PartList() const { return PartList; } + void set_PartList(const vector &partList) { PartList = partList; } string ToString(); }; -inline bool operator == (const QueryReq & a, const QueryReq & b) { +inline bool operator==(const QueryReq &a, const QueryReq &b) { return a.PartList == b.PartList; } /***********Snapshot***********/ -class Query{ +class Query { public: - map> Snapshot; - map CPList; - RetCode Ret = 0; - void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset){ + map> Snapshot; + map CPList; + RetCode Ret = 0; + void InsertStrip(UInt64 part, UInt64 pos, UInt64 offset) { // if (Snapshot.find(part) == Snapshot.end()) // Snapshot[part] = vector>(); // else - Snapshot[part].push_back(make_pair(pos, offset)); - } - void InsertCP(UInt64 part, UInt64 cp) { - CPList[part] = cp; - } - map> get_Snapshot() const { - return Snapshot; - } - map get_CPList() const { return CPList;} - RetCode get_Ret() const { return Ret;} - void set_Snapshot(const map> & sp){ - Snapshot = sp; - } - void set_CPList(const map & cplist) { - CPList = cplist; - } - void set_Ret(RetCode ret) {Ret = ret;} - string ToString(); + Snapshot[part].push_back(make_pair(pos, offset)); + } + void InsertCP(UInt64 part, UInt64 cp) { CPList[part] = cp; } + map> get_Snapshot() const { return Snapshot; } + map get_CPList() const { return CPList; } + RetCode get_Ret() const { return Ret; } + void set_Snapshot(const map> &sp) { Snapshot = sp; } + void set_CPList(const map &cplist) { CPList = cplist; } + void set_Ret(RetCode ret) { Ret = ret; } + string ToString(); }; -inline bool operator == (const Query & a, const Query & b) { +inline bool operator==(const Query &a, const Query &b) { return a.Snapshot == b.Snapshot; } /*********Checkpoint***********/ -class Checkpoint{ +class Checkpoint { public: UInt64 Id; UInt64 Part; @@ -218,63 +221,55 @@ class Checkpoint{ vector CommitStripList; vector AbortStripList; Checkpoint() {} - Checkpoint(UInt64 part, UInt64 newLogicCP, UInt64 oldPhyCP): - Part(part), LogicCP(newLogicCP),PhyCP(oldPhyCP) {} - UInt64 get_Id() const { return Id;} - UInt64 get_Part() const { return Part;} - UInt64 get_LogicCP() const { return LogicCP;} - UInt64 get_PhyCP() const { return PhyCP;} - UInt64 get_Ret() const { return Ret;} - vector get_CommitStripList() const { return CommitStripList;}; - vector get_AbortStripList() const { return AbortStripList;}; - void set_Part(UInt64 part) { Part = part;} - void set_LogicCP(UInt64 logicCP) { LogicCP = logicCP;} - void set_PhyCP(UInt64 phyCP) { PhyCP = phyCP;} - void set_Ret(RetCode ret) { Ret = ret;} - void set_CommitStripList(const vector & commitstripList) { + Checkpoint(UInt64 part, UInt64 newLogicCP, UInt64 oldPhyCP) + : Part(part), LogicCP(newLogicCP), PhyCP(oldPhyCP) {} + UInt64 get_Id() const { return Id; } + UInt64 get_Part() const { return Part; } + UInt64 get_LogicCP() const { return LogicCP; } + UInt64 get_PhyCP() const { return PhyCP; } + UInt64 get_Ret() const { return Ret; } + vector get_CommitStripList() const { return CommitStripList; }; + vector get_AbortStripList() const { return AbortStripList; }; + void set_Part(UInt64 part) { Part = part; } + void set_LogicCP(UInt64 logicCP) { LogicCP = logicCP; } + void set_PhyCP(UInt64 phyCP) { PhyCP = phyCP; } + void set_Ret(RetCode ret) { Ret = ret; } + void set_CommitStripList(const vector &commitstripList) { CommitStripList = commitstripList; } - void set_AbortStripList(const vector & abortstripList) { + void set_AbortStripList(const vector &abortstripList) { AbortStripList = abortstripList; } string ToString(); }; -inline bool operator == (const Checkpoint & a, const Checkpoint & b) { +inline bool operator==(const Checkpoint &a, const Checkpoint &b) { return a.Id == b.Id; } inline void SerializeConfig() { caf::announce("FixTupleIngestReq", - make_pair(&FixTupleIngestReq::get_Content, &FixTupleIngestReq::set_Content)); - caf::announce("Ingest", - make_pair(&Ingest::get_Id,&Ingest::set_Id), - make_pair(&Ingest::get_StripList,&Ingest::set_StripList), - make_pair(&Ingest::get_Ret,&Ingest::set_Ret)); - caf::announce("QueryReq", - make_pair(&QueryReq::get_PartList, &QueryReq::set_PartList)); + make_pair(&FixTupleIngestReq::get_Content, + &FixTupleIngestReq::set_Content)); + caf::announce( + "Ingest", make_pair(&Ingest::get_Id, &Ingest::set_Id), + make_pair(&Ingest::get_StripList, &Ingest::set_StripList), + make_pair(&Ingest::get_Ret, &Ingest::set_Ret)); + caf::announce( + "QueryReq", make_pair(&QueryReq::get_PartList, &QueryReq::set_PartList)); caf::announce("Query", - make_pair(&Query::get_Snapshot,&Query::set_Snapshot), - make_pair(&Query::get_CPList, &Query::set_CPList), - make_pair(&Query::get_Ret, &Query::set_Ret)); - caf::announce("Checkpoint", - make_pair(&Checkpoint::get_Part, &Checkpoint::set_Part), - make_pair(&Checkpoint::get_LogicCP, &Checkpoint::set_LogicCP), - make_pair(&Checkpoint::get_PhyCP, &Checkpoint::set_PhyCP), - make_pair(&Checkpoint::get_Ret, &Checkpoint::set_Ret), - make_pair(&Checkpoint::get_CommitStripList, - &Checkpoint::set_CommitStripList), - make_pair(&Checkpoint::get_AbortStripList, - &Checkpoint::set_AbortStripList)); + make_pair(&Query::get_Snapshot, &Query::set_Snapshot), + make_pair(&Query::get_CPList, &Query::set_CPList), + make_pair(&Query::get_Ret, &Query::set_Ret)); + caf::announce( + "Checkpoint", make_pair(&Checkpoint::get_Part, &Checkpoint::set_Part), + make_pair(&Checkpoint::get_LogicCP, &Checkpoint::set_LogicCP), + make_pair(&Checkpoint::get_PhyCP, &Checkpoint::set_PhyCP), + make_pair(&Checkpoint::get_Ret, &Checkpoint::set_Ret), + make_pair(&Checkpoint::get_CommitStripList, + &Checkpoint::set_CommitStripList), + make_pair(&Checkpoint::get_AbortStripList, + &Checkpoint::set_AbortStripList)); } - - - - - - - - - } } -#endif // TXN_HPP_ +#endif // TXN_HPP_ diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 094c2e701..fcb9ee481 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -20,18 +20,17 @@ * * Created on: 2016年4月10日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ #include "txn_server.hpp" -namespace claims{ -namespace txn{ +namespace claims { +namespace txn { int TxnCore::BufferSize = kTxnBufferSize; - int TxnServer::Port = kTxnPort; int TxnServer::Concurrency = kConcurrency; caf::actor TxnServer::Router; @@ -47,173 +46,158 @@ RetCode TxnCore::ReMalloc() { Size = 0; TxnIndex.clear(); try { - delete [] Commit; - delete [] Abort; - delete [] StripList; + delete[] Commit; + delete[] Abort; + delete[] StripList; Commit = new bool[BufferSize]; Abort = new bool[BufferSize]; StripList = new vector[BufferSize]; } catch (...) { - cout << "core:"<delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); + ReMalloc(); + this->delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); return { - [=](IngestAtom, const FixTupleIngestReq * request, Ingest * ingest)->int { - struct timeval tv1; - if (Size >= BufferSize) - return -1; - auto id = ingest->Id = GetId(); - TxnIndex[id] = Size; - Commit[Size] = Abort[Size] = false; - for (auto & item : request->Content) { - auto part = item.first; - auto tupleSize = item.second.first; - auto tupleCount = item.second.second; - auto strip = TxnServer::AtomicMalloc(part, tupleSize, tupleCount); - StripList[Size].push_back(strip); - //cout << strip.ToString() << endl; - ingest->InsertStrip(strip); - } - Size ++; - - return 0; - }, - [=](CommitIngestAtom, const Ingest * ingest)->int{ - if (TxnIndex.find(ingest->Id) == TxnIndex.end()) - return -1; - Commit[TxnIndex[ingest->Id]] = true; - return 0; - }, - [=](AbortIngestAtom, const Ingest * ingest)->int { - if (TxnIndex.find(ingest->Id) == TxnIndex.end()) - return -1; - Commit[TxnIndex[ingest->Id]] = true; - return 0; - }, - [=](QueryAtom, const QueryReq * request, Query * query)->int { - for (auto i = 0; i < Size; i++) - if (Commit[i]) - for (auto & strip : StripList[i]) { - if (query->CPList.find(strip.Part) != query->CPList.end() && - strip.Pos >= query->CPList[strip.Part]) - query->InsertStrip(strip.Part, strip.Pos, strip.Offset); - } - return 1; - }, - [=] (CheckpointAtom, Checkpoint * cp)->int { - - for (auto i = 0; i < Size; i++) - if (Commit[i]) { - for (auto & strip : StripList[i]) - if ( strip.Part == cp->Part && strip.Pos >= cp->LogicCP ) - cp->CommitStripList.push_back(PStrip(strip.Pos, strip.Offset)); - } - else if (Abort[i]) { - for (auto & strip : StripList[i]) - if (strip.Part == cp->Part && strip.Pos >= cp->LogicCP) - cp->AbortStripList.push_back(PStrip(strip.Pos, strip.Offset)); - } - - }, - [=](GCAtom) { - auto size_old = Size; - auto pos = 0; - for (auto i = 0; i < Size; i++) - if (!TxnServer::IsStripListGarbage(StripList[i])) { - TxnIndex[TxnIndex[i]] = pos; - Commit[pos] = Commit[i]; - Abort[pos] = Abort[i]; - StripList[pos] = StripList[i]; - ++ pos; - } - Size = pos; - cout <<"core:"<"<< pos << endl; - this->delayed_send(this, seconds(kGCTime), GCAtom::value); - }, - caf::others >> [] () { cout<<"core unkown message"< int { + struct timeval tv1; + if (Size >= BufferSize) return -1; + auto id = ingest->Id = GetId(); + TxnIndex[id] = Size; + Commit[Size] = Abort[Size] = false; + for (auto& item : request->Content) { + auto part = item.first; + auto tupleSize = item.second.first; + auto tupleCount = item.second.second; + auto strip = TxnServer::AtomicMalloc(part, tupleSize, tupleCount); + StripList[Size].push_back(strip); + // cout << strip.ToString() << endl; + ingest->InsertStrip(strip); + } + Size++; + + return 0; + }, + [=](CommitIngestAtom, const Ingest* ingest) -> int { + if (TxnIndex.find(ingest->Id) == TxnIndex.end()) return -1; + Commit[TxnIndex[ingest->Id]] = true; + return 0; + }, + [=](AbortIngestAtom, const Ingest* ingest) -> int { + if (TxnIndex.find(ingest->Id) == TxnIndex.end()) return -1; + Commit[TxnIndex[ingest->Id]] = true; + return 0; + }, + [=](QueryAtom, const QueryReq* request, Query* query) -> int { + for (auto i = 0; i < Size; i++) + if (Commit[i]) + for (auto& strip : StripList[i]) { + if (query->CPList.find(strip.Part) != query->CPList.end() && + strip.Pos >= query->CPList[strip.Part]) + query->InsertStrip(strip.Part, strip.Pos, strip.Offset); + } + return 1; + }, + [=](CheckpointAtom, Checkpoint* cp) -> int { + + for (auto i = 0; i < Size; i++) + if (Commit[i]) { + for (auto& strip : StripList[i]) + if (strip.Part == cp->Part && strip.Pos >= cp->LogicCP) + cp->CommitStripList.push_back(PStrip(strip.Pos, strip.Offset)); + } else if (Abort[i]) { + for (auto& strip : StripList[i]) + if (strip.Part == cp->Part && strip.Pos >= cp->LogicCP) + cp->AbortStripList.push_back(PStrip(strip.Pos, strip.Offset)); + } + + }, + [=](GCAtom) { + auto size_old = Size; + auto pos = 0; + for (auto i = 0; i < Size; i++) + if (!TxnServer::IsStripListGarbage(StripList[i])) { + TxnIndex[TxnIndex[i]] = pos; + Commit[pos] = Commit[i]; + Abort[pos] = Abort[i]; + StripList[pos] = StripList[i]; + ++pos; + } + Size = pos; + cout << "core:" << CoreId << ",gc:" << size_old << "=>" << pos << endl; + this->delayed_send(this, seconds(kGCTime), GCAtom::value); + }, + caf::others >> []() { cout << "core unkown message" << endl; }}; } -caf::behavior TxnWorker::make_behavior( ){ - return { - [=](IngestAtom, const FixTupleIngestReq & request)->caf::message { - Ingest ingest; - auto ret = TxnServer::BeginIngest(request, ingest); - quit(); - return caf::make_message(ingest, ret); - }, - [=](CommitIngestAtom, const Ingest & ingest)->RetCode { - quit(); - return TxnServer::CommitIngest(ingest); - }, - [=](AbortIngestAtom, const Ingest & ingest)->RetCode { - quit(); - return TxnServer::AbortIngest(ingest); - }, - [=](QueryAtom, const QueryReq & request)->caf::message { - Query query; - auto ret = TxnServer::BeginQuery(request, query); - quit(); - return caf::make_message(query, ret); - }, - [=](CheckpointAtom, const UInt64 part)->caf::message{ - Checkpoint cp; - cp.Part = part; - auto ret = TxnServer::BeginCheckpoint(cp); - quit(); - return caf::make_message(cp, ret); - }, - [=](CommitCPAtom, const Checkpoint & cp)->RetCode { - quit(); - return TxnServer::CommitCheckpoint(cp); - }, - caf::others >> [] () { cout<<"work unkown message"< caf::message { + Ingest ingest; + auto ret = TxnServer::BeginIngest(request, ingest); + quit(); + return caf::make_message(ingest, ret); + }, + [=](CommitIngestAtom, const Ingest& ingest) -> RetCode { + quit(); + return TxnServer::CommitIngest(ingest); + }, + [=](AbortIngestAtom, const Ingest& ingest) -> RetCode { + quit(); + return TxnServer::AbortIngest(ingest); + }, + [=](QueryAtom, const QueryReq& request) -> caf::message { + Query query; + auto ret = TxnServer::BeginQuery(request, query); + quit(); + return caf::make_message(query, ret); + }, + [=](CheckpointAtom, const UInt64 part) -> caf::message { + Checkpoint cp; + cp.Part = part; + auto ret = TxnServer::BeginCheckpoint(cp); + quit(); + return caf::make_message(cp, ret); + }, + [=](CommitCPAtom, const Checkpoint& cp) -> RetCode { + quit(); + return TxnServer::CommitCheckpoint(cp); + }, + caf::others >> []() { cout << "work unknown message" << endl; }}; } - caf::behavior TxnServer::make_behavior() { try { caf::io::publish(Router, Port); - cout << "publish to port:"<< Port<< " success" << endl; + cout << "publish to port:" << Port << " success" << endl; } catch (...) { - cout << "publish to port:"<< Port<< " fail" << endl; + cout << "publish to port:" << Port << " fail" << endl; } - return { - [=](IngestAtom, const FixTupleIngestReq & request) { - this->forward_to(caf::spawn()); - }, - [=](CommitIngestAtom, const Ingest & ingest) { - this->forward_to(caf::spawn()); - }, - [=](AbortIngestAtom, const Ingest & ingest) { - this->forward_to(caf::spawn()); - }, - [=](QueryAtom, const QueryReq & request) { - this->forward_to(caf::spawn()); - }, - [=](CheckpointAtom, const UInt64 part){ - this->forward_to(caf::spawn()); - }, - [=](CommitCPAtom, const Checkpoint & cp) { - this->forward_to(caf::spawn()); - }, - caf::others >> [] () { cout<<"unkown message"<forward_to(caf::spawn()); + }, + [=](CommitIngestAtom, const Ingest& ingest) { + this->forward_to(caf::spawn()); + }, + [=](AbortIngestAtom, const Ingest& ingest) { + this->forward_to(caf::spawn()); + }, + [=](QueryAtom, const QueryReq& request) { + this->forward_to(caf::spawn()); + }, + [=](CheckpointAtom, + const UInt64 part) { this->forward_to(caf::spawn()); }, + [=](CommitCPAtom, const Checkpoint& cp) { + this->forward_to(caf::spawn()); + }, + caf::others >> []() { cout << "unkown message" << endl; }}; } - - RetCode TxnServer::Init(int concurrency, int port) { - Active = true; Concurrency = concurrency; Port = port; @@ -223,122 +207,114 @@ RetCode TxnServer::Init(int concurrency, int port) { SerializeConfig(); RecoveryFromCatalog(); RecoveryFromTxnLog(); - srand((unsigned) time(NULL)); + srand((unsigned)time(NULL)); return 0; } -RetCode TxnServer::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest) { +RetCode TxnServer::BeginIngest(const FixTupleIngestReq& request, + Ingest& ingest) { RetCode ret; UInt64 core_id = SelectCore(); caf::scoped_actor self; - self->sync_send(Cores[core_id], IngestAtom::value, & request, & ingest). - await([&](int r) {ret = r;}); + self->sync_send(Cores[core_id], IngestAtom::value, &request, &ingest) + .await([&](int r) { ret = r; }); return 0; } -RetCode TxnServer::CommitIngest(const Ingest & ingest) { +RetCode TxnServer::CommitIngest(const Ingest& ingest) { RetCode ret; UInt64 core_id = GetCoreId(ingest.Id); caf::scoped_actor self; - self->sync_send(Cores[core_id], CommitIngestAtom::value, &ingest). - await([&](int r) { ret = r;}); + self->sync_send(Cores[core_id], CommitIngestAtom::value, &ingest) + .await([&](int r) { ret = r; }); return 0; } -RetCode TxnServer::AbortIngest(const Ingest & ingest) { +RetCode TxnServer::AbortIngest(const Ingest& ingest) { RetCode ret; UInt64 core_id = GetCoreId(ingest.Id); caf::scoped_actor self; - self->sync_send(Cores[core_id], AbortIngestAtom::value, &ingest). - await([&](int r) { ret = r;}); + self->sync_send(Cores[core_id], AbortIngestAtom::value, &ingest) + .await([&](int r) { ret = r; }); return 0; } -RetCode TxnServer::BeginQuery(const QueryReq & request, Query & query) { +RetCode TxnServer::BeginQuery(const QueryReq& request, Query& query) { RetCode ret; caf::scoped_actor self; - for (auto & part : request.PartList) + for (auto& part : request.PartList) query.CPList[part] = TxnServer::LogicCPList[part]; - for (auto & core : Cores) - self->sync_send(core, QueryAtom::value, & request, & query). - await([&](int r) {r = ret;}); - for (auto & part : query.Snapshot) { + for (auto& core : Cores) + self->sync_send(core, QueryAtom::value, &request, &query) + .await([&](int r) { r = ret; }); + for (auto& part : query.Snapshot) { Strip::Sort(part.second); Strip::Merge(part.second); } return ret; } -RetCode TxnServer::BeginCheckpoint(Checkpoint & cp) { +RetCode TxnServer::BeginCheckpoint(Checkpoint& cp) { RetCode ret; - if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) - return -1; + if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) return -1; cp.LogicCP = TxnServer::LogicCPList[cp.Part]; cp.PhyCP = TxnServer::PhyCPList[cp.Part]; caf::scoped_actor self; - for (auto & core : Cores) - self->sync_send(core,CheckpointAtom::value, &cp). - await([&]( int r) { r = ret;}); + for (auto& core : Cores) + self->sync_send(core, CheckpointAtom::value, &cp) + .await([&](int r) { r = ret; }); Strip::Sort(cp.CommitStripList); Strip::Merge(cp.CommitStripList); Strip::Sort(cp.AbortStripList); Strip::Merge(cp.AbortStripList); return 0; } -RetCode TxnServer::CommitCheckpoint(const Checkpoint & cp) { - if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) - return -1; - TxnServer::LogicCPList[cp.Part] = cp.LogicCP; - TxnServer::PhyCPList[cp.Part] = cp.PhyCP; +RetCode TxnServer::CommitCheckpoint(const Checkpoint& cp) { + if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) return -1; + TxnServer::LogicCPList[cp.Part] = cp.LogicCP; + TxnServer::PhyCPList[cp.Part] = cp.PhyCP; return 0; } - -Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount) { +Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, + UInt64 TupleCount) { Strip strip; strip.Part = part; - if (TupleSize * TupleCount == 0) - return strip; + if (TupleSize * TupleCount == 0) return strip; do { strip.Pos = PosList[part].load(); strip.Offset = 0; UInt64 block_pos = strip.Pos % kBlockSize; UInt64 remain_count = TupleCount; int count = 0; - while(remain_count > 0) { - // 求出一个块内可以存放的最多元组数 + while (remain_count > 0) { + // 求出一个块内可以存放的最多元组数 UInt64 use_count = (kBlockSize - block_pos - kTailSize) / TupleSize; - if (use_count > remain_count) - use_count = remain_count; + if (use_count > remain_count) use_count = remain_count; - //使用块内可用区域 + //使用块内可用区域 remain_count -= use_count; strip.Offset += use_count * TupleSize; block_pos += use_count * TupleSize; - //将不可利用的空间也分配 + //将不可利用的空间也分配 if (kBlockSize - block_pos - kTailSize < TupleSize) { - strip.Offset += kBlockSize - block_pos; - block_pos = 0; - } + strip.Offset += kBlockSize - block_pos; + block_pos = 0; + } } - } while(!PosList[part].compare_exchange_weak(strip.Pos, strip.Pos + strip.Offset)); + } while (!PosList[part].compare_exchange_weak(strip.Pos, + strip.Pos + strip.Offset)); return strip; } RetCode TxnServer::RecoveryFromCatalog() { - for (auto i = 0; i < 10; i++ ) { + for (auto i = 0; i < 10; i++) { PosList[i] = 0; CountList[i] = 0; LogicCPList[i] = 0; } } -RetCode TxnServer::RecoveryFromTxnLog() { - -} - +RetCode TxnServer::RecoveryFromTxnLog() {} } } - - - diff --git a/utility/Makefile.am b/utility/Makefile.am index f82838e3a..4b8bf7948 100644 --- a/utility/Makefile.am +++ b/utility/Makefile.am @@ -24,5 +24,5 @@ libutility_a_SOURCES = \ string_process.h task.cpp \ task.h test_tool.h \ thread_pool.cpp thread_pool.h \ - warmup.h stl_guard.h + warmup.h resource_guard.h \ No newline at end of file diff --git a/utility/resource_guard.h b/utility/resource_guard.h new file mode 100644 index 000000000..3a7b5070b --- /dev/null +++ b/utility/resource_guard.h @@ -0,0 +1,138 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/utility/resource_guard.h + * + * Created on: Apr 19, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#ifndef UTILITY_RESOURCE_GUARD_H_ +#define UTILITY_RESOURCE_GUARD_H_ + +#include "../common/error_define.h" +#include "../common/memory_handle.h" +#include "../common/rename.h" + +// auto release memory stored in t_. +// Attention: if 't_' is void*, the typename T is void !!! +template +class MemoryGuard { + public: + explicit MemoryGuard(T*& t) : t_(t) {} + ~MemoryGuard() { DELETE_PTR(t_); } + + NO_COPY_AND_ASSIGN(MemoryGuard); + + private: + T*& t_; +}; + +// if the memory is still in use after function return, please use +// MemoryGuardWithRetCode +// only some error occurs in function, the memory will be auto-free +// Attention: if some error occurs, the 'ret' must be set !!! +template +class MemoryGuardWithRetCode { + public: + MemoryGuardWithRetCode(T*& t, RetCode& ret) : t_(t), ret_(ret) {} + ~MemoryGuardWithRetCode() { + if (claims::common::rSuccess != ret_) DELETE_PTR(t_); + } + + NO_COPY_AND_ASSIGN(MemoryGuardWithRetCode); + + private: + T*& t_; + RetCode& ret_; +}; + +// a guard of STL container which stores pointer, like vector +// if ret != rSuccess, the memory will be released +template +class STLGuardWithRetCode { + public: + STLGuardWithRetCode(T& t, RetCode& ret) : t_(t), ret_(ret) {} + ~STLGuardWithRetCode() { + if (claims::common::rSuccess != ret_) { + for (auto it : t_) DELETE_PTR(it); + t_.clear(); + } + } + + NO_COPY_AND_ASSIGN(STLGuardWithRetCode); + + private: + T& t_; + RetCode& ret_; +}; + +template +class TwoLayerSTLGuardWithRetCode { + public: + TwoLayerSTLGuardWithRetCode(T& t, RetCode& ret) : t_(t), ret_(ret) {} + ~TwoLayerSTLGuardWithRetCode() { + if (claims::common::rSuccess != ret_) { + for (auto it1 : t_) { + for (auto it2 : it1) { + DELETE_PTR(it2); + } + it1.clear(); + } + t_.clear(); + } + } + + NO_COPY_AND_ASSIGN(TwoLayerSTLGuardWithRetCode); + + private: + T& t_; + RetCode& ret_; +}; + +template +class ThreeLayerSTLGuardWithRetCode { + public: + ThreeLayerSTLGuardWithRetCode(T& t, RetCode& ret) : t_(t), ret_(ret) {} + ~ThreeLayerSTLGuardWithRetCode() { + if (claims::common::rSuccess != ret_) { + for (auto it1 : t_) { + for (auto it2 : it1) { + for (auto it3 : it2) { + DELETE_PTR(it3); + } + it2.clear(); + } + it1.clear(); + } + t_.clear(); + } + } + + NO_COPY_AND_ASSIGN(ThreeLayerSTLGuardWithRetCode); + + private: + T& t_; + RetCode& ret_; +}; + +#endif // UTILITY_RESOURCE_GUARD_H_ From 2801b6db0a4c655e09a7b320ab77dcc865374b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Thu, 21 Apr 2016 21:08:54 +0800 Subject: [PATCH 11/58] add txnPartitionReadIterator --- Test/gtest_main.cpp | 19 ++++----- .../physical_projection_scan.cpp | 2 +- storage/ChunkStorage.h | 2 +- storage/PartitionStorage.cpp | 40 +++++++++++++++++++ storage/PartitionStorage.h | 11 ++++- 5 files changed, 62 insertions(+), 12 deletions(-) diff --git a/Test/gtest_main.cpp b/Test/gtest_main.cpp index 1212db322..bf5d6d63b 100644 --- a/Test/gtest_main.cpp +++ b/Test/gtest_main.cpp @@ -25,16 +25,17 @@ #include #include // #include "../codegen/codegen_test.h" -#include "./common/errno_test.h" -#include "../common/test/operate_test.h" -#include "../common/types/Test/data_type_test.h" -#include "../common/file_handle/test/disk_file_handle_imp_test.h" -#include "../common/file_handle/test/hdfs_file_handle_imp_test.h" -#include "../loader/test/single_thread_single_file_connector_test.h" -#include "../loader/test/single_file_connector_test.h" +//#include "./common/errno_test.h" +//#include "../common/test/operate_test.h" +//#include "../common/types/Test/data_type_test.h" +//#include "../common/file_handle/test/disk_file_handle_imp_test.h" +//#include "../common/file_handle/test/hdfs_file_handle_imp_test.h" +//#include "../loader/test/single_thread_single_file_connector_test.h" +//#include "../loader/test/single_file_connector_test.h" #include "./iterator/elastic_iterator_model_test.h" -#include "../loader/test/data_ingestion_test.h" -#include "../loader/test/table_file_connector_test.h" + +//#include "../loader/test/data_ingestion_test.h" +//#include "../loader/test/table_file_connector_test.h" #define GLOG_NO_ABBREVIATED_SEVERITIES #include "../common/log/logging.h" diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index 3803d5435..6ff46bd4b 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -96,7 +96,7 @@ bool PhysicalProjectionScan::Open(const PartitionOffset& kPartitionOffset) { SetReturnStatus(false); } else { partition_reader_iterator_ = - partition_handle_->createAtomicReaderIterator(); + partition_handle_->createTxnReaderIterator(); SetReturnStatus(true); } diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index 7549256c3..738aa8880 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -130,7 +130,7 @@ class InMemoryChunkReaderItetaor : public ChunkReaderIterator { virtual ~InMemoryChunkReaderItetaor(); bool nextBlock(BlockStreamBase*& block); bool getNextBlockAccessor(block_accessor*& ba); - + void * getChunk() { return start_;} private: void* start_; }; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 6a57178b8..dc41cb499 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -166,3 +166,43 @@ bool PartitionStorage::AtomicPartitionReaderIterator::nextBlock( } } } +PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() { + +} +PartitionStorage::PartitionReaderItetaor* PartitionStorage::createTxnReaderIterator() { + return new TxnPartitionReaderIterator(this); +} + +bool PartitionStorage::TxnPartitionReaderIterator::nextBlock( + BlockStreamBase*& block) { + lock_.acquire(); + ChunkReaderIterator::block_accessor* ba; + if (chunk_it_ != 0 && chunk_it_->getNextBlockAccessor(ba)) { + lock_.release(); + ba->getBlock(block); + auto block_addr = (char*)block->getBlock(); + auto chunk_addr = (char*)((InMemoryChunkReaderItetaor*)chunk_it_)->getChunk(); + //cout << (block_addr - chunk_addr) / (64 * 1024) << endl; + return true; + } + else { + if ((chunk_it_ = PartitionReaderItetaor::nextChunk()) > 0) { + lock_.release(); + return nextBlock(block); + } + else { + lock_.release(); + return false; + } + } +} +ChunkReaderIterator* PartitionStorage::TxnPartitionReaderIterator::nextChunk() { +// lock_.acquire(); + ChunkReaderIterator* ret; + if (chunk_cur_ < ps->number_of_chunks_) + ret = ps->chunk_list_[chunk_cur_++]->createChunkReaderIterator(); + else + ret = 0; +// lock_.release(); + return ret; +} diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index c228a7bf9..5cb8d1af0 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -40,7 +40,15 @@ class PartitionStorage { private: Lock lock_; }; - + class TxnPartitionReaderIterator:public PartitionReaderItetaor{ + public: + TxnPartitionReaderIterator(PartitionStorage* partition_storage):PartitionReaderItetaor(partition_storage){}; + virtual ~TxnPartitionReaderIterator(); + ChunkReaderIterator* nextChunk(); + virtual bool nextBlock(BlockStreamBase* &block); + private: + Lock lock_; + }; friend class PartitionReaderItetaor; PartitionStorage(const PartitionID &partition_id,const unsigned &number_of_chunks,const StorageLevel&); virtual ~PartitionStorage(); @@ -49,6 +57,7 @@ class PartitionStorage { void removeAllChunks(const PartitionID &partition_id); PartitionReaderItetaor* createReaderIterator(); PartitionReaderItetaor* createAtomicReaderIterator(); + PartitionReaderItetaor* createTxnReaderIterator(); protected: PartitionID partition_id_; unsigned number_of_chunks_; From 706393bee3de9e5674e27e0cef9d52193c23bbe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Thu, 21 Apr 2016 22:31:50 +0800 Subject: [PATCH 12/58] find a caf actor bug --- txn_manager/txn_client.cpp | 28 ++++++++++----------- txn_manager/txn_client.hpp | 1 + txn_manager/txn_client_test.cpp | 44 ++++++++++++++++++++------------- txn_manager/txn_log.cpp | 27 +++++++++++++------- txn_manager/txn_server.cpp | 8 +++--- txn_manager/txn_server_test.cpp | 31 +++++++++-------------- 6 files changed, 76 insertions(+), 63 deletions(-) diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index fe27f5090..30538230d 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -37,11 +37,17 @@ namespace txn{ string TxnClient::Ip = kTxnIp; int TxnClient::Port = kTxnPort; - +caf::actor TxnClient::Proxy; RetCode TxnClient::Init(string ip, int port){ Ip = ip; Port = port; SerializeConfig(); + try { + Proxy = caf::io::remote_actor(Ip, port); + } catch (...) { + return -1; + } + return 0; } RetCode TxnClient::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest){ @@ -50,9 +56,8 @@ RetCode TxnClient::BeginIngest(const FixTupleIngestReq & request, Ingest & inges return TxnServer::BeginIngest(request, ingest); else { try{ - auto router = caf::io::remote_actor(Ip, Port); - caf::scoped_actor self; - self->sync_send(router, IngestAtom::value, request). + caf::scoped_actor self;; + self->sync_send(Proxy, IngestAtom::value, request). await([&](Ingest & reply, RetCode r) { ingest = reply; ret = r;}, caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); } catch (...){ @@ -69,9 +74,8 @@ RetCode TxnClient::CommitIngest(const Ingest & ingest) { return TxnServer::CommitIngest(ingest); else { try { - auto router = caf::io::remote_actor(Ip, Port); caf::scoped_actor self; - self->sync_send(router, CommitIngestAtom::value, ingest). + self->sync_send(Proxy, CommitIngestAtom::value, ingest). await([&](RetCode r) { ret = r;}, caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); } catch (...) { @@ -88,9 +92,8 @@ RetCode TxnClient::AbortIngest(const Ingest & ingest) { return TxnServer::AbortIngest(ingest); else { try { - auto router = caf::io::remote_actor(Ip, Port); caf::scoped_actor self; - self->sync_send(router, AbortIngestAtom::value, ingest). + self->sync_send(Proxy, AbortIngestAtom::value, ingest). await([&](RetCode r) { ret = r;}, caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); } catch (...) { @@ -107,9 +110,8 @@ RetCode TxnClient::BeginQuery(const QueryReq & request, Query & query) { return TxnServer::BeginQuery(request, query); else { try { - auto router = caf::io::remote_actor(Ip, Port); caf::scoped_actor self; - self->sync_send(router, QueryAtom::value, request). + self->sync_send(Proxy, QueryAtom::value, request). await([&](const QueryReq & request, RetCode r) { ret = r;}, caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); } catch (...) { @@ -126,9 +128,8 @@ RetCode TxnClient::BeginCheckpoint(Checkpoint & cp) { return TxnServer::BeginCheckpoint(cp); else { try { - auto router = caf::io::remote_actor(Ip, Port); caf::scoped_actor self; - self->sync_send(router, CheckpointAtom::value, cp.Part). + self->sync_send(Proxy, CheckpointAtom::value, cp.Part). await([&](const Checkpoint & checkpoint, RetCode r) {cp = checkpoint; ret = r;}, caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); } catch (...) { @@ -145,9 +146,8 @@ RetCode TxnClient::CommitCheckpoint(const Checkpoint & cp) { return TxnServer::CommitCheckpoint(cp); else { try { - auto router = caf::io::remote_actor(Ip, Port); caf::scoped_actor self; - self->sync_send(router, CommitCPAtom::value, cp). + self->sync_send(Proxy, CommitCPAtom::value, cp). await([&](RetCode r) { ret = r;}, caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); } catch (...) { diff --git a/txn_manager/txn_client.hpp b/txn_manager/txn_client.hpp index b47b9c36a..bf8a0e6ea 100644 --- a/txn_manager/txn_client.hpp +++ b/txn_manager/txn_client.hpp @@ -70,6 +70,7 @@ class TxnClient{ public: static string Ip; static int Port; + static caf::actor Proxy; static RetCode Init(string ip = kTxnIp, int port = kTxnPort); static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); static RetCode CommitIngest(const Ingest & ingest); diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index ffe7ecaa5..41ff67319 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -89,30 +89,40 @@ class Foo { inline bool operator == (const Foo & a, const Foo & b) { return a.request1 == b.request1 && a.request2 == b.request2; } +char v[1024+10]; +void task(int time){ + for (auto i = 0; i< time; i++) { + + FixTupleIngestReq request1; + Ingest ingest; + request1.Content = {{0, {45, 10}}, + {1, {35, 20}}, + {2,{15,100}}}; + TxnClient::BeginIngest(request1, ingest); +// LogClient::Data(1, 1, 1111,(void*)v, 1024); +// LogClient::Data(1, 1, 1111,(void*)v, 1024); +// LogClient::Data(1, 1, 1111,(void*)v, 1024); + + //TxnClient::CommitIngest(ingest); +// } + } +} + int main(){ TxnClient::Init(); - FixTupleIngestReq request1; - Ingest ingest; - struct timeval tv1, tv2; gettimeofday(&tv1,NULL); -// request1.Content = {{0, {45, 10}}, {1, {54, 10}}}; -// TxnClient::BeginIngest(request1, ingest); - Checkpoint cp; - cp.Part = 0; - TxnClient::BeginCheckpoint(cp); - cout << cp.ToString() << endl; - cp.LogicCP = 10000; - cp.PhyCP = 10000; - TxnClient::CommitCheckpoint(cp); - TxnClient::BeginCheckpoint(cp); - cout << cp.ToString() << endl; + vector threads; + int n,times; + cin >> n >> times; + for (auto i=0;i(); + log_s = caf::spawn(); log_path = path; buffer = (char*)malloc(max_buffer_size); if (buffer == nullptr) return -1; @@ -55,14 +55,20 @@ caf::behavior LogServer::make_behavior() { return { [=](BeginAtom, UInt64 id)->RetCode { - return Append(BeginLog(id)); + // return Append(BeginLog(id)); + cout << "begin" << endl; + return 0; }, [=](WriteAtom,UInt64 id, UInt64 part, UInt64 pos, UInt64 offset)->RetCode { - return Append(WriteLog(id, part, pos, offset)); + // return Append(WriteLog(id, part, pos, offset)); + cout << "write" << endl; + return 0; }, [=](CommitAtom, UInt64 id)->RetCode { - return Append(CommitLog(id)); + //return Append(CommitLog(id)); + cout << "commit" << endl; + return 0; }, [=](AbortAtom, UInt64 id)->RetCode { return Append(AbortLog(id)); @@ -78,9 +84,11 @@ caf::behavior LogServer::make_behavior() { return 0; }, [=](RefreshAtom)->RetCode { - return Refresh(); + //return Refresh(); + cout << "refresh" << endl; + return 0; }, - caf::others >> [=] () { cout << "unkown log message" << endl; } + caf::others >> [=] () { cout << "unknown log message" << endl; } }; } @@ -137,8 +145,9 @@ RetCode LogServer::Refresh() { RetCode LogClient::Begin(UInt64 id) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send( LogServer::log_server,BeginAtom::value, id). - await( [&](RetCode ret_code) { ret = ret_code;}); + cout<<"going to send begin atom to log server :"<sync_send( log_s,BeginAtom::value, id). + await( [&](RetCode ret_code) { cout<<"log:Begin, ret"<InsertStrip(strip); } Size ++; @@ -183,7 +183,7 @@ caf::behavior TxnWorker::make_behavior( ){ caf::behavior TxnServer::make_behavior() { try { - caf::io::publish(Router, Port); + caf::io::publish(Router, Port, nullptr, true); cout << "txn server bind to port:"<< Port<< " success" << endl; } catch (...) { cout << "txn server bind to port:"<< Port<< " fail" << endl; @@ -237,8 +237,8 @@ RetCode TxnServer::BeginIngest(const FixTupleIngestReq & request, Ingest & inges await([&](int r) {ret = r;}); if (ret == 0) { LogClient::Begin(ingest.Id); - for (auto & strip : ingest.StripList) - LogClient::Write(ingest.Id, strip.first, strip.second.first, strip.second.second); +// for (auto & strip : ingest.StripList) +// LogClient::Write(ingest.Id, strip.first, strip.second.first, strip.second.second); } return ret; } diff --git a/txn_manager/txn_server_test.cpp b/txn_manager/txn_server_test.cpp index 93ceb9d2c..3418b121a 100644 --- a/txn_manager/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -220,26 +220,19 @@ int main(){ // TxnServer::CommitIngest(ingest); // } // sleep(1); - memset(v, 1024, '*'); - string path; - cout << "input path" << endl; - cin >> path; - TxnServer::Init(); - LogServer::init(path); - struct timeval tv1, tv2; - vector v; - int n = 1, time =1; - cout << "input #thread, #time" << endl; - cin >> n >> time; - gettimeofday(&tv1,NULL); - for (auto i=0;i Date: Fri, 22 Apr 2016 00:05:15 +0800 Subject: [PATCH 13/58] refactor txn_manager --- txn_manager/Makefile.am | 4 +- txn_manager/log_client.cpp | 31 +++++++ txn_manager/log_client.hpp | 36 ++++++++ txn_manager/log_server.cpp | 31 +++++++ txn_manager/log_server.hpp | 36 ++++++++ txn_manager/txn_client_test.cpp | 3 +- txn_manager/txn_server.cpp | 156 ++++++++++++++++++++++---------- txn_manager/txn_server.hpp | 35 ++++++- txn_manager/txn_server_test.cpp | 2 +- 9 files changed, 282 insertions(+), 52 deletions(-) create mode 100644 txn_manager/log_client.cpp create mode 100644 txn_manager/log_client.hpp create mode 100644 txn_manager/log_server.cpp create mode 100644 txn_manager/log_server.hpp diff --git a/txn_manager/Makefile.am b/txn_manager/Makefile.am index d0af4badd..2445f4e37 100644 --- a/txn_manager/Makefile.am +++ b/txn_manager/Makefile.am @@ -26,7 +26,9 @@ libtxnmanager_a_SOURCES = \ txn.hpp txn.cpp \ txn_client.hpp txn_client.cpp \ txn_server.hpp txn_server.cpp \ - txn_log.hpp txn_log.cpp + txn_log.hpp txn_log.cpp \ + log_client.hpp log_client.cpp \ + log_server.hpp log_server.cpp diff --git a/txn_manager/log_client.cpp b/txn_manager/log_client.cpp new file mode 100644 index 000000000..b7d6f9bb5 --- /dev/null +++ b/txn_manager/log_client.cpp @@ -0,0 +1,31 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /CLAIMS/txn_manager/log_client.cpp + * + * Created on: 2016年4月22日 + * Author: imdb + * Email: + * + * Description: + * + */ + + + + diff --git a/txn_manager/log_client.hpp b/txn_manager/log_client.hpp new file mode 100644 index 000000000..fd7b65b7f --- /dev/null +++ b/txn_manager/log_client.hpp @@ -0,0 +1,36 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /CLAIMS/txn_manager/log_client.hpp + * + * Created on: 2016年4月22日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#ifndef TXN_MANAGER_LOG_CLIENT_HPP_ +#define TXN_MANAGER_LOG_CLIENT_HPP_ + + + + + +#endif // TXN_MANAGER_LOG_CLIENT_HPP_ diff --git a/txn_manager/log_server.cpp b/txn_manager/log_server.cpp new file mode 100644 index 000000000..bb00877d7 --- /dev/null +++ b/txn_manager/log_server.cpp @@ -0,0 +1,31 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /CLAIMS/txn_manager/log_server.cpp + * + * Created on: 2016年4月22日 + * Author: imdb + * Email: + * + * Description: + * + */ + + + + diff --git a/txn_manager/log_server.hpp b/txn_manager/log_server.hpp new file mode 100644 index 000000000..d49a8f10b --- /dev/null +++ b/txn_manager/log_server.hpp @@ -0,0 +1,36 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /CLAIMS/txn_manager/log_server.hpp + * + * Created on: 2016年4月21日 + * Author: imdb + * Email: + * + * Description: + * + */ + +#ifndef TXN_MANAGER_LOG_SERVER_HPP_ +#define TXN_MANAGER_LOG_SERVER_HPP_ + + + + + +#endif // TXN_MANAGER_LOG_SERVER_HPP_ diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index 41ff67319..adba0c43b 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -105,7 +105,7 @@ void task(int time){ // LogClient::Data(1, 1, 1111,(void*)v, 1024); // LogClient::Data(1, 1, 1111,(void*)v, 1024); - //TxnClient::CommitIngest(ingest); + TxnClient::CommitIngest(ingest); // } } } @@ -123,6 +123,5 @@ int main(){ threads[i].join(); gettimeofday(&tv2,NULL); cout << tv2.tv_sec - tv1.tv_sec << "-" << (tv2.tv_usec - tv1.tv_usec)/1000 <int{ - if (TxnIndex.find(ingest->Id) == TxnIndex.end()) + [=](CommitIngestAtom, const UInt64 id)->int{ + if (TxnIndex.find(id) == TxnIndex.end()) return -1; - Commit[TxnIndex[ingest->Id]] = true; + Commit[TxnIndex[id]] = true; return 0; }, - [=](AbortIngestAtom, const Ingest * ingest)->int { - if (TxnIndex.find(ingest->Id) == TxnIndex.end()) + [=](AbortIngestAtom, const UInt64 id)->int { + if (TxnIndex.find(id) == TxnIndex.end()) return -1; - Commit[TxnIndex[ingest->Id]] = true; + Commit[TxnIndex[id]] = true; return 0; }, [=](QueryAtom, const QueryReq * request, Query * query)->int { @@ -143,44 +143,108 @@ caf::behavior TxnCore::make_behavior() { } -caf::behavior TxnWorker::make_behavior( ){ +// +// +//caf::behavior TxnWorker::make_behavior( ){ +// return { +// [=](IngestAtom, const FixTupleIngestReq & request)->caf::message { +// Ingest ingest; +// auto ret = TxnServer::BeginIngest(request, ingest); +// quit(); +// return caf::make_message(ingest, ret); +// }, +// [=](CommitIngestAtom, const Ingest & ingest)->RetCode { +// quit(); +// return TxnServer::CommitIngest(ingest); +// }, +// [=](AbortIngestAtom, const Ingest & ingest)->RetCode { +// quit(); +// return TxnServer::AbortIngest(ingest); +// }, +// [=](QueryAtom, const QueryReq & request)->caf::message { +// Query query; +// auto ret = TxnServer::BeginQuery(request, query); +// quit(); +// return caf::make_message(query, ret); +// }, +// [=](CheckpointAtom, const UInt64 part)->caf::message{ +// Checkpoint cp; +// cp.Part = part; +// auto ret = TxnServer::BeginCheckpoint(cp); +// quit(); +// return caf::make_message(cp, ret); +// }, +// [=](CommitCPAtom, const Checkpoint & cp)->RetCode { +// quit(); +// return TxnServer::CommitCheckpoint(cp); +// }, +// caf::others >> [] () { cout<<"work unkown message"<caf::message { + Ingest ingest; + auto ret = TxnServer::BeginIngest(request, ingest); + quit(); + return caf::make_message(ingest, ret); + }, + caf::others >> [] () { cout<<"core unkown message"<caf::message { - Ingest ingest; - auto ret = TxnServer::BeginIngest(request, ingest); - quit(); - return caf::make_message(ingest, ret); - }, [=](CommitIngestAtom, const Ingest & ingest)->RetCode { - quit(); - return TxnServer::CommitIngest(ingest); - }, + quit(); + return TxnServer::CommitIngest(ingest); + }, + caf::others >> [] () { cout<<"core unkown message"<RetCode { - quit(); - return TxnServer::AbortIngest(ingest); - }, + quit(); + return TxnServer::AbortIngest(ingest); + }, + caf::others >> [] () { cout<<"core unkown message"<caf::message { - Query query; - auto ret = TxnServer::BeginQuery(request, query); - quit(); - return caf::make_message(query, ret); - }, + Query query; + auto ret = TxnServer::BeginQuery(request, query); + quit(); + return caf::make_message(query, ret); + }, + caf::others >> [] () { cout<<"core unkown message"<caf::message{ - Checkpoint cp; - cp.Part = part; - auto ret = TxnServer::BeginCheckpoint(cp); - quit(); - return caf::make_message(cp, ret); - }, + Checkpoint cp; + cp.Part = part; + auto ret = TxnServer::BeginCheckpoint(cp); + quit(); + return caf::make_message(cp, ret); + }, + caf::others >> [] () { cout<<"core unkown message"<RetCode { quit(); return TxnServer::CommitCheckpoint(cp); }, - caf::others >> [] () { cout<<"work unkown message"<> [] () { cout<<"core unkown message"<forward_to(caf::spawn()); + this->forward_to(caf::spawn()); }, [=](CommitIngestAtom, const Ingest & ingest) { - this->forward_to(caf::spawn()); + this->forward_to(caf::spawn()); }, [=](AbortIngestAtom, const Ingest & ingest) { - this->forward_to(caf::spawn()); + this->forward_to(caf::spawn()); }, [=](QueryAtom, const QueryReq & request) { - this->forward_to(caf::spawn()); + this->forward_to(caf::spawn()); }, [=](CheckpointAtom, const UInt64 part){ - this->forward_to(caf::spawn()); + this->forward_to(caf::spawn()); }, [=](CommitCPAtom, const Checkpoint & cp) { - this->forward_to(caf::spawn()); + this->forward_to(caf::spawn()); }, caf::others >> [] () { cout<<"unkown message"<sync_send(Cores[core_id], IngestAtom::value, & request, & ingest). await([&](int r) {ret = r;}); if (ret == 0) { - LogClient::Begin(ingest.Id); + // LogClient::Begin(ingest.Id); // for (auto & strip : ingest.StripList) // LogClient::Write(ingest.Id, strip.first, strip.second.first, strip.second.second); } @@ -249,8 +313,8 @@ RetCode TxnServer::CommitIngest(const Ingest & ingest) { self->sync_send(Cores[core_id], CommitIngestAtom::value, &ingest). await([&](int r) { ret = r;}); if (ret == 0) { - LogClient::Commit(ingest.Id); - LogClient::Refresh(); +// LogClient::Commit(ingest.Id); +// LogClient::Refresh(); } return ret; } @@ -261,8 +325,8 @@ RetCode TxnServer::AbortIngest(const Ingest & ingest) { self->sync_send(Cores[core_id], AbortIngestAtom::value, &ingest). await([&](int r) { ret = r;}); if (ret == 0) { - LogClient::Abort(ingest.Id); - LogClient::Refresh(); +// LogClient::Abort(ingest.Id); +// LogClient::Refresh(); } return ret; } @@ -304,8 +368,8 @@ RetCode TxnServer::CommitCheckpoint(const Checkpoint & cp) { TxnServer::LogicCPList[cp.Part] = cp.LogicCP; TxnServer::PhyCPList[cp.Part] = cp.PhyCP; if (ret == 0) { - LogClient::Checkpoint(cp.Part, cp.LogicCP, cp.PhyCP); - LogClient::Refresh(); +// LogClient::Checkpoint(cp.Part, cp.LogicCP, cp.PhyCP); +// LogClient::Refresh(); } return ret; } diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index 323202c19..faf9538e6 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -45,7 +45,7 @@ #include "caf/all.hpp" #include "caf/io/all.hpp" #include "txn.hpp" -#include "txn_log.hpp" +//#include "txn_log.hpp" #include namespace claims{ @@ -85,11 +85,42 @@ class TxnCore: public caf::event_based_actor { } }; -class TxnWorker:public caf::event_based_actor { +//class TxnWorker:public caf::event_based_actor { +// public: +// caf::behavior make_behavior() override; +//}; + +class IngestWorker:public caf::event_based_actor { + public: + caf::behavior make_behavior() override; +}; + +class IngestCommitWorker:public caf::event_based_actor { + public: + caf::behavior make_behavior() override; +}; + +class AbortWorker:public caf::event_based_actor { + public: + caf::behavior make_behavior() override; +}; + +class QueryWorker:public caf::event_based_actor { + public: + caf::behavior make_behavior() override; +}; + +class CheckpointWorker:public caf::event_based_actor { public: caf::behavior make_behavior() override; }; +class CommitCPWorker:public caf::event_based_actor { + public: + caf::behavior make_behavior() override; +}; + + class TxnServer: public caf::event_based_actor{ public: static bool Active; diff --git a/txn_manager/txn_server_test.cpp b/txn_manager/txn_server_test.cpp index 3418b121a..dd8b3c8b3 100644 --- a/txn_manager/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -222,7 +222,7 @@ int main(){ // sleep(1); TxnServer::Init(); - LogServer::init("txn-log"); + //LogServer::init("txn-log"); // gettimeofday(&tv1,NULL); //// for (auto i=0;i Date: Fri, 22 Apr 2016 22:18:36 +0800 Subject: [PATCH 14/58] slave loader is to be finished --- common/error_define.h | 1 + common/error_no.cpp | 2 + loader/Makefile.am | 2 +- loader/load_packet.cpp | 34 +++++----- loader/load_packet.h | 33 +++++++--- loader/loader_message.h | 2 - loader/master_loader.cpp | 91 +++++++++++++++++++------- loader/master_loader.h | 26 ++++++-- loader/slave_loader.cpp | 102 +++++++++++++++++++++++------ loader/slave_loader.h | 18 +++-- storage/ChunkStorage.cpp | 30 ++++++++- storage/ChunkStorage.h | 4 ++ storage/PartitionStorage.cpp | 123 +++++++++++++++++++---------------- storage/PartitionStorage.h | 88 ++++++++++++++----------- 14 files changed, 378 insertions(+), 178 deletions(-) diff --git a/common/error_define.h b/common/error_define.h index 435b346c6..724f8ecc0 100644 --- a/common/error_define.h +++ b/common/error_define.h @@ -204,6 +204,7 @@ const int rFileInUsing = -102; const int rResourceIsLocked = -103; const int rSentMessageError = -104; +const int rReceiveMessageError = -105; // schema associated const int rEmptyAttributeName = -501; diff --git a/common/error_no.cpp b/common/error_no.cpp index 2b79ea77a..35f2cea21 100644 --- a/common/error_no.cpp +++ b/common/error_no.cpp @@ -160,6 +160,8 @@ ErrorInit::ErrorInit() { DefineErrorAndMessage(rResourceIsLocked, "other hold the lock of resource"); DefineErrorAndMessage(rSentMessageError, "failed to send network message"); + DefineErrorAndMessage(rReceiveMessageError, + "failed to receive network message"); // schema assocated DefineErrorAndMessage(rEmptyAttributeName, diff --git a/loader/Makefile.am b/loader/Makefile.am index 97a2b9855..3cd362bd3 100644 --- a/loader/Makefile.am +++ b/loader/Makefile.am @@ -30,7 +30,7 @@ LDADD = ../catalog/libcatalog.a \ noinst_LIBRARIES=libloader.a libloader_a_SOURCES = \ data_ingestion.cpp data_ingestion.h \ - file_connector.h loader_message.h \ + file_connector.h \ master_loader.cpp master_loader.h \ single_file_connector.cpp single_file_connector.h \ single_thread_single_file_connector.cpp single_thread_single_file_connector.h \ diff --git a/loader/load_packet.cpp b/loader/load_packet.cpp index 8aee98d9e..b71bb8f48 100644 --- a/loader/load_packet.cpp +++ b/loader/load_packet.cpp @@ -38,39 +38,37 @@ LoadPacket::~LoadPacket() {} RetCode LoadPacket::Serialize(void*& packet_buffer, uint64_t& packet_length) const { - packet_length = sizeof(uint64_t) * 4 + data_length_; + packet_length = kHeadLength + data_length_; packet_buffer = Malloc(packet_length); if (NULL == packet_length) { ELOG(rNoMemory, "no memory for packet buffer"); return rNoMemory; } - *reinterpret_cast(packet_buffer) = global_part_id_; - *reinterpret_cast(packet_buffer + sizeof(uint64_t)) = pos_; - *reinterpret_cast(packet_buffer + 2 * sizeof(uint64_t)) = offset_; - *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)) = + *reinterpret_cast(packet_buffer) = txn_id_; + *reinterpret_cast(packet_buffer + sizeof(uint64_t)) = + global_part_id_; + *reinterpret_cast(packet_buffer + 2 * sizeof(uint64_t)) = pos_; + *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)) = offset_; + *reinterpret_cast(packet_buffer + 4 * sizeof(uint64_t)) = data_length_; memcpy(packet_buffer + 4 * sizeof(uint64_t), data_buffer_, data_length_); return rSuccess; } -RetCode LoadPacket::Deserialize(const void* const packet_buffer, - const uint64_t packet_length) { - global_part_id_ = *reinterpret_cast(packet_buffer); - pos_ = *reinterpret_cast(packet_buffer + sizeof(uint64_t)); +RetCode LoadPacket::Deserialize(const void* const head_buffer, + void* data_buffer) { + txn_id_ = *reinterpret_cast(head_buffer); + global_part_id_ = + *reinterpret_cast(head_buffer + sizeof(uint64_t)); + pos_ = *reinterpret_cast(head_buffer + 2 * sizeof(uint64_t)); offset_ = - *reinterpret_cast(packet_buffer + 2 * sizeof(uint64_t)); + *reinterpret_cast(head_buffer + 3 * sizeof(uint64_t)); data_length_ = - *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)); + *reinterpret_cast(head_buffer + 4 * sizeof(uint64_t)); - data_buffer_ = Malloc(data_length_); - if (NULL == data_buffer_) { - ELOG(rNoMemory, "no memory for data buffer"); - return rNoMemory; - } - - memcpy(data_buffer_, packet_buffer + 4 * sizeof(uint64_t), data_length_); + data_buffer_ = data_buffer; return rSuccess; } diff --git a/loader/load_packet.h b/loader/load_packet.h index 8758783e0..fc6cee23f 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -34,11 +34,25 @@ namespace claims { namespace loader { -class LoadPacket { +using IpPortAtom = caf::atom_constant; +using LoadAckAtom = caf::atom_constant; + +/************** LoadPacket format *****************/ +/** field type length **********/ +/****************************************************/ +/** transaction_id uint64_t 4 **/ +/** global_part_id uint64_t 4 **/ +/** position uint64_t 4 **/ +/** offset uint64_t 4 **/ +/** date_length uint64_t 4 **/ +/** data void* data_length **/ +/****************************************************/ +struct LoadPacket { public: - LoadPacket(const uint64_t g_part_id, uint64_t pos, uint64_t offset, - uint64_t data_length, const void* data_buffer) - : global_part_id_(g_part_id), + LoadPacket(const uint64_t txn_id, const uint64_t g_part_id, uint64_t pos, + uint64_t offset, uint64_t data_length, const void* data_buffer) + : txn_id_(txn_id), + global_part_id_(g_part_id), pos_(pos), offset_(offset), data_buffer_(data_buffer), @@ -46,13 +60,14 @@ class LoadPacket { ~LoadPacket(); RetCode Serialize(void*& packet_buffer, uint64_t& packet_length) const; - RetCode Deserialize(const void* const packet_buffer, - const uint64_t packet_length); + RetCode Deserialize(const void* const head_buffer, void* data_buffer); + + public: + static const int kHeadLength = 5 * 4; - private: - // uint64_t packet_length_; + public: + uint64_t txn_id_; uint64_t global_part_id_; - // uint64_t txn_id_; uint64_t pos_; uint64_t offset_; uint64_t data_length_; diff --git a/loader/loader_message.h b/loader/loader_message.h index 83ee56ace..420625e23 100644 --- a/loader/loader_message.h +++ b/loader/loader_message.h @@ -32,6 +32,4 @@ #include "caf/all.hpp" #include "caf/io/all.hpp" -using IpPortAtom = caf::atom_constant; - #endif // LOADER_LOADER_MESSAGE_H_ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 2784b7744..36c674016 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -72,8 +72,8 @@ namespace claims { namespace loader { MasterLoader::MasterLoader() - : master_loader_ip(Config::master_loader_ip), - master_loader_port(Config::master_loader_port) {} + : master_loader_ip_(Config::master_loader_ip), + master_loader_port_(Config::master_loader_port) {} MasterLoader::~MasterLoader() {} @@ -91,12 +91,8 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, LOG(INFO) << "succeed to get connected fd with slave"; } assert(new_slave_fd > 3); - // mloader->slave_addrs_.push_back(NetAddr(ip, port)); - // mloader->slave_sockets_.push_back(new_slave_fd); - // assert(mloader->slave_sockets_.size() == - // mloader->slave_addrs_.size()); - mloader->slave_addr_to_socket.insert( + mloader->slave_addr_to_socket_.insert( pair(NodeAddress(ip, port), new_slave_fd)); DLOG(INFO) << "start to send test message to slave"; @@ -127,6 +123,27 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // DLOG(INFO) << "message buffer is sent"; // } }, + [=](LoadAckAtom, int txn_id, bool is_commited) { // NOLINT + // TODO(ANYONE): there should be a thread checking whether transaction + // overtime periodically and abort these transaction and delete from + // map. + // Consider that: if this function access the item in map just deleted + // by above thread, unexpected thing happens. + claims::txn::Ingest ingest; + ingest.Id = txn_id; + if (is_commited) { + if (++(mloader->txn_commint_info_[txn_id].commited_part_num_) >= + mloader->txn_commint_info_[txn_id].total_part_num_) { + // TODO(lizhifang): optimize the interface of TxnClient + TxnClient::CommitIngest(ingest); + mloader->txn_commint_info_.erase(txn_id); + } + } else { + // TODO(lizhifang): optimize the interface of TxnClient + TxnClient::AbortIngest(ingest); + mloader->txn_commint_info_.erase(txn_id); + } + }, caf::others >> [] { LOG(ERROR) << "nothing matched!!!"; }}; } @@ -134,10 +151,10 @@ RetCode MasterLoader::ConnectWithSlaves() { int ret = rSuccess; try { auto listening_actor = spawn(&MasterLoader::ReceiveSlaveReg, this); - publish(listening_actor, master_loader_port, master_loader_ip.c_str(), + publish(listening_actor, master_loader_port_, master_loader_ip_.c_str(), true); - DLOG(INFO) << "published in " << master_loader_ip << ":" - << master_loader_port; + DLOG(INFO) << "published in " << master_loader_ip_ << ":" + << master_loader_port_; } catch (exception& e) { LOG(ERROR) << e.what(); return rFailure; @@ -189,8 +206,11 @@ RetCode MasterLoader::Ingest() { EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), "applied transaction", "failed to apply transaction"); + txn_commint_info_.insert(pair( + ingest.Id, CommitInfo(ingest.StripList.size()))); + // write data log - EXEC_AND_LOG(ret, WriteLog(req, table, partition_buffers), "written log ", + EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log ", "failed to write log"); // reply ACK to MQ @@ -209,9 +229,9 @@ string MasterLoader::GetMessage() { return ret; } -bool MasterLoader::CheckValidity() {} - -void MasterLoader::DistributeSubIngestion() {} +// bool MasterLoader::CheckValidity() {} +// +// void MasterLoader::DistributeSubIngestion() {} RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, int* connected_fd) { @@ -240,9 +260,9 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, return ret; } -RetCode MasterLoader::CheckAndToValue(const IngestionRequest& req, - void* tuple_buffer, - vector& column_validities) {} +// RetCode MasterLoader::CheckAndToValue(const IngestionRequest& req, +// void* tuple_buffer, +// vector& column_validities) {} // map every tuple into associate part RetCode MasterLoader::GetPartitionTuples( @@ -346,10 +366,36 @@ RetCode MasterLoader::ApplyTransaction( } RetCode MasterLoader::WriteLog( - const IngestionRequest& req, const TableDescriptor* table, - const vector>& partition_buffers) {} + const TableDescriptor* table, + const vector>& partition_buffers, + claims::txn::Ingest& ingest) { + RetCode ret = rSuccess; + uint64_t table_id = table->get_table_id(); -RetCode MasterLoader::ReplyToMQ(const IngestionRequest& req) {} + for (int prj_id = 0; prj_id < partition_buffers.size(); ++prj_id) { + for (int part_id = 0; part_id < partition_buffers[prj_id].size(); + ++part_id) { + uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); + + EXEC_AND_LOG(ret, + LogClient::Data(global_part_id, + ingest.StripList[global_part_id].first, + ingest.StripList[global_part_id].second, + partition_buffers[prj_id][part_id].buffer_, + partition_buffers[prj_id][part_id].length_), + "written data log for partition:" << global_part_id, + "failed to write data log for partition:" << global_part_id); + } + } + + EXEC_AND_LOG(ret, LogClient::Refresh(), "flushed data log into disk", + "failed to flush data log"); + return ret; +} + +RetCode MasterLoader::ReplyToMQ(const IngestionRequest& req) { + // TODO(YUKAI) +} RetCode MasterLoader::SendPartitionTupleToSlave( const TableDescriptor* table, @@ -362,7 +408,8 @@ RetCode MasterLoader::SendPartitionTupleToSlave( for (int part_id = 0; part_id < partition_buffers[prj_id].size(); ++part_id) { uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); - LoadPacket packet(global_part_id, ingest.StripList[global_part_id].first, + LoadPacket packet(ingest.Id, global_part_id, + ingest.StripList[global_part_id].first, ingest.StripList[global_part_id].second, partition_buffers[prj_id][part_id].length_, partition_buffers[prj_id][part_id].buffer_); @@ -428,7 +475,7 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, EXEC_AND_LOG_RETURN( ret, NodeTracker::GetInstance()->GetNodeAddr(node_id_in_rmm, addr), "got node address", "failed to get node address"); - socket_fd = slave_addr_to_socket[addr]; + socket_fd = slave_addr_to_socket_[addr]; return ret; } diff --git a/loader/master_loader.h b/loader/master_loader.h index fa88c0483..8a87b8673 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -61,6 +61,18 @@ class MasterLoader { vector tuples_; }; + struct CommitInfo { + explicit CommitInfo(uint64_t total_part_num) + : total_part_num_(total_part_num), + commited_part_num_(0), + wait_period_(0) {} + uint64_t total_part_num_; + uint64_t commited_part_num_; + // initial value is 0, add by 1 every time check thread traverses + // if wait period exceeds the specified value, this transaction fails + uint64_t wait_period_; + }; + struct PartitionBuffer { PartitionBuffer(void* buf, uint64_t len) : buffer_(buf), length_(len) {} void* buffer_; @@ -102,8 +114,9 @@ class MasterLoader { const vector>& partition_buffers, claims::txn::Ingest& ingest); - RetCode WriteLog(const IngestionRequest& req, const TableDescriptor* table, - const vector>& partition_buffers); + RetCode WriteLog(const TableDescriptor* table, + const vector>& partition_buffers, + claims::txn::Ingest& ingest); RetCode ReplyToMQ(const IngestionRequest& req); @@ -131,11 +144,14 @@ class MasterLoader { static void* StartMasterLoader(void* arg); private: - string master_loader_ip; - int master_loader_port; + string master_loader_ip_; + int master_loader_port_; // vector slave_addrs_; // vector slave_sockets_; - boost::unordered_map slave_addr_to_socket; + boost::unordered_map slave_addr_to_socket_; + + // store id of transactions which are not finished + boost::unordered_map txn_commint_info_; }; } /* namespace loader */ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index edd01da28..a8bcc3915 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -38,23 +38,29 @@ #include "../Config.h" #include "../Environment.h" #include "../common/error_define.h" +#include "../common/memory_handle.h" +#include "../storage/PartitionStorage.h" +#include "../txn_manager/txn.hpp" using caf::event_based_actor; using caf::io::remote_actor; using caf::mixin::sync_sender_impl; using caf::spawn; +using claims::common::Malloc; using claims::common::rSuccess; using claims::common::rFailure; +using claims::txn::GetPartitionIdFromGlobalPartId; +using claims::txn::GetProjectionIdFromGlobalPartId; +using claims::txn::GetTableIdFromGlobalPartId; namespace claims { namespace loader { SlaveLoader::SlaveLoader() { - // TODO Auto-generated constructor stub + master_actor_ = + remote_actor(Config::master_loader_ip, Config::master_loader_port); } -SlaveLoader::~SlaveLoader() { - // TODO Auto-generated destructor stub -} +SlaveLoader::~SlaveLoader() {} RetCode SlaveLoader::ConnectWithMaster() { int ret = rSuccess; @@ -144,10 +150,8 @@ RetCode SlaveLoader::SendSelfAddrToMaster() { << "to (" << Config::master_loader_ip << ":" << Config::master_loader_port << ")"; try { - auto master_actor = - remote_actor(Config::master_loader_ip, Config::master_loader_port); caf::scoped_actor self; - self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port); + self->sync_send(master_actor_, IpPortAtom::value, self_ip, self_port); } catch (exception& e) { LOG(ERROR) << e.what(); return rFailure; @@ -180,30 +184,52 @@ void SlaveLoader::OutputFdIpPort(int fd) { << ntohs(temp_addr.sin_port) << ")"; } -void SlaveLoader::ReceiveAndWorkLoop() { +RetCode SlaveLoader::ReceiveAndWorkLoop() { assert(master_fd_ > 3); - const int length = 1000; - char* buffer = new char[length]; - DLOG(INFO) << "slave is recving ..."; + char head_buffer[LoadPacket::kHeadLength]; + DLOG(INFO) << "slave is receiving ..."; while (1) { - if (-1 == recv(master_fd_, buffer, 4, MSG_WAITALL)) { + RetCode ret = rSuccess; + + // get load packet + if (-1 == + recv(master_fd_, head_buffer, LoadPacket::kHeadLength, MSG_WAITALL)) { PLOG(ERROR) << "failed to receive message length from master"; } - LOG(INFO) << "length is " << *reinterpret_cast(buffer); - if (-1 == recv(master_fd_, buffer, *reinterpret_cast(buffer), - MSG_WAITALL)) { + uint64_t data_length = *reinterpret_cast(head_buffer + 3 * 4); + uint64_t real_packet_length = data_length + LoadPacket::kHeadLength; + assert(data_length >= 4); + LOG(INFO) << "real packet length is :" << real_packet_length + << ". date length is " << data_length; + + char* data_buffer = Malloc(data_length); + MemoryGuard guard(data_buffer); // auto-release + if (NULL == data_buffer) { + ELOG((ret = claims::common::rNoMemory), + "no memory to hold data of message from master"); + return ret; + } + + if (-1 == recv(master_fd_, data_buffer, data_length, MSG_WAITALL)) { PLOG(ERROR) << "failed to receive message from master"; + return claims::common::rReceiveMessageError; } - LOG(INFO) << "receive message from master:" << buffer << endl; + // LOG(INFO) << "data of message from master is:" << buffer; + + // deserialization of packet + LoadPacket packet; + packet.Deserialize(head_buffer, data_buffer); + + EXEC_AND_LOG(ret, StoreDataInMemory(packet), "stored data", + "failed to store"); + + // return result to master loader + SendAckToMasterLoader(packet.txn_id_, rSuccess == ret); } } void* SlaveLoader::StartSlaveLoader(void* arg) { Config::getInstance(); - // if (rSuccess != Catalog::getInstance()->restoreCatalog()) { - // LOG(ERROR) << "failed to restore catalog" << std::endl; - // cerr << "ERROR: restore catalog failed" << endl; - // } LOG(INFO) << "start slave loader..."; SlaveLoader* slave_loader = Environment::getInstance()->get_slave_loader(); @@ -217,8 +243,44 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { cout << "connected with master loader" << endl; // TODO(YK): error handle slave_loader->ReceiveAndWorkLoop(); + assert(false); return NULL; } +RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { + RetCode ret = rSuccess; + uint64_t table_id = GetTableIdFromGlobalPartId(packet.global_part_id_); + uint64_t prj_id = GetProjectionIdFromGlobalPartId(packet.global_part_id_); + uint64_t part_id = GetPartitionIdFromGlobalPartId(packet.global_part_id_); + + uint64_t chunk_id = packet.pos_ / CHUNK_SIZE; + PartitionStorage* part_storage = + BlockManager::getInstance()->getPartitionHandle( + PartitionID(ProjectionID(table_id, prj_id), part_id)); + + // set HDFS because the memory is not applied actually + // it will be set to MEMORY in function + EXEC_AND_LOG_RETURN(ret, + part_storage->AddChunkWithMemoryToNum(chunk_id, HDFS), + "added chunk to " << chunk_id, "failed to add chunk"); + + uint64_t pos_in_chunk = packet.pos_ % CHUNK_SIZE; + // TODO(YUKAI): copy the value to chunk + + return ret; +} + +RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, + bool is_commited) { + try { + caf::scoped_actor self; + self->sync_send(master_actor_, LoadAckAtom::value, txn_id, is_commited); + } catch (exception& e) { + LOG(ERROR) << e.what(); + return rFailure; + } + return rSuccess; +} + } /* namespace loader */ } /* namespace claims */ diff --git a/loader/slave_loader.h b/loader/slave_loader.h index 22ff201a3..535c8639c 100644 --- a/loader/slave_loader.h +++ b/loader/slave_loader.h @@ -32,23 +32,29 @@ #include #include #include - #include "../catalog/catalog.h" #include "../storage/BlockManager.h" +#include "caf/all.hpp" namespace claims { namespace loader { + using std::string; using claims::catalog::Catalog; +class LoadPacket; + class SlaveLoader { public: SlaveLoader(); virtual ~SlaveLoader(); + public: + static void* StartSlaveLoader(void* arg); + public: RetCode ConnectWithMaster(); - void ReceiveAndWorkLoop(); + RetCode ReceiveAndWorkLoop(); void Clean() { if (-1 != listening_fd_) FileClose(listening_fd_); listening_fd_ = -1; @@ -64,15 +70,15 @@ class SlaveLoader { void OutputFdIpPort(int fd); + RetCode StoreDataInMemory(const LoadPacket& packet); + RetCode SendAckToMasterLoader(const uint64_t& txn_id, bool is_commited); + private: int master_socket_fd_; string self_ip; int self_port; + caf::actor master_actor_; - public: - static void* StartSlaveLoader(void* arg); - - private: int listening_fd_ = -1; int master_fd_ = -1; }; diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 2ffcab697..5db033c43 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -41,6 +41,33 @@ ChunkStorage::~ChunkStorage() { // TODO Auto-generated destructor stub } +// apply memory for chunk size for writing later by slave loader +RetCode ChunkStorage::ApplyMemory() { + RetCode ret = claims::common::rSuccess; + HdfsInMemoryChunk chunk_info; + chunk_info.length = CHUNK_SIZE; + if (BlockManager::getInstance()->getMemoryChunkStore()->applyChunk( + chunk_id_, chunk_info.hook)) { + /* there is enough memory storage space, so the storage level can be + * shifted.*/ + current_storage_level_ = MEMORY; + + /* update the chunk info in the Chunk store in case that the + * chunk_info is updated.*/ + BlockManager::getInstance()->getMemoryChunkStore()->updateChunkInfo( + chunk_id_, chunk_info); + } else { + /* + * The storage memory is full, some swap algorithm is needed here. + * TODO: swap algorithm. + */ + printf("Failed to get memory chunk budget!\n"); + ret = claims::common::rNoMemory; + assert(false); + } + return ret; +} + ChunkReaderIterator* ChunkStorage::createChunkReaderIterator() { // printf("level value:%d\n",current_storage_level_); ChunkReaderIterator* ret; @@ -98,8 +125,7 @@ ChunkReaderIterator* ChunkStorage::createChunkReaderIterator() { * chunk_info is updated.*/ BlockManager::getInstance()->getMemoryChunkStore()->updateChunkInfo( chunk_id_, chunk_info); - // printf("%lx current is set to - // memory!\n"); + // printf("%lx current is set to memory!\n"); ret = new InMemoryChunkReaderItetaor( chunk_info.hook, chunk_info.length, chunk_info.length / block_size_, block_size_, chunk_id_); diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index 7549256c3..48174f341 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -9,6 +9,8 @@ #define CHUNKSTORAGE_H_ #include #include + +#include "../common/error_define.h" #ifdef DMALLOC #include "dmalloc.h" #endif @@ -193,6 +195,8 @@ class ChunkStorage { current_storage_level_ = current_level; } + RetCode ApplyMemory(); + private: unsigned block_size_; unsigned chunk_size_; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 6a57178b8..0028b3a5d 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -6,19 +6,23 @@ */ #include "PartitionStorage.h" + +#include "../common/error_define.h" #include "../Debug.h" #include "MemoryStore.h" #include "../Config.h" -PartitionStorage::PartitionStorage(const PartitionID &partition_id, - const unsigned &number_of_chunks, + +using claims::common::rSuccess; + +PartitionStorage::PartitionStorage(const PartitionID& partition_id, + const unsigned& number_of_chunks, const StorageLevel& storage_level) : partition_id_(partition_id), number_of_chunks_(number_of_chunks), desirable_storage_level_(storage_level) { for (unsigned i = 0; i < number_of_chunks_; i++) { - chunk_list_.push_back( - new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, - desirable_storage_level_)); + chunk_list_.push_back(new ChunkStorage( + ChunkID(partition_id_, i), BLOCK_SIZE, desirable_storage_level_)); } } @@ -29,12 +33,10 @@ PartitionStorage::~PartitionStorage() { chunk_list_.clear(); } -void PartitionStorage::addNewChunk() { - number_of_chunks_++; -} +void PartitionStorage::addNewChunk() { number_of_chunks_++; } void PartitionStorage::updateChunksWithInsertOrAppend( - const PartitionID &partition_id, const unsigned &number_of_chunks, + const PartitionID& partition_id, const unsigned& number_of_chunks, const StorageLevel& storage_level) { if (!chunk_list_.empty()) { /* @@ -44,11 +46,11 @@ void PartitionStorage::updateChunksWithInsertOrAppend( */ MemoryChunkStore::getInstance()->returnChunk( chunk_list_.back()->getChunkID()); -// if (Config::local_disk_mode == 0) + // if (Config::local_disk_mode == 0) // actually, DISK is not used, only HDFS and MEMORY is used chunk_list_.back()->setCurrentStorageLevel(HDFS); -// else -// chunk_list_.back()->setCurrentStorageLevel(DISK); + // else + // chunk_list_.back()->setCurrentStorageLevel(DISK); } for (unsigned i = number_of_chunks_; i < number_of_chunks; i++) chunk_list_.push_back( @@ -56,7 +58,26 @@ void PartitionStorage::updateChunksWithInsertOrAppend( number_of_chunks_ = number_of_chunks; } -void PartitionStorage::removeAllChunks(const PartitionID &partition_id) { +RetCode PartitionStorage::AddChunkWithMemoryToNum( + const unsigned& number_of_chunks, const StorageLevel& storage_level) { + RetCode ret = rSuccess; + if (number_of_chunks_ >= number_of_chunks - 1) return ret; + for (unsigned i = number_of_chunks_; i < number_of_chunks; i++) { + ChunkStorage* chunk = + new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); + EXEC_AND_LOG( + ret, chunk->ApplyMemory(), + "applied memory for chunk(" << partition_id_ << "," << i << ")", + "failed to apply memory for chunk(" << partition_id_ << "," << i + << ")"); + chunk_list_.push_back(chunk); + } + number_of_chunks_ = number_of_chunks; + + return ret; +} + +void PartitionStorage::removeAllChunks(const PartitionID& partition_id) { if (!chunk_list_.empty()) { vector::iterator iter = chunk_list_.begin(); MemoryChunkStore* mcs = MemoryChunkStore::getInstance(); @@ -68,47 +89,43 @@ void PartitionStorage::removeAllChunks(const PartitionID &partition_id) { } } -PartitionStorage::PartitionReaderItetaor* PartitionStorage::createReaderIterator() { +PartitionStorage::PartitionReaderItetaor* +PartitionStorage::createReaderIterator() { return new PartitionReaderItetaor(this); } -PartitionStorage::PartitionReaderItetaor* PartitionStorage::createAtomicReaderIterator() { +PartitionStorage::PartitionReaderItetaor* +PartitionStorage::createAtomicReaderIterator() { return new AtomicPartitionReaderIterator(this); } PartitionStorage::PartitionReaderItetaor::PartitionReaderItetaor( PartitionStorage* partition_storage) - : ps(partition_storage), - chunk_cur_(0), - chunk_it_(0) { + : ps(partition_storage), chunk_cur_(0), chunk_it_(0) {} -} - -//PartitionStorage::PartitionReaderItetaor::PartitionReaderItetaor():chunk_cur_(0){ +// PartitionStorage::PartitionReaderItetaor::PartitionReaderItetaor():chunk_cur_(0){ // //} -PartitionStorage::PartitionReaderItetaor::~PartitionReaderItetaor() { - -} +PartitionStorage::PartitionReaderItetaor::~PartitionReaderItetaor() {} ChunkReaderIterator* PartitionStorage::PartitionReaderItetaor::nextChunk() { if (chunk_cur_ < ps->number_of_chunks_) return ps->chunk_list_[chunk_cur_++]->createChunkReaderIterator(); else return 0; } -//PartitionStorage::AtomicPartitionReaderIterator::AtomicPartitionReaderIterator():PartitionReaderItetaor(){ +// PartitionStorage::AtomicPartitionReaderIterator::AtomicPartitionReaderIterator():PartitionReaderItetaor(){ // //} -PartitionStorage::AtomicPartitionReaderIterator::~AtomicPartitionReaderIterator() { - -} -ChunkReaderIterator* PartitionStorage::AtomicPartitionReaderIterator::nextChunk() { -// lock_.acquire(); +PartitionStorage::AtomicPartitionReaderIterator:: + ~AtomicPartitionReaderIterator() {} +ChunkReaderIterator* +PartitionStorage::AtomicPartitionReaderIterator::nextChunk() { + // lock_.acquire(); ChunkReaderIterator* ret; if (chunk_cur_ < ps->number_of_chunks_) ret = ps->chunk_list_[chunk_cur_++]->createChunkReaderIterator(); else ret = 0; -// lock_.release(); + // lock_.release(); return ret; } @@ -117,12 +134,10 @@ bool PartitionStorage::PartitionReaderItetaor::nextBlock( assert(false); if (chunk_it_ > 0 && chunk_it_->nextBlock(block)) { return true; - } - else { + } else { if ((chunk_it_ = nextChunk()) > 0) { return nextBlock(block); - } - else { + } else { return false; } } @@ -130,22 +145,22 @@ bool PartitionStorage::PartitionReaderItetaor::nextBlock( bool PartitionStorage::AtomicPartitionReaderIterator::nextBlock( BlockStreamBase*& block) { -//// lock_.acquire(); -// if(chunk_it_>0&&chunk_it_->nextBlock(block)){ -//// lock_.release(); -// return true; -// } -// else{ -// lock_.acquire(); -// if((chunk_it_=nextChunk())>0){ -// lock_.release(); -// return nextBlock(block); -// } -// else{ -// lock_.release(); -// return false; -// } -// } + //// lock_.acquire(); + // if(chunk_it_>0&&chunk_it_->nextBlock(block)){ + //// lock_.release(); + // return true; + // } + // else{ + // lock_.acquire(); + // if((chunk_it_=nextChunk())>0){ + // lock_.release(); + // return nextBlock(block); + // } + // else{ + // lock_.release(); + // return false; + // } + // } // lock_.acquire(); lock_.acquire(); @@ -154,13 +169,11 @@ bool PartitionStorage::AtomicPartitionReaderIterator::nextBlock( lock_.release(); ba->getBlock(block); return true; - } - else { + } else { if ((chunk_it_ = PartitionReaderItetaor::nextChunk()) > 0) { lock_.release(); return nextBlock(block); - } - else { + } else { lock_.release(); return false; } diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index c228a7bf9..0cd5ae0f4 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -7,6 +7,8 @@ #ifndef PARTITIONSTORAGE_H_ #define PARTITIONSTORAGE_H_ +#include "../common/error_define.h" + #ifdef DMALLOC #include "dmalloc.h" #endif @@ -15,45 +17,55 @@ #include "PartitionReaderIterator.h" #include "../utility/lock.h" - class PartitionStorage { -public: - class PartitionReaderItetaor{ - public: -// PartitionReaderItetaor(); - PartitionReaderItetaor(PartitionStorage* partition_storage); - virtual ~PartitionReaderItetaor(); - virtual ChunkReaderIterator* nextChunk(); - virtual bool nextBlock(BlockStreamBase* &block); - protected: - PartitionStorage* ps; - unsigned chunk_cur_; - ChunkReaderIterator* chunk_it_; - }; - class AtomicPartitionReaderIterator:public PartitionReaderItetaor{ - public: -// AtomicPartitionReaderIterator(); - AtomicPartitionReaderIterator(PartitionStorage* partition_storage):PartitionReaderItetaor(partition_storage){}; - virtual ~AtomicPartitionReaderIterator(); - ChunkReaderIterator* nextChunk(); - virtual bool nextBlock(BlockStreamBase* &block); - private: - Lock lock_; - }; - - friend class PartitionReaderItetaor; - PartitionStorage(const PartitionID &partition_id,const unsigned &number_of_chunks,const StorageLevel&); - virtual ~PartitionStorage(); - void addNewChunk(); - void updateChunksWithInsertOrAppend(const PartitionID &partition_id, const unsigned &number_of_chunks, const StorageLevel& storage_level); - void removeAllChunks(const PartitionID &partition_id); - PartitionReaderItetaor* createReaderIterator(); - PartitionReaderItetaor* createAtomicReaderIterator(); -protected: - PartitionID partition_id_; - unsigned number_of_chunks_; - std::vector chunk_list_; - StorageLevel desirable_storage_level_; + public: + class PartitionReaderItetaor { + public: + // PartitionReaderItetaor(); + PartitionReaderItetaor(PartitionStorage* partition_storage); + virtual ~PartitionReaderItetaor(); + virtual ChunkReaderIterator* nextChunk(); + virtual bool nextBlock(BlockStreamBase*& block); + + protected: + PartitionStorage* ps; + unsigned chunk_cur_; + ChunkReaderIterator* chunk_it_; + }; + class AtomicPartitionReaderIterator : public PartitionReaderItetaor { + public: + // AtomicPartitionReaderIterator(); + AtomicPartitionReaderIterator(PartitionStorage* partition_storage) + : PartitionReaderItetaor(partition_storage){}; + virtual ~AtomicPartitionReaderIterator(); + ChunkReaderIterator* nextChunk(); + virtual bool nextBlock(BlockStreamBase*& block); + + private: + Lock lock_; + }; + + public: + friend class PartitionReaderItetaor; + PartitionStorage(const PartitionID& partition_id, + const unsigned& number_of_chunks, const StorageLevel&); + virtual ~PartitionStorage(); + void addNewChunk(); + void updateChunksWithInsertOrAppend(const PartitionID& partition_id, + const unsigned& number_of_chunks, + const StorageLevel& storage_level); + + RetCode AddChunkWithMemoryToNum(const unsigned& number_of_chunks, + const StorageLevel& storage_level); + void removeAllChunks(const PartitionID& partition_id); + PartitionReaderItetaor* createReaderIterator(); + PartitionReaderItetaor* createAtomicReaderIterator(); + + protected: + PartitionID partition_id_; + unsigned number_of_chunks_; + std::vector chunk_list_; + StorageLevel desirable_storage_level_; }; #endif /* PARTITIONSTORAGE_H_ */ From 78fec975d98e08151bfa20dd626b0b2d710db995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Sun, 24 Apr 2016 16:22:13 +0800 Subject: [PATCH 15/58] basiclly implement pipeline of txn manager --- Environment.cpp | 2 +- common/error_define.h | 11 + common/error_no.cpp | 10 + loader/master_loader.cpp | 2 +- txn_manager/txn.cpp | 50 ++-- txn_manager/txn.hpp | 172 +++++++------- txn_manager/txn_client.cpp | 203 +++++++++------- txn_manager/txn_client.hpp | 13 +- txn_manager/txn_client_test.cpp | 94 ++++++-- txn_manager/txn_log.cpp | 135 ++++++----- txn_manager/txn_log.hpp | 23 +- txn_manager/txn_server.cpp | 410 +++++++++++++++++--------------- txn_manager/txn_server.hpp | 68 +++--- txn_manager/txn_server_test.cpp | 241 +++++++++---------- 14 files changed, 766 insertions(+), 668 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index 4006c0174..172bcb98a 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -237,7 +237,7 @@ bool Environment::InitTxnManager() { bool Environment::InitTxnLog() { if (Config::enable_txn_log) { LOG(INFO) << "I'm txn log server"; - LogServer::init(Config::txn_log_path); + LogServer::Init(Config::txn_log_path); } return true; } diff --git a/common/error_define.h b/common/error_define.h index 77139cdc3..1313ccac0 100644 --- a/common/error_define.h +++ b/common/error_define.h @@ -252,6 +252,17 @@ const int rInvalidNullData = -2008; const int rTooFewColumn = -2009; const int rTooManyColumn = -2010; +const int rLinkTmTimeout = -2499; +const int rLinkTmFail = -2500; +const int rBeginIngestTxnFail = -2501; +const int rCommitIngestTxnFail = -2502; +const int rAbortIngestTxnFail = -2503; +const int rBeginQueryFail = -2504; +const int rBeginCheckpointFail = -2505; +const int rCommitCheckpointFail = -2506; + + + /* errorno for codegen -3001 ~ -4000 */ const int rTestError = -3001; diff --git a/common/error_no.cpp b/common/error_no.cpp index 7042d3ac5..ec7f04a9f 100644 --- a/common/error_no.cpp +++ b/common/error_no.cpp @@ -212,9 +212,19 @@ ErrorInit::ErrorInit() { DefineErrorAndMessage(rTooFewColumn, "too few column data"); DefineErrorAndMessage(rTooManyColumn, "too many column data"); +// DefineErrorAndMessage(rLinkTmTimeout, "link to txn manager time out"); +// DefineErrorAndMessage(rLinkTmFail,"link to txn manager failed"); +// DefineErrorAndMessage(rBeginIngestTxnFail, "Begin ingest txn failed "); +// DefineErrorAndMessage(rCommitIngestTxnFail, "Commit ingest txn failed"); +// DefineErrorAndMessage(rAbortIngestTxnFail, "Abort ingest txn failed"); +// DefineErrorAndMessage(rBeginQueryFail, "Begin query failed"); +// DefineErrorAndMessage(rBeginCheckpointFail, "Begin checkpoint failed"); +// DefineErrorAndMessage(rCommitCheckpointFail, "Commit checkpoint failed"); + /* errorno for codegen -3001 ~ -4000 */ DefineErrorAndMessage(rTestError, "test it is error "); + /* errorno for logical_operator -4001~ -5000 */ DefineErrorAndMessage(rUninitializedJoinPolicy, "the dedicated join police is not initialized, e.g., " diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index c1445f48d..e18e4e203 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -34,7 +34,7 @@ #include "caf/io/all.hpp" #include #include - +#include "../common/error_define.h" #include "../catalog/catalog.h" #include "../Config.h" #include "../Environment.h" diff --git a/txn_manager/txn.cpp b/txn_manager/txn.cpp index 24001b3b1..cce2e3f79 100644 --- a/txn_manager/txn.cpp +++ b/txn_manager/txn.cpp @@ -33,16 +33,16 @@ using claims::txn::Strip; void Strip::Map(vector & input, map> & output) { output.clear(); for (auto & strip:input) { - if (output.find(strip.Part) != output.end()) - output[strip.Part].push_back(strip); + if (output.find(strip.part_) != output.end()) + output[strip.part_].push_back(strip); else - output[strip.Part] = vector(); + output[strip.part_] = vector(); } } void Strip::Sort(vector & input) { sort(input.begin(), input.end(), - [](const Strip & a, const Strip &b){ return a.Pos < b.Pos;}); + [](const Strip & a, const Strip &b){ return a.pos_ < b.pos_;}); } void Strip::Sort(vector & input) { @@ -56,16 +56,16 @@ void Strip::Merge(vector & input){ vector buffer(input); input.clear(); if (buffer.size() == 0) return; - auto pid = buffer[0].Part; - auto begin = buffer[0].Pos; - auto end = buffer[0].Pos + buffer[0].Offset; + auto pid = buffer[0].part_; + auto begin = buffer[0].pos_; + auto end = buffer[0].pos_ + buffer[0].offset_; for (auto i = 1; i < buffer.size(); i ++) { - if (end == buffer[i].Pos) - end = buffer[i].Pos + buffer[i].Offset; + if (end == buffer[i].pos_) + end = buffer[i].pos_ + buffer[i].offset_; else { input.emplace_back(pid, begin, end - begin); - begin = buffer[i].Pos; - end = begin + buffer[i].Offset; + begin = buffer[i].pos_; + end = begin + buffer[i].offset_; } } input.emplace_back(pid, begin, end - begin); @@ -100,27 +100,27 @@ void Strip::Filter(vector & input, function predicat string Strip::ToString() { string str = "*******Strip******\n"; - str += "part:" + to_string(Part) + - ",pos:" + to_string(Pos) + - ",Offset:" + to_string(Offset) + "\n"; + str += "part:" + to_string(part_) + + ",pos:" + to_string(pos_) + + ",Offset:" + to_string(offset_) + "\n"; return str; } string FixTupleIngestReq::ToString() { string str = "*******FixTupleIngestReq********\n"; - for (auto & item : Content) + for (auto & item : content_) str += "part:" + to_string(item.first) + ",tuple_size:" + to_string(item.second.first) + ",tuple_count:"+ to_string(item.second.second)+"\n"; return str; } string Ingest::ToString() { - UInt64 core_id = Id % 1000; + UInt64 core_id = id_ % 1000; core_id << 54; core_id >> 54; string str = "*******Ingest*********\n"; - str += "id:" + to_string(Id) + ",core:" + to_string(core_id)+ "\n"; - for (auto & item : StripList) + str += "id:" + to_string(id_) + ",core:" + to_string(core_id)+ "\n"; + for (auto & item : strip_list_) str += "part:" + to_string(item.first) + ",pos:" + to_string(item.second.first) + ",offset:"+ to_string(item.second.second)+"\n"; @@ -128,14 +128,14 @@ string Ingest::ToString() { } string QueryReq::ToString() { string str = "*******QueryReq********\n"; - for (auto & part : PartList) + for (auto & part : part_list_) str += "part:" + to_string(part) +"\n"; return str; } string Query::ToString() { string str = "******Query*******\n"; - for (auto & part : Snapshot){ + for (auto & part : snapshot_){ str += "part:" + to_string(part.first)+"\n"; for (auto & strip : part.second) str += "Pos:" + to_string(strip.first) + @@ -147,18 +147,18 @@ string Query::ToString() { string Checkpoint::ToString() { string str = "******checkpoint******\n"; - str += "part:" + to_string(Part) +"\n"; + str += "part:" + to_string(part_) +"\n"; str += "commit strip\n"; - for (auto & strip : CommitStripList) + for (auto & strip : commit_strip_list_) str += "Pos:" + to_string(strip.first) + ",Offset:" + to_string(strip.second) + "\n"; str += "abort strip\n"; - for (auto & strip : AbortStripList) + for (auto & strip : abort_strip_list_) str += "Pos:" + to_string(strip.first) + ",Offset:" + to_string(strip.second) + "\n"; - str += "logic cp:" + to_string(LogicCP) + "\n"; - str += "phy cp:" + to_string(PhyCP) + "\n"; + str += "logic cp:" + to_string(logic_cp_) + "\n"; + str += "phy cp:" + to_string(phy_cp_) + "\n"; return str; } diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index 463d82787..3c6bbac6b 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -82,6 +82,8 @@ using AbortCPAtom = caf::atom_constant; using QuitAtom = caf::atom_constant; using LinkAtom = caf::atom_constant; using RefreshAtom = caf::atom_constant; +using MergeAtom = caf::atom_constant; + static const int kTxnPort = 8089; static const string kTxnIp = "127.0.0.1"; @@ -90,7 +92,7 @@ static const int kTxnBufferSize = 1024 * 10000; static const int kTxnLowLen = 10; static const int kTxnHighLen = 54; static const int kGCTime = 5; -static const int kTimeout = 1; +static const int kTimeout = 3; static const int kBlockSize = 64 * 1024; static const int kTailSize = sizeof(unsigned); @@ -98,18 +100,18 @@ static const int kTailSize = sizeof(unsigned); using PStrip = pair; class Strip{ public: - UInt64 Part; - UInt64 Pos; - UInt64 Offset; + UInt64 part_; + UInt64 pos_; + UInt64 offset_; Strip() {} Strip(UInt64 pId, UInt64 pos, UInt32 offset): - Part(pId), Pos(pos), Offset(offset) {} - UInt64 get_Part() const { return Part;} - UInt64 get_Pos() const { return Pos;} - UInt64 get_Offset() const { return Offset;} - void set_Part(UInt64 part) { Part = part;} - void set_Pos(UInt64 pos) { Pos = pos;} - void set_Offset(UInt64 offset) { Offset = offset;} + part_(pId), pos_(pos), offset_(offset) {} + UInt64 get_part() const { return part_;} + UInt64 get_pos() const { return pos_;} + UInt64 get_offset() const { return offset_;} + void set_part(UInt64 part) { part_ = part;} + void set_pos(UInt64 pos) { pos_ = pos;} + void set_offset(UInt64 offset) { offset_ = offset;} string ToString(); static void Map(vector & input, map> & output); static void Sort(vector & input); @@ -119,7 +121,7 @@ class Strip{ static void Filter(vector & input, function predicate); }; inline bool operator == (const Strip & a, const Strip & b) { - return a.Part == b.Part && a.Pos == b.Pos && a.Offset == b.Offset; + return a.part_ == b.part_ && a.pos_ == b.pos_ && a.offset_ == b.offset_; } @@ -128,151 +130,145 @@ inline bool operator == (const Strip & a, const Strip & b) { class FixTupleIngestReq{ public: /*fix tuple part -> */ - map Content; - void Insert(UInt64 part, UInt64 tuple_size, UInt64 tuple_count) { - Content[part] = make_pair(tuple_size, tuple_count); + map content_; + void InsertStrip(UInt64 part, UInt64 tuple_size, UInt64 tuple_count) { + content_[part] = make_pair(tuple_size, tuple_count); } - map get_Content() const{ - return Content; + map get_content() const{ + return content_; } - void set_Content(const map & content) { - Content = content; + void set_content(const map & content) { + content_ = content; } string ToString (); }; inline bool operator == (const FixTupleIngestReq & a, const FixTupleIngestReq & b) { - return a.Content == b.Content; + return a.content_ == b.content_; } /****************Ingest***************/ class Ingest { public: - UInt64 Id; - map StripList; + UInt64 id_; + map strip_list_; void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset) { - StripList[part] = make_pair(pos, offset); + strip_list_[part] = make_pair(pos, offset); } void InsertStrip (const Strip & strip) { - StripList[strip.Part] = make_pair(strip.Pos, strip.Offset); + strip_list_[strip.part_] = make_pair(strip.pos_, strip.offset_); } - UInt64 get_Id() const { return Id;} - map get_StripList() const { return StripList;} - void set_Id(const UInt64 & id){ Id = id;} - void set_StripList(const map & stripList) { - StripList = stripList; + UInt64 get_id() const { return id_;} + map get_strip_list() const { return strip_list_;} + void set_id(const UInt64 & id){ id_ = id;} + void set_strip_list(const map & stripList) { + strip_list_ = stripList; } string ToString(); }; inline bool operator == (const Ingest & a, const Ingest & b) { - return a.Id == b.Id; + return a.id_ == b.id_; } /************QueryReq************/ class QueryReq{ public: - vector PartList; - void InsertPart(UInt64 part) { PartList.push_back(part);} - vector get_PartList() const { return PartList;} - void set_PartList(const vector & partList) { PartList = partList;} + vector part_list_; + void InsertPart(UInt64 part) { part_list_.push_back(part);} + vector get_part_list() const { return part_list_;} + void set_part_list(const vector & partList) { part_list_ = partList;} string ToString(); }; inline bool operator == (const QueryReq & a, const QueryReq & b) { - return a.PartList == b.PartList; + return a.part_list_ == b.part_list_; } /***********Snapshot***********/ class Query{ public: - map> Snapshot; - map CPList; + map> snapshot_; + map cp_list_; void InsertStrip (UInt64 part, UInt64 pos, UInt64 offset){ // if (Snapshot.find(part) == Snapshot.end()) // Snapshot[part] = vector>(); // else - Snapshot[part].push_back(make_pair(pos, offset)); + snapshot_[part].push_back(make_pair(pos, offset)); } void InsertCP(UInt64 part, UInt64 cp) { - CPList[part] = cp; + cp_list_[part] = cp; } - map> get_Snapshot() const { - return Snapshot; + map> get_snapshot() const { + return snapshot_; } - map get_CPList() const { return CPList;} - void set_Snapshot(const map> & sp){ - Snapshot = sp; + map get_cp_list() const { return cp_list_;} + void set_snapshot(const map> & sp){ + snapshot_ = sp; } - void set_CPList(const map & cplist) { - CPList = cplist; + void set_cp_list(const map & cplist) { + cp_list_ = cplist; } string ToString(); }; inline bool operator == (const Query & a, const Query & b) { - return a.Snapshot == b.Snapshot; + return a.snapshot_ == b.snapshot_; } /*********Checkpoint***********/ class Checkpoint{ public: - UInt64 Id; - UInt64 Part; - UInt64 LogicCP; - UInt64 PhyCP; - vector CommitStripList; - vector AbortStripList; + UInt64 id_; + UInt64 part_; + UInt64 logic_cp_; + UInt64 phy_cp_; + vector commit_strip_list_; + vector abort_strip_list_; Checkpoint() {} Checkpoint(UInt64 part, UInt64 newLogicCP, UInt64 oldPhyCP): - Part(part), LogicCP(newLogicCP),PhyCP(oldPhyCP) {} - UInt64 get_Id() const { return Id;} - UInt64 get_Part() const { return Part;} - UInt64 get_LogicCP() const { return LogicCP;} - UInt64 get_PhyCP() const { return PhyCP;} - vector get_CommitStripList() const { return CommitStripList;}; - vector get_AbortStripList() const { return AbortStripList;}; - void set_Part(UInt64 part) { Part = part;} - void set_LogicCP(UInt64 logicCP) { LogicCP = logicCP;} - void set_PhyCP(UInt64 phyCP) { PhyCP = phyCP;} - void set_CommitStripList(const vector & commitstripList) { - CommitStripList = commitstripList; + part_(part), logic_cp_(newLogicCP),phy_cp_(oldPhyCP) {} + UInt64 get_id() const { return id_;} + UInt64 get_part() const { return part_;} + UInt64 get_logic_cp() const { return logic_cp_;} + UInt64 get_phy_cp() const { return phy_cp_;} + vector get_commit_strip_list() const { return commit_strip_list_;}; + vector get_abort_strip_list() const { return abort_strip_list_;}; + void set_part(UInt64 part) { part_ = part;} + void set_Logic_cp(UInt64 logicCP) { logic_cp_ = logicCP;} + void set_Phy_cp(UInt64 phyCP) { phy_cp_ = phyCP;} + void set_commit_strip_list(const vector & commitstripList) { + commit_strip_list_ = commitstripList; } - void set_AbortStripList(const vector & abortstripList) { - AbortStripList = abortstripList; + void set_abort_strip_list(const vector & abortstripList) { + abort_strip_list_ = abortstripList; } string ToString(); }; inline bool operator == (const Checkpoint & a, const Checkpoint & b) { - return a.Id == b.Id; + return a.id_ == b.id_; } -inline void SerializeConfig() { +inline void SerConfig() { caf::announce("FixTupleIngestReq", - make_pair(&FixTupleIngestReq::get_Content, &FixTupleIngestReq::set_Content)); + make_pair(&FixTupleIngestReq::get_content, &FixTupleIngestReq::set_content)); caf::announce("Ingest", - make_pair(&Ingest::get_Id,&Ingest::set_Id), - make_pair(&Ingest::get_StripList,&Ingest::set_StripList)); + make_pair(&Ingest::get_id,&Ingest::set_id), + make_pair(&Ingest::get_strip_list,&Ingest::set_strip_list)); caf::announce("QueryReq", - make_pair(&QueryReq::get_PartList, &QueryReq::set_PartList)); + make_pair(&QueryReq::get_part_list, &QueryReq::set_part_list)); caf::announce("Query", - make_pair(&Query::get_Snapshot,&Query::set_Snapshot), - make_pair(&Query::get_CPList, &Query::set_CPList)); + make_pair(&Query::get_snapshot,&Query::set_snapshot), + make_pair(&Query::get_cp_list, &Query::set_cp_list)); caf::announce("Checkpoint", - make_pair(&Checkpoint::get_Part, &Checkpoint::set_Part), - make_pair(&Checkpoint::get_LogicCP, &Checkpoint::set_LogicCP), - make_pair(&Checkpoint::get_PhyCP, &Checkpoint::set_PhyCP), - make_pair(&Checkpoint::get_CommitStripList, - &Checkpoint::set_CommitStripList), - make_pair(&Checkpoint::get_AbortStripList, - &Checkpoint::set_AbortStripList)); + make_pair(&Checkpoint::get_part, &Checkpoint::set_part), + make_pair(&Checkpoint::get_logic_cp, &Checkpoint::set_Logic_cp), + make_pair(&Checkpoint::get_phy_cp, &Checkpoint::set_Phy_cp), + make_pair(&Checkpoint::get_commit_strip_list, + &Checkpoint::set_commit_strip_list), + make_pair(&Checkpoint::get_abort_strip_list, + &Checkpoint::set_abort_strip_list)); } - - - - - - } } #endif // TXN_HPP_ diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index 30538230d..bf52269d5 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -27,6 +27,7 @@ */ #include "txn_client.hpp" +//#include "../common/error_define.h" namespace claims{ namespace txn{ @@ -34,126 +35,152 @@ namespace txn{ //using claims::txn::RetCode; //using claims::txn::FixTupleIngestReq; //using claims::txn::Ingest; +//using claims::common::rSuccess; +//using claims::common::rLinkTmTimeout; +//using claims::common::rLinkTmFail; +//using claims::common::rBeginIngestTxnFail; +//using claims::common::rBeginQueryFail; +//using claims::common::rBeginCheckpointFail; +//using claims::common::rCommitIngestTxnFail; +//using claims::common::rCommitCheckpointFail; -string TxnClient::Ip = kTxnIp; -int TxnClient::Port = kTxnPort; -caf::actor TxnClient::Proxy; + +string TxnClient::ip_ = kTxnIp; +int TxnClient::port_ = kTxnPort; +caf::actor TxnClient::proxy_; RetCode TxnClient::Init(string ip, int port){ - Ip = ip; - Port = port; - SerializeConfig(); + ip_ = ip; + port_ = port; + SerConfig(); try { - Proxy = caf::io::remote_actor(Ip, port); + proxy_ = caf::io::remote_actor(ip_, port); } catch (...) { +// return rLinkTmFail; return -1; } +// return rSuccess; return 0; } RetCode TxnClient::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest){ - RetCode ret = -1; - if (TxnServer::Active) - return TxnServer::BeginIngest(request, ingest); - else { - try{ - caf::scoped_actor self;; - self->sync_send(Proxy, IngestAtom::value, request). - await([&](Ingest & reply, RetCode r) { ingest = reply; ret = r;}, - caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); - } catch (...){ - cout << "link fail" << endl; - return -1; - } +// RetCode ret = rSuccess; + RetCode ret = 0; + try{ + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + IngestAtom::value, request).await( + [&](RetCode r, const Ingest & reply) {ret = r; ingest = reply;}, + [&](RetCode r) { ret = r;}, + caf::others >> [](){ cout << " unkown message" << endl;}, + caf::after(seconds(kTimeout)) >> [&]{ +// ret = rLinkTmTimeout; + ret = -1; + cout <<"time out" << endl; }); + } catch (...){ + cout << "link fail" << endl; +// return rLinkTmFail; + return -1; } return ret; } -RetCode TxnClient::CommitIngest(const Ingest & ingest) { - RetCode ret = -1; - if (TxnServer::Active) - return TxnServer::CommitIngest(ingest); - else { - try { - caf::scoped_actor self; - self->sync_send(Proxy, CommitIngestAtom::value, ingest). - await([&](RetCode r) { ret = r;}, - caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); - } catch (...) { - cout << "link fail" << endl; - return -1; - } +RetCode TxnClient::CommitIngest(const UInt64 id) { +// RetCode ret = rSuccess; + RetCode ret = 0; + try { + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + CommitIngestAtom::value, id).await( + [&](RetCode r) { ret = r;}, + caf::after(seconds(kTimeout)) >> [&]{ +// ret = rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; }); + } catch (...) { + cout << "link fail" << endl; +// return rLinkTmFail; + return -1; } return ret; } -RetCode TxnClient::AbortIngest(const Ingest & ingest) { - RetCode ret = -1; - if (TxnServer::Active) - return TxnServer::AbortIngest(ingest); - else { - try { - caf::scoped_actor self; - self->sync_send(Proxy, AbortIngestAtom::value, ingest). - await([&](RetCode r) { ret = r;}, - caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); - } catch (...) { - cout << "link fail" << endl; - return -1; - } +RetCode TxnClient::AbortIngest(const UInt64 id) { +// RetCode ret = rSuccess; + RetCode ret = 0; + try { + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + AbortIngestAtom::value, id).await( + [&](RetCode r) { ret = r;}, + caf::after(seconds(kTimeout)) >> [&]{ +// ret = rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; }); + } catch (...) { + cout << "link fail" << endl; +// return rLinkTmFail; + return -1; } return ret; } RetCode TxnClient::BeginQuery(const QueryReq & request, Query & query) { - RetCode ret = -1; - if (TxnServer::Active) - return TxnServer::BeginQuery(request, query); - else { - try { - caf::scoped_actor self; - self->sync_send(Proxy, QueryAtom::value, request). - await([&](const QueryReq & request, RetCode r) { ret = r;}, - caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); - } catch (...) { - cout << "link fail" << endl; - return -1; - } +// RetCode ret = rSuccess; + RetCode ret = 0; + try { + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + QueryAtom::value, request).await( + [&](const Query & q) { query = q;}, + caf::after(seconds(kTimeout)) >> [&]{ +// ret = rLinkTmTimeout; + ret = -1; + cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; +// return rLinkTmFail; + return -1; } return ret; } RetCode TxnClient::BeginCheckpoint(Checkpoint & cp) { - RetCode ret = -1; - if (TxnServer::Active) - return TxnServer::BeginCheckpoint(cp); - else { - try { - caf::scoped_actor self; - self->sync_send(Proxy, CheckpointAtom::value, cp.Part). - await([&](const Checkpoint & checkpoint, RetCode r) {cp = checkpoint; ret = r;}, - caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); - } catch (...) { - cout << "link fail" << endl; - return -1; - } +// RetCode ret = rSuccess; + RetCode ret = 0; + try { + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + CheckpointAtom::value, cp.part_).await( + [&](const Checkpoint & checkpoint, RetCode r) { + cp = checkpoint; ret = r;}, + caf::after(seconds(kTimeout)) >> [&]{ +// ret = rLinkTmTimeout; + ret = -1; + cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; +// return rLinkTmFail; + return -1; } return ret; } -RetCode TxnClient::CommitCheckpoint(const Checkpoint & cp) { - RetCode ret = -1; - if (TxnServer::Active) - return TxnServer::CommitCheckpoint(cp); - else { - try { - caf::scoped_actor self; - self->sync_send(Proxy, CommitCPAtom::value, cp). - await([&](RetCode r) { ret = r;}, - caf::after(seconds(kTimeout)) >> []{ cout << "time out" << endl;}); - } catch (...) { - cout << "link fail" << endl; - return -1; - } +RetCode TxnClient::CommitCheckpoint(const UInt64 logic_cp, const UInt64 phy_cp) { +// RetCode ret = rSuccess; + RetCode ret = 0; + try { + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + CommitCPAtom::value, logic_cp, phy_cp).await( + [&](RetCode r) { ret = r;}, + caf::after(seconds(kTimeout)) >> [&]{ +// ret = rLinkTmTimeout; + ret = -1; + cout << "time out" << endl;}); + } catch (...) { + cout << "link fail" << endl; +// return rLinkTmFail; + return -1; } return ret; } diff --git a/txn_manager/txn_client.hpp b/txn_manager/txn_client.hpp index bf8a0e6ea..1923e904d 100644 --- a/txn_manager/txn_client.hpp +++ b/txn_manager/txn_client.hpp @@ -45,6 +45,7 @@ #include "caf/io/all.hpp" #include "txn.hpp" #include "txn_server.hpp" + #include using std::cin; @@ -68,16 +69,16 @@ namespace txn{ class TxnClient{ public: - static string Ip; - static int Port; - static caf::actor Proxy; + static string ip_; + static int port_; + static caf::actor proxy_; static RetCode Init(string ip = kTxnIp, int port = kTxnPort); static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); - static RetCode CommitIngest(const Ingest & ingest); - static RetCode AbortIngest(const Ingest & ingest); + static RetCode CommitIngest(const UInt64 id); + static RetCode AbortIngest(const UInt64 id); static RetCode BeginQuery(const QueryReq & request, Query & query); static RetCode BeginCheckpoint(Checkpoint & cp); - static RetCode CommitCheckpoint(const Checkpoint & cp); + static RetCode CommitCheckpoint(const UInt64 logic_cp, const UInt64 phy_cp); }; } diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index adba0c43b..7842896ae 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -26,7 +26,6 @@ * */ - #include #include #include @@ -44,6 +43,7 @@ #include "txn.hpp" #include "unistd.h" #include "txn_client.hpp" +#include "txn_log.hpp" using std::cin; using std::cout; using std::endl; @@ -59,16 +59,17 @@ using std::tuple; using std::make_tuple; using std::make_pair; using std::get; +using std::string; using UInt64 = unsigned long long; using UInt32 = unsigned int; using UInt16 = unsigned short; using UInt8 = char; using RetCode = int; using OkAtom = caf::atom_constant; +using IngestAtom = caf::atom_constant; +using QueryAtom = caf::atom_constant; using FailAtom = caf::atom_constant; - - -using namespace claims::txn; +using QuitAtom = caf::atom_constant; class Foo { public: @@ -91,37 +92,82 @@ inline bool operator == (const Foo & a, const Foo & b) { } char v[1024+10]; +caf::actor proxy; +class A{ + public: + vector list_ ; + int c = 0; + void set_list_(const vector list) { list_ = list;} + vector get_list_() const { return list_;} +}; +inline bool operator == (const A & a1, const A & a2) { + return a1.list_ == a2.list_; +} -void task(int time){ - for (auto i = 0; i< time; i++) { - - FixTupleIngestReq request1; - Ingest ingest; - request1.Content = {{0, {45, 10}}, - {1, {35, 20}}, - {2,{15,100}}}; - TxnClient::BeginIngest(request1, ingest); -// LogClient::Data(1, 1, 1111,(void*)v, 1024); -// LogClient::Data(1, 1, 1111,(void*)v, 1024); -// LogClient::Data(1, 1, 1111,(void*)v, 1024); +void ConfigA(){ + caf::announce("A", make_pair(&A::get_list_, &A::set_list_)); +} +void task(int index){ +for (auto i=0;isync_send(proxy, IngestAtom::value, i).await( + [=](int ret) { /*cout <<"receive:" << ret << endl;*/}, + caf::after(std::chrono::seconds(2)) >> [] { + cout << "ingest time out" << endl; + } + ); +// self->sync_send(proxy, QueryAtom::value).await( +// [=](int t) { +// cout << t<< endl; +// }, +// [=](A a) { +// cout << "success" << endl; +// for (auto &it : a.list_){ +// cout << it << endl; +// } +// }, +// caf::after(std::chrono::seconds(2)) >> [] { +// cout << "query time out" << endl; +// } +// ); +} +} - TxnClient::CommitIngest(ingest); -// } +using claims::txn::FixTupleIngestReq; +using claims::txn::Ingest; +using claims::txn::QueryReq; +using claims::txn::Query; +using claims::txn::TxnServer; +using claims::txn::TxnClient; +using claims::txn::LogServer; +using claims::txn::LogClient; +void task2(int times){ +for (auto i=0; i threads; - int n,times; - cin >> n >> times; for (auto i=0;i(); - log_path = path; - buffer = (char*)malloc(max_buffer_size); - if (buffer == nullptr) return -1; - is_active = true; + proxy_ = caf::spawn(); + file_path_ = path; + buffer_ = (char*)malloc(buffer_capacity_); + if (buffer_ == nullptr) return -1; + active_ = true; return 0; } caf::behavior LogServer::make_behavior() { return { - [=](BeginAtom, UInt64 id)->RetCode { - // return Append(BeginLog(id)); - cout << "begin" << endl; - return 0; + [=](IngestAtom, shared_ptr ingest)->caf::message { + Append(BeginLog(ingest->id_)); + for (auto & strip : ingest->strip_list_) + Append(WriteLog(ingest->id_, strip.first, strip.second.first,strip.second.second)); + //cout << "begin" << endl; + return caf::make_message(0, *ingest); }, - [=](WriteAtom,UInt64 id, UInt64 part, UInt64 pos, - UInt64 offset)->RetCode { - // return Append(WriteLog(id, part, pos, offset)); - cout << "write" << endl; - return 0; - }, - [=](CommitAtom, UInt64 id)->RetCode { - //return Append(CommitLog(id)); - cout << "commit" << endl; - return 0; + [=](CommitIngestAtom, const UInt64 id)->caf::message { + Append(CommitLog(id)); + //cout << "commit" << endl; + Refresh(); + return caf::make_message(0); }, - [=](AbortAtom, UInt64 id)->RetCode { - return Append(AbortLog(id)); + [=](AbortIngestAtom, UInt64 id)->caf::message { + Append(AbortLog(id)); + //cout << "abort" << endl; + Refresh(); + return caf::make_message(0); }, [=](CheckpointAtom, UInt64 part, UInt64 logic_cp, UInt64 phy_cp) ->RetCode { @@ -84,8 +83,8 @@ caf::behavior LogServer::make_behavior() { return 0; }, [=](RefreshAtom)->RetCode { - //return Refresh(); - cout << "refresh" << endl; + return Refresh(); + // cout << "refresh" << endl; return 0; }, caf::others >> [=] () { cout << "unknown log message" << endl; } @@ -93,67 +92,67 @@ caf::behavior LogServer::make_behavior() { } RetCode LogServer::Append (const string & log) { - if (buffer_size + log.length() >= max_buffer_size) { + if (buffer_size_ + log.length() >= buffer_capacity_) { cout << "append fail" << endl; return -1; } - memcpy(buffer + buffer_size, log.c_str(), log.length()); - buffer_size += log.length(); - log_size += log.length(); + memcpy(buffer_ + buffer_size_, log.c_str(), log.length()); + buffer_size_ += log.length(); + file_size_ += log.length(); return 0; } RetCode LogServer::Append(void * data, UInt64 size){ - if (buffer_size + size >= max_buffer_size) + if (buffer_size_ + size >= buffer_capacity_) return -1; - memcpy(buffer + buffer_size, data, size); - buffer_size += size; - buffer[buffer_size++] = '\n'; - log_size += size + 1; + memcpy(buffer_ + buffer_size_, data, size); + buffer_size_ += size; + buffer_[buffer_size_++] = '\n'; + file_size_ += size + 1; return 0; } RetCode LogServer::Refresh() { - if (log_handler == nullptr) { + if (file_handler_ == nullptr) { struct timeval ts; gettimeofday (&ts, NULL); - string file = log_path + "/" + kTxnLogFileName + to_string(ts.tv_sec); - log_handler = fopen (file.c_str(),"a"); - if (log_handler == nullptr) return -1; + string file = file_path_ + "/" + kTxnLogFileName + to_string(ts.tv_sec); + file_handler_ = fopen (file.c_str(),"a"); + if (file_handler_ == nullptr) return -1; } - if (buffer_size == 0) + if (buffer_size_ == 0) return 0; //cout << buffer_size << endl; - fwrite(buffer, sizeof(char), buffer_size, log_handler); - fflush(log_handler); - buffer_size = 0; + fwrite(buffer_, sizeof(char), buffer_size_, file_handler_); + fflush(file_handler_); + buffer_size_ = 0; /* 日志文件已满 */ - if(log_size >= max_log_size) { - if (log_handler == nullptr) return -1; - fclose(log_handler); - log_handler = nullptr; - log_size = 0; + if(file_size_ >= file_capacity_) { + if (file_handler_ == nullptr) return -1; + fclose(file_handler_); + file_handler_ = nullptr; + file_size_ = 0; } return 0; } RetCode LogClient::Begin(UInt64 id) { - RetCode ret = 0; - caf::scoped_actor self; - cout<<"going to send begin atom to log server :"<sync_send( log_s,BeginAtom::value, id). - await( [&](RetCode ret_code) { cout<<"log:Begin, ret"<sync_send( log_s,BeginAtom::value, id). +// await( [&](RetCode ret_code) { cout<<"log:Begin, ret"<sync_send(LogServer::log_server, + self->sync_send(LogServer::proxy_, WriteAtom::value, id, part, pos, offset).await( [&](RetCode ret_code) { ret = ret_code;} ); @@ -162,7 +161,7 @@ RetCode LogClient::Write(UInt64 id, UInt64 part, UInt64 pos, UInt64 offset) { RetCode LogClient::Commit(UInt64 id) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send( LogServer::log_server, + self->sync_send( LogServer::proxy_, CommitAtom::value,id).await( [&](RetCode ret_code) { ret = ret_code;} ); @@ -171,7 +170,7 @@ RetCode LogClient::Commit(UInt64 id) { RetCode LogClient::Abort(UInt64 id) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send( LogServer::log_server, + self->sync_send( LogServer::proxy_, AbortAtom::value, id).await( [&](RetCode ret_code) { ret = ret_code;} ); @@ -180,7 +179,7 @@ RetCode LogClient::Abort(UInt64 id) { RetCode LogClient::Data(UInt64 part, UInt64 pos, UInt64 offset, void * buffer, UInt64 size) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send( LogServer::log_server, + self->sync_send( LogServer::proxy_, DataAtom::value, part, pos, offset, buffer, size).await( [&](RetCode ret_code) { ret = ret_code;} ); @@ -189,7 +188,7 @@ RetCode LogClient::Data(UInt64 part, UInt64 pos, UInt64 offset, void * buffer, U RetCode LogClient::Checkpoint(UInt64 part, UInt64 logic_cp, UInt64 phy_cp) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send(LogServer::log_server, + self->sync_send(LogServer::proxy_, CheckpointAtom::value, part, logic_cp, phy_cp).await( [&](RetCode ret_code) { ret = ret_code;} ); @@ -199,7 +198,7 @@ RetCode LogClient::Checkpoint(UInt64 part, UInt64 logic_cp, UInt64 phy_cp) { RetCode LogClient::Refresh() { RetCode ret = 0; caf::scoped_actor self; - self->sync_send(LogServer::log_server, RefreshAtom::value). + self->sync_send(LogServer::proxy_, RefreshAtom::value). await( [&](RetCode ret_code) { ret = ret_code;}); return ret; } diff --git a/txn_manager/txn_log.hpp b/txn_manager/txn_log.hpp index 30814083e..adfd4c188 100644 --- a/txn_manager/txn_log.hpp +++ b/txn_manager/txn_log.hpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "unistd.h" #include "dirent.h" @@ -52,6 +53,8 @@ using std::cout; using std::endl; using std::ifstream; using std::to_string; +using std::shared_ptr; + namespace claims{ namespace txn{ @@ -74,7 +77,7 @@ const int kTypeData = 6; class LogServer:public caf::event_based_actor { public: - static RetCode init(const string path = "."); + static RetCode Init(const string path = "."); static RetCode Append (const string & log); static RetCode Append (void * buffer, UInt64 size); static RetCode Refresh (); @@ -102,16 +105,16 @@ class LogServer:public caf::event_based_actor { ","+to_string(offset)+","+to_string(size)+">\n"; } caf::behavior make_behavior(); - static caf::actor log_server; - static bool is_active; + static caf::actor proxy_; + static bool active_; private: - static string log_path; - static FILE * log_handler; - static UInt64 log_size; - static UInt64 max_log_size; - static char * buffer; - static UInt64 buffer_size; - static UInt64 max_buffer_size; + static string file_path_; + static FILE * file_handler_; + static UInt64 file_size_; + static UInt64 file_capacity_; + static char * buffer_; + static UInt64 buffer_size_; + static UInt64 buffer_capacity_; }; diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 214960b2f..02c440c20 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -26,189 +26,202 @@ * */ #include "txn_server.hpp" - +#include "txn_log.hpp" +//#include "../common/error_define.h" namespace claims{ namespace txn{ - -int TxnCore::BufferSize = kTxnBufferSize; - - -int TxnServer::Port = kTxnPort; -int TxnServer::Concurrency = kConcurrency; -caf::actor TxnServer::Router; -vector TxnServer::Cores; -bool TxnServer::Active = false; - -std::unordered_map> TxnServer::PosList; -std::unordered_map TxnServer::LogicCPList; -std::unordered_map TxnServer::PhyCPList; +//using claims::common::rSuccess; +//using claims::common::rLinkTmTimeout; +//using claims::common::rLinkTmFail; +//using claims::common::rBeginIngestTxnFail; +//using claims::common::rBeginQueryFail; +//using claims::common::rBeginCheckpointFail; +//using claims::common::rCommitIngestTxnFail; +//using claims::common::rAbortIngestTxnFail; +//using claims::common::rCommitCheckpointFail; +int TxnCore::capacity_ = kTxnBufferSize; + + +int TxnServer::port_ = kTxnPort; +int TxnServer::concurrency_ = kConcurrency; +caf::actor TxnServer::proxy_; +vector TxnServer::cores_; +bool TxnServer::active_ = false; + +std::unordered_map> TxnServer::pos_list_; +std::unordered_map TxnServer::logic_cp_list_; +std::unordered_map TxnServer::phy_cp_list_; std::unordered_map> TxnServer::CountList; - -RetCode TxnCore::ReMalloc() { - Size = 0; - TxnIndex.clear(); - try { - delete [] Commit; - delete [] Abort; - delete [] StripList; - Commit = new bool[BufferSize]; - Abort = new bool[BufferSize]; - StripList = new vector[BufferSize]; - } catch (...) { - cout << "core:"<delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); return { - [=](IngestAtom, const FixTupleIngestReq * request, Ingest * ingest)->int { - struct timeval tv1; - if (Size >= BufferSize) - return -1; - auto id = ingest->Id = GetId(); - TxnIndex[id] = Size; - Commit[Size] = Abort[Size] = false; - for (auto & item : request->Content) { - auto part = item.first; - auto tupleSize = item.second.first; - auto tupleCount = item.second.second; + [=](IngestAtom, const FixTupleIngestReq & request)->caf::message { + //cout << "begin" << endl; + auto ingest = make_shared(); +// RetCode ret = rSuccess; + RetCode ret = 0; + if (size_ >= capacity_) + return caf::make_message(-1/*rBeginIngestTxnFail*/); + ingest->id_ = GetId(); + txn_index_[ingest->id_ ] = size_; + commit_[size_] = abort_[size_] = false; + for (auto & strip_ : request.content_) { + auto part = strip_.first; + auto tupleSize = strip_.second.first; + auto tupleCount = strip_.second.second; auto strip = TxnServer::AtomicMalloc(part, tupleSize, tupleCount); - StripList[Size].push_back(strip); - ///cout << strip.ToString() << endl; + strip_list_[size_].push_back(strip); ingest->InsertStrip(strip); - } - Size ++; - - return 0; + } + size_ ++; + ///cout << ingest.ToString() << endl; + if (LogServer::active_) { + current_message() = caf::make_message(IngestAtom::value, ingest); + this->forward_to(LogServer::proxy_); + } + return caf::make_message(ret, *ingest); + }, + [=](CommitIngestAtom, const UInt64 id)->caf::message{ + // cout << "commit" << endl; + if (txn_index_.find(id) == txn_index_.end()) + return caf::make_message(-1/*rCommitIngestTxnFail*/); + commit_[txn_index_[id]] = true; + if (LogServer::active_) { + this->forward_to(LogServer::proxy_); + } + return caf::make_message(0/*rSuccess*/); + }, + [=](AbortIngestAtom, const UInt64 id)->caf::message { + // cout << "abort" << endl; + if (txn_index_.find(id) == txn_index_.end()) + return caf::make_message(-1/*rBeginIngestTxnFail*/); + abort_[txn_index_[id]] = true; + if (LogServer::active_) { + this->forward_to(LogServer::proxy_); + } + return caf::make_message(0/*rAbortIngestTxnFail*/); }, - [=](CommitIngestAtom, const UInt64 id)->int{ - if (TxnIndex.find(id) == TxnIndex.end()) - return -1; - Commit[TxnIndex[id]] = true; - return 0; + [=](QueryAtom, const QueryReq & request, shared_ptr query) { + // cout << "core:"<< core_id_ <<" query" << endl; + // cout << query->ToString() << endl; + for (auto i = 0; i < size_; i++){ + // cout << "commit:" << commit_[i] << endl; + if (commit_[i]) + for (auto & strip : strip_list_[i]) { + if (query->cp_list_.find(strip.part_) != query->cp_list_.end() && + strip.pos_ >= query->cp_list_[strip.part_]) + query->InsertStrip(strip.part_, strip.pos_, strip.offset_); + } + } + if (core_id_ != TxnServer::cores_.size() - 1) + this->forward_to(TxnServer::cores_[core_id_ + 1]); + else { + current_message() = caf::make_message(MergeAtom::value, request, query); + this->forward_to(TxnServer::cores_[TxnServer::SelectCoreId()]); + } }, - [=](AbortIngestAtom, const UInt64 id)->int { - if (TxnIndex.find(id) == TxnIndex.end()) - return -1; - Commit[TxnIndex[id]] = true; - return 0; + [=](MergeAtom, const QueryReq & request, shared_ptr query)->Query { + // cout << "query merge" << endl; + for (auto & part : query->snapshot_) { + Strip::Sort(part.second); + Strip::Merge(part.second); + } + return *query; }, - [=](QueryAtom, const QueryReq * request, Query * query)->int { - for (auto i = 0; i < Size; i++) - if (Commit[i]) - for (auto & strip : StripList[i]) { - if (query->CPList.find(strip.Part) != query->CPList.end() && - strip.Pos >= query->CPList[strip.Part]) - query->InsertStrip(strip.Part, strip.Pos, strip.Offset); - } - return 1; + [=](MergeAtom, shared_ptr cp)->Checkpoint { + Strip::Sort(cp->commit_strip_list_); + Strip::Merge(cp->commit_strip_list_); + Strip::Sort(cp->abort_strip_list_); + Strip::Merge(cp->abort_strip_list_); + return *cp; }, - [=] (CheckpointAtom, Checkpoint * cp)->int { - - for (auto i = 0; i < Size; i++) - if (Commit[i]) { - for (auto & strip : StripList[i]) - if ( strip.Part == cp->Part && strip.Pos >= cp->LogicCP ) - cp->CommitStripList.push_back(PStrip(strip.Pos, strip.Offset)); - } - else if (Abort[i]) { - for (auto & strip : StripList[i]) - if (strip.Part == cp->Part && strip.Pos >= cp->LogicCP) - cp->AbortStripList.push_back(PStrip(strip.Pos, strip.Offset)); + [=] (CheckpointAtom, shared_ptr cp) { + for (auto i = 0; i < size_; i++) + if (commit_[i]) { + for (auto & strip : strip_list_[i]) + if ( strip.part_ == cp->part_ && strip.pos_ >= cp->logic_cp_ ) + cp->commit_strip_list_.push_back(PStrip(strip.pos_, strip.offset_)); + } + else if (abort_[i]) { + for (auto & strip : strip_list_[i]) + if (strip.part_ == cp->part_ && strip.pos_ >= cp->logic_cp_) + cp->abort_strip_list_.push_back(PStrip(strip.pos_, strip.offset_)); + } + if (core_id_ != TxnServer::cores_.size() - 1) + this->forward_to(TxnServer::cores_[core_id_ + 1]); + else { + current_message() = caf::make_message(MergeAtom::value, cp); + this->forward_to(TxnServer::cores_[TxnServer::SelectCoreId()]); } - }, [=](GCAtom) { - auto size_old = Size; + auto size_old = size_; auto pos = 0; - for (auto i = 0; i < Size; i++) - if (!TxnServer::IsStripListGarbage(StripList[i])) { - TxnIndex[TxnIndex[i]] = pos; - Commit[pos] = Commit[i]; - Abort[pos] = Abort[i]; - StripList[pos] = StripList[i]; + for (auto i = 0; i < size_; i++) + if (!TxnServer::IsStripListGarbage(strip_list_[i])) { + txn_index_[txn_index_[i]] = pos; + commit_[pos] = commit_[i]; + abort_[pos] = abort_[i]; + strip_list_[pos] = strip_list_[i]; ++ pos; } - Size = pos; - cout <<"core:"<"<< pos << endl; + size_ = pos; + cout <<"core:"<"<< pos << endl; this->delayed_send(this, seconds(kGCTime), GCAtom::value); }, - caf::others >> [] () { cout<<"core unkown message"<> [&](){ cout<<"core:"<< core_id_<<" unkown message"<caf::message { -// Ingest ingest; -// auto ret = TxnServer::BeginIngest(request, ingest); -// quit(); -// return caf::make_message(ingest, ret); -// }, -// [=](CommitIngestAtom, const Ingest & ingest)->RetCode { -// quit(); -// return TxnServer::CommitIngest(ingest); -// }, -// [=](AbortIngestAtom, const Ingest & ingest)->RetCode { -// quit(); -// return TxnServer::AbortIngest(ingest); -// }, -// [=](QueryAtom, const QueryReq & request)->caf::message { -// Query query; -// auto ret = TxnServer::BeginQuery(request, query); -// quit(); -// return caf::make_message(query, ret); -// }, -// [=](CheckpointAtom, const UInt64 part)->caf::message{ -// Checkpoint cp; -// cp.Part = part; -// auto ret = TxnServer::BeginCheckpoint(cp); -// quit(); -// return caf::make_message(cp, ret); -// }, -// [=](CommitCPAtom, const Checkpoint & cp)->RetCode { -// quit(); -// return TxnServer::CommitCheckpoint(cp); -// }, -// caf::others >> [] () { cout<<"work unkown message"<int {cout << "receive int:" << a <> [&](){ cout<<"test unkown message"<caf::message { Ingest ingest; auto ret = TxnServer::BeginIngest(request, ingest); - quit(); +// auto ret = 10; +// cout<<"new IngestWorker!!"<sync_send(test, 34).await( +//// [&](int a) { ret = a;}); +//// +//// caf::scoped_actor self; +//// self->sync_send(test, 34).await( +//// [=](int a) { cout<quit(); return caf::make_message(ingest, ret); }, - caf::others >> [] () { cout<<"core unkown message"<> [] () { cout<<"IngestWorker unkown message"<RetCode { + [=](CommitIngestAtom, const UInt64 id)->RetCode { quit(); - return TxnServer::CommitIngest(ingest); + return TxnServer::CommitIngest(id); }, - caf::others >> [] () { cout<<"core unkown message"<> [] () { cout<<"IngestCommitWorker unkown message"<RetCode { + [=](AbortIngestAtom, const UInt64 id)->RetCode { quit(); - return TxnServer::AbortIngest(ingest); + return TxnServer::AbortIngest(id); }, - caf::others >> [] () { cout<<"core unkown message"<> [] () { cout<<"AbortWorker unkown message"<> [] () { cout<<"core unkown message"<> [] () { cout<<"QueryWorker unkown message"<caf::message{ Checkpoint cp; - cp.Part = part; + cp.part_ = part; auto ret = TxnServer::BeginCheckpoint(cp); quit(); return caf::make_message(cp, ret); }, - caf::others >> [] () { cout<<"core unkown message"<> [] () { cout<<"CheckpointWorker unkown message"<> [] () { cout<<"core unkown message"<> [] () { cout<<"CommitCPWorker unkown message"<forward_to(caf::spawn()); + forward_to(cores_[SelectCoreId()]); }, - [=](CommitIngestAtom, const Ingest & ingest) { - this->forward_to(caf::spawn()); + [=](CommitIngestAtom, const UInt64 id) { + forward_to(cores_[GetCoreId(id)]); }, - [=](AbortIngestAtom, const Ingest & ingest) { - this->forward_to(caf::spawn()); + [=](AbortIngestAtom, const UInt64 id) { + forward_to(cores_[GetCoreId(id)]); }, [=](QueryAtom, const QueryReq & request) { - this->forward_to(caf::spawn()); + auto query = make_shared(); + for (auto & part : request.part_list_) + query->cp_list_[part] = TxnServer::logic_cp_list_[part]; + current_message() = caf::make_message(QueryAtom::value, request, query); + forward_to(cores_[0]); }, [=](CheckpointAtom, const UInt64 part){ - this->forward_to(caf::spawn()); + auto cp = make_shared(); + cp->part_ = part; + current_message() = caf::make_message(CheckpointAtom::value, cp); + forward_to(cores_[0]); }, [=](CommitCPAtom, const Checkpoint & cp) { - this->forward_to(caf::spawn()); + }, - caf::others >> [] () { cout<<"unkown message"<> [] () { cout<<"server unkown message"<(); - for (auto i = 0; i < Concurrency; i++) - Cores.push_back(caf::spawn(i)); - SerializeConfig(); + active_ = true; + concurrency_ = concurrency; + port_ = port; + proxy_ = caf::spawn(); + for (auto i = 0; i < concurrency_; i++) + cores_.push_back(caf::spawn(i)); + SerConfig(); RecoveryFromCatalog(); RecoveryFromTxnLog(); srand((unsigned) time(NULL)); - return 0; } RetCode TxnServer::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest) { RetCode ret = 0; - UInt64 core_id = SelectCore(); + UInt64 core_id = SelectCoreId(); caf::scoped_actor self; - self->sync_send(Cores[core_id], IngestAtom::value, & request, & ingest). + self->sync_send(cores_[core_id], IngestAtom::value, & request, & ingest). await([&](int r) {ret = r;}); if (ret == 0) { // LogClient::Begin(ingest.Id); @@ -306,11 +324,11 @@ RetCode TxnServer::BeginIngest(const FixTupleIngestReq & request, Ingest & inges } return ret; } -RetCode TxnServer::CommitIngest(const Ingest & ingest) { +RetCode TxnServer::CommitIngest(const UInt64 id) { RetCode ret = 0; - UInt64 core_id = GetCoreId(ingest.Id); + UInt64 core_id = GetCoreId(id); caf::scoped_actor self; - self->sync_send(Cores[core_id], CommitIngestAtom::value, &ingest). + self->sync_send(cores_[core_id], CommitIngestAtom::value, id). await([&](int r) { ret = r;}); if (ret == 0) { // LogClient::Commit(ingest.Id); @@ -318,11 +336,11 @@ RetCode TxnServer::CommitIngest(const Ingest & ingest) { } return ret; } -RetCode TxnServer::AbortIngest(const Ingest & ingest) { +RetCode TxnServer::AbortIngest(const UInt64 id) { RetCode ret; - UInt64 core_id = GetCoreId(ingest.Id); + UInt64 core_id = GetCoreId(id); caf::scoped_actor self; - self->sync_send(Cores[core_id], AbortIngestAtom::value, &ingest). + self->sync_send(cores_[core_id], AbortIngestAtom::value, id). await([&](int r) { ret = r;}); if (ret == 0) { // LogClient::Abort(ingest.Id); @@ -333,12 +351,12 @@ RetCode TxnServer::AbortIngest(const Ingest & ingest) { RetCode TxnServer::BeginQuery(const QueryReq & request, Query & query) { RetCode ret; caf::scoped_actor self; - for (auto & part : request.PartList) - query.CPList[part] = TxnServer::LogicCPList[part]; - for (auto & core : Cores) + for (auto & part : request.part_list_) + query.cp_list_[part] = TxnServer::logic_cp_list_[part]; + for (auto & core : cores_) self->sync_send(core, QueryAtom::value, & request, & query). await([&](int r) {r = ret;}); - for (auto & part : query.Snapshot) { + for (auto & part : query.snapshot_) { Strip::Sort(part.second); Strip::Merge(part.second); } @@ -346,27 +364,27 @@ RetCode TxnServer::BeginQuery(const QueryReq & request, Query & query) { } RetCode TxnServer::BeginCheckpoint(Checkpoint & cp) { RetCode ret = 0; - if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) + if (TxnServer::pos_list_.find(cp.part_) == TxnServer::pos_list_.end()) return -1; - cp.LogicCP = TxnServer::LogicCPList[cp.Part]; - cp.PhyCP = TxnServer::PhyCPList[cp.Part]; + cp.logic_cp_ = TxnServer::logic_cp_list_[cp.part_]; + cp.phy_cp_ = TxnServer::phy_cp_list_[cp.part_]; caf::scoped_actor self; - for (auto & core : Cores) + for (auto & core : cores_) self->sync_send(core,CheckpointAtom::value, &cp). await([&]( int r) { r = ret;}); - Strip::Sort(cp.CommitStripList); - Strip::Merge(cp.CommitStripList); - Strip::Sort(cp.AbortStripList); - Strip::Merge(cp.AbortStripList); + Strip::Sort(cp.commit_strip_list_); + Strip::Merge(cp.commit_strip_list_); + Strip::Sort(cp.abort_strip_list_); + Strip::Merge(cp.abort_strip_list_); return ret; } RetCode TxnServer::CommitCheckpoint(const Checkpoint & cp) { RetCode ret = 0; - if (TxnServer::PosList.find(cp.Part) == TxnServer::PosList.end()) + if (TxnServer::pos_list_.find(cp.part_) == TxnServer::pos_list_.end()) return -1; - TxnServer::LogicCPList[cp.Part] = cp.LogicCP; - TxnServer::PhyCPList[cp.Part] = cp.PhyCP; + TxnServer::logic_cp_list_[cp.part_] = cp.logic_cp_; + TxnServer::phy_cp_list_[cp.part_] = cp.phy_cp_; if (ret == 0) { // LogClient::Checkpoint(cp.Part, cp.LogicCP, cp.PhyCP); // LogClient::Refresh(); @@ -377,13 +395,13 @@ RetCode TxnServer::CommitCheckpoint(const Checkpoint & cp) { Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount) { Strip strip; - strip.Part = part; + strip.part_ = part; if (TupleSize * TupleCount == 0) return strip; do { - strip.Pos = PosList[part].load(); - strip.Offset = 0; - UInt64 block_pos = strip.Pos % kBlockSize; + strip.pos_ = pos_list_[part].load(); + strip.offset_ = 0; + UInt64 block_pos = strip.pos_ % kBlockSize; UInt64 remain_count = TupleCount; int count = 0; while(remain_count > 0) { @@ -394,25 +412,25 @@ Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount) //使用块内可用区域 remain_count -= use_count; - strip.Offset += use_count * TupleSize; + strip.offset_ += use_count * TupleSize; block_pos += use_count * TupleSize; //将不可利用的空间也分配 if (kBlockSize - block_pos - kTailSize < TupleSize) { - strip.Offset += kBlockSize - block_pos; + strip.offset_ += kBlockSize - block_pos; block_pos = 0; } } - } while(!PosList[part].compare_exchange_weak(strip.Pos, strip.Pos + strip.Offset)); + } while(!pos_list_[part].compare_exchange_weak(strip.pos_, strip.pos_ + strip.offset_)); return strip; } RetCode TxnServer::RecoveryFromCatalog() { for (auto i = 0; i < 10; i++ ) { - PosList[i] = 0; + pos_list_[i] = 0; CountList[i] = 0; - LogicCPList[i] = 0; + logic_cp_list_[i] = 0; } } diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index faf9538e6..4240bacbb 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -63,38 +63,38 @@ using std::sort; using std::atomic; using std::chrono::seconds; using std::chrono::milliseconds; +using std::make_shared; +using std::shared_ptr; class TxnCore: public caf::event_based_actor { public: - static int BufferSize; - - UInt64 CoreId; - UInt64 LocalId = 0; - - UInt64 Size; - map TxnIndex; - bool * Commit = nullptr; - bool * Abort = nullptr; - vector * StripList; - + static int capacity_; + UInt64 core_id_; + UInt64 txn_id_ = 0; + UInt64 size_; + map txn_index_; + bool * commit_ = nullptr; + bool * abort_ = nullptr; + vector * strip_list_; caf::behavior make_behavior() override; - RetCode ReMalloc(); - TxnCore(int coreId):CoreId(coreId) {} + void ReMalloc() { + size_ = 0; + txn_index_.clear(); + commit_ = new bool[capacity_]; + abort_ = new bool[capacity_]; + strip_list_ = new vector[capacity_]; + } + TxnCore(int coreId):core_id_(coreId) { ReMalloc();} UInt64 GetId(){ - UInt64 id = ((++LocalId) *1000) + CoreId; - return id; + return ((++txn_id_) *1000) + core_id_; } }; -//class TxnWorker:public caf::event_based_actor { -// public: -// caf::behavior make_behavior() override; -//}; - -class IngestWorker:public caf::event_based_actor { +class Test:public caf::event_based_actor { public: caf::behavior make_behavior() override; }; + class IngestCommitWorker:public caf::event_based_actor { public: caf::behavior make_behavior() override; @@ -123,30 +123,30 @@ class CommitCPWorker:public caf::event_based_actor { class TxnServer: public caf::event_based_actor{ public: - static bool Active; - static int Port; - static int Concurrency; - static caf::actor Router; - static vector Cores; - static std::unordered_map> PosList; - static std::unordered_map LogicCPList; - static std::unordered_map PhyCPList; + static bool active_; + static int port_; + static int concurrency_; + static caf::actor proxy_; + static vector cores_; + static std::unordered_map> pos_list_; + static std::unordered_map logic_cp_list_; + static std::unordered_map phy_cp_list_; static std::unordered_map> CountList; /**************** User APIs ***************/ static RetCode Init(int concurrency = kConcurrency , int port = kTxnPort); /**************** System APIs ***************/ static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); - static RetCode CommitIngest(const Ingest & ingest); - static RetCode AbortIngest(const Ingest & ingest); + static RetCode CommitIngest(const UInt64 id); + static RetCode AbortIngest(const UInt64 id); static RetCode BeginQuery(const QueryReq & request, Query & snapshot); static RetCode BeginCheckpoint(Checkpoint & cp); static RetCode CommitCheckpoint(const Checkpoint & cp); static UInt64 GetCoreId(UInt64 id) { return id % 1000; } - static inline UInt64 SelectCore() { - return rand() % Concurrency; + static inline UInt64 SelectCoreId() { + return rand() % concurrency_; } caf::behavior make_behavior() override; @@ -155,7 +155,7 @@ class TxnServer: public caf::event_based_actor{ static inline Strip AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount); static inline bool IsStripListGarbage(const vector & striplist) { for (auto & strip : striplist) { - if (strip.Pos >= TxnServer::LogicCPList[strip.Part]) + if (strip.pos_ >= TxnServer::logic_cp_list_[strip.part_]) return false; } return true; diff --git a/txn_manager/txn_server_test.cpp b/txn_manager/txn_server_test.cpp index dd8b3c8b3..625419f13 100644 --- a/txn_manager/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -43,10 +43,10 @@ #include #include "caf/all.hpp" #include "caf/io/all.hpp" -#include "txn.hpp" +//#include "txn.hpp" #include "txn_server.hpp" #include "txn_client.hpp" -#include "txn_log.cpp" +#include "txn_log.hpp" using std::cin; using std::cout; using std::endl; @@ -62,8 +62,9 @@ using std::tuple; using std::make_pair; using std::make_tuple; using std::get; - -using namespace claims::txn; +using std::shared_ptr; +using std::make_shared; +//using namespace claims::txn; using UInt64 = unsigned long long; using UInt32 = unsigned int; @@ -71,6 +72,8 @@ using UInt16 = unsigned short; using UInt8 = char; using RetCode = int; using OkAtom = caf::atom_constant; +using IngestAtom = caf::atom_constant; +using QueryAtom = caf::atom_constant; using FailAtom = caf::atom_constant; using QuitAtom = caf::atom_constant; @@ -94,146 +97,130 @@ inline bool operator == (const Foo & a, const Foo & b) { } -class AA:public caf::event_based_actor { - caf::behavior make_behavior() override { - return{ - [] (FixTupleIngestReq & request){ - cout << request.ToString() << endl; - }, - [] (int a) {cout << a << endl;}, - caf::others >> []() { - cout << "no matched" << endl; - } - }; - } -}; -class C:public caf::event_based_actor { - caf::behavior make_behavior() override { - return { - [=] (int a)->int { quit(); aout(this)<< a*1000 << endl;}, - caf::others >> []() {cout << "no matched" << endl;} - }; - } -}; -class Foo2 { - public: - int a = 0; - int b = 0; -}; -class Foo3{ + +class A{ public: + vector list_ ; int c = 0; + void set_list_(const vector list) { list_ = list;} + vector get_list_() const { return list_;} }; -using Foo2Atom = caf::atom_constant; -using Foo3Atom = caf::atom_constant; +inline bool operator == (const A & a1, const A & a2) { + return a1.list_ == a2.list_; +} -class B:public caf::event_based_actor { +void ConfigA(){ + caf::announce("A", make_pair(&A::get_list_, &A::set_list_)); +} + +class Core: public caf::event_based_actor { public: - caf::actor Router; - B() {} - B(caf::actor router):Router(router) {} - caf::behavior make_behavior() override { - return { - [=](int a) { - forward_to(caf::spawn()); - }, - [=](Foo2Atom, Foo2 * foo2)->int { - foo2->a = 97; - foo2->b = 98; - cout << "foo2" << endl; - return 101; - }, - [=](Foo3Atom, Foo3 * foo3)->int { - foo3->c = 99; - cout << "foo3" << endl; - return 102; - }, - caf::others >> []() { cout << "unkown" << endl;} - }; - } + int id_; + int v_; + Core(int id):id_(id) {} + caf::behavior make_behavior() override; }; -using claims::txn::TxnServer; -using claims::txn::FixTupleIngestReq; -using claims::txn::Ingest; -char v[1024+10]; - - - -void task(int time){ - for (auto i = 0; i< time; i++) { +class Proxy: public caf::event_based_actor { + public: + caf::behavior make_behavior() override; +}; +vector cores; +caf::actor proxy; +caf::behavior Core::make_behavior() { + return { + [=](IngestAtom, int v)->int { + v_ = v; + //cout << "ingest:" << v_ << endl; + return -v_; + }, + [=](QueryAtom, shared_ptr ptr)->A { + //cout << id_ << endl; + ptr->list_.push_back(-id_*id_); + // ptr->list_.push_back(id_); + if(id_ != cores.size() - 1) { + this->forward_to(cores[id_ + 1]); + return A(); + } + else { + return *ptr; + }; + cout << "err" << endl; + + return A(); + }, + caf::others >> [] { cout << "core receive unkown message" << endl;} + }; +} - FixTupleIngestReq request1; - Ingest ingest; - request1.Content = {{0, {45, 10}}, - {1, {35, 20}}, - {2,{15,100}}}; - TxnClient::BeginIngest(request1, ingest); - LogClient::Data(1, 1, 1111,(void*)v, 1024); - LogClient::Data(1, 1, 1111,(void*)v, 1024); - LogClient::Data(1, 1, 1111,(void*)v, 1024); +caf::behavior Proxy::make_behavior() { + return { + [=](IngestAtom, int v) { + //cout << "begin ingest" << endl; + this->forward_to(cores[v%cores.size()]); + }, + [=](QueryAtom) { + shared_ptr ptr = make_shared(); + this->current_message() = caf::make_message(QueryAtom::value, ptr); + this->forward_to(cores[0]); + }, + caf::others >> [] { cout << "proxy receive unkown message" << endl;} + }; +} +void task(int index){ +for (auto i=0;isync_send(proxy, IngestAtom::value, i).await( + [=](int ret) { /*cout <<"receive:" << ret << endl;*/}, + caf::after(std::chrono::seconds(2)) >> [] { + cout << "ingest time out" << endl; + } + ); - TxnClient::CommitIngest(ingest); -// } } } -void task2(int time) { - for (auto i = 0; i< time; i++) { - - LogClient::Begin(i); - LogClient::Write(i, 1, 0, 100 ); - LogClient::Write(i, 2, 0, 100 ); - LogClient::Write(i, 3, 0, 100 ); - - //LogClient::PushToDisk() ; +using claims::txn::FixTupleIngestReq; +using claims::txn::Ingest; +using claims::txn::TxnServer; +using claims::txn::TxnClient; +using claims::txn::LogServer; +using claims::txn::LogClient; +char buffer[3*1024+10]; +void task2(int times){ +for (auto i=0; i(); -// SerializeConfig(); -// caf::announce("foo", -// make_pair(&Foo::get_request1, &Foo::set_request1), -// make_pair(&Foo::get_request2, &Foo::set_request2), -// make_pair(&Foo::get_request3, &Foo::set_request3)); -// -// try { -// caf::io::publish(server, 8088); -// } catch (...) { -// cout << "bind fail" << endl; -// } - -// -// TxnServer::Init(); - -// for (auto j = 0;j < 100 ;j++) { -//// request1.Content[0] = {45, 10}; -//// request1.Content[1] = {54, 10}; -// FixTupleIngestReq request1; -// Ingest ingest; -// request1.Content = {{0, {45, 10}}, {1, {54, 10}}}; -// TxnServer::BeginIngest(request1, ingest); -// TxnServer::CommitIngest(ingest); -// } -// sleep(1); - +int main(int argc,char *argv[]){ + memset(buffer,3*1024,'*'); TxnServer::Init(); - //LogServer::init("txn-log"); - -// gettimeofday(&tv1,NULL); -//// for (auto i=0;i threads; + int n,times; + cin >> n >> times; + for (auto i=0;i Date: Sun, 24 Apr 2016 17:45:08 +0800 Subject: [PATCH 16/58] finish test project --- txn_manager/txn_client_test.cpp | 45 ++++++++++++++++++++++----------- txn_manager/txn_log.cpp | 19 +++++++++----- txn_manager/txn_server_test.cpp | 35 +++++++++++++------------ 3 files changed, 61 insertions(+), 38 deletions(-) diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index 7842896ae..40e63840e 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -141,33 +141,48 @@ using claims::txn::TxnServer; using claims::txn::TxnClient; using claims::txn::LogServer; using claims::txn::LogClient; -void task2(int times){ -for (auto i=0; i rand_tuple_size(50, 150); + std::uniform_int_distribution rand_tuple_count(10, 100); + std::uniform_int_distribution rand_part_count(1, 10); + for (auto i=0; i0 ?tuple_count/part_count :1); + TxnClient::BeginIngest(req, ingest); + for (auto & strip : ingest.strip_list_) + LogClient::Data(strip.first,strip.second.first,strip.second.second, + buffer, tuple_size*tuple_count); + TxnClient::CommitIngest(ingest.id_); + LogClient::Refresh(); + } } int main(int argc, const char **argv){ int n = stoi(string(argv[1])); int times = stoi(string(argv[2])); - TxnClient::Init(); - LogServer::Init("txn-data"); + string ip = string(argv[3]); + int port = stoi(string(argv[4])); + TxnClient::Init(ip, port); + LogServer::Init("data-log"); struct timeval tv1, tv2; vector threads; for (auto i=0;iRetCode { + void * buffer, UInt64 size)->caf::message { Append(DataLogPrefix(part, pos, offset, size)); Append(buffer, size); - return 0; + return caf::make_message(0); }, - [=](RefreshAtom)->RetCode { - return Refresh(); - // cout << "refresh" << endl; - return 0; + [=](RefreshAtom)->caf::message { + + Refresh(); + return caf::make_message(0); }, caf::others >> [=] () { cout << "unknown log message" << endl; } }; @@ -115,12 +115,17 @@ RetCode LogServer::Append(void * data, UInt64 size){ } RetCode LogServer::Refresh() { + // cout << "refresh" << endl; if (file_handler_ == nullptr) { struct timeval ts; gettimeofday (&ts, NULL); string file = file_path_ + "/" + kTxnLogFileName + to_string(ts.tv_sec); + //cout << file << endl; file_handler_ = fopen (file.c_str(),"a"); - if (file_handler_ == nullptr) return -1; + if (file_handler_ == nullptr){ + //cout <<"open file fail"< threads; - int n,times; - cin >> n >> times; - for (auto i=0;i threads; +// int n,times; +// cin >> n >> times; +// for (auto i=0;i Date: Sun, 24 Apr 2016 18:39:53 +0800 Subject: [PATCH 17/58] finish slave loader but failed to compile --- Makefile.am | 3 ++- catalog/Makefile.am | 4 +-- configure.ac | 2 -- loader/slave_loader.cpp | 55 +++++++++++++++++++++++++++++++++----- storage/ChunkStorage.cpp | 23 ++++++++++++++++ storage/ChunkStorage.h | 47 ++++++++++++++++++++++++-------- txn_manager/txn_server.cpp | 1 + 7 files changed, 113 insertions(+), 22 deletions(-) diff --git a/Makefile.am b/Makefile.am index 9995239fa..eddf6e2ee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -40,7 +40,6 @@ LDADD = \ storage/libstorage.a \ Resource/libresouce.a \ catalog/libcatalog.a \ - catalog/stat/libstat.a \ loader/libloader.a \ loader/test/libtest.a \ codegen/libcodegen.a\ @@ -73,6 +72,8 @@ LDADD = \ -lLLVMX86Desc -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMTarget -lLLVMRuntimeDyld \ -lLLVMExecutionEngine -lLLVMMC -lLLVMObject -lLLVMBitReader -lLLVMCore -lLLVMSupport -lLLVMMCParser +#LDADD += catalog/stat/libstat.a + include_HEADERS = Config.h \ Debug.h \ diff --git a/catalog/Makefile.am b/catalog/Makefile.am index 995ea20e8..de7cf0c50 100644 --- a/catalog/Makefile.am +++ b/catalog/Makefile.am @@ -43,5 +43,5 @@ libcatalog_a_SOURCES = \ table.h unordered_map.hpp # if it is need to include stat folder and Test folder, add stat and Test at next two lines -SUBDIRS = stat -DIST_SUBDIRS = stat +SUBDIRS = +DIST_SUBDIRS = diff --git a/configure.ac b/configure.ac index fea71cbcc..3f3c7fd90 100644 --- a/configure.ac +++ b/configure.ac @@ -13,8 +13,6 @@ AC_CONFIG_FILES([ catalog/Test/Makefile catalog/stat/Makefile Client/Makefile - Client/Test/Makefile - Client/json/Makefile common/Makefile common/Block/Makefile common/Expression/Makefile diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index a8bcc3915..e6420bf31 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -39,6 +39,8 @@ #include "../Environment.h" #include "../common/error_define.h" #include "../common/memory_handle.h" +#include "../storage/ChunkStorage.h" +#include "../storage/MemoryStore.h" #include "../storage/PartitionStorage.h" #include "../txn_manager/txn.hpp" using caf::event_based_actor; @@ -249,9 +251,11 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { RetCode ret = rSuccess; - uint64_t table_id = GetTableIdFromGlobalPartId(packet.global_part_id_); - uint64_t prj_id = GetProjectionIdFromGlobalPartId(packet.global_part_id_); - uint64_t part_id = GetPartitionIdFromGlobalPartId(packet.global_part_id_); + const uint64_t table_id = GetTableIdFromGlobalPartId(packet.global_part_id_); + const uint64_t prj_id = + GetProjectionIdFromGlobalPartId(packet.global_part_id_); + const uint64_t part_id = + GetPartitionIdFromGlobalPartId(packet.global_part_id_); uint64_t chunk_id = packet.pos_ / CHUNK_SIZE; PartitionStorage* part_storage = @@ -264,9 +268,48 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { part_storage->AddChunkWithMemoryToNum(chunk_id, HDFS), "added chunk to " << chunk_id, "failed to add chunk"); - uint64_t pos_in_chunk = packet.pos_ % CHUNK_SIZE; - // TODO(YUKAI): copy the value to chunk - + // copy data into applied memory + const uint64_t tuple_size = Catalog::getInstance() + ->getTable(table_id) + ->getProjectoin(prj_id) + ->getSchema() + ->getTupleMaxSize(); + const uint64_t offset = packet.offset_; + uint64_t cur_chunk_id = packet.pos_ / CHUNK_SIZE; + uint64_t cur_block_id = (packet.pos_ % CHUNK_SIZE) / BLOCK_SIZE; + uint64_t pos_in_block = packet.pos_ % BLOCK_SIZE; + uint64_t total_written_length = 0; + HdfsInMemoryChunk chunk_info; + while (total_written_length < offset) { + // get start position of current chunk + if (BlockManager::getInstance()->getMemoryChunkStore()->getChunk( + ChunkID(part_id, cur_chunk_id), chunk_info)) { + InMemoryChunkWriterIterator writer(chunk_info.hook, CHUNK_SIZE, + cur_block_id, BLOCK_SIZE, pos_in_block, + tuple_size); + do { // write to every block + uint64_t written_length = + writer.Write(packet.data_buffer_ + total_written_length, + offset - total_written_length); + total_written_length += written_length; + if (total_written_length == offset) { + // all tuple is written into memory + return rSuccess; + } else if (total_written_length > offset) { + assert(false); + } + } while (writer.NextBlock()); + + ++cur_chunk_id; // get next chunk to write + assert(cur_chunk_id < part_storage->chunk_list_.size()); + cur_block_id = 0; // the block id of next chunk is 0 + pos_in_block = 0; + + } else { + cout << "chunk id is " << cur_chunk_id << endl; + assert(false && "no chunk with this chunk id"); + } + } return ret; } diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 5db033c43..5536fb13f 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -410,3 +410,26 @@ void ChunkReaderIterator::InHDFSBlockAccessor::getBlock( printf("InHDFSBlockAccessor::getBlock() is not implemented!\n"); assert(false); } + +uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, + uint64_t length_to_write) { + void* block_offset = chunk_offset_ + block_id_ * block_size_; + unsigned* tuple_count_in_block = reinterpret_cast( + block_offset + block_size_ - sizeof(unsigned)); + + // there are space to store data + int can_store_tuple_count = + (block_size_ - sizeof(unsigned)) / tuple_size_ - *tuple_count_in_block; + if (can_store_tuple_count > 0) { + int actual_written_tuple_count = + length_to_write / tuple_size_ > can_store_tuple_count + ? can_store_tuple_count + : length_to_write / tuple_size_; + memcpy(block_offset + *tuple_count_in_block * block_size_, buffer_to_write, + actual_written_tuple_count * tuple_size_); + + __sync_add_and_fetch(tuple_count_in_block, actual_written_tuple_count); + return actual_written_tuple_count; + } + return 0; +} diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index 48174f341..328155611 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -7,8 +7,8 @@ #ifndef CHUNKSTORAGE_H_ #define CHUNKSTORAGE_H_ -#include #include +#include #include "../common/error_define.h" #ifdef DMALLOC @@ -33,11 +33,7 @@ class ChunkReaderIterator { virtual void getBlock(BlockStreamBase*& block) const { assert(false); } unsigned getBlockSize() const { return block_size; } - void setBlockSize(unsigned blockSize) { - block_size = blockSize; - } - - ; + void setBlockSize(unsigned blockSize) { block_size = blockSize; } protected: unsigned block_size; @@ -109,7 +105,7 @@ class ChunkReaderIterator { number_of_blocks_(number_of_blocks), cur_block_(0), block_size_(block_size), - chunk_size_(chunk_size){}; + chunk_size_(chunk_size) {} virtual bool nextBlock(BlockStreamBase*& block) = 0; virtual bool getNextBlockAccessor(block_accessor*& ba) = 0; bool nextBlock(); @@ -146,8 +142,8 @@ class DiskChunkReaderIteraror : public ChunkReaderIterator { bool getNextBlockAccessor(block_accessor*& ba); private: - // unsigned number_of_blocks_; - // unsigned cur_block_; + // unsigned number_of_blocks_; + // unsigned cur_block_; /*the iterator creates a buffer and allocates its memory such that the query * processing * can just use the Block without the concern the memory allocation and @@ -166,8 +162,8 @@ class HDFSChunkReaderIterator : public ChunkReaderIterator { bool getNextBlockAccessor(block_accessor*& ba); private: - // unsigned number_of_blocks_; - // unsigned cur_block_; + // unsigned number_of_blocks_; + // unsigned cur_block_; /*the iterator creates a buffer and allocates its memory such that the query * processing * can just use the Block without the concern the memory allocation and @@ -178,6 +174,35 @@ class HDFSChunkReaderIterator : public ChunkReaderIterator { hdfsFile hdfs_fd_; }; +class InMemoryChunkWriterIterator { + public: + InMemoryChunkWriterIterator(void* chunk_offset, uint64_t chunk_size, + uint64_t block_id, uint64_t block_size, + uint64_t pos_in_block, uint64_t tuple_size) + : chunk_offset_(chunk_offset), + chunk_size_(chunk_size), + block_id_(block_id), + block_size_(block_size), + pos_in_block_(pos_in_block), + tuple_size_(tuple_size) {} + + public: + uint64_t Write(const void* const buffer_to_write, uint64_t length_to_write); + + bool NextBlock() { + if (++block_id_ > (chunk_size_ / block_size_ - 1)) return false; + pos_in_block_ = 0; + return true; + } + + private: + void* chunk_offset_; + uint64_t chunk_size_; + uint64_t block_id_; + uint64_t block_size_; + uint64_t pos_in_block_; + uint64_t tuple_size_; +}; class ChunkStorage { public: /* considering that how block size effects the performance is to be tested, diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index c6b7bb563..230356f87 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -303,6 +303,7 @@ Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, strip.Pos = PosList[part].load(); strip.Offset = 0; UInt64 block_pos = strip.Pos % kBlockSize; + // TODO(lizhifang): use int64_t instead of uint64_t UInt64 remain_count = TupleCount; int count = 0; while (remain_count > 0) { From bf4f8d40c7380e79b4d04811d7dacb2ff88e5eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Mon, 25 Apr 2016 13:47:10 +0800 Subject: [PATCH 18/58] update txn_test --- txn_manager/txn_client_test.cpp | 8 ++++++-- txn_manager/txn_server_test.cpp | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index 40e63840e..3a718b0c2 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -142,6 +142,7 @@ using claims::txn::TxnClient; using claims::txn::LogServer; using claims::txn::LogClient; char buffer[20*1024+10]; +int is_log = 0; void task2(int id, int times){ std::default_random_engine e; std::uniform_int_distribution rand_tuple_size(50, 150); @@ -156,11 +157,13 @@ void task2(int id, int times){ for (auto i = 0; i < part_count; i++) req.InsertStrip(i, part_count, tuple_count/part_count>0 ?tuple_count/part_count :1); TxnClient::BeginIngest(req, ingest); + if (is_log == 1) for (auto & strip : ingest.strip_list_) LogClient::Data(strip.first,strip.second.first,strip.second.second, - buffer, tuple_size*tuple_count); + buffer, tuple_size*tuple_count/part_count); TxnClient::CommitIngest(ingest.id_); - LogClient::Refresh(); + if (is_log == 1) + LogClient::Refresh(); } } @@ -169,6 +172,7 @@ int main(int argc, const char **argv){ int times = stoi(string(argv[2])); string ip = string(argv[3]); int port = stoi(string(argv[4])); + is_log = stoi(string(argv[5])); TxnClient::Init(ip, port); LogServer::Init("data-log"); struct timeval tv1, tv2; diff --git a/txn_manager/txn_server_test.cpp b/txn_manager/txn_server_test.cpp index 16753e78a..1fb28bd2e 100644 --- a/txn_manager/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -203,14 +203,16 @@ for (auto i=0; i threads; // int n,times; From e6db4bd1c43d50fccc52b2bfb89721db08c69135 Mon Sep 17 00:00:00 2001 From: yukai Date: Mon, 25 Apr 2016 14:35:40 +0800 Subject: [PATCH 19/58] ADD: simple test case; compile ok --- Makefile.am | 2 +- catalog/Makefile.am | 4 +-- loader/data_ingestion.cpp | 2 +- loader/data_ingestion.h | 2 +- loader/load_packet.h | 1 + loader/master_loader.cpp | 70 ++++++++++++++++++++++++++++++++++-- loader/master_loader.h | 7 ++++ loader/slave_loader.cpp | 10 ++++-- storage/PartitionStorage.cpp | 10 +++--- storage/PartitionStorage.h | 1 + 10 files changed, 93 insertions(+), 16 deletions(-) diff --git a/Makefile.am b/Makefile.am index eddf6e2ee..fe65757a5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -72,7 +72,7 @@ LDADD = \ -lLLVMX86Desc -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCJIT -lLLVMTarget -lLLVMRuntimeDyld \ -lLLVMExecutionEngine -lLLVMMC -lLLVMObject -lLLVMBitReader -lLLVMCore -lLLVMSupport -lLLVMMCParser -#LDADD += catalog/stat/libstat.a +LDADD += catalog/stat/libstat.a include_HEADERS = Config.h \ diff --git a/catalog/Makefile.am b/catalog/Makefile.am index de7cf0c50..995ea20e8 100644 --- a/catalog/Makefile.am +++ b/catalog/Makefile.am @@ -43,5 +43,5 @@ libcatalog_a_SOURCES = \ table.h unordered_map.hpp # if it is need to include stat folder and Test folder, add stat and Test at next two lines -SUBDIRS = -DIST_SUBDIRS = +SUBDIRS = stat +DIST_SUBDIRS = stat diff --git a/loader/data_ingestion.cpp b/loader/data_ingestion.cpp index 8294ba28c..0e8d2d98c 100644 --- a/loader/data_ingestion.cpp +++ b/loader/data_ingestion.cpp @@ -1066,7 +1066,7 @@ inline RetCode DataIngestion::CheckAndToValue( columns_validities); } -istream& DataIngestion::GetTupleTerminatedBy(ifstream& ifs, string& res, +istream& DataIngestion::GetTupleTerminatedBy(istream& ifs, string& res, const string& terminator) { res.clear(); if (1 == terminator.length()) { diff --git a/loader/data_ingestion.h b/loader/data_ingestion.h index ed156a4d2..892ab2fb8 100644 --- a/loader/data_ingestion.h +++ b/loader/data_ingestion.h @@ -176,7 +176,7 @@ class DataIngestion { RetCode DestroyLocalPJBuffer(vector>& pj_buffer); public: - static istream& GetTupleTerminatedBy(ifstream& ifs, string& res, + static istream& GetTupleTerminatedBy(istream& ifs, string& res, const string& terminator); private: diff --git a/loader/load_packet.h b/loader/load_packet.h index fc6cee23f..849ec1e36 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -49,6 +49,7 @@ using LoadAckAtom = caf::atom_constant; /****************************************************/ struct LoadPacket { public: + LoadPacket() {} LoadPacket(const uint64_t txn_id, const uint64_t g_part_id, uint64_t pos, uint64_t offset, uint64_t data_length, const void* data_buffer) : txn_id_(txn_id), diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 36c674016..e87683d77 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -49,6 +49,7 @@ #include "../common/Schema/TupleConvertor.h" #include "../Config.h" #include "../Environment.h" +#include "../loader/data_ingestion.h" #include "../txn_manager/txn.hpp" #include "../txn_manager/txn_client.hpp" #include "../utility/resource_guard.h" @@ -132,8 +133,8 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, claims::txn::Ingest ingest; ingest.Id = txn_id; if (is_commited) { - if (++(mloader->txn_commint_info_[txn_id].commited_part_num_) >= - mloader->txn_commint_info_[txn_id].total_part_num_) { + if (++(mloader->txn_commint_info_.at(txn_id).commited_part_num_) >= + mloader->txn_commint_info_.at(txn_id).total_part_num_) { // TODO(lizhifang): optimize the interface of TxnClient TxnClient::CommitIngest(ingest); mloader->txn_commint_info_.erase(txn_id); @@ -225,7 +226,53 @@ RetCode MasterLoader::Ingest() { } string MasterLoader::GetMessage() { - string ret; + // for testing + string ret = + "LINEITEM,|,\n" + "1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-" + "02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|\n" + "1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-" + "20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |\n" + "1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-" + "31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|\n" + "1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-" + "16|NONE|AIR|lites. fluffily even de|\n" + "1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-" + "01|NONE|FOB| pending foxes. slyly re|\n" + "1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-" + "03|DELIVER IN PERSON|MAIL|arefully slyly ex|\n" + "2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-" + "02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|\n" + "3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-" + "23|NONE|AIR|ongside of the furiously brave acco|\n" + "3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-" + "24|TAKE BACK RETURN|RAIL| unusual accounts. eve|\n" + "3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-" + "23|DELIVER IN PERSON|SHIP|nal foxes wake. |\n" + "3|29380|1883|4|2|2618.76|0.01|0.06|A|F|1993-12-04|1994-01-07|1994-01-01|" + "NONE|TRUCK|y. fluffily pending d|\n" + "7|145243|7758|2|9|11594.16|0.08|0.08|N|O|1996-02-01|1996-03-02|1996-02-" + "19|TAKE BACK RETURN|SHIP|es. instructions|\n" + "7|94780|9799|3|46|81639.88|0.10|0.07|N|O|1996-01-15|1996-03-27|1996-02-" + "03|COLLECT COD|MAIL| unusual reques|\n" + "7|163073|3074|4|28|31809.96|0.03|0.04|N|O|1996-03-21|1996-04-08|1996-04-" + "20|NONE|FOB|. slyly special requests haggl|\n" + "7|151894|9440|5|38|73943.82|0.08|0.01|N|O|1996-02-11|1996-02-24|1996-02-" + "18|DELIVER IN PERSON|TRUCK|ns haggle carefully ironic deposits. bl|\n" + "7|79251|1759|6|35|43058.75|0.06|0.03|N|O|1996-01-16|1996-02-23|1996-01-" + "22|TAKE BACK RETURN|FOB|jole. excuses wake carefully alongside of |\n" + "7|157238|2269|7|5|6476.15|0.04|0.02|N|O|1996-02-10|1996-03-26|1996-02-" + "13|NONE|FOB|ithely regula|\n" + "32|82704|7721|1|28|47227.60|0.05|0.08|N|O|1995-10-23|1995-08-27|1995-10-" + "26|TAKE BACK RETURN|TRUCK|sleep quickly. req|\n" + "32|197921|441|2|32|64605.44|0.02|0.00|N|O|1995-08-14|1995-10-07|1995-08-" + "27|COLLECT COD|AIR|lithely regular deposits. fluffily |\n" + "32|44161|6666|3|2|2210.32|0.09|0.02|N|O|1995-08-07|1995-10-07|1995-08-" + "23|DELIVER IN PERSON|AIR| express accounts wake according to the|\n" + "32|2743|7744|4|4|6582.96|0.09|0.03|N|O|1995-08-04|1995-10-01|1995-09-03|" + "NONE|REG AIR|e slyly final pac|\n" + "32|85811|8320|5|44|79059.64|0.05|0.06|N|O|1995-08-28|1995-08-20|1995-09-" + "14|DELIVER IN PERSON|AIR|symptotes nag according to the ironic depo|\n"; return ret; } @@ -257,6 +304,23 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, IngestionRequest* req) { // AddRowIdColumn() RetCode ret = rSuccess; + size_t pos = message.find(',', 0); + req->table_name_ = message.substr(0, pos); + pos++; + size_t next_pos = message.find(',', pos); + req->col_sep_ = message.substr(pos, next_pos - pos); + + pos = next_pos + 1; + next_pos = message.find('\n', pos); + req->row_sep_ = message.substr(pos, next_pos - pos); + + string tuple; + string data_string = message.substr(pos + 1); + istringstream iss(data_string); + while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) { + req->tuples_.push_back(tuple); + } + req->Show(); return ret; } diff --git a/loader/master_loader.h b/loader/master_loader.h index 8a87b8673..5a228798d 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -30,6 +30,7 @@ #define LOADER_MASTER_LOADER_H_ #include +#include #include #include #include "caf/all.hpp" @@ -59,6 +60,12 @@ class MasterLoader { string col_sep_; string row_sep_; vector tuples_; + void Show() { + LOG(INFO) << "table name:" << table_name_ + << ", column separator:" << col_sep_ + << ", row separator:" << row_sep_ + << ", tuples size is:" << tuples_.size(); + } }; struct CommitInfo { diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index e6420bf31..78f71c679 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -34,15 +34,17 @@ #include "caf/all.hpp" #include "caf/io/all.hpp" -#include "./loader_message.h" +#include "./load_packet.h" #include "../Config.h" #include "../Environment.h" #include "../common/error_define.h" +#include "../common/ids.h" #include "../common/memory_handle.h" #include "../storage/ChunkStorage.h" #include "../storage/MemoryStore.h" #include "../storage/PartitionStorage.h" #include "../txn_manager/txn.hpp" +#include "../utility/resource_guard.h" using caf::event_based_actor; using caf::io::remote_actor; using caf::mixin::sync_sender_impl; @@ -283,7 +285,9 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { while (total_written_length < offset) { // get start position of current chunk if (BlockManager::getInstance()->getMemoryChunkStore()->getChunk( - ChunkID(part_id, cur_chunk_id), chunk_info)) { + ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), + cur_chunk_id), + chunk_info)) { InMemoryChunkWriterIterator writer(chunk_info.hook, CHUNK_SIZE, cur_block_id, BLOCK_SIZE, pos_in_block, tuple_size); @@ -301,7 +305,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { } while (writer.NextBlock()); ++cur_chunk_id; // get next chunk to write - assert(cur_chunk_id < part_storage->chunk_list_.size()); + assert(cur_chunk_id < part_storage->GetChunkNum()); cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 0028b3a5d..37a95acc7 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -65,11 +65,11 @@ RetCode PartitionStorage::AddChunkWithMemoryToNum( for (unsigned i = number_of_chunks_; i < number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); - EXEC_AND_LOG( - ret, chunk->ApplyMemory(), - "applied memory for chunk(" << partition_id_ << "," << i << ")", - "failed to apply memory for chunk(" << partition_id_ << "," << i - << ")"); + EXEC_AND_LOG(ret, chunk->ApplyMemory(), "applied memory for chunk(" + << partition_id_.getName() + << "," << i << ")", + "failed to apply memory for chunk(" << partition_id_.getName() + << "," << i << ")"); chunk_list_.push_back(chunk); } number_of_chunks_ = number_of_chunks; diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index 0cd5ae0f4..bdc8df0fb 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -60,6 +60,7 @@ class PartitionStorage { void removeAllChunks(const PartitionID& partition_id); PartitionReaderItetaor* createReaderIterator(); PartitionReaderItetaor* createAtomicReaderIterator(); + const int GetChunkNum() const { return chunk_list_.size(); } protected: PartitionID partition_id_; From 23f2c02f22936e8fcf832e576441148b962d3096 Mon Sep 17 00:00:00 2001 From: yukai Date: Tue, 26 Apr 2016 15:39:43 +0800 Subject: [PATCH 20/58] ADD: some debug info log --- Environment.cpp | 14 ++++---- Resource/ResourceManagerSlave.cpp | 60 ++++++++++++++++--------------- loader/master_loader.cpp | 13 +++---- loader/master_loader.h | 4 +-- txn_manager/txn_server.cpp | 6 ++-- utility/thread_pool.cpp | 5 +-- 6 files changed, 54 insertions(+), 48 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index bfb2774fc..83d3e0997 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -7,6 +7,7 @@ #include "Environment.h" +#include #include "caf/all.hpp" #include "txn_manager/txn_server.hpp" @@ -69,6 +70,7 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { logging_->log("Initializing the ThreadPool..."); if (false == initializeThreadPool()) { logging_->elog("initialize ThreadPool failed"); + assert(false && "can't initialize thread pool"); } } logging_->log("Initializing the AdaptiveEndPoint..."); @@ -101,13 +103,10 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { } logging_->log("Initializing txn manager"); - if (!InitTxnManager()) - LOG(ERROR) << "failed to initialize txn manager"; + if (!InitTxnManager()) LOG(ERROR) << "failed to initialize txn manager"; logging_->log("Initializing txn log server"); - if (!InitTxnLog()) - LOG(ERROR) << "failed to initialize txn log"; - + if (!InitTxnLog()) LOG(ERROR) << "failed to initialize txn log"; logging_->log("Initializing the ExecutorMaster..."); iteratorExecutorMaster = new IteratorExecutorMaster(); @@ -204,8 +203,11 @@ void Environment::initializeStorage() { void Environment::initializeResourceManager() { if (ismaster_) { resourceManagerMaster_ = new ResourceManagerMaster(); + DLOG(INFO) << "ResourceManagerMaster instanced "; } resourceManagerSlave_ = new InstanceResourceManager(); + DLOG(INFO) << "resourceManagerSlave instanced "; + nodeid = resourceManagerSlave_->Register(); } @@ -232,7 +234,7 @@ bool Environment::InitLoader() { bool Environment::InitTxnManager() { if (Config::enable_txn_server) { - LOG(INFO) << "I'm txn manager server" ; + LOG(INFO) << "I'm txn manager server"; TxnServer::Init(Config::txn_server_cores, Config::txn_server_port); } TxnClient::Init(Config::txn_server_ip, Config::txn_server_port); diff --git a/Resource/ResourceManagerSlave.cpp b/Resource/ResourceManagerSlave.cpp index 4092a0087..42c10c1ee 100755 --- a/Resource/ResourceManagerSlave.cpp +++ b/Resource/ResourceManagerSlave.cpp @@ -10,42 +10,44 @@ #include "../common/TimeOutReceiver.h" #define ResourceManagerMasterName "ResourceManagerMaster" InstanceResourceManager::InstanceResourceManager() { - framework_=new Theron::Framework(*Environment::getInstance()->getEndPoint()); - logging_=new ResourceManagerMasterLogging(); + framework_ = + new Theron::Framework(*Environment::getInstance()->getEndPoint()); + logging_ = new ResourceManagerMasterLogging(); } InstanceResourceManager::~InstanceResourceManager() { - delete framework_; - delete logging_; + delete framework_; + delete logging_; } -NodeID InstanceResourceManager::Register(){ - NodeID ret=10; - TimeOutReceiver receiver(Environment::getInstance()->getEndPoint()); - Theron::Catcher resultCatcher; - receiver.RegisterHandler(&resultCatcher, &Theron::Catcher::Push); +NodeID InstanceResourceManager::Register() { + NodeID ret = 10; + TimeOutReceiver receiver(Environment::getInstance()->getEndPoint()); + Theron::Catcher resultCatcher; + receiver.RegisterHandler(&resultCatcher, &Theron::Catcher::Push); - std::string ip=Environment::getInstance()->getIp(); - unsigned port=Environment::getInstance()->getPort(); - NodeRegisterMessage message(ip,port); + std::string ip = Environment::getInstance()->getIp(); + unsigned port = Environment::getInstance()->getPort(); + NodeRegisterMessage message(ip, port); - framework_->Send(message,receiver.GetAddress(),Theron::Address("ResourceManagerMaster")); - Theron::Address from; - if(receiver.TimeOutWait(1,1000)==1){ - - resultCatcher.Pop(ret,from); - logging_->log("Successfully registered to the master, the allocated id =%d.",ret); - return ret; - } - else{ - logging_->elog("Failed to get NodeId from the master."); - return -1; - } + DLOG(INFO) << "resourceManagerSlave is going to register to master"; + framework_->Send(message, receiver.GetAddress(), + Theron::Address("ResourceManagerMaster")); + Theron::Address from; + if (receiver.TimeOutWait(1, 1000) == 1) { + resultCatcher.Pop(ret, from); + logging_->log( + "Successfully registered to the master, the allocated id =%d.", ret); + return ret; + } else { + logging_->elog("Failed to get NodeId from the master."); + return -1; + } } -void InstanceResourceManager::ReportStorageBudget(StorageBudgetMessage& message){ - framework_->Send(message,Theron::Address(),Theron::Address(ResourceManagerMasterName)); +void InstanceResourceManager::ReportStorageBudget( + StorageBudgetMessage& message) { + framework_->Send(message, Theron::Address(), + Theron::Address(ResourceManagerMasterName)); } void InstanceResourceManager::setStorageBudget(unsigned long memory, - unsigned long disk) { - -} + unsigned long disk) {} diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index e87683d77..046477a07 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -67,6 +67,7 @@ using claims::catalog::TableDescriptor; using claims::common::Malloc; using claims::common::rSuccess; using claims::common::rFailure; + using namespace claims::txn; // NOLINT namespace claims { @@ -432,7 +433,7 @@ RetCode MasterLoader::ApplyTransaction( RetCode MasterLoader::WriteLog( const TableDescriptor* table, const vector>& partition_buffers, - claims::txn::Ingest& ingest) { + const claims::txn::Ingest& ingest) { RetCode ret = rSuccess; uint64_t table_id = table->get_table_id(); @@ -443,8 +444,8 @@ RetCode MasterLoader::WriteLog( EXEC_AND_LOG(ret, LogClient::Data(global_part_id, - ingest.StripList[global_part_id].first, - ingest.StripList[global_part_id].second, + ingest.StripList.at(global_part_id).first, + ingest.StripList.at(global_part_id).second, partition_buffers[prj_id][part_id].buffer_, partition_buffers[prj_id][part_id].length_), "written data log for partition:" << global_part_id, @@ -464,7 +465,7 @@ RetCode MasterLoader::ReplyToMQ(const IngestionRequest& req) { RetCode MasterLoader::SendPartitionTupleToSlave( const TableDescriptor* table, const vector>& partition_buffers, - claims::txn::Ingest& ingest) { + const claims::txn::Ingest& ingest) { RetCode ret = rSuccess; uint64_t table_id = table->get_table_id(); @@ -473,8 +474,8 @@ RetCode MasterLoader::SendPartitionTupleToSlave( ++part_id) { uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); LoadPacket packet(ingest.Id, global_part_id, - ingest.StripList[global_part_id].first, - ingest.StripList[global_part_id].second, + ingest.StripList.at(global_part_id).first, + ingest.StripList.at(global_part_id).second, partition_buffers[prj_id][part_id].length_, partition_buffers[prj_id][part_id].buffer_); void* packet_buffer; diff --git a/loader/master_loader.h b/loader/master_loader.h index 5a228798d..59f4042a4 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -123,14 +123,14 @@ class MasterLoader { RetCode WriteLog(const TableDescriptor* table, const vector>& partition_buffers, - claims::txn::Ingest& ingest); + const claims::txn::Ingest& ingest); RetCode ReplyToMQ(const IngestionRequest& req); RetCode SendPartitionTupleToSlave( const TableDescriptor* table, const vector>& partition_buffers, - claims::txn::Ingest& ingest); + const claims::txn::Ingest& ingest); RetCode SelectSocket(const TableDescriptor* table, const uint64_t prj_id, const uint64_t part_id, int& socket_fd); diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 230356f87..faec8b3ac 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -47,9 +47,9 @@ RetCode TxnCore::ReMalloc() { Size = 0; TxnIndex.clear(); try { - delete[] Commit; - delete[] Abort; - delete[] StripList; + // delete[] Commit; + // delete[] Abort; + // delete[] StripList; Commit = new bool[BufferSize]; Abort = new bool[BufferSize]; StripList = new vector[BufferSize]; diff --git a/utility/thread_pool.cpp b/utility/thread_pool.cpp index 997ed36b8..8f91337b2 100644 --- a/utility/thread_pool.cpp +++ b/utility/thread_pool.cpp @@ -29,6 +29,7 @@ #include "./thread_pool.h" #include #include +#include #include #include "./task.h" #include "../common/memory_handle.h" @@ -57,13 +58,13 @@ bool ThreadPool::Init(int thread_count_in_pool) { if (pthread_create(&monitor_thread_, NULL, MonitorThreadExec, this) != 0) { // if any failed, return false - ThreadPoolLogging::elog("ERROR: create monitor thread failed!"); + PLOG(ERROR) << "ERROR: create monitor thread failed!"; return false; } for (int i = 0; i < base_thread_count_; ++i) { if (pthread_create(&thread_list_[i], NULL, ThreadExec, this) != 0) { // if any failed, return false - ThreadPoolLogging::elog("ERROR: create thread failed!"); + PLOG(ERROR) << "ERROR: create thread failed!"; return false; } ++current_thread_count_; From bca77d3e2541f3bfd02b06eed9a5d8de70661856 Mon Sep 17 00:00:00 2001 From: yukai Date: Tue, 26 Apr 2016 21:46:07 +0800 Subject: [PATCH 21/58] ADD: InitConnector() in table.cpp OPTIMIZE: master_loader_actor is not member variable ERROR version --- Resource/ResourceManagerMaster.cpp | 241 +++++++++++++++-------------- Resource/ResourceManagerSlave.cpp | 2 +- catalog/table.cpp | 20 +-- catalog/table.h | 4 +- loader/master_loader.cpp | 14 +- loader/slave_loader.cpp | 25 ++- loader/table_file_connector.cpp | 4 +- 7 files changed, 172 insertions(+), 138 deletions(-) diff --git a/Resource/ResourceManagerMaster.cpp b/Resource/ResourceManagerMaster.cpp index 96d0663c5..f141e7e30 100755 --- a/Resource/ResourceManagerMaster.cpp +++ b/Resource/ResourceManagerMaster.cpp @@ -8,137 +8,154 @@ #include "ResourceManagerMaster.h" #include "../Environment.h" ResourceManagerMaster::ResourceManagerMaster() { - node_tracker_=NodeTracker::GetInstance(); - logging_=new ResourceManagerMasterLogging(); + node_tracker_ = NodeTracker::GetInstance(); + logging_ = new ResourceManagerMasterLogging(); - endpoint_=Environment::getInstance()->getEndPoint(); - framework=new Theron::Framework(*endpoint_); - acter_=new ResourceManagerMasterActor(framework,this); + endpoint_ = Environment::getInstance()->getEndPoint(); + framework = new Theron::Framework(*endpoint_); + acter_ = new ResourceManagerMasterActor(framework, this); } ResourceManagerMaster::~ResourceManagerMaster() { - acter_->~ResourceManagerMasterActor(); - framework->~Framework(); - node_tracker_->~NodeTracker(); + acter_->~ResourceManagerMasterActor(); + framework->~Framework(); + node_tracker_->~NodeTracker(); } -NodeID ResourceManagerMaster::RegisterNewSlave(NodeAddress new_slave_address){ - NodeID new_node_id=node_tracker_->RegisterNode(new_slave_address); - if(new_node_id==-1){ - /* Node with the given ip has already existed.*/ - logging_->elog("[%s:%s] has already exists",new_slave_address.ip.c_str(),new_slave_address.port.c_str()); - return false; - } - -// -// if(node_to_resourceinfo_.find(new_node_id)!=node_to_resourceinfo_.end()){ -// /*The slaveId has already existed.*/ -// return false; -// } - node_to_resourceinfo_[new_node_id]=new InstanceResourceInfo(); - - logging_->log("[ip=%s:%s, id=%d] is successfully registered.",new_slave_address.ip.c_str(),new_slave_address.port.c_str(),new_node_id); - -// hashmap::iterator it=node_to_resourceinfo_.begin(); -// while(it!=node_to_resourceinfo_.end()){ -// printf("%d--> ",it->first); -// it++; -// } -// printf("\n"); - - return new_node_id; +NodeID ResourceManagerMaster::RegisterNewSlave(NodeAddress new_slave_address) { + NodeID new_node_id = node_tracker_->RegisterNode(new_slave_address); + if (new_node_id == -1) { + /* Node with the given ip has already existed.*/ + logging_->elog("[%s:%s] has already exists", new_slave_address.ip.c_str(), + new_slave_address.port.c_str()); + return false; + } + + // + // if(node_to_resourceinfo_.find(new_node_id)!=node_to_resourceinfo_.end()){ + // /*The slaveId has already existed.*/ + // return false; + // } + node_to_resourceinfo_[new_node_id] = new InstanceResourceInfo(); + + logging_->log("[ip=%s:%s, id=%d] is successfully registered.", + new_slave_address.ip.c_str(), new_slave_address.port.c_str(), + new_node_id); + + // hashmap::iterator + // it=node_to_resourceinfo_.begin(); + // while(it!=node_to_resourceinfo_.end()){ + // printf("%d--> ",it->first); + // it++; + // } + // printf("\n"); + + return new_node_id; } -std::vector ResourceManagerMaster::getSlaveIDList(){ - std::vector ret; - boost::unordered_map::iterator it=node_to_resourceinfo_.begin(); - while(it!=node_to_resourceinfo_.end()){ - ret.push_back(it->first); - it++; - } - return ret; +std::vector ResourceManagerMaster::getSlaveIDList() { + std::vector ret; + boost::unordered_map::iterator it = + node_to_resourceinfo_.begin(); + while (it != node_to_resourceinfo_.end()) { + ret.push_back(it->first); + it++; + } + return ret; } -bool ResourceManagerMaster::ApplyDiskBuget(NodeID target, unsigned size_in_mb){ - if(node_to_resourceinfo_.find(target)==node_to_resourceinfo_.cend()){ - /* target slave does not exist.*/ - return false; - } - if(node_to_resourceinfo_[target]->disk.take(size_in_mb)) - return true; - return false; +bool ResourceManagerMaster::ApplyDiskBuget(NodeID target, unsigned size_in_mb) { + if (node_to_resourceinfo_.find(target) == node_to_resourceinfo_.cend()) { + /* target slave does not exist.*/ + return false; + } + if (node_to_resourceinfo_[target]->disk.take(size_in_mb)) return true; + return false; } -bool ResourceManagerMaster::ReturnDiskBuget(NodeID target, unsigned size_in_mb) -{ - if(node_to_resourceinfo_.find(target) == node_to_resourceinfo_.cend()) - return false; - node_to_resourceinfo_[target]->disk.put(size_in_mb); - return true; +bool ResourceManagerMaster::ReturnDiskBuget(NodeID target, + unsigned size_in_mb) { + if (node_to_resourceinfo_.find(target) == node_to_resourceinfo_.cend()) + return false; + node_to_resourceinfo_[target]->disk.put(size_in_mb); + return true; } -bool ResourceManagerMaster::ApplyMemoryBuget(NodeID target, unsigned size_in_mb){ - if(node_to_resourceinfo_.find(target)==node_to_resourceinfo_.cend()){ - - /* target slave does not exist.*/ - return false; - } - if(node_to_resourceinfo_[target]->memory.take(size_in_mb)) - return true; - cout<<"no memory"<memory.take(size_in_mb)) return true; + cout << "no memory" << endl; + return false; } -bool ResourceManagerMaster::ReturnMemoryBuget(NodeID target, unsigned size_in_mb) -{ - if(node_to_resourceinfo_.find(target) == node_to_resourceinfo_.cend()) - return false; - node_to_resourceinfo_[target]->memory.put(size_in_mb); - return true; +bool ResourceManagerMaster::ReturnMemoryBuget(NodeID target, + unsigned size_in_mb) { + if (node_to_resourceinfo_.find(target) == node_to_resourceinfo_.cend()) + return false; + node_to_resourceinfo_[target]->memory.put(size_in_mb); + return true; } -bool ResourceManagerMaster::RegisterDiskBuget(NodeID report_node_id, unsigned size_in_mb){ - if(node_to_resourceinfo_.find(report_node_id)==node_to_resourceinfo_.end()){ - /* target slave does not exists*/ - return false; - } - node_to_resourceinfo_[report_node_id]->disk.initialize(size_in_mb); - logging_->log("Node(id=%d) reports its disk capacity=%d",report_node_id,size_in_mb); - return true; +bool ResourceManagerMaster::RegisterDiskBuget(NodeID report_node_id, + unsigned size_in_mb) { + if (node_to_resourceinfo_.find(report_node_id) == + node_to_resourceinfo_.end()) { + /* target slave does not exists*/ + return false; + } + node_to_resourceinfo_[report_node_id]->disk.initialize(size_in_mb); + logging_->log("Node(id=%d) reports its disk capacity=%d", report_node_id, + size_in_mb); + return true; } -bool ResourceManagerMaster::RegisterMemoryBuget(NodeID report_node_id, unsigned size_in_mb){ - if(node_to_resourceinfo_.find(report_node_id)==node_to_resourceinfo_.end()){ - /* target slave does not exists*/ - return false; - } - node_to_resourceinfo_[report_node_id]->memory.initialize(size_in_mb); - logging_->log("Node(id=%d) reports its memory capacity=%d",report_node_id,size_in_mb); - return true; +bool ResourceManagerMaster::RegisterMemoryBuget(NodeID report_node_id, + unsigned size_in_mb) { + if (node_to_resourceinfo_.find(report_node_id) == + node_to_resourceinfo_.end()) { + /* target slave does not exists*/ + return false; + } + node_to_resourceinfo_[report_node_id]->memory.initialize(size_in_mb); + logging_->log("Node(id=%d) reports its memory capacity=%d", report_node_id, + size_in_mb); + return true; } - -ResourceManagerMaster::ResourceManagerMasterActor::ResourceManagerMasterActor(Theron::Framework* framework,ResourceManagerMaster* rmm) -:Theron::Actor(*framework,"ResourceManagerMaster"),rmm_(rmm){ - RegisterHandler(this,&ResourceManagerMasterActor::ReceiveStorageBudgetReport); - RegisterHandler(this,&ResourceManagerMasterActor::ReceiveNewNodeRegister); +ResourceManagerMaster::ResourceManagerMasterActor::ResourceManagerMasterActor( + Theron::Framework* framework, ResourceManagerMaster* rmm) + : Theron::Actor(*framework, "ResourceManagerMaster"), rmm_(rmm) { + RegisterHandler(this, + &ResourceManagerMasterActor::ReceiveStorageBudgetReport); + RegisterHandler(this, &ResourceManagerMasterActor::ReceiveNewNodeRegister); } -void ResourceManagerMaster::ResourceManagerMasterActor::ReceiveStorageBudgetReport(const StorageBudgetMessage &message,const Theron::Address from){ - if(!rmm_->RegisterDiskBuget(message.nodeid,message.disk_budget)){ - rmm_->logging_->elog("Fail to add the budget information to rmm!"); - } - if(!rmm_->RegisterMemoryBuget(message.nodeid,message.memory_budget)){ - rmm_->logging_->elog("Fail to add the budget information to rmm!"); - } -// rmm_->logging_->log("The storage of Slave[%d] has been registered, the disk=[%d]MB, memory=[%d]MB",message.nodeid,message.disk_budget,message.memory_budget); -// printf("The storage of Slave[%d] has been registered, the disk=[%d]MB, memory=[%d]MB\n",message.nodeid,message.disk_budget,message.memory_budget); -// Send(0,from); +void +ResourceManagerMaster::ResourceManagerMasterActor::ReceiveStorageBudgetReport( + const StorageBudgetMessage& message, const Theron::Address from) { + if (!rmm_->RegisterDiskBuget(message.nodeid, message.disk_budget)) { + rmm_->logging_->elog("Fail to add the budget information to rmm!"); + } + if (!rmm_->RegisterMemoryBuget(message.nodeid, message.memory_budget)) { + rmm_->logging_->elog("Fail to add the budget information to rmm!"); + } + // rmm_->logging_->log("The storage of Slave[%d] has been registered, the + // disk=[%d]MB, + // memory=[%d]MB",message.nodeid,message.disk_budget,message.memory_budget); + // printf("The storage of Slave[%d] has been registered, the disk=[%d]MB, + // memory=[%d]MB\n",message.nodeid,message.disk_budget,message.memory_budget); + // Send(0,from); } -void ResourceManagerMaster::ResourceManagerMasterActor::ReceiveNewNodeRegister(const NodeRegisterMessage &message,const Theron::Address from){ - - NodeAddress node_addr; - node_addr.ip=message.get_ip(); - std::ostringstream str; - str<RegisterNewSlave(node_addr); - rmm_->logging_->log("Received register request from %s:%d, the allocated NodeID=%d",message.get_ip().c_str(),message.port,assigned_node_id); - Send(assigned_node_id,from); +void ResourceManagerMaster::ResourceManagerMasterActor::ReceiveNewNodeRegister( + const NodeRegisterMessage& message, const Theron::Address from) { + NodeAddress node_addr; + node_addr.ip = message.get_ip(); + std::ostringstream str; + str << message.port; + node_addr.port = str.str(); + NodeID assigned_node_id = rmm_->RegisterNewSlave(node_addr); + rmm_->logging_->log( + "Received register request from %s:%d, the allocated NodeID=%d", + message.get_ip().c_str(), message.port, assigned_node_id); + Send(assigned_node_id, from); } diff --git a/Resource/ResourceManagerSlave.cpp b/Resource/ResourceManagerSlave.cpp index 42c10c1ee..936f6a2a3 100755 --- a/Resource/ResourceManagerSlave.cpp +++ b/Resource/ResourceManagerSlave.cpp @@ -33,7 +33,7 @@ NodeID InstanceResourceManager::Register() { framework_->Send(message, receiver.GetAddress(), Theron::Address("ResourceManagerMaster")); Theron::Address from; - if (receiver.TimeOutWait(1, 1000) == 1) { + if (receiver.TimeOutWait(1, 10000) == 1) { resultCatcher.Pop(ret, from); logging_->log( "Successfully registered to the master, the allocated id =%d.", ret); diff --git a/catalog/table.cpp b/catalog/table.cpp index 04fcd79f5..15d47b261 100755 --- a/catalog/table.cpp +++ b/catalog/table.cpp @@ -38,20 +38,14 @@ using claims::utility::LockGuard; namespace claims { namespace catalog { -TableDescriptor::TableDescriptor() { - write_connector_ = new TableFileConnector( - Config::local_disk_mode ? FilePlatform::kDisk : FilePlatform::kHdfs, this, - common::kAppendFile); -} +TableDescriptor::TableDescriptor() {} TableDescriptor::TableDescriptor(const string& name, const TableID table_id) : tableName(name), table_id_(table_id), row_number_(0) { - write_connector_ = new TableFileConnector( - Config::local_disk_mode ? FilePlatform::kDisk : FilePlatform::kHdfs, this, - common::kAppendFile); + InitConnector(); } -TableDescriptor::~TableDescriptor() {} +TableDescriptor::~TableDescriptor() { DELETE_PTR(write_connector_); } void TableDescriptor::addAttribute(Attribute attr) { LockGuard guard(update_lock_); @@ -218,5 +212,13 @@ Schema* TableDescriptor::getSchema() const { return new SchemaFix(columns); } +void TableDescriptor::InitConnector() { + if (NULL == write_connector_) { + write_connector_ = new TableFileConnector( + Config::local_disk_mode ? FilePlatform::kDisk : FilePlatform::kHdfs, + this, common::kAppendFile); + } +} + } /* namespace catalog */ } /* namespace claims */ diff --git a/catalog/table.h b/catalog/table.h index 72990923c..1111c9793 100755 --- a/catalog/table.h +++ b/catalog/table.h @@ -215,12 +215,14 @@ class TableDescriptor { TableFileConnector* write_connector_ = NULL; + void InitConnector(); + friend class boost::serialization::access; template void serialize(Archive& ar, const unsigned int version) { // NOLINT ar& tableName& attributes& table_id_& projection_list_& row_number_& has_deleted_tuples_; - // InitLocks(); + InitConnector(); } }; diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 046477a07..a1078f1d8 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -95,7 +95,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, assert(new_slave_fd > 3); mloader->slave_addr_to_socket_.insert( - pair(NodeAddress(ip, port), new_slave_fd)); + std::pair(NodeAddress(ip, port), new_slave_fd)); DLOG(INFO) << "start to send test message to slave"; /// test whether socket works well @@ -158,7 +158,7 @@ RetCode MasterLoader::ConnectWithSlaves() { DLOG(INFO) << "published in " << master_loader_ip_ << ":" << master_loader_port_; } catch (exception& e) { - LOG(ERROR) << e.what(); + LOG(ERROR) << "publish master loader actor failed" << e.what(); return rFailure; } return ret; @@ -166,6 +166,10 @@ RetCode MasterLoader::ConnectWithSlaves() { RetCode MasterLoader::Ingest() { RetCode ret = rSuccess; + + int temp; + cin >> temp; + cout << "Well , temp is received" << std::endl; string message = GetMessage(); // get message from MQ @@ -572,9 +576,9 @@ void* MasterLoader::StartMasterLoader(void* arg) { EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), "failed to connect all slaves"); - while (true) - EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), - "failed to ingest data"); + // while (true) + // EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), + // "failed to ingest data"); return NULL; } diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 78f71c679..469a143f8 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include "caf/all.hpp" #include "caf/io/all.hpp" @@ -60,10 +61,13 @@ namespace claims { namespace loader { SlaveLoader::SlaveLoader() { - master_actor_ = - remote_actor(Config::master_loader_ip, Config::master_loader_port); + // try { + // master_actor_ = + // remote_actor(Config::master_loader_ip, Config::master_loader_port); + // } catch (const exception& e) { + // cout << "master loader actor failed." << e.what() << endl; + // } } - SlaveLoader::~SlaveLoader() {} RetCode SlaveLoader::ConnectWithMaster() { @@ -80,7 +84,7 @@ RetCode SlaveLoader::ConnectWithMaster() { return ret; } - for (int i = 0; i < retry_time; ++i) { + for (int i = 1; i <= retry_time; ++i) { EXEC_AND_LOG(ret, SendSelfAddrToMaster(), "sent self ip/port to master", "failed to send self ip/port to master in " << i << " times"); if (rSuccess == ret) break; @@ -154,10 +158,12 @@ RetCode SlaveLoader::SendSelfAddrToMaster() { << "to (" << Config::master_loader_ip << ":" << Config::master_loader_port << ")"; try { + auto master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); caf::scoped_actor self; - self->sync_send(master_actor_, IpPortAtom::value, self_ip, self_port); + self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port); } catch (exception& e) { - LOG(ERROR) << e.what(); + LOG(ERROR) << "can't send self ip&port to master loader " << e.what(); return rFailure; } return rSuccess; @@ -202,7 +208,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { } uint64_t data_length = *reinterpret_cast(head_buffer + 3 * 4); uint64_t real_packet_length = data_length + LoadPacket::kHeadLength; - assert(data_length >= 4); + assert(data_length >= 4 && data_length <= 10000000); LOG(INFO) << "real packet length is :" << real_packet_length << ". date length is " << data_length; @@ -246,6 +252,7 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { cout << "connected with master loader" << endl; // TODO(YK): error handle + slave_loader->ReceiveAndWorkLoop(); assert(false); return NULL; @@ -320,8 +327,10 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, bool is_commited) { try { + auto master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); caf::scoped_actor self; - self->sync_send(master_actor_, LoadAckAtom::value, txn_id, is_commited); + self->sync_send(master_actor, LoadAckAtom::value, txn_id, is_commited); } catch (exception& e) { LOG(ERROR) << e.what(); return rFailure; diff --git a/loader/table_file_connector.cpp b/loader/table_file_connector.cpp index acf824176..ba329f273 100644 --- a/loader/table_file_connector.cpp +++ b/loader/table_file_connector.cpp @@ -77,8 +77,8 @@ TableFileConnector::TableFileConnector(FilePlatform platform, file_handles_.push_back(projection_files); write_locks_.push_back(projection_locks); } - LOG(INFO) << "open all " << file_handles_.size() << " file successfully" - << std::endl; + LOG(INFO) << "open all " << file_handles_.size() << " file of table " + << table->getTableName() << " successfully" << std::endl; } // TableFileConnector::TableFileConnector(FilePlatform platform, From a4877ca5588043f719d8424bf3e158d10a6d2dfb Mon Sep 17 00:00:00 2001 From: yukai Date: Sat, 30 Apr 2016 12:58:42 +0800 Subject: [PATCH 22/58] network connection ok, but master loader lack node info --- Environment.cpp | 12 +++---- Resource/NodeTracker.cpp | 6 ++++ Resource/NodeTracker.h | 3 ++ Resource/ResourceManagerMaster.cpp | 55 ++++++++++++++++++++++++++++-- Resource/ResourceManagerSlave.cpp | 12 ++++--- catalog/projection.cpp | 3 ++ loader/load_packet.h | 2 ++ loader/master_loader.cpp | 45 +++++++++++++++++------- loader/slave_loader.cpp | 40 +++++++++++----------- 9 files changed, 134 insertions(+), 44 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index 83d3e0997..4719b832a 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -75,6 +75,12 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { } logging_->log("Initializing the AdaptiveEndPoint..."); initializeEndPoint(); + + logging_->log("Initializing the loader..."); + if (!InitLoader()) { + LOG(ERROR) << "failed to initialize loader"; + } + /** * TODO: * DO something in AdaptiveEndPoint such that the construction function does @@ -97,11 +103,6 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { logging_->log("Initializing the BufferManager..."); initializeBufferManager(); - logging_->log("Initializing the loader..."); - if (!InitLoader()) { - LOG(ERROR) << "failed to initialize loader"; - } - logging_->log("Initializing txn manager"); if (!InitTxnManager()) LOG(ERROR) << "failed to initialize txn manager"; @@ -228,7 +229,6 @@ bool Environment::InitLoader() { std::thread slave_thread(&SlaveLoader::StartSlaveLoader, nullptr); slave_thread.detach(); - // caf::await_all_actors_done(); return true; } diff --git a/Resource/NodeTracker.cpp b/Resource/NodeTracker.cpp index 069a1738b..527b5ae5d 100755 --- a/Resource/NodeTracker.cpp +++ b/Resource/NodeTracker.cpp @@ -66,3 +66,9 @@ RetCode NodeTracker::GetNodeAddr(const NodeID& target, assert(false && "can't find node address according node ID"); return claims::common::rFailure; } + +RetCode NodeTracker::InsertRegisteredNode(const NodeID& node_id, + const NodeAddress& node_addr) { + address_to_id_[node_addr] = node_id; + return claims::common::rSuccess; +} diff --git a/Resource/NodeTracker.h b/Resource/NodeTracker.h index 7ab733c2c..a08c39516 100755 --- a/Resource/NodeTracker.h +++ b/Resource/NodeTracker.h @@ -30,6 +30,9 @@ class NodeTracker { std::string GetNodeIP(const NodeID&) const; RetCode GetNodeAddr(const NodeID&, NodeAddress& node_addr) const; + RetCode InsertRegisteredNode(const NodeID& node_id, + const NodeAddress& node_addr); + std::vector GetNodeIDList() const; private: diff --git a/Resource/ResourceManagerMaster.cpp b/Resource/ResourceManagerMaster.cpp index f141e7e30..3a82e42bc 100755 --- a/Resource/ResourceManagerMaster.cpp +++ b/Resource/ResourceManagerMaster.cpp @@ -6,7 +6,17 @@ */ #include "ResourceManagerMaster.h" + +#include + #include "../Environment.h" +#include "../loader/load_packet.h" +#include "caf/io/all.hpp" + +#include "../Config.h" +using caf::io::remote_actor; +using claims::loader::OkAtom; +using claims::loader::RegNodeAtom; ResourceManagerMaster::ResourceManagerMaster() { node_tracker_ = NodeTracker::GetInstance(); logging_ = new ResourceManagerMasterLogging(); @@ -30,6 +40,47 @@ NodeID ResourceManagerMaster::RegisterNewSlave(NodeAddress new_slave_address) { return false; } + // send all node info to master loader + // DLOG(INFO) << "going to send node info to (" << Config::master_loader_ip + // << ":" << Config::master_loader_port << ")"; + // int retry_max_time = 10; + // int time = 0; + // bool is_ok = false; + // caf::actor master_actor; + // while (1) { + // try { + // master_actor = + // remote_actor(Config::master_loader_ip, + // Config::master_loader_port); + // } catch (exception& e) { + // // LOG(ERROR) << "can't send node info to master loader in " << + // // ++time + // cout << "new remote actor " << Config::master_loader_ip << "," + // << Config::master_loader_port << "failed for " + // << " time. " << e.what(); + // usleep(100 * 1000); + // if (time >= retry_max_time) return false; + // continue; + // } + // caf::scoped_actor self; + // self->sync_send(master_actor, RegNodeAtom::value, new_slave_address, + // new_node_id) + // .await([&](OkAtom) { + // cout << "successfully sent node info to master loader"; + // is_ok = true; + // }, + // [&](const caf::sync_exited_msg& msg) { + // cout << "notify link fail"; + // usleep(100 * 1000); + // }, + // caf::after(std::chrono::milliseconds(10)) >> + // [&]() { + // cout << "notify timeout for " << ++time << " time."; + // if (time >= retry_max_time) return false; + // }); + // + // if (is_ok) break; + // } // // if(node_to_resourceinfo_.find(new_node_id)!=node_to_resourceinfo_.end()){ // /*The slaveId has already existed.*/ @@ -134,10 +185,10 @@ void ResourceManagerMaster::ResourceManagerMasterActor::ReceiveStorageBudgetReport( const StorageBudgetMessage& message, const Theron::Address from) { if (!rmm_->RegisterDiskBuget(message.nodeid, message.disk_budget)) { - rmm_->logging_->elog("Fail to add the budget information to rmm!"); + rmm_->logging_->elog("Fail to add the disk budget information to rmm!"); } if (!rmm_->RegisterMemoryBuget(message.nodeid, message.memory_budget)) { - rmm_->logging_->elog("Fail to add the budget information to rmm!"); + rmm_->logging_->elog("Fail to add the memory budget information to rmm!"); } // rmm_->logging_->log("The storage of Slave[%d] has been registered, the // disk=[%d]MB, diff --git a/Resource/ResourceManagerSlave.cpp b/Resource/ResourceManagerSlave.cpp index 936f6a2a3..2c0324d75 100755 --- a/Resource/ResourceManagerSlave.cpp +++ b/Resource/ResourceManagerSlave.cpp @@ -6,6 +6,9 @@ */ #include "ResourceManagerSlave.h" + +#include + #include "../Environment.h" #include "../common/TimeOutReceiver.h" #define ResourceManagerMasterName "ResourceManagerMaster" @@ -29,11 +32,12 @@ NodeID InstanceResourceManager::Register() { unsigned port = Environment::getInstance()->getPort(); NodeRegisterMessage message(ip, port); - DLOG(INFO) << "resourceManagerSlave is going to register to master"; + DLOG(INFO) << "resourceManagerSlave is going to register (" << ip << "," + << port << ")to master"; framework_->Send(message, receiver.GetAddress(), Theron::Address("ResourceManagerMaster")); Theron::Address from; - if (receiver.TimeOutWait(1, 10000) == 1) { + if (receiver.TimeOutWait(1, 1000) == 1) { resultCatcher.Pop(ret, from); logging_->log( "Successfully registered to the master, the allocated id =%d.", ret); @@ -49,5 +53,5 @@ void InstanceResourceManager::ReportStorageBudget( Theron::Address(ResourceManagerMasterName)); } -void InstanceResourceManager::setStorageBudget(unsigned long memory, - unsigned long disk) {} +void InstanceResourceManager::setStorageBudget(uint64_t memory, uint64_t disk) { +} diff --git a/catalog/projection.cpp b/catalog/projection.cpp index b75f2ee21..9d73d5055 100644 --- a/catalog/projection.cpp +++ b/catalog/projection.cpp @@ -27,6 +27,8 @@ */ #include "../catalog/projection.h" + +#include #include #include @@ -77,6 +79,7 @@ bool ProjectionDescriptor::AllPartitionBound() const { } std::vector ProjectionDescriptor::getAttributeList() const { std::vector ret; + LOG(INFO) << "the length of column list is:" << column_list_.size(); for (unsigned i = 0; i < this->column_list_.size(); i++) { ret.push_back((Attribute)column_list_[i]); } diff --git a/loader/load_packet.h b/loader/load_packet.h index 849ec1e36..072240703 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -36,6 +36,8 @@ namespace loader { using IpPortAtom = caf::atom_constant; using LoadAckAtom = caf::atom_constant; +using RegNodeAtom = caf::atom_constant; +using OkAtom = caf::atom_constant; /************** LoadPacket format *****************/ /** field type length **********/ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index a1078f1d8..8a8d56bd0 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -50,6 +50,7 @@ #include "../Config.h" #include "../Environment.h" #include "../loader/data_ingestion.h" +#include "../Resource/NodeTracker.h" #include "../txn_manager/txn.hpp" #include "../txn_manager/txn_client.hpp" #include "../utility/resource_guard.h" @@ -94,8 +95,9 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, } assert(new_slave_fd > 3); - mloader->slave_addr_to_socket_.insert( - std::pair(NodeAddress(ip, port), new_slave_fd)); + DLOG(INFO) << "going to push socket into map"; + mloader->slave_addr_to_socket_[NodeAddress(ip, to_string(port))] = + new_slave_fd; DLOG(INFO) << "start to send test message to slave"; /// test whether socket works well @@ -146,6 +148,13 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, mloader->txn_commint_info_.erase(txn_id); } }, + [=](RegNodeAtom, NodeAddress addr, + NodeID node_id) -> caf::message { // NOLINT + LOG(INFO) << "get node register info from " << addr.ip << ":" + << addr.port; + NodeTracker::GetInstance()->InsertRegisteredNode(node_id, addr); + return caf::make_message(OkAtom::value); + }, caf::others >> [] { LOG(ERROR) << "nothing matched!!!"; }}; } @@ -153,10 +162,10 @@ RetCode MasterLoader::ConnectWithSlaves() { int ret = rSuccess; try { auto listening_actor = spawn(&MasterLoader::ReceiveSlaveReg, this); - publish(listening_actor, master_loader_port_, master_loader_ip_.c_str(), - true); + publish(listening_actor, master_loader_port_); DLOG(INFO) << "published in " << master_loader_ip_ << ":" << master_loader_port_; + cout << "published in " << master_loader_ip_ << ":" << master_loader_port_; } catch (exception& e) { LOG(ERROR) << "publish master loader actor failed" << e.what(); return rFailure; @@ -167,6 +176,7 @@ RetCode MasterLoader::ConnectWithSlaves() { RetCode MasterLoader::Ingest() { RetCode ret = rSuccess; + cout << "\ninput a number to continue" << std::endl; int temp; cin >> temp; cout << "Well , temp is received" << std::endl; @@ -182,12 +192,13 @@ RetCode MasterLoader::Ingest() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable(req.table_name_); assert(table != NULL && "table is not exist!"); + vector>> tuple_buffers_per_part( table->getNumberOfProjection()); - for (auto proj : (*(table->GetProjectionList()))) { - tuple_buffers_per_part.push_back(vector>( - proj->getPartitioner()->getNumberOfPartitions(), vector())); - } + for (int i = 0; i < table->getNumberOfProjection(); ++i) + tuple_buffers_per_part[i].resize( + table->getProjectoin(i)->getPartitioner()->getNumberOfPartitions()); + vector columns_validities; EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, columns_validities), @@ -212,7 +223,7 @@ RetCode MasterLoader::Ingest() { EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), "applied transaction", "failed to apply transaction"); - txn_commint_info_.insert(pair( + txn_commint_info_.insert(std::pair( ingest.Id, CommitInfo(ingest.StripList.size()))); // write data log @@ -508,11 +519,21 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( vector>>& tuple_buffer_per_part, vector>& partition_buffers) { RetCode ret = rSuccess; + assert(tuple_buffer_per_part.size() == table->getNumberOfProjection() && + "projection number is not match!!"); for (int i = 0; i < tuple_buffer_per_part.size(); ++i) { + assert(tuple_buffer_per_part[i].size() == + table->getProjectoin(i) + ->getPartitioner() + ->getNumberOfPartitions() && + "partition number is not match"); for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { int tuple_count = tuple_buffer_per_part[i][j].size(); int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); int buffer_len = tuple_count * tuple_len; + LOG(INFO) << "the tuple length of prj:" << i << ",part:" << j + << ",table:" << table->getTableName() << " is:" << tuple_len; + LOG(INFO) << "tuple size is:" << tuple_count; void* new_buffer = Malloc(buffer_len); if (NULL == new_buffer) return ret = claims::common::rNoMemory; @@ -576,9 +597,9 @@ void* MasterLoader::StartMasterLoader(void* arg) { EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), "failed to connect all slaves"); - // while (true) - // EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), - // "failed to ingest data"); + while (true) + EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), + "failed to ingest data"); return NULL; } diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 469a143f8..44892a638 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -238,26 +238,6 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { } } -void* SlaveLoader::StartSlaveLoader(void* arg) { - Config::getInstance(); - LOG(INFO) << "start slave loader..."; - - SlaveLoader* slave_loader = Environment::getInstance()->get_slave_loader(); - int ret = rSuccess; - EXEC_AND_LOG(ret, slave_loader->ConnectWithMaster(), - "succeed to connect with master", - "failed to connect with master "); - - assert(rSuccess == ret && "can't connect with master"); - - cout << "connected with master loader" << endl; - // TODO(YK): error handle - - slave_loader->ReceiveAndWorkLoop(); - assert(false); - return NULL; -} - RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { RetCode ret = rSuccess; const uint64_t table_id = GetTableIdFromGlobalPartId(packet.global_part_id_); @@ -338,5 +318,25 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, return rSuccess; } +void* SlaveLoader::StartSlaveLoader(void* arg) { + Config::getInstance(); + LOG(INFO) << "start slave loader..."; + + SlaveLoader* slave_loader = Environment::getInstance()->get_slave_loader(); + int ret = rSuccess; + EXEC_AND_LOG(ret, slave_loader->ConnectWithMaster(), + "succeed to connect with master", + "failed to connect with master "); + + assert(rSuccess == ret && "can't connect with master"); + + cout << "connected with master loader" << endl; + // TODO(YK): error handle + + slave_loader->ReceiveAndWorkLoop(); + assert(false); + return NULL; +} + } /* namespace loader */ } /* namespace claims */ From b74c277a2e6ff440d561a457d58ccbf138e78d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Sun, 1 May 2016 13:25:28 +0800 Subject: [PATCH 23/58] init txn-manager from catalog --- Environment.cpp | 38 +++++++++++++++++++++ Environment.h | 8 +---- Test/iterator/elastic_iterator_model_test.h | 5 +++ catalog/catalog.cpp | 7 ++++ catalog/catalog.h | 6 ++-- txn_manager/txn.hpp | 15 ++++++++ 6 files changed, 70 insertions(+), 9 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index 172bcb98a..e34ece577 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -31,6 +31,12 @@ // #define DEBUG_MODE #include "catalog/catalog.h" +#include "txn_manager/txn_server.hpp" +#include "txn_manager/txn_client.hpp" +#include "txn_manager/txn_log.hpp" +#include "txn_manager/txn.hpp" + + using claims::common::InitAggAvgDivide; using claims::common::InitOperatorFunc; using claims::common::InitTypeCastFunc; @@ -39,6 +45,12 @@ using claims::common::rSuccess; using claims::loader::MasterLoader; using claims::loader::SlaveLoader; +using claims::txn::TxnServer; +using claims::txn::TxnClient; +using claims::txn::LogServer; +using claims::txn::LogClient; +using claims::txn::GetGlobalPartId; + Environment* Environment::_instance = 0; Environment::Environment(bool ismaster) : ismaster_(ismaster) { @@ -229,6 +241,32 @@ bool Environment::InitTxnManager() { if (Config::enable_txn_server) { LOG(INFO) << "I'm txn manager server" ; TxnServer::Init(Config::txn_server_cores, Config::txn_server_port); + auto cat = Catalog::getInstance(); + auto table_count = cat->getNumberOfTable(); + //cout << "table count:" << table_count << endl; + for (auto table_id = 0; table_id < table_count; table_id ++) { + auto table = cat->getTable(table_id); + auto proj_count = table->getNumberOfProjection(); + // cout << "proj_count:" << proj_count << endl; + for (auto proj_id = 0; proj_id < proj_count; proj_id++) { + auto proj = table->getProjectoin(proj_id); + auto part= proj->getPartitioner(); + auto part_count = part->getNumberOfPartitions(); + //cout << "part_count:" << part_count << endl; + for (auto part_id = 0; part_id < part_count; part_id ++) { + auto global_part_id = GetGlobalPartId(table_id, proj_id, part_id); + // cout << global_part_id << endl; + TxnServer::pos_list_[global_part_id] = + TxnServer::logic_cp_list_[global_part_id] = + TxnServer::phy_cp_list_[global_part_id] = + part->getPartitionBlocks(part_id) * 64 * 1024; + } + } + } + + cout<<"*******pos_list*******" << endl; + for (auto & pos : TxnServer::pos_list_) + cout << "partition[" << pos.first << "] => " << pos.second << endl; } TxnClient::Init(Config::txn_server_ip, Config::txn_server_port); return true; diff --git a/Environment.h b/Environment.h index 6fb858216..946a37ad1 100755 --- a/Environment.h +++ b/Environment.h @@ -23,9 +23,6 @@ #include "Executor/exchange_tracker.h" #include "Executor/expander_tracker.h" #include "Resource/BufferManager.h" -#include "txn_manager/txn_server.hpp" -#include "txn_manager/txn_client.hpp" -#include "txn_manager/txn_log.hpp" namespace claims { @@ -37,10 +34,7 @@ class MasterLoader; using claims::catalog::Catalog; using claims::loader::SlaveLoader; using claims::loader::MasterLoader; -using claims::txn::TxnServer; -using claims::txn::TxnClient; -using claims::txn::LogServer; -using claims::txn::LogClient; + class Environment { diff --git a/Test/iterator/elastic_iterator_model_test.h b/Test/iterator/elastic_iterator_model_test.h index 6d5ecfbf9..9b53d8548 100644 --- a/Test/iterator/elastic_iterator_model_test.h +++ b/Test/iterator/elastic_iterator_model_test.h @@ -329,6 +329,7 @@ TEST_F(ElasticIteratorModelTest, droptestdata) { TEST_F(ElasticIteratorModelTest, CreateTempTableForTableFileConnectorTest) { string table_name = "sfdfsf"; + string drop_table_sql = "drop table sfdfsf;"; string create_table_stmt = "create table " + table_name + " (a int , b varchar(12));"; string create_prj_stmt1 = "create projection on " + table_name + @@ -347,6 +348,10 @@ TEST_F(ElasticIteratorModelTest, CreateTempTableForTableFileConnectorTest) { client_.submit(create_prj_stmt2.c_str(), message, rs); EXPECT_EQ("create projection successfully\n", message); cout << message << endl; + + client_.submit(drop_table_sql.c_str(), message, rs); + EXPECT_EQ("drop table successfully!\n", message); + cout << message << endl; } // add by cswang 19 Oct, 2015 diff --git a/catalog/catalog.cpp b/catalog/catalog.cpp index f96e1e14f..40df50a38 100644 --- a/catalog/catalog.cpp +++ b/catalog/catalog.cpp @@ -351,5 +351,12 @@ void Catalog::GetAllTables(ostringstream& ostr) const { } } +vector Catalog::getAllTableIDs() const { + vector ids; + for(auto it = tableid_to_table.begin(); it != tableid_to_table.end(); it ++) + ids.push_back(it->first); + +} + } /* namespace catalog */ } /* namespace claims */ diff --git a/catalog/catalog.h b/catalog/catalog.h index 40b34140f..7219c63d2 100644 --- a/catalog/catalog.h +++ b/catalog/catalog.h @@ -32,6 +32,7 @@ #define CATALOG_CATALOG_H_ #include #include +#include #include "../catalog/projection_binding.h" #include "../catalog/table.h" @@ -47,6 +48,7 @@ class SingleFileConnector; }; namespace catalog { +using std::vector; using loader::SingleFileConnector; class SingleFileConnector; @@ -86,14 +88,14 @@ class Catalog { void GetAllTables(ostringstream& ostr) const; ProjectionDescriptor* getProjection(const ProjectionID&) const; ProjectionBinding* getBindingModele() const; - + vector getAllTableIDs() const; /** * ATTENTION: this method do not return the number of existing table, * other than the next table'id * An example is: a table is dropped, but the return value don't change */ unsigned getTableCount() const { return table_id_allocator.table_id_curosr; } - + unsigned getNumberOfTable() const { return tableid_to_table.size();} RetCode saveCatalog(); // 2014-3-20---save as a file---by Yu RetCode restoreCatalog(); // 2014-3-20---restore from a file---by Yu void outPut(); diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index 3c6bbac6b..70bfb244a 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -267,6 +267,21 @@ inline void SerConfig() { &Checkpoint::set_abort_strip_list)); } +inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, + UInt64 partition_id) { + return table_id + 1000 * (projeciton_id + 1000 * partition_id); +} + +inline UInt64 GetTableIdFromGlobalPartId(UInt64 global_partition_id) { + return global_partition_id / (1000 * 1000); +} + +inline UInt64 GetProjectionIdFromGlobalPartId(UInt64 global_partition_id) { + return (global_partition_id % (1000 * 1000)) / 1000; +} +inline UInt64 GetPartitionIdFromGlobalPartId(UInt64 global_partition_id) { + return global_partition_id % (1000); +} } From 5c6d5ff5acaea9e077d7c0c9e756d42595aa60cf Mon Sep 17 00:00:00 2001 From: yukai Date: Sun, 1 May 2016 15:03:12 +0800 Subject: [PATCH 24/58] Completed Loader without MQ! FIX: add bracket around macro definition; FIX: bug found in LoadPacket/SlaveLoader/MasterLoader/ChunkStorage, txn.cpp; ADD: announce custom structure for CAF; ADD: send binding info and node register info to master loader; --- Debug.h | 22 ++++---- Environment.cpp | 14 +++-- Resource/NodeTracker.cpp | 3 ++ Resource/ResourceManagerMaster.cpp | 39 ++++---------- Resource/ResourceManagerSlave.cpp | 36 +++++++++++++ catalog/projection_binding.cpp | 30 +++++++++++ common/Schema/SchemaFix.cpp | 4 +- loader/load_packet.cpp | 12 +++-- loader/load_packet.h | 3 +- loader/master_loader.cpp | 62 ++++++++++++++++----- loader/slave_loader.cpp | 86 +++++++++++++++++++++--------- storage/BlockManager.cpp | 3 ++ storage/ChunkStorage.cpp | 16 ++++-- storage/PartitionStorage.cpp | 2 + txn_manager/txn.hpp | 2 +- txn_manager/txn_server.cpp | 4 ++ 16 files changed, 243 insertions(+), 95 deletions(-) diff --git a/Debug.h b/Debug.h index e6f00e0cc..03884adb1 100755 --- a/Debug.h +++ b/Debug.h @@ -10,7 +10,6 @@ #define COOR "/home/claims/config/zhanglei/coor.conf" - /* CONFIG is now specified in Config.cpp //#define CONFIG "/home/claims/config/wangli/config" @@ -23,20 +22,19 @@ /* * block in hdfs and sublock in block * */ -#define BLOCK_SIZE 64*1024 -#define SUCHUNK_SIZE 64*1024 -#define CHUNK_SIZE 64*1024*1024 -#define CHUNK_SIZE_IN_MB 64 -#define HEARTBEAT_MESSAGE_LEN 64 -#define REGISTER_MESSAGE_LEN 64 -#define BLOCK_STATUS_MESSAGE_LEN 256 -#define MATCHER_MESSAGE_FILENAME_LEN 256 -#define MATCHER_MESSAGE_BMI_LEN 256 -#define MATCHER_MESSAGE_PROJECT_LEN 256 +#define BLOCK_SIZE (64 * 1024) +#define SUCHUNK_SIZE (64 * 1024) +#define CHUNK_SIZE (64 * 1024 * 1024) +#define CHUNK_SIZE_IN_MB (64) +#define HEARTBEAT_MESSAGE_LEN (64) +#define REGISTER_MESSAGE_LEN (64) +#define BLOCK_STATUS_MESSAGE_LEN (256) +#define MATCHER_MESSAGE_FILENAME_LEN (256) +#define MATCHER_MESSAGE_BMI_LEN (256) +#define MATCHER_MESSAGE_PROJECT_LEN (256) // 分布式文件系统的主节点 #define HDFS_N "10.11.1.190" // 磁盘目录 #define DISKDIR "/home/claims/diskdata" - #endif /* DEBUG_H_ */ diff --git a/Environment.cpp b/Environment.cpp index 4719b832a..1791db47b 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -6,23 +6,22 @@ */ #include "Environment.h" - #include -#include "caf/all.hpp" +#include -#include "txn_manager/txn_server.hpp" #define GLOG_NO_ABBREVIATED_SEVERITIES #include #undef GLOG_NO_ABBREVIATED_SEVERITIES -#include #include #include #include #include // NOLINT +#include "caf/all.hpp" #include "loader/master_loader.h" #include "loader/slave_loader.h" #include "./Debug.h" #include "./Config.h" +#include "common/ids.h" #include "common/Logging.h" #include "common/TypePromotionMap.h" #include "common/TypeCast.h" @@ -33,7 +32,9 @@ #include "common/expression/type_conversion_matrix.h" // #define DEBUG_MODE #include "catalog/catalog.h" +#include "txn_manager/txn_server.hpp" +using caf::announce; using claims::common::InitAggAvgDivide; using claims::common::InitOperatorFunc; using claims::common::InitTypeCastFunc; @@ -46,6 +47,11 @@ using claims::txn::TxnServer; Environment* Environment::_instance = 0; Environment::Environment(bool ismaster) : ismaster_(ismaster) { + announce("NodeAddress", &NodeAddress::ip, &NodeAddress::port); + announce("ProjectionID", &ProjectionID::table_id, + &ProjectionID::projection_off); + announce("PartitionID", &PartitionID::projection_id, + &PartitionID::partition_off); _instance = this; Config::getInstance(); CodeGenerator::getInstance(); diff --git a/Resource/NodeTracker.cpp b/Resource/NodeTracker.cpp index 527b5ae5d..5992a4c0a 100755 --- a/Resource/NodeTracker.cpp +++ b/Resource/NodeTracker.cpp @@ -8,6 +8,7 @@ #include "NodeTracker.h" #include +#include NodeTracker* NodeTracker::instance_ = 0; NodeTracker::NodeTracker() : allocate_cur_(0) {} NodeTracker* NodeTracker::GetInstance() { @@ -70,5 +71,7 @@ RetCode NodeTracker::GetNodeAddr(const NodeID& target, RetCode NodeTracker::InsertRegisteredNode(const NodeID& node_id, const NodeAddress& node_addr) { address_to_id_[node_addr] = node_id; + LOG(INFO) << "inserted node:" << node_id + << ". Now size of addr2id is:" << address_to_id_.size(); return claims::common::rSuccess; } diff --git a/Resource/ResourceManagerMaster.cpp b/Resource/ResourceManagerMaster.cpp index 3a82e42bc..7bcd16793 100755 --- a/Resource/ResourceManagerMaster.cpp +++ b/Resource/ResourceManagerMaster.cpp @@ -8,12 +8,11 @@ #include "ResourceManagerMaster.h" #include - +#include +#include "../Config.h" #include "../Environment.h" #include "../loader/load_packet.h" #include "caf/io/all.hpp" - -#include "../Config.h" using caf::io::remote_actor; using claims::loader::OkAtom; using claims::loader::RegNodeAtom; @@ -40,48 +39,32 @@ NodeID ResourceManagerMaster::RegisterNewSlave(NodeAddress new_slave_address) { return false; } - // send all node info to master loader + // // send all node info to master loader // DLOG(INFO) << "going to send node info to (" << Config::master_loader_ip // << ":" << Config::master_loader_port << ")"; // int retry_max_time = 10; // int time = 0; - // bool is_ok = false; - // caf::actor master_actor; // while (1) { // try { - // master_actor = + // caf::actor master_actor = // remote_actor(Config::master_loader_ip, // Config::master_loader_port); + // caf::scoped_actor self; + // self->sync_send(master_actor, RegNodeAtom::value, new_slave_address, + // new_node_id); // } catch (exception& e) { // // LOG(ERROR) << "can't send node info to master loader in " << // // ++time // cout << "new remote actor " << Config::master_loader_ip << "," - // << Config::master_loader_port << "failed for " - // << " time. " << e.what(); + // << Config::master_loader_port << "failed for " << ++time << " + // time. " + // << e.what() << endl; // usleep(100 * 1000); // if (time >= retry_max_time) return false; // continue; // } - // caf::scoped_actor self; - // self->sync_send(master_actor, RegNodeAtom::value, new_slave_address, - // new_node_id) - // .await([&](OkAtom) { - // cout << "successfully sent node info to master loader"; - // is_ok = true; - // }, - // [&](const caf::sync_exited_msg& msg) { - // cout << "notify link fail"; - // usleep(100 * 1000); - // }, - // caf::after(std::chrono::milliseconds(10)) >> - // [&]() { - // cout << "notify timeout for " << ++time << " time."; - // if (time >= retry_max_time) return false; - // }); - // - // if (is_ok) break; // } - // + // if(node_to_resourceinfo_.find(new_node_id)!=node_to_resourceinfo_.end()){ // /*The slaveId has already existed.*/ // return false; diff --git a/Resource/ResourceManagerSlave.cpp b/Resource/ResourceManagerSlave.cpp index 2c0324d75..224b4bf59 100755 --- a/Resource/ResourceManagerSlave.cpp +++ b/Resource/ResourceManagerSlave.cpp @@ -8,9 +8,18 @@ #include "ResourceManagerSlave.h" #include +#include +#include "../Config.h" #include "../Environment.h" +#include "../common/ids.h" #include "../common/TimeOutReceiver.h" +#include "../loader/load_packet.h" +#include "caf/io/all.hpp" +#include "caf/all.hpp" + +using caf::io::remote_actor; +using claims::loader::RegNodeAtom; #define ResourceManagerMasterName "ResourceManagerMaster" InstanceResourceManager::InstanceResourceManager() { framework_ = @@ -41,6 +50,33 @@ NodeID InstanceResourceManager::Register() { resultCatcher.Pop(ret, from); logging_->log( "Successfully registered to the master, the allocated id =%d.", ret); + + // send all node info to master loader + DLOG(INFO) << "going to send node info to (" << Config::master_loader_ip + << ":" << Config::master_loader_port << ")"; + + int retry_max_time = 10; + int time = 0; + while (1) { + try { + caf::actor master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); + caf::scoped_actor self; + self->sync_send(master_actor, RegNodeAtom::value, + NodeAddress(ip, to_string(port)), + ret).await([&](int r) { + LOG(INFO) << "sent node info and received response"; + }); + break; + } catch (exception& e) { + cout << "new remote actor " << Config::master_loader_ip << "," + << Config::master_loader_port << "failed for " << ++time + << " time. " << e.what() << endl; + usleep(100 * 1000); + if (time >= retry_max_time) return false; + } + } + return ret; } else { logging_->elog("Failed to get NodeId from the master."); diff --git a/catalog/projection_binding.cpp b/catalog/projection_binding.cpp index 43b4be895..8b43734d2 100644 --- a/catalog/projection_binding.cpp +++ b/catalog/projection_binding.cpp @@ -34,6 +34,12 @@ #include #include "../Environment.h" #include "../utility/maths.h" +#include "caf/io/all.hpp" + +#include "../Config.h" +#include "../loader/load_packet.h" +using caf::io::remote_actor; +using claims::loader::BindPartAtom; namespace claims { namespace catalog { @@ -136,6 +142,30 @@ bool ProjectionBinding::BindingEntireProjection( const unsigned number_of_chunks = part->getPartitionChunks(partition_off); BlockManagerMaster::getInstance()->SendBindingMessage( partition_id, number_of_chunks, desriable_storage_level, node_id); + + /* notify the master loader the binding info*/ + DLOG(INFO) << "going to send node info to (" << Config::master_loader_ip + << ":" << Config::master_loader_port << ")"; + int retry_max_time = 10; + int time = 0; + while (1) { + try { + caf::actor master_actor = remote_actor(Config::master_loader_ip, + Config::master_loader_port); + caf::scoped_actor self; + self->sync_send(master_actor, BindPartAtom::value, partition_id, + node_id).await([&](int r) { + LOG(INFO) << "sent bind part info and received response"; + }); + break; + } catch (exception& e) { + cout << "new remote actor " << Config::master_loader_ip << "," + << Config::master_loader_port << "failed for " << ++time + << " time. " << e.what() << endl; + usleep(100 * 1000); + if (time >= retry_max_time) return false; + } + } } return true; } diff --git a/common/Schema/SchemaFix.cpp b/common/Schema/SchemaFix.cpp index 48e1d9e07..13626fdda 100755 --- a/common/Schema/SchemaFix.cpp +++ b/common/Schema/SchemaFix.cpp @@ -169,7 +169,9 @@ RetCode SchemaFix::CheckAndToValue(std::string text_tuple, void* binary_tuple, rInvalidInsertData == ret) { // error if (kSQL == raw_data_source) { // treated as error columns_validities.push_back(std::move(Validity(i, ret))); - ELOG(ret, "Data from SQL is for column whose index is " << i); + ELOG(ret, "Data:" << text_column + << " from SQL is for column whose index is " + << i); return ret; } else { // treated as warning and set default columns_validities.push_back(std::move(Validity(i, ret))); diff --git a/loader/load_packet.cpp b/loader/load_packet.cpp index b71bb8f48..98677c21e 100644 --- a/loader/load_packet.cpp +++ b/loader/load_packet.cpp @@ -27,6 +27,9 @@ */ #include "./load_packet.h" + +#include + #include "../common/memory_handle.h" using namespace claims::common; // NOLINT @@ -46,14 +49,16 @@ RetCode LoadPacket::Serialize(void*& packet_buffer, } *reinterpret_cast(packet_buffer) = txn_id_; - *reinterpret_cast(packet_buffer + sizeof(uint64_t)) = + *reinterpret_cast(packet_buffer + 1 * sizeof(uint64_t)) = global_part_id_; *reinterpret_cast(packet_buffer + 2 * sizeof(uint64_t)) = pos_; *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)) = offset_; *reinterpret_cast(packet_buffer + 4 * sizeof(uint64_t)) = data_length_; + LOG(INFO) << "Serialize packet: " << txn_id_ << " " << global_part_id_ << " " + << pos_ << " " << offset_ << " " << data_length_; - memcpy(packet_buffer + 4 * sizeof(uint64_t), data_buffer_, data_length_); + memcpy(packet_buffer + kHeadLength, data_buffer_, data_length_); return rSuccess; } @@ -67,7 +72,8 @@ RetCode LoadPacket::Deserialize(const void* const head_buffer, *reinterpret_cast(head_buffer + 3 * sizeof(uint64_t)); data_length_ = *reinterpret_cast(head_buffer + 4 * sizeof(uint64_t)); - + LOG(INFO) << "Deserialize packet: " << txn_id_ << " " << global_part_id_ + << " " << pos_ << " " << offset_ << " " << data_length_; data_buffer_ = data_buffer; return rSuccess; } diff --git a/loader/load_packet.h b/loader/load_packet.h index 072240703..d780f049d 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -37,6 +37,7 @@ namespace loader { using IpPortAtom = caf::atom_constant; using LoadAckAtom = caf::atom_constant; using RegNodeAtom = caf::atom_constant; +using BindPartAtom = caf::atom_constant; using OkAtom = caf::atom_constant; /************** LoadPacket format *****************/ @@ -66,7 +67,7 @@ struct LoadPacket { RetCode Deserialize(const void* const head_buffer, void* data_buffer); public: - static const int kHeadLength = 5 * 4; + static const int kHeadLength = 5 * sizeof(uint64_t); public: uint64_t txn_id_; diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 8a8d56bd0..96827a1ed 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -59,7 +59,6 @@ using caf::behavior; using caf::event_based_actor; using caf::io::publish; using caf::io::remote_actor; -using caf::mixin::sync_sender_impl; using caf::spawn; using std::endl; using claims::catalog::Catalog; @@ -83,7 +82,7 @@ MasterLoader::~MasterLoader() {} static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, MasterLoader* mloader) { return { - [=](IpPortAtom, std::string ip, int port) { // NOLINT + [=](IpPortAtom, std::string ip, int port) -> int { // NOLINT LOG(INFO) << "receive slave network address(" << ip << ":" << port << ")" << endl; int new_slave_fd = -1; @@ -96,8 +95,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, assert(new_slave_fd > 3); DLOG(INFO) << "going to push socket into map"; - mloader->slave_addr_to_socket_[NodeAddress(ip, to_string(port))] = - new_slave_fd; + mloader->slave_addr_to_socket_[NodeAddress(ip, "")] = new_slave_fd; DLOG(INFO) << "start to send test message to slave"; /// test whether socket works well @@ -126,8 +124,10 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // } else { // DLOG(INFO) << "message buffer is sent"; // } + + return 1; }, - [=](LoadAckAtom, int txn_id, bool is_commited) { // NOLINT + [=](LoadAckAtom, uint64_t txn_id, bool is_commited) -> int { // NOLINT // TODO(ANYONE): there should be a thread checking whether transaction // overtime periodically and abort these transaction and delete from // map. @@ -136,24 +136,45 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, claims::txn::Ingest ingest; ingest.Id = txn_id; if (is_commited) { + LOG(INFO) << "received a commit result of txn with id:" << txn_id; + cout << "received a commit result of txn with id:" << txn_id << endl; if (++(mloader->txn_commint_info_.at(txn_id).commited_part_num_) >= mloader->txn_commint_info_.at(txn_id).total_part_num_) { // TODO(lizhifang): optimize the interface of TxnClient TxnClient::CommitIngest(ingest); mloader->txn_commint_info_.erase(txn_id); + LOG(INFO) << "committed txn with id:" << txn_id + << " to txn manager"; + cout << "committed txn with id:" << txn_id << " to txn manager" + << endl; } } else { // TODO(lizhifang): optimize the interface of TxnClient TxnClient::AbortIngest(ingest); mloader->txn_commint_info_.erase(txn_id); + LOG(INFO) << "aborted txn with id:" << txn_id << " to txn manager"; + cout << "aborted txn with id:" << txn_id << " to txn manager" << endl; } + + return 1; }, - [=](RegNodeAtom, NodeAddress addr, - NodeID node_id) -> caf::message { // NOLINT - LOG(INFO) << "get node register info from " << addr.ip << ":" - << addr.port; + [=](RegNodeAtom, NodeAddress addr, NodeID node_id) -> int { // NOLINT + LOG(INFO) << "get node register info : (" << addr.ip << ":" << addr.port + << ") --> " << node_id; NodeTracker::GetInstance()->InsertRegisteredNode(node_id, addr); - return caf::make_message(OkAtom::value); + // return caf::make_message(OkAtom::value); + return 1; + }, + [=](BindPartAtom, PartitionID part_id, NodeID node_id) -> int { // NOLINT + LOG(INFO) << "get part bind info (T" << part_id.projection_id.table_id + << "P" << part_id.projection_id.projection_off << "G" + << part_id.partition_off << ") --> " << node_id; + Catalog::getInstance() + ->getTable(part_id.projection_id.table_id) + ->getProjectoin(part_id.projection_id.projection_off) + ->getPartitioner() + ->bindPartitionToNode(part_id.partition_off, node_id); + return 1; }, caf::others >> [] { LOG(ERROR) << "nothing matched!!!"; }}; } @@ -244,7 +265,7 @@ RetCode MasterLoader::Ingest() { string MasterLoader::GetMessage() { // for testing string ret = - "LINEITEM,|,\n" + "LINEITEM,|,\n," "1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-" "02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|\n" "1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-" @@ -319,6 +340,7 @@ RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, RetCode MasterLoader::GetRequestFromMessage(const string& message, IngestionRequest* req) { // AddRowIdColumn() + static uint64_t row_id = 10000000; RetCode ret = rSuccess; size_t pos = message.find(',', 0); req->table_name_ = message.substr(0, pos); @@ -327,14 +349,16 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, req->col_sep_ = message.substr(pos, next_pos - pos); pos = next_pos + 1; - next_pos = message.find('\n', pos); + next_pos = message.find(',', pos); req->row_sep_ = message.substr(pos, next_pos - pos); + pos = next_pos + 1; string tuple; - string data_string = message.substr(pos + 1); + string data_string = message.substr(pos); istringstream iss(data_string); while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) { - req->tuples_.push_back(tuple); + uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); + req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + tuple); } req->Show(); return ret; @@ -360,6 +384,7 @@ RetCode MasterLoader::GetPartitionTuples( // check all tuples to be inserted int line = 0; for (auto tuple_string : req.tuples_) { + DLOG(INFO) << "to be inserted tuple:" << tuple_string; void* tuple_buffer = Malloc(table->getSchema()->getTupleMaxSize()); if (tuple_buffer == NULL) return claims::common::rNoMemory; MemoryGuardWithRetCode guard(tuple_buffer, ret); @@ -438,10 +463,15 @@ RetCode MasterLoader::ApplyTransaction( for (int j = 0; j < prj->getPartitioner()->getNumberOfPartitions(); ++j) { req.Insert(GetGlobalPartId(table_id, i, j), tuple_length, partition_buffers[i][j].length_ / tuple_length); + cout << "the length of partition buffer[" << i << "," << j + << "] is:" << partition_buffers[i][j].length_ << std::endl; } } + TxnClient::BeginIngest(req, ingest); + cout << req.ToString() << " " << ingest.ToString() << endl; + return ret; } @@ -475,6 +505,7 @@ RetCode MasterLoader::WriteLog( RetCode MasterLoader::ReplyToMQ(const IngestionRequest& req) { // TODO(YUKAI) + return rSuccess; } RetCode MasterLoader::SendPartitionTupleToSlave( @@ -561,10 +592,13 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, NodeID node_id_in_rmm = table->getProjectoin(prj_id)->getPartitioner()->getPartitionLocation( part_id); + LOG(INFO) << "node id is " << node_id_in_rmm; NodeAddress addr; EXEC_AND_LOG_RETURN( ret, NodeTracker::GetInstance()->GetNodeAddr(node_id_in_rmm, addr), "got node address", "failed to get node address"); + LOG(INFO) << "node address is " << addr.ip << ":" << addr.port; + addr.port = ""; // the port is used for OLAP, not for loading socket_fd = slave_addr_to_socket_[addr]; return ret; } diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 44892a638..f06669d4b 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -48,7 +48,6 @@ #include "../utility/resource_guard.h" using caf::event_based_actor; using caf::io::remote_actor; -using caf::mixin::sync_sender_impl; using caf::spawn; using claims::common::Malloc; using claims::common::rSuccess; @@ -161,9 +160,12 @@ RetCode SlaveLoader::SendSelfAddrToMaster() { auto master_actor = remote_actor(Config::master_loader_ip, Config::master_loader_port); caf::scoped_actor self; - self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port); + self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port) + .await([&](int r) { // NOLINT + LOG(INFO) << "sent ip&port and received response"; + }); } catch (exception& e) { - LOG(ERROR) << "can't send self ip&port to master loader " << e.what(); + LOG(ERROR) << "can't send self ip&port to master loader. " << e.what(); return rFailure; } return rSuccess; @@ -202,15 +204,23 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { RetCode ret = rSuccess; // get load packet - if (-1 == - recv(master_fd_, head_buffer, LoadPacket::kHeadLength, MSG_WAITALL)) { + int real_read_num; + if (-1 == (real_read_num = recv(master_fd_, head_buffer, + LoadPacket::kHeadLength, MSG_WAITALL))) { PLOG(ERROR) << "failed to receive message length from master"; + continue; + } else if (real_read_num < LoadPacket::kHeadLength) { + LOG(ERROR) << "received message error! only read " << real_read_num + << " bytes"; + continue; } - uint64_t data_length = *reinterpret_cast(head_buffer + 3 * 4); + uint64_t data_length = + *reinterpret_cast(head_buffer + LoadPacket::kHeadLength - + sizeof(uint64_t)); uint64_t real_packet_length = data_length + LoadPacket::kHeadLength; - assert(data_length >= 4 && data_length <= 10000000); LOG(INFO) << "real packet length is :" << real_packet_length << ". date length is " << data_length; + assert(data_length >= 4 && data_length <= 10000000); char* data_buffer = Malloc(data_length); MemoryGuard guard(data_buffer); // auto-release @@ -234,7 +244,9 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { "failed to store"); // return result to master loader - SendAckToMasterLoader(packet.txn_id_, rSuccess == ret); + EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), + "sent commit result to master loader", + "failed to send commit res to master loader"); } } @@ -246,18 +258,25 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { const uint64_t part_id = GetPartitionIdFromGlobalPartId(packet.global_part_id_); - uint64_t chunk_id = packet.pos_ / CHUNK_SIZE; PartitionStorage* part_storage = BlockManager::getInstance()->getPartitionHandle( PartitionID(ProjectionID(table_id, prj_id), part_id)); - - // set HDFS because the memory is not applied actually - // it will be set to MEMORY in function - EXEC_AND_LOG_RETURN(ret, - part_storage->AddChunkWithMemoryToNum(chunk_id, HDFS), - "added chunk to " << chunk_id, "failed to add chunk"); - - // copy data into applied memory + assert(part_storage != NULL); + + /// set HDFS because the memory is not applied actually + /// it will be set to MEMORY in function + uint64_t last_chunk_id = (packet.pos_ + packet.offset_) / CHUNK_SIZE; + // assert(last_chunk_id <= + // (1024UL * 1024 * 1024 * 1024 * 1024) / (64 * 1024 * 1024) && + // " memory for chunk should not larger than 1PB"); + DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ + << " CHUNK SIZE is:" << CHUNK_SIZE + << " last chunk id is:" << last_chunk_id; + EXEC_AND_LOG_RETURN( + ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), + "added chunk to " << last_chunk_id + 1, "failed to add chunk"); + + /// copy data into applied memory const uint64_t tuple_size = Catalog::getInstance() ->getTable(table_id) ->getProjectoin(prj_id) @@ -270,7 +289,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { uint64_t total_written_length = 0; HdfsInMemoryChunk chunk_info; while (total_written_length < offset) { - // get start position of current chunk + /// get start position of current chunk if (BlockManager::getInstance()->getMemoryChunkStore()->getChunk( ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), cur_chunk_id), @@ -283,6 +302,9 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { writer.Write(packet.data_buffer_ + total_written_length, offset - total_written_length); total_written_length += written_length; + LOG(INFO) << "written " << written_length + << " bytes into chunk:" << cur_chunk_id + << ". Now total written " << total_written_length << " bytes"; if (total_written_length == offset) { // all tuple is written into memory return rSuccess; @@ -292,6 +314,8 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { } while (writer.NextBlock()); ++cur_chunk_id; // get next chunk to write + LOG(INFO) << "Now chunk id is " << cur_chunk_id + << ", the number of chunk is" << part_storage->GetChunkNum(); assert(cur_chunk_id < part_storage->GetChunkNum()); cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; @@ -306,14 +330,24 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, bool is_commited) { - try { - auto master_actor = - remote_actor(Config::master_loader_ip, Config::master_loader_port); - caf::scoped_actor self; - self->sync_send(master_actor, LoadAckAtom::value, txn_id, is_commited); - } catch (exception& e) { - LOG(ERROR) << e.what(); - return rFailure; + int time = 0; + int retry_max_time = 10; + while (1) { + try { + auto master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); + caf::scoped_actor self; + self->sync_send(master_actor, LoadAckAtom::value, txn_id, is_commited) + .await([&](int r) { // NOLINT + LOG(INFO) << "sent commit result:" << is_commited + << " to master and received response"; + }); + return rSuccess; + } catch (exception& e) { + LOG(ERROR) << "failed to send commit result to master loader in " + << ++time << "time." << e.what(); + if (time >= retry_max_time) return rFailure; + } } return rSuccess; } diff --git a/storage/BlockManager.cpp b/storage/BlockManager.cpp index c903c3627..2a2973146 100755 --- a/storage/BlockManager.cpp +++ b/storage/BlockManager.cpp @@ -7,6 +7,7 @@ #include #include "BlockManager.h" +#include #include "../common/file_handle/hdfs_connector.h" #include "../Environment.h" #include "../common/rename.h" @@ -368,6 +369,8 @@ bool BlockManager::removePartition(const PartitionID& partition_id) { PartitionStorage* BlockManager::getPartitionHandle( const PartitionID& partition_id) const { + LOG(INFO) << "partid2storage size is:" << partition_id_to_storage_.size(); + LOG(INFO) << "going to find storage [" << partition_id.getName() << "]"; boost::unordered_map::const_iterator it = partition_id_to_storage_.find(partition_id); if (it == partition_id_to_storage_.cend()) { diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 5536fb13f..9f8637c89 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -416,20 +416,26 @@ uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, void* block_offset = chunk_offset_ + block_id_ * block_size_; unsigned* tuple_count_in_block = reinterpret_cast( block_offset + block_size_ - sizeof(unsigned)); - - // there are space to store data int can_store_tuple_count = (block_size_ - sizeof(unsigned)) / tuple_size_ - *tuple_count_in_block; + LOG(INFO) << "block whose id is " << block_id_ << " stored " + << *tuple_count_in_block << " tuple and leaf " + << can_store_tuple_count + << " tuple space. and tuple size is:" << tuple_size_; + + // there are space to store data if (can_store_tuple_count > 0) { int actual_written_tuple_count = length_to_write / tuple_size_ > can_store_tuple_count ? can_store_tuple_count : length_to_write / tuple_size_; - memcpy(block_offset + *tuple_count_in_block * block_size_, buffer_to_write, - actual_written_tuple_count * tuple_size_); + memcpy(block_offset + (*tuple_count_in_block) * block_size_, + buffer_to_write, actual_written_tuple_count * tuple_size_); + LOG(INFO) << "copy " << actual_written_tuple_count * tuple_size_ + << " bytes into block:" << block_id_; __sync_add_and_fetch(tuple_count_in_block, actual_written_tuple_count); - return actual_written_tuple_count; + return actual_written_tuple_count * tuple_size_; } return 0; } diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 37a95acc7..906814b59 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -62,6 +62,8 @@ RetCode PartitionStorage::AddChunkWithMemoryToNum( const unsigned& number_of_chunks, const StorageLevel& storage_level) { RetCode ret = rSuccess; if (number_of_chunks_ >= number_of_chunks - 1) return ret; + LOG(INFO) << "now chunk number:" << number_of_chunks_ + << ". expected chunk num:" << number_of_chunks; for (unsigned i = number_of_chunks_; i < number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index 528308e85..d9fb4c48b 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -96,7 +96,7 @@ static const int kTailSize = sizeof(unsigned); inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, UInt64 partition_id) { - return table_id + 1000 * (projeciton_id + 1000 * partition_id); + return partition_id + 1000 * (projeciton_id + 1000 * table_id); } inline UInt64 GetTableIdFromGlobalPartId(UInt64 global_partition_id) { diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index faec8b3ac..52835e5ab 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -333,6 +333,10 @@ RetCode TxnServer::RecoveryFromCatalog() { CountList[i] = 0; LogicCPList[i] = 0; } + + PosList[10000000] = 2017198080; + CountList[10000000] = 6001215; + LogicCPList[10000000] = 2017198080; } RetCode TxnServer::RecoveryFromTxnLog() {} From e374453696ab73d34dbfa0cfa2f34e27b12441a5 Mon Sep 17 00:00:00 2001 From: yukai Date: Mon, 2 May 2016 21:05:25 +0800 Subject: [PATCH 25/58] ADD:AMQ consumer in multi-threading; FIX: bug in ChunkStorage.cpp; ADD: memset after getting memory from memory pool --- Client/Client.cpp | 3 +- Config.cpp | 4 +- Config.h | 1 + Environment.cpp | 19 ++-- Executor/Coordinator.cpp | 31 +++--- Makefile.am | 2 +- Resource/ResourceManagerSlave.cpp | 1 + catalog/projection.cpp | 2 +- loader/AMQ_consumer.cpp | 163 +++++++++++++++++++++++++++++ loader/AMQ_consumer.h | 121 ++++++++++++++++++++++ loader/Makefile.am | 7 +- loader/master_loader.cpp | 96 ++++++++++++++--- loader/master_loader.h | 26 ++++- loader/slave_loader.cpp | 11 +- storage/ChunkStorage.cpp | 16 +-- storage/MemoryStore.cpp | 166 ++++++++++++++++-------------- storage/PartitionStorage.cpp | 13 ++- storage/PartitionStorage.h | 2 +- 18 files changed, 542 insertions(+), 142 deletions(-) create mode 100644 loader/AMQ_consumer.cpp create mode 100644 loader/AMQ_consumer.h diff --git a/Client/Client.cpp b/Client/Client.cpp index 0b92f268f..6362dcdd0 100644 --- a/Client/Client.cpp +++ b/Client/Client.cpp @@ -58,7 +58,8 @@ Client::query_result Client::submit(std::string command, std::string &message, command = "#" + command; - write(m_clientFd, command.c_str(), command.length() + 1); + int bytes = write(m_clientFd, command.c_str(), command.length() + 1); + if (bytes != command.length() + 1) perror("failed to send SQL to claims"); ClientLogging::log("Client: message from server!\n"); const int maxBytes = 75536 + sizeof(int) * 2; char *buf = new char[maxBytes]; diff --git a/Config.cpp b/Config.cpp index b0935a63d..695cf1997 100644 --- a/Config.cpp +++ b/Config.cpp @@ -94,7 +94,6 @@ bool Config::is_master_loader; std::string Config::master_loader_ip; int Config::master_loader_port; - bool Config::enable_txn_server; int Config::txn_server_cores; std::string Config::txn_server_ip; @@ -103,6 +102,7 @@ int Config::txn_server_port; bool Config::enable_txn_log; std::string Config::txn_log_path; +int Config::master_loader_thread_num; Config *Config::getInstance() { if (instance_ == 0) { @@ -185,6 +185,8 @@ void Config::initialize() { txn_log_path = getString("txn_log_path", "."); + master_loader_thread_num = getInt("master_loader_thread_num", 4); + #ifdef DEBUG_Config print_configure(); #endif diff --git a/Config.h b/Config.h index 9f8321f0c..fbb6d801d 100644 --- a/Config.h +++ b/Config.h @@ -91,6 +91,7 @@ class Config { static bool enable_txn_log; static std::string txn_log_path; + static int master_loader_thread_num; private: static Config* instance_; diff --git a/Environment.cpp b/Environment.cpp index 1791db47b..6d3144ec7 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -65,12 +65,6 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { initializeCoordinator(); logging_->log("Initializing the catalog ..."); } - catalog_ = claims::catalog::Catalog::getInstance(); - logging_->log("restore the catalog ..."); - if (rSuccess != catalog_->restoreCatalog()) { - LOG(ERROR) << "failed to restore catalog" << std::endl; - cerr << "ERROR: restore catalog failed" << endl; - } if (true == g_thread_pool_used) { logging_->log("Initializing the ThreadPool..."); @@ -91,18 +85,21 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { * TODO: * DO something in AdaptiveEndPoint such that the construction function does not return until the connection is completed. If so, the following - sleep() - dose not needed. - + sleep() dose not needed. This is done in Aug.18 by Li :) */ - /*Before initializing Resource Manager, the instance ip and port should be * decided.*/ - logging_->log("Initializing the ResourceManager..."); initializeResourceManager(); + catalog_ = claims::catalog::Catalog::getInstance(); + logging_->log("restore the catalog ..."); + if (rSuccess != catalog_->restoreCatalog()) { + LOG(ERROR) << "failed to restore catalog" << std::endl; + cerr << "ERROR: restore catalog failed" << endl; + } + logging_->log("Initializing the Storage..."); initializeStorage(); diff --git a/Executor/Coordinator.cpp b/Executor/Coordinator.cpp index 8c8b14a41..1dd792227 100755 --- a/Executor/Coordinator.cpp +++ b/Executor/Coordinator.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -154,18 +155,20 @@ void *Coordinator::ListeningNewNode(void *arg) { int recvbytes; int port; - if ((recvbytes = recv(socket_fd_new, &port, sizeof(port), MSG_WAITALL)) == - -1) { + int retry_time = 10; + int time = 0; + while ((recvbytes = + recv(socket_fd_new, &port, sizeof(port), MSG_WAITALL)) == -1) { std::cout << "New node " << inet_ntoa(remote_addr.sin_addr) << " has connected, but the receiving the information times out!" - << std::endl; - FileClose(socket_fd_new); - // logging->elog("-----for debug: fd %d is closed", - // socket_fd_new); - continue; - // return false; + << strerror(errno) << std::endl; + if (++time >= retry_time) { + FileClose(socket_fd_new); + break; + } } + if (time >= retry_time) continue; if (recvbytes != sizeof(int)) { std::cout << "Information received, but the length is not right!" << std::endl; @@ -198,7 +201,8 @@ void *Coordinator::ListeningNewNode(void *arg) { new_node_port.c_str()); } else { Cthis->logging->log( - "[Coordinator]: The Coordinator EndPoint has successfully connected " + "[Coordinator]: The Coordinator EndPoint has successfully " + "connected " "to the EndPoint of the new node!"); } @@ -212,7 +216,8 @@ void *Coordinator::ListeningNewNode(void *arg) { * message to each NodeConnectionActor until the feedback is received * which means the target node has conducted new connection based on * message received. - * However, if the target node is dead, the message will be sent repeatedly + * However, if the target node is dead, the message will be sent + * repeatedly * and infinitely. Additional code is needed to handle the dead node. */ for (unsigned i = 0; i < Cthis->PeersIpPort.size(); i++) { @@ -255,10 +260,12 @@ void *Coordinator::ListeningNewNode(void *arg) { Cthis->SendReadyNotificationToNewNode(socket_fd_new, 'R'); - // below code should be keep in case of dynamically selecting master loader + // below code should be keep in case of dynamically selecting master + // loader /*if (1 == Cthis->PeersIpPort.size()) { // select the first new node as loader master - LOG(INFO) << "Congratulations! (" << new_node_ip << ", " << new_node_port + LOG(INFO) << "Congratulations! (" << new_node_ip << ", " << + new_node_port << ") is selected to be master loader"; if (Cthis->SendReadyNotificationToNewNode(socket_fd_new, 'M')) LOG(INFO) << "succeed to send M notify this node"; diff --git a/Makefile.am b/Makefile.am index fe65757a5..569996f04 100644 --- a/Makefile.am +++ b/Makefile.am @@ -11,7 +11,7 @@ AM_CPPFLAGS=-fPIC -DTHERON_XS\ #-L/usr/local/lib \ #-I/usr/local/include -AM_LDFLAGS= -lc -lm -lrt -lpthread \ +AM_LDFLAGS= -lc -lm -lrt -lpthread -lactivemq-cpp\ -lconfig++ -lxs -lnuma -lreadline -lhistory -lz -ltinfo -Wl,--no-as-needed -ldl -rdynamic -lglog if OPT_TCMALLOC diff --git a/Resource/ResourceManagerSlave.cpp b/Resource/ResourceManagerSlave.cpp index 224b4bf59..88fc38f97 100755 --- a/Resource/ResourceManagerSlave.cpp +++ b/Resource/ResourceManagerSlave.cpp @@ -80,6 +80,7 @@ NodeID InstanceResourceManager::Register() { return ret; } else { logging_->elog("Failed to get NodeId from the master."); + cerr << "Failed to get NodeId from the master." << endl; return -1; } } diff --git a/catalog/projection.cpp b/catalog/projection.cpp index 9d73d5055..95a3430f8 100644 --- a/catalog/projection.cpp +++ b/catalog/projection.cpp @@ -79,7 +79,7 @@ bool ProjectionDescriptor::AllPartitionBound() const { } std::vector ProjectionDescriptor::getAttributeList() const { std::vector ret; - LOG(INFO) << "the length of column list is:" << column_list_.size(); + // DLOG(INFO) << "the length of column list is:" << column_list_.size(); for (unsigned i = 0; i < this->column_list_.size(); i++) { ret.push_back((Attribute)column_list_[i]); } diff --git a/loader/AMQ_consumer.cpp b/loader/AMQ_consumer.cpp new file mode 100644 index 000000000..b046cf20e --- /dev/null +++ b/loader/AMQ_consumer.cpp @@ -0,0 +1,163 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/AMQ_consumer.cpp + * + * Created on: May 1, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#include "./AMQ_consumer.h" + +#include +#include +#include +#include +#include + +#include "./master_loader.h" +#include "../common/memory_handle.h" +using activemq::core::ActiveMQConnection; +using activemq::core::ActiveMQConnectionFactory; +using cms::TextMessage; +using std::cout; +using std::endl; +using std::string; + +namespace claims { +namespace loader { + +void claims::loader::AMQConsumer::run(MasterLoader* mloader) { + mloader_ = mloader; + try { + // Create a ConnectionFactory + ActiveMQConnectionFactory* connectionFactory = + new ActiveMQConnectionFactory(brokerURI_); + LOG(INFO) << "Create a ConnectionFactory"; + // Create a Connection + connection_ = connectionFactory->createConnection(); + delete connectionFactory; + LOG(INFO) << "Create a Connection"; + + ActiveMQConnection* amqConnection = + dynamic_cast(connection_); + if (amqConnection != NULL) { + amqConnection->addTransportListener(this); + } + LOG(INFO) << "Create a ActiveMQConnection"; + + connection_->start(); + + connection_->setExceptionListener(this); + LOG(INFO) << "ActiveMQConnection is started"; + + // Create a Session + if (client_ack_) { + session_ = connection_->createSession(Session::CLIENT_ACKNOWLEDGE); + } else { + session_ = connection_->createSession(Session::AUTO_ACKNOWLEDGE); + } + LOG(INFO) << "Create a Session"; + + // Create the destination (Topic or Queue) + if (use_topic_) { + destination_ = session_->createTopic(destURI_); + } else { + destination_ = session_->createQueue(destURI_); + } + LOG(INFO) << "Create a destination"; + + // Create a MessageConsumer from the Session to the Topic or Queue + consumer = session_->createConsumer(destination_); + consumer->setMessageListener(this); + LOG(INFO) << "Create a MessageConsumer"; + std::cout << "AMQ client listening...." << std::endl; + } catch (CMSException& e) { + e.printStackTrace(); + } +} + +void claims::loader::AMQConsumer::onMessage(const Message* message) { + try { + const TextMessage* textMessage = dynamic_cast(message); + string text = ""; + + if (textMessage != NULL) { + text = textMessage->getText(); + + bool client_ack = client_ack_; + mloader_->Ingest(text, [message, client_ack]() -> int { + if (client_ack) { + message->acknowledge(); + } + return claims::common::rSuccess; + }); + } else { + text = "NOT A TEXTMESSAGE!"; + std::cerr << "get error message from AMQ " << std::endl; + LOG(ERROR) << "get error message from AMQ "; + } + // enter for load. + // printf("Message #%d Received: %s\n", count, text.c_str()); + } catch (CMSException& e) { + e.printStackTrace(); + } +} + +void claims::loader::AMQConsumer::cleanup() { + //************************************************* + // Always close destination, consumers and producers before + // you destroy their sessions and connection. + //************************************************* + + // Destroy resources. + try { + DELETE_PTR(destination_); + } catch (CMSException& e) { + } + + try { + DELETE_PTR(consumer); + } catch (CMSException& e) { + } + + // Close open resources. + try { + if (session_ != NULL) session_->close(); + if (connection_ != NULL) connection_->close(); + } catch (CMSException& e) { + } + + // Now Destroy them + try { + DELETE_PTR(session_); + } catch (CMSException& e) { + } + + try { + DELETE_PTR(connection_); + } catch (CMSException& e) { + } +} + +} /* namespace loader */ +} /* namespace claims */ diff --git a/loader/AMQ_consumer.h b/loader/AMQ_consumer.h new file mode 100644 index 000000000..964e2cced --- /dev/null +++ b/loader/AMQ_consumer.h @@ -0,0 +1,121 @@ +/* + * Copyright [2012-2015] DaSE@ECNU + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * /Claims/loader/AMQ_consumer.h + * + * Created on: May 1, 2016 + * Author: yukai + * Email: yukai2014@gmail.com + * + * Description: + * + */ + +#ifndef LOADER_AMQ_CONSUMER_H_ +#define LOADER_AMQ_CONSUMER_H_ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../common/rename.h" +using activemq::transport::DefaultTransportListener; +using cms::CMSException; +using cms::Connection; +using cms::Destination; +using cms::ExceptionListener; +using cms::Message; +using cms::MessageConsumer; +using cms::MessageListener; +using cms::Session; + +namespace claims { +namespace loader { + +class MasterLoader; + +class AMQConsumer : public ExceptionListener, + public MessageListener, + public DefaultTransportListener { + public: + AMQConsumer(const std::string& brokerURI, const std::string& destURI, + bool use_topic = false, bool client_ack = false) + : connection_(NULL), + session_(NULL), + destination_(NULL), + consumer(NULL), + use_topic_(use_topic), + brokerURI_(brokerURI), + destURI_(destURI), + client_ack_(client_ack) {} + + virtual ~AMQConsumer() { this->cleanup(); } + + void close() { this->cleanup(); } + + void run(MasterLoader* mloader); + + virtual void onMessage(const Message* message); + + // If something bad happens you see it here as this class is also been + // registered as an ExceptionListener with the connection. + virtual void onException(const CMSException& ex AMQCPP_UNUSED) { + printf("CMS Exception occurred. Shutting down client.\n"); + exit(1); + } + + virtual void onException(const decaf::lang::Exception& ex) { + printf("Transport Exception occurred: %s \n", ex.getMessage().c_str()); + } + + virtual void transportInterrupted() { + std::cout << "The Connection's Transport has been Interrupted." + << std::endl; + } + + virtual void transportResumed() { + std::cout << "The Connection's Transport has been Restored." << std::endl; + } + + NO_COPY_AND_ASSIGN(AMQConsumer); + + private: + void cleanup(); + + public: + private: + Connection* connection_; + Session* session_; + Destination* destination_; + MessageConsumer* consumer; + bool use_topic_; + std::string brokerURI_; + std::string destURI_; + bool client_ack_; + + MasterLoader* mloader_; +}; + +} /* namespace loader */ +} /* namespace claims */ + +#endif // LOADER_AMQ_CONSUMER_H_ diff --git a/loader/Makefile.am b/loader/Makefile.am index 3cd362bd3..ac4667d16 100644 --- a/loader/Makefile.am +++ b/loader/Makefile.am @@ -7,7 +7,9 @@ AM_CPPFLAGS= -fPIC -fpermissive \ -I${JAVA_HOME}/include/linux \ -I${THERON_HOME}/Include \ -I${CAF_HOME}/libcaf_io \ --I${CAF_HOME}/libcaf_core +-I${CAF_HOME}/libcaf_core \ +-I/usr/local/include/activemq-cpp-3.9.3 \ +-I/usr/local/apr/include/apr-1 AM_LDFLAGS=-lc -lm -lrt -lcaf_core -lcaf_io -lxs -lboost_serialization @@ -37,7 +39,8 @@ libloader_a_SOURCES = \ slave_loader.cpp slave_loader.h \ table_file_connector.cpp table_file_connector.h \ validity.cpp validity.h \ - load_packet.h load_packet.cpp + load_packet.h load_packet.cpp \ + AMQ_consumer.h AMQ_consumer.cpp SUBDIRS = test DIST_SUBDIRS = test diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 96827a1ed..8bb49c8d6 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -27,6 +27,8 @@ */ #include "./master_loader.h" + +#include #include #include #include @@ -37,6 +39,7 @@ #include "caf/all.hpp" #include "caf/io/all.hpp" +#include "./AMQ_consumer.h" #include "./load_packet.h" #include "./loader_message.h" #include "./validity.h" @@ -183,7 +186,7 @@ RetCode MasterLoader::ConnectWithSlaves() { int ret = rSuccess; try { auto listening_actor = spawn(&MasterLoader::ReceiveSlaveReg, this); - publish(listening_actor, master_loader_port_); + publish(listening_actor, master_loader_port_, nullptr, true); DLOG(INFO) << "published in " << master_loader_ip_ << ":" << master_loader_port_; cout << "published in " << master_loader_ip_ << ":" << master_loader_port_; @@ -194,14 +197,12 @@ RetCode MasterLoader::ConnectWithSlaves() { return ret; } -RetCode MasterLoader::Ingest() { +RetCode MasterLoader::Ingest(const string& message, + function ack_function) { RetCode ret = rSuccess; - cout << "\ninput a number to continue" << std::endl; - int temp; - cin >> temp; - cout << "Well , temp is received" << std::endl; - string message = GetMessage(); + // string message = GetMessage(); + // DLOG(INFO) << "get message:\n" << message; // get message from MQ IngestionRequest req; @@ -228,6 +229,7 @@ RetCode MasterLoader::Ingest() { if (ret != rSuccess && ret != claims::common::rNoMemory) { // TODO(YUKAI): error handle, like sending error message to client LOG(ERROR) << "the tuple is not valid"; + ack_function(); return rFailure; } @@ -252,7 +254,7 @@ RetCode MasterLoader::Ingest() { "failed to write log"); // reply ACK to MQ - EXEC_AND_LOG(ret, ReplyToMQ(req), "replied to MQ", "failed to reply to MQ"); + EXEC_AND_LOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); // distribute partition load task EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), @@ -384,7 +386,7 @@ RetCode MasterLoader::GetPartitionTuples( // check all tuples to be inserted int line = 0; for (auto tuple_string : req.tuples_) { - DLOG(INFO) << "to be inserted tuple:" << tuple_string; + // DLOG(INFO) << "to be inserted tuple:" << tuple_string; void* tuple_buffer = Malloc(table->getSchema()->getTupleMaxSize()); if (tuple_buffer == NULL) return claims::common::rNoMemory; MemoryGuardWithRetCode guard(tuple_buffer, ret); @@ -463,14 +465,14 @@ RetCode MasterLoader::ApplyTransaction( for (int j = 0; j < prj->getPartitioner()->getNumberOfPartitions(); ++j) { req.Insert(GetGlobalPartId(table_id, i, j), tuple_length, partition_buffers[i][j].length_ / tuple_length); - cout << "the length of partition buffer[" << i << "," << j - << "] is:" << partition_buffers[i][j].length_ << std::endl; + // DLOG(INFO) << "the length of partition buffer[" << i << "," << j + // << "] is:" << partition_buffers[i][j].length_ << std::endl; } } TxnClient::BeginIngest(req, ingest); - cout << req.ToString() << " " << ingest.ToString() << endl; + // cout << req.ToString() << " " << ingest.ToString() << endl; return ret; } @@ -606,6 +608,7 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, RetCode MasterLoader::SendPacket(const int socket_fd, const void* const packet_buffer, const uint64_t packet_length) { + LockGuard guard(lock_); size_t total_write_num = 0; while (total_write_num < packet_length) { ssize_t write_num = write( @@ -620,6 +623,15 @@ RetCode MasterLoader::SendPacket(const int socket_fd, return rSuccess; } +void* MasterLoader::Work(void* arg) { + WorkerPara* para = static_cast(arg); + AMQConsumer consumer(para->brokerURI_, para->destURI_, para->use_topic_, + para->client_ack_); + consumer.run(para->master_loader_); + while (1) sleep(10); + return NULL; +} + void* MasterLoader::StartMasterLoader(void* arg) { Config::getInstance(); LOG(INFO) << "start master loader..."; @@ -631,9 +643,63 @@ void* MasterLoader::StartMasterLoader(void* arg) { EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->ConnectWithSlaves(), "failed to connect all slaves"); - while (true) - EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), - "failed to ingest data"); + activemq::library::ActiveMQCPP::initializeLibrary(); + // Use either stomp or openwire, the default ports are different for each + // + // Examples: + // tcp://127.0.0.1:61616 default to openwire + // tcp://127.0.0.1:61616?wireFormat=openwire same as above + // tcp://127.0.0.1:61613?wireFormat=stomp use stomp instead + // + std::string brokerURI = + "failover:(tcp://" + "58.198.176.92:61616?wireFormat=openwire&connection.useAsyncSend=true" + // "&transport.commandTracingEnabled=true" + // "&transport.tcpTracingEnabled=true" + // "&wireFormat.tightEncodingEnabled=true" + ")"; + + //============================================================ + // This is the Destination Name and URI options. Use this to + // customize where the consumer listens, to have the consumer + // use a topic or queue set the 'useTopics' flag. + //============================================================ + std::string destURI = + "t123?consumer.prefetchSize = 1 "; // ?consumer.prefetchSize=1"; + + //============================================================ + // set to true to use topics instead of queues + // Note in the code above that this causes createTopic or + // createQueue to be used in the consumer. + //============================================================ + bool use_topics = false; + + //============================================================ + // set to true if you want the consumer to use client ack mode + // instead of the default auto ack mode. + //============================================================ + bool client_ack = true; + + cout << "\n input a number to continue" << std::endl; + int temp; + cin >> temp; + cout << "Well , start flag is received" << std::endl; + + // AMQConsumer consumer(brokerURI, destURI, use_topics, client_ack); + // consumer.run(master_loader); + for (int i = 0; i < Config::master_loader_thread_num - 1; ++i) { + WorkerPara para(master_loader, brokerURI, destURI, use_topics, client_ack); + Environment::getInstance()->getThreadPool()->AddTask(MasterLoader::Work, + ¶); + } + // i am also a worker + WorkerPara para(master_loader, brokerURI, destURI, use_topics, client_ack); + Work(¶); + + while (1) sleep(10); + + // while (true) EXEC_AND_ONLY_LOG_ERROR(ret, master_loader->Ingest(), + // "failed to ingest data"); return NULL; } diff --git a/loader/master_loader.h b/loader/master_loader.h index 59f4042a4..869d562b2 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -31,6 +31,7 @@ #include #include +#include #include #include #include "caf/all.hpp" @@ -39,6 +40,9 @@ #include "../common/error_define.h" #include "../common/ids.h" #include "../txn_manager/txn.hpp" +#include "../utility/lock.h" + +using std::function; namespace claims { namespace catalog { @@ -54,6 +58,9 @@ using caf::event_based_actor; using claims::catalog::TableDescriptor; class MasterLoader { + // public: + // enum DataIngestSource { kActiveMQ }; + public: struct IngestionRequest { string table_name_; @@ -67,6 +74,21 @@ class MasterLoader { << ", tuples size is:" << tuples_.size(); } }; + struct WorkerPara { + WorkerPara(MasterLoader* mloader, const std::string& brokerURI, + const std::string& destURI, bool use_topic = false, + bool client_ack = false) + : use_topic_(use_topic), + brokerURI_(brokerURI), + destURI_(destURI), + client_ack_(client_ack), + master_loader_(mloader) {} + const std::string& brokerURI_; + const std::string& destURI_; + bool use_topic_ = false; + bool client_ack_ = false; + MasterLoader* master_loader_; + }; struct CommitInfo { explicit CommitInfo(uint64_t total_part_num) @@ -92,7 +114,7 @@ class MasterLoader { RetCode ConnectWithSlaves(); - RetCode Ingest(); + RetCode Ingest(const string& message, function ack_function); private: string GetMessage(); @@ -148,6 +170,7 @@ class MasterLoader { MasterLoader* mloader); public: + static void* Work(void* para); static void* StartMasterLoader(void* arg); private: @@ -159,6 +182,7 @@ class MasterLoader { // store id of transactions which are not finished boost::unordered_map txn_commint_info_; + Lock lock_; }; } /* namespace loader */ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index f06669d4b..39480ca34 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -282,13 +282,14 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { ->getProjectoin(prj_id) ->getSchema() ->getTupleMaxSize(); - const uint64_t offset = packet.offset_; + uint64_t cur_chunk_id = packet.pos_ / CHUNK_SIZE; uint64_t cur_block_id = (packet.pos_ % CHUNK_SIZE) / BLOCK_SIZE; uint64_t pos_in_block = packet.pos_ % BLOCK_SIZE; uint64_t total_written_length = 0; + uint64_t data_length = packet.data_length_; HdfsInMemoryChunk chunk_info; - while (total_written_length < offset) { + while (total_written_length < data_length) { /// get start position of current chunk if (BlockManager::getInstance()->getMemoryChunkStore()->getChunk( ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), @@ -300,15 +301,15 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { do { // write to every block uint64_t written_length = writer.Write(packet.data_buffer_ + total_written_length, - offset - total_written_length); + data_length - total_written_length); total_written_length += written_length; LOG(INFO) << "written " << written_length << " bytes into chunk:" << cur_chunk_id << ". Now total written " << total_written_length << " bytes"; - if (total_written_length == offset) { + if (total_written_length == data_length) { // all tuple is written into memory return rSuccess; - } else if (total_written_length > offset) { + } else if (total_written_length > data_length) { assert(false); } } while (writer.NextBlock()); diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 9f8637c89..f54d501d3 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -418,10 +418,11 @@ uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, block_offset + block_size_ - sizeof(unsigned)); int can_store_tuple_count = (block_size_ - sizeof(unsigned)) / tuple_size_ - *tuple_count_in_block; - LOG(INFO) << "block whose id is " << block_id_ << " stored " - << *tuple_count_in_block << " tuple and leaf " - << can_store_tuple_count - << " tuple space. and tuple size is:" << tuple_size_; + assert(can_store_tuple_count >= 0); + DLOG(INFO) << "block whose id is " << block_id_ << " stored " + << *tuple_count_in_block << " tuple and leaf " + << can_store_tuple_count + << " tuple space. and tuple size is:" << tuple_size_; // there are space to store data if (can_store_tuple_count > 0) { @@ -429,10 +430,13 @@ uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, length_to_write / tuple_size_ > can_store_tuple_count ? can_store_tuple_count : length_to_write / tuple_size_; + DLOG(INFO) << "memcpy start pos is " + << block_offset + (*tuple_count_in_block) * block_size_ + << ". buffer to write: " << buffer_to_write; memcpy(block_offset + (*tuple_count_in_block) * block_size_, buffer_to_write, actual_written_tuple_count * tuple_size_); - LOG(INFO) << "copy " << actual_written_tuple_count * tuple_size_ - << " bytes into block:" << block_id_; + DLOG(INFO) << "copy " << actual_written_tuple_count * tuple_size_ + << " bytes into block:" << block_id_; __sync_add_and_fetch(tuple_count_in_block, actual_written_tuple_count); return actual_written_tuple_count * tuple_size_; diff --git a/storage/MemoryStore.cpp b/storage/MemoryStore.cpp index 00afb2701..fe30bab5d 100755 --- a/storage/MemoryStore.cpp +++ b/storage/MemoryStore.cpp @@ -11,97 +11,103 @@ #include "../configure.h" #include "../Resource/BufferManager.h" using namespace std; -MemoryChunkStore* MemoryChunkStore::instance_=0; -MemoryChunkStore::MemoryChunkStore():chunk_pool_(CHUNK_SIZE),block_pool_(BLOCK_SIZE){ -// cout<<"in the memorystroage initialize"<::const_iterator it=chunk_list_.find(chunk_id); - if(it!=chunk_list_.cend()){ - printf("chunk id already exists (chunk id =%d)!\n",chunk_id.chunk_off); - lock_.release(); - return false; - } - if(!BufferManager::getInstance()->applyStorageDedget(CHUNK_SIZE)){ - printf("not enough memory!!\n"); - lock_.release(); - return false; - } - if((start_address=chunk_pool_.malloc())!=0){ - chunk_list_[chunk_id]=HdfsInMemoryChunk(start_address,CHUNK_SIZE); - lock_.release(); - return true; - } - else{ - printf("Error occurs when memalign!\n"); - lock_.release(); - return false; - } +bool MemoryChunkStore::applyChunk(ChunkID chunk_id, void*& start_address) { + lock_.acquire(); + boost::unordered_map::const_iterator it = + chunk_list_.find(chunk_id); + if (it != chunk_list_.cend()) { + printf("chunk id already exists (chunk id =%d)!\n", chunk_id.chunk_off); + lock_.release(); + return false; + } + if (!BufferManager::getInstance()->applyStorageDedget(CHUNK_SIZE)) { + printf("not enough memory!!\n"); + lock_.release(); + return false; + } + if ((start_address = chunk_pool_.malloc()) != 0) { + memset(start_address, 0, CHUNK_SIZE); + chunk_list_[chunk_id] = HdfsInMemoryChunk(start_address, CHUNK_SIZE); + lock_.release(); + return true; + } else { + printf("Error occurs when memalign!\n"); + lock_.release(); + return false; + } } -void MemoryChunkStore::returnChunk(const ChunkID& chunk_id){ - lock_.acquire(); - boost::unordered_map::const_iterator it=chunk_list_.find(chunk_id); - if(it==chunk_list_.cend()){ - printf("return fail to find the target chunk id !\n"); - lock_.release(); - return; - } - HdfsInMemoryChunk chunk_info=it->second; +void MemoryChunkStore::returnChunk(const ChunkID& chunk_id) { + lock_.acquire(); + boost::unordered_map::const_iterator it = + chunk_list_.find(chunk_id); + if (it == chunk_list_.cend()) { + printf("return fail to find the target chunk id !\n"); + lock_.release(); + return; + } + HdfsInMemoryChunk chunk_info = it->second; - chunk_pool_.free(chunk_info.hook); - chunk_list_.erase(it); - BufferManager::getInstance()->returnStorageBudget(chunk_info.length); - lock_.release(); + chunk_pool_.free(chunk_info.hook); + chunk_list_.erase(it); + BufferManager::getInstance()->returnStorageBudget(chunk_info.length); + lock_.release(); } -bool MemoryChunkStore::getChunk(const ChunkID& chunk_id,HdfsInMemoryChunk& chunk_info){ - lock_.acquire(); - boost::unordered_map::const_iterator it=chunk_list_.find(chunk_id); - if(it!=chunk_list_.cend()){ - chunk_info=it->second; - lock_.release(); - return true; - } - lock_.release(); - return false; +bool MemoryChunkStore::getChunk(const ChunkID& chunk_id, + HdfsInMemoryChunk& chunk_info) { + lock_.acquire(); + boost::unordered_map::const_iterator it = + chunk_list_.find(chunk_id); + if (it != chunk_list_.cend()) { + chunk_info = it->second; + lock_.release(); + return true; + } + lock_.release(); + return false; } -bool MemoryChunkStore::updateChunkInfo(const ChunkID & chunk_id, const HdfsInMemoryChunk & chunk_info){ - lock_.acquire(); - boost::unordered_map::iterator it=chunk_list_.find(chunk_id); - if(it==chunk_list_.cend()){ - lock_.release(); - return false; - } - it->second=chunk_info; - lock_.release(); - return true; - - +bool MemoryChunkStore::updateChunkInfo(const ChunkID& chunk_id, + const HdfsInMemoryChunk& chunk_info) { + lock_.acquire(); + boost::unordered_map::iterator it = + chunk_list_.find(chunk_id); + if (it == chunk_list_.cend()) { + lock_.release(); + return false; + } + it->second = chunk_info; + lock_.release(); + return true; } -bool MemoryChunkStore::putChunk(const ChunkID& chunk_id,HdfsInMemoryChunk& chunk_info){ - lock_.acquire(); - boost::unordered_map::const_iterator it=chunk_list_.find(chunk_id); - if(it!=chunk_list_.cend()){ - printf("The memory chunk is already existed!\n"); - lock_.release(); - return false; - } - chunk_list_[chunk_id]=chunk_info; - lock_.release(); - return true; +bool MemoryChunkStore::putChunk(const ChunkID& chunk_id, + HdfsInMemoryChunk& chunk_info) { + lock_.acquire(); + boost::unordered_map::const_iterator it = + chunk_list_.find(chunk_id); + if (it != chunk_list_.cend()) { + printf("The memory chunk is already existed!\n"); + lock_.release(); + return false; + } + chunk_list_[chunk_id] = chunk_info; + lock_.release(); + return true; } -MemoryChunkStore* MemoryChunkStore::getInstance(){ - if(instance_==0){ - instance_=new MemoryChunkStore(); - } - return instance_; +MemoryChunkStore* MemoryChunkStore::getInstance() { + if (instance_ == 0) { + instance_ = new MemoryChunkStore(); + } + return instance_; } diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 906814b59..8dfb9606f 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -7,6 +7,7 @@ #include "PartitionStorage.h" +#include #include "../common/error_define.h" #include "../Debug.h" #include "MemoryStore.h" @@ -59,12 +60,13 @@ void PartitionStorage::updateChunksWithInsertOrAppend( } RetCode PartitionStorage::AddChunkWithMemoryToNum( - const unsigned& number_of_chunks, const StorageLevel& storage_level) { + const unsigned& expected_number_of_chunks, + const StorageLevel& storage_level) { RetCode ret = rSuccess; - if (number_of_chunks_ >= number_of_chunks - 1) return ret; + if (number_of_chunks_ >= expected_number_of_chunks) return ret; LOG(INFO) << "now chunk number:" << number_of_chunks_ - << ". expected chunk num:" << number_of_chunks; - for (unsigned i = number_of_chunks_; i < number_of_chunks; i++) { + << ". expected chunk num:" << expected_number_of_chunks; + for (unsigned i = number_of_chunks_; i < expected_number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); EXEC_AND_LOG(ret, chunk->ApplyMemory(), "applied memory for chunk(" @@ -74,7 +76,8 @@ RetCode PartitionStorage::AddChunkWithMemoryToNum( << "," << i << ")"); chunk_list_.push_back(chunk); } - number_of_chunks_ = number_of_chunks; + number_of_chunks_ = expected_number_of_chunks; + assert(chunk_list_.size() == number_of_chunks_); return ret; } diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index bdc8df0fb..bba450c3c 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -55,7 +55,7 @@ class PartitionStorage { const unsigned& number_of_chunks, const StorageLevel& storage_level); - RetCode AddChunkWithMemoryToNum(const unsigned& number_of_chunks, + RetCode AddChunkWithMemoryToNum(const unsigned& expected_number_of_chunks, const StorageLevel& storage_level); void removeAllChunks(const PartitionID& partition_id); PartitionReaderItetaor* createReaderIterator(); From 23c280954e5af3997b8c1ebe6fd8d3b01211b398 Mon Sep 17 00:00:00 2001 From: yukai Date: Tue, 3 May 2016 15:12:17 +0800 Subject: [PATCH 26/58] Distributed Load basically works; FIX: memory leak by getSchema(); FIX: bug in ChunkStorage.cpp --- common/Schema/TupleConvertor.cpp | 42 ++++++++++++++++------------- common/Schema/TupleConvertor.h | 46 ++++++++++++++++++-------------- loader/data_ingestion.cpp | 2 +- loader/master_loader.cpp | 2 ++ loader/slave_loader.cpp | 6 ++++- storage/ChunkStorage.cpp | 8 ++++-- storage/ChunkStorage.h | 3 ++- 7 files changed, 65 insertions(+), 44 deletions(-) diff --git a/common/Schema/TupleConvertor.cpp b/common/Schema/TupleConvertor.cpp index 9d3aa9ccb..77e4472ae 100755 --- a/common/Schema/TupleConvertor.cpp +++ b/common/Schema/TupleConvertor.cpp @@ -8,30 +8,34 @@ #include "TupleConvertor.h" TupleConvertor::TupleConvertor() { - // TODO Auto-generated constructor stub - + // TODO Auto-generated constructor stub } TupleConvertor::~TupleConvertor() { - // TODO Auto-generated destructor stub + // TODO Auto-generated destructor stub } -void TupleConvertor::sub_tuple(const Schema*& src_s,const Schema*& des_s,const void* const& tuple, void* desc, std::vector index){ - for(unsigned i=0;igetColumnAddess(index[i],tuple); - void* const desc_column_address=des_s->getColumnAddess(i,desc); - des_s->getcolumn(i).operate->assignment(source_column_address,desc_column_address); - } +void TupleConvertor::sub_tuple(const Schema*& src_s, const Schema*& des_s, + const void* const& tuple, void* desc, + std::vector index) { + for (unsigned i = 0; i < index.size(); i++) { + const void* const source_column_address = + src_s->getColumnAddess(index[i], tuple); + void* const desc_column_address = des_s->getColumnAddess(i, desc); + des_s->getcolumn(i) + .operate->assignment(source_column_address, desc_column_address); + } } -SubTuple::SubTuple(Schema* srouce, Schema* target, std::vector index) -:source_schema_(srouce),target_schema_(target),index_(index){ - -} +SubTuple::SubTuple(Schema* source, Schema* target, std::vector index) + : source_schema_(source), target_schema_(target), index_(index) {} -void SubTuple::getSubTuple(void*& tuple, void*& target){ - for(unsigned i=0;igetColumnAddess(index_[i],tuple); - void* const desc_column_address=target_schema_->getColumnAddess(i,target); - target_schema_->getcolumn(i).operate->assignment(source_column_address,desc_column_address); - } +void SubTuple::getSubTuple(void*& tuple, void*& target) { + for (unsigned i = 0; i < index_.size(); i++) { + const void* const source_column_address = + source_schema_->getColumnAddess(index_[i], tuple); + void* const desc_column_address = + target_schema_->getColumnAddess(i, target); + target_schema_->getcolumn(i) + .operate->assignment(source_column_address, desc_column_address); + } } diff --git a/common/Schema/TupleConvertor.h b/common/Schema/TupleConvertor.h index 0c314e3d8..a66132afc 100755 --- a/common/Schema/TupleConvertor.h +++ b/common/Schema/TupleConvertor.h @@ -9,35 +9,41 @@ #ifndef TUPLECONVENTOR_H_ #define TUPLECONVENTOR_H_ #include + #ifdef DMALLOC #include "dmalloc.h" #endif #include "Schema.h" class TupleConvertor { -public: - TupleConvertor(); - virtual ~TupleConvertor(); + public: + TupleConvertor(); + virtual ~TupleConvertor(); - /* - * extract sub-tuple from a tuple. The index for the columns that appear in the desc tuple are specified in - * parameter index in form of array. E.g., index={0,3,4} means that the remainning columns are the 1st, 3rd - * and 4th. - * src_s and des_c describe the schema for tuple and desc tuple. - * memory of desc tuple shoulbe be allocated before calling this method. - */ - void sub_tuple(const Schema*& src_s,const Schema*& des_c,const void* const& tuple, void* desc, std::vector index); -// void sub_tuple(const Schema* const src_s,const Schema* const des_s, void*& const tuple,void*& const desc); + /* + * extract sub-tuple from a tuple. The index for the columns that appear in + * the desc tuple are specified in + * parameter index in form of array. E.g., index={0,3,4} means that the + * remainning columns are the 1st, 3rd + * and 4th. + * src_s and des_c describe the schema for tuple and desc tuple. + * memory of desc tuple shoulbe be allocated before calling this method. + */ + void sub_tuple(const Schema*& src_s, const Schema*& des_c, + const void* const& tuple, void* desc, + std::vector index); + // void sub_tuple(const Schema* const src_s,const Schema* const des_s, + // void*& const tuple,void*& const desc); }; -class SubTuple{ -public: - SubTuple(Schema* srouce, Schema* target, std::vector index); - void getSubTuple(void*& tuple, void*& target); -private: - Schema* source_schema_; - Schema* target_schema_; - std::vector index_; +class SubTuple { + public: + SubTuple(Schema* source, Schema* target, std::vector index); + void getSubTuple(void*& tuple, void*& target); + private: + Schema* source_schema_; + Schema* target_schema_; + std::vector index_; }; #endif /* TUPLECONVENTOR_H_ */ diff --git a/loader/data_ingestion.cpp b/loader/data_ingestion.cpp index 0e8d2d98c..bd941a31e 100644 --- a/loader/data_ingestion.cpp +++ b/loader/data_ingestion.cpp @@ -187,7 +187,7 @@ DataIngestion::DataIngestion(TableDescriptor* table, const string col_separator, prj_index.push_back(prj_attrs[j].index); } SubTuple* st = new SubTuple( - table_->getSchema(), table_->getProjectoin(i)->getSchema(), prj_index); + table_schema_, table_->getProjectoin(i)->getSchema(), prj_index); sub_tuple_generator_.push_back(st); } diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index fa33bed6d..7a53376fb 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -614,6 +614,8 @@ RetCode MasterLoader::SendPacket(const int socket_fd, socket_fd, static_cast(packet_buffer) + total_write_num, packet_length - total_write_num); if (-1 == write_num) { + std::cerr << "failed to send buffer to slave(" << socket_fd + << "): " << std::endl; PLOG(ERROR) << "failed to send buffer to slave(" << socket_fd << "): "; return claims::common::rSentMessageError; } diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 39480ca34..babaf65ce 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -247,6 +247,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), "sent commit result to master loader", "failed to send commit res to master loader"); + if (rSuccess != ret) return ret; } } @@ -295,6 +296,9 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), cur_chunk_id), chunk_info)) { + DLOG(INFO) << "start address of chunk:" << cur_chunk_id << " is " + << chunk_info.hook << ", end addr is " + << chunk_info.hook + CHUNK_SIZE; InMemoryChunkWriterIterator writer(chunk_info.hook, CHUNK_SIZE, cur_block_id, BLOCK_SIZE, pos_in_block, tuple_size); @@ -316,7 +320,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { ++cur_chunk_id; // get next chunk to write LOG(INFO) << "Now chunk id is " << cur_chunk_id - << ", the number of chunk is" << part_storage->GetChunkNum(); + << ", total number of chunk is" << part_storage->GetChunkNum(); assert(cur_chunk_id < part_storage->GetChunkNum()); cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index f54d501d3..c24bb35fb 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -413,7 +413,11 @@ void ChunkReaderIterator::InHDFSBlockAccessor::getBlock( uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, uint64_t length_to_write) { + DLOG(INFO) << "current block id is:" << block_id_ + << ", block size is:" << block_size_; void* block_offset = chunk_offset_ + block_id_ * block_size_; + assert(block_offset < chunk_offset_ + CHUNK_SIZE && + "this block is not in this chunk"); unsigned* tuple_count_in_block = reinterpret_cast( block_offset + block_size_ - sizeof(unsigned)); int can_store_tuple_count = @@ -431,9 +435,9 @@ uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, ? can_store_tuple_count : length_to_write / tuple_size_; DLOG(INFO) << "memcpy start pos is " - << block_offset + (*tuple_count_in_block) * block_size_ + << block_offset + (*tuple_count_in_block) * tuple_size_ << ". buffer to write: " << buffer_to_write; - memcpy(block_offset + (*tuple_count_in_block) * block_size_, + memcpy(block_offset + (*tuple_count_in_block) * tuple_size_, buffer_to_write, actual_written_tuple_count * tuple_size_); DLOG(INFO) << "copy " << actual_written_tuple_count * tuple_size_ << " bytes into block:" << block_id_; diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index 3378b23ae..a5513164b 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -128,7 +128,8 @@ class InMemoryChunkReaderItetaor : public ChunkReaderIterator { virtual ~InMemoryChunkReaderItetaor(); bool nextBlock(BlockStreamBase*& block); bool getNextBlockAccessor(block_accessor*& ba); - void * getChunk() { return start_;} + void* getChunk() { return start_; } + private: void* start_; }; From 7dfad8a614038cb1f405f9575f75ee2016d333e2 Mon Sep 17 00:00:00 2001 From: yukai Date: Thu, 5 May 2016 09:52:49 +0800 Subject: [PATCH 27/58] Distributed Load in one partition Ok! FIX: use std::unordered_map rather than boost::unordered_map; --- loader/master_loader.cpp | 112 ++++++++++++++++++++++++++------------- loader/master_loader.h | 7 ++- loader/slave_loader.cpp | 24 ++++++--- 3 files changed, 98 insertions(+), 45 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 7a53376fb..15f969756 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -29,6 +29,9 @@ #include "./master_loader.h" #include +#include +#include +#include #include #include #include @@ -139,25 +142,37 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // by above thread, unexpected thing happens. if (is_commited) { LOG(INFO) << "received a commit result of txn with id:" << txn_id; - cout << "received a commit result of txn with id:" << txn_id << endl; - if (++(mloader->txn_commint_info_.at(txn_id).commited_part_num_) >= - mloader->txn_commint_info_.at(txn_id).total_part_num_) { - // TODO(lizhifang): optimize the interface of TxnClient - TxnClient::CommitIngest(txn_id); - mloader->txn_commint_info_.erase(txn_id); - LOG(INFO) << "committed txn with id:" << txn_id - << " to txn manager"; - cout << "committed txn with id:" << txn_id << " to txn manager" - << endl; + cout << "(" << syscall(__NR_gettid) + << ")received a commit result of txn with id:" << txn_id << endl; + try { + CommitInfo& commit_info = mloader->txn_commint_info_.at(txn_id); + + if (++commit_info.commited_part_num_ >= + commit_info.total_part_num_) { + // cout << "going to commit txn with id:" << txn_id << + // endl; + LOG(INFO) << "going to commit txn with id:" << txn_id << endl; + TxnClient::CommitIngest(txn_id); + mloader->txn_commint_info_.erase(txn_id); + LOG(INFO) << "committed txn with id:" << txn_id + << " to txn manager"; + // cout << "committed txn with id:" << txn_id << " to + // txn + // manager" + // << endl; + } else { + TxnClient::AbortIngest(txn_id); + mloader->txn_commint_info_.erase(txn_id); + LOG(INFO) << "aborted txn with id:" << txn_id + << " to txn manager"; + cout << "aborted txn with id:" << txn_id << " to txn manager" + << endl; + } + } catch (const std::out_of_range& e) { + LOG(ERROR) << "no find " << txn_id << " in map"; + assert(false); } - } else { - // TODO(lizhifang): optimize the interface of TxnClient - TxnClient::AbortIngest(txn_id); - mloader->txn_commint_info_.erase(txn_id); - LOG(INFO) << "aborted txn with id:" << txn_id << " to txn manager"; - cout << "aborted txn with id:" << txn_id << " to txn manager" << endl; } - return 1; }, [=](RegNodeAtom, NodeAddress addr, NodeID node_id) -> int { // NOLINT @@ -245,8 +260,18 @@ RetCode MasterLoader::Ingest(const string& message, EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), "applied transaction", "failed to apply transaction"); + spin_lock_.acquire(); txn_commint_info_.insert(std::pair( ingest.id_, CommitInfo(ingest.strip_list_.size()))); + spin_lock_.release(); + // TODO()need to deleted after testing + try { + CommitInfo temp = txn_commint_info_.at(ingest.id_); + } catch (const std::out_of_range& e) { + LOG(ERROR) << "Oh~~~NO!!!! no find " << ingest.id_ << " in map"; + assert(false); + } + DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; // write data log EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log ", @@ -259,6 +284,7 @@ RetCode MasterLoader::Ingest(const string& message, EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), "sent every partition data to its slave", "failed to send every partition data to its slave"); + assert(rSuccess == ret); return ret; } @@ -269,48 +295,64 @@ string MasterLoader::GetMessage() { "LINEITEM,|,\n," "1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-" "02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|\n" - "1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-" + "1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-" + "04-" "20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |\n" "1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-" "31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|\n" "1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-" "16|NONE|AIR|lites. fluffily even de|\n" - "1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-" + "1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-" + "04-" "01|NONE|FOB| pending foxes. slyly re|\n" "1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-" "03|DELIVER IN PERSON|MAIL|arefully slyly ex|\n" - "2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-" + "2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-" + "02-" "02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|\n" "3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-" "23|NONE|AIR|ongside of the furiously brave acco|\n" - "3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-" + "3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-" + "11-" "24|TAKE BACK RETURN|RAIL| unusual accounts. eve|\n" - "3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-" + "3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-" + "01-" "23|DELIVER IN PERSON|SHIP|nal foxes wake. |\n" - "3|29380|1883|4|2|2618.76|0.01|0.06|A|F|1993-12-04|1994-01-07|1994-01-01|" + "3|29380|1883|4|2|2618.76|0.01|0.06|A|F|1993-12-04|1994-01-07|1994-01-" + "01|" "NONE|TRUCK|y. fluffily pending d|\n" - "7|145243|7758|2|9|11594.16|0.08|0.08|N|O|1996-02-01|1996-03-02|1996-02-" + "7|145243|7758|2|9|11594.16|0.08|0.08|N|O|1996-02-01|1996-03-02|1996-" + "02-" "19|TAKE BACK RETURN|SHIP|es. instructions|\n" - "7|94780|9799|3|46|81639.88|0.10|0.07|N|O|1996-01-15|1996-03-27|1996-02-" + "7|94780|9799|3|46|81639.88|0.10|0.07|N|O|1996-01-15|1996-03-27|1996-" + "02-" "03|COLLECT COD|MAIL| unusual reques|\n" - "7|163073|3074|4|28|31809.96|0.03|0.04|N|O|1996-03-21|1996-04-08|1996-04-" + "7|163073|3074|4|28|31809.96|0.03|0.04|N|O|1996-03-21|1996-04-08|1996-" + "04-" "20|NONE|FOB|. slyly special requests haggl|\n" - "7|151894|9440|5|38|73943.82|0.08|0.01|N|O|1996-02-11|1996-02-24|1996-02-" + "7|151894|9440|5|38|73943.82|0.08|0.01|N|O|1996-02-11|1996-02-24|1996-" + "02-" "18|DELIVER IN PERSON|TRUCK|ns haggle carefully ironic deposits. bl|\n" - "7|79251|1759|6|35|43058.75|0.06|0.03|N|O|1996-01-16|1996-02-23|1996-01-" + "7|79251|1759|6|35|43058.75|0.06|0.03|N|O|1996-01-16|1996-02-23|1996-" + "01-" "22|TAKE BACK RETURN|FOB|jole. excuses wake carefully alongside of |\n" "7|157238|2269|7|5|6476.15|0.04|0.02|N|O|1996-02-10|1996-03-26|1996-02-" "13|NONE|FOB|ithely regula|\n" - "32|82704|7721|1|28|47227.60|0.05|0.08|N|O|1995-10-23|1995-08-27|1995-10-" + "32|82704|7721|1|28|47227.60|0.05|0.08|N|O|1995-10-23|1995-08-27|1995-" + "10-" "26|TAKE BACK RETURN|TRUCK|sleep quickly. req|\n" - "32|197921|441|2|32|64605.44|0.02|0.00|N|O|1995-08-14|1995-10-07|1995-08-" + "32|197921|441|2|32|64605.44|0.02|0.00|N|O|1995-08-14|1995-10-07|1995-" + "08-" "27|COLLECT COD|AIR|lithely regular deposits. fluffily |\n" "32|44161|6666|3|2|2210.32|0.09|0.02|N|O|1995-08-07|1995-10-07|1995-08-" "23|DELIVER IN PERSON|AIR| express accounts wake according to the|\n" - "32|2743|7744|4|4|6582.96|0.09|0.03|N|O|1995-08-04|1995-10-01|1995-09-03|" + "32|2743|7744|4|4|6582.96|0.09|0.03|N|O|1995-08-04|1995-10-01|1995-09-" + "03|" "NONE|REG AIR|e slyly final pac|\n" - "32|85811|8320|5|44|79059.64|0.05|0.06|N|O|1995-08-28|1995-08-20|1995-09-" - "14|DELIVER IN PERSON|AIR|symptotes nag according to the ironic depo|\n"; + "32|85811|8320|5|44|79059.64|0.05|0.06|N|O|1995-08-28|1995-08-20|1995-" + "09-" + "14|DELIVER IN PERSON|AIR|symptotes nag according to the ironic " + "depo|\n"; return ret; } @@ -365,10 +407,6 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, return ret; } -// RetCode MasterLoader::CheckAndToValue(const IngestionRequest& req, -// void* tuple_buffer, -// vector& column_validities) {} - // map every tuple into associate part RetCode MasterLoader::GetPartitionTuples( const IngestionRequest& req, const TableDescriptor* table, diff --git a/loader/master_loader.h b/loader/master_loader.h index 869d562b2..f87f1ec4f 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -35,6 +35,7 @@ #include #include #include "caf/all.hpp" +#include #include "./validity.h" #include "../common/error_define.h" @@ -43,6 +44,7 @@ #include "../utility/lock.h" using std::function; +using std::unordered_map; namespace claims { namespace catalog { @@ -180,9 +182,10 @@ class MasterLoader { // vector slave_sockets_; boost::unordered_map slave_addr_to_socket_; - // store id of transactions which are not finished - boost::unordered_map txn_commint_info_; + // store id of transactions which are not finished + unordered_map txn_commint_info_; Lock lock_; + SpineLock spin_lock_; }; } /* namespace loader */ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index babaf65ce..44936f096 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include "caf/all.hpp" #include "caf/io/all.hpp" @@ -55,6 +56,7 @@ using claims::common::rFailure; using claims::txn::GetPartitionIdFromGlobalPartId; using claims::txn::GetProjectionIdFromGlobalPartId; using claims::txn::GetTableIdFromGlobalPartId; +using std::chrono::milliseconds; namespace claims { namespace loader { @@ -208,7 +210,10 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { if (-1 == (real_read_num = recv(master_fd_, head_buffer, LoadPacket::kHeadLength, MSG_WAITALL))) { PLOG(ERROR) << "failed to receive message length from master"; - continue; + return rFailure; + } else if (0 == real_read_num) { + PLOG(ERROR) << "master loader socket has been closed"; + return rFailure; } else if (real_read_num < LoadPacket::kHeadLength) { LOG(ERROR) << "received message error! only read " << real_read_num << " bytes"; @@ -344,13 +349,20 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, caf::scoped_actor self; self->sync_send(master_actor, LoadAckAtom::value, txn_id, is_commited) .await([&](int r) { // NOLINT - LOG(INFO) << "sent commit result:" << is_commited - << " to master and received response"; - }); + LOG(INFO) << "sent txn " << txn_id + << " commit result:" << is_commited + << " to master and received response"; + }, + caf::after(milliseconds(100)) >> + [&] { + LOG(INFO) << "receiving response of txn " << txn_id + << " time out"; + throw caf::network_error("receiving response time out"); + }); return rSuccess; } catch (exception& e) { - LOG(ERROR) << "failed to send commit result to master loader in " - << ++time << "time." << e.what(); + LOG(ERROR) << "failed to send commit result of " << txn_id + << " to master loader in " << ++time << " time." << e.what(); if (time >= retry_max_time) return rFailure; } } From f4ee689686fa27f49d066c6ee4943b9244c0cda1 Mon Sep 17 00:00:00 2001 From: yukai Date: Thu, 5 May 2016 15:43:26 +0800 Subject: [PATCH 28/58] Distributed Load in mutliple parttion OK! FIX: extend waiting time of slave loader for master loader ack; FIX: process after receiving response of slave loader --- loader/master_loader.cpp | 42 +++++++++++++++++++--------------------- loader/master_loader.h | 7 +++++++ loader/slave_loader.cpp | 3 ++- 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 15f969756..135215441 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -140,38 +140,36 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // map. // Consider that: if this function access the item in map just deleted // by above thread, unexpected thing happens. - if (is_commited) { - LOG(INFO) << "received a commit result of txn with id:" << txn_id; - cout << "(" << syscall(__NR_gettid) - << ")received a commit result of txn with id:" << txn_id << endl; - try { - CommitInfo& commit_info = mloader->txn_commint_info_.at(txn_id); - - if (++commit_info.commited_part_num_ >= - commit_info.total_part_num_) { - // cout << "going to commit txn with id:" << txn_id << - // endl; + LOG(INFO) << "received a commit result " << is_commited + << " of txn with id:" << txn_id; + // cout << "(" << syscall(__NR_gettid) << ")received a commit + // result " + // << is_commited << "of txn with id:" << txn_id << endl; + try { + CommitInfo& commit_info = mloader->txn_commint_info_.at(txn_id); + + if (is_commited) { + __sync_add_and_fetch(&commit_info.commited_part_num_, 1); + } else { + __sync_add_and_fetch(&commit_info.abort_part_num_, 1); + } + if (commit_info.IsFinished()) { + if (0 == commit_info.abort_part_num_) { LOG(INFO) << "going to commit txn with id:" << txn_id << endl; TxnClient::CommitIngest(txn_id); - mloader->txn_commint_info_.erase(txn_id); LOG(INFO) << "committed txn with id:" << txn_id << " to txn manager"; - // cout << "committed txn with id:" << txn_id << " to - // txn - // manager" - // << endl; } else { + LOG(INFO) << "going to abort txn with id:" << txn_id << endl; TxnClient::AbortIngest(txn_id); - mloader->txn_commint_info_.erase(txn_id); LOG(INFO) << "aborted txn with id:" << txn_id << " to txn manager"; - cout << "aborted txn with id:" << txn_id << " to txn manager" - << endl; } - } catch (const std::out_of_range& e) { - LOG(ERROR) << "no find " << txn_id << " in map"; - assert(false); + mloader->txn_commint_info_.erase(txn_id); } + } catch (const std::out_of_range& e) { + LOG(ERROR) << "no find " << txn_id << " in map"; + assert(false); } return 1; }, diff --git a/loader/master_loader.h b/loader/master_loader.h index f87f1ec4f..8d785230a 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -96,9 +96,16 @@ class MasterLoader { explicit CommitInfo(uint64_t total_part_num) : total_part_num_(total_part_num), commited_part_num_(0), + abort_part_num_(0), wait_period_(0) {} + + inline bool IsFinished() { + return commited_part_num_ + abort_part_num_ >= total_part_num_; + } + uint64_t total_part_num_; uint64_t commited_part_num_; + uint64_t abort_part_num_; // initial value is 0, add by 1 every time check thread traverses // if wait period exceeds the specified value, this transaction fails uint64_t wait_period_; diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 44936f096..aa507ef09 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -57,6 +57,7 @@ using claims::txn::GetPartitionIdFromGlobalPartId; using claims::txn::GetProjectionIdFromGlobalPartId; using claims::txn::GetTableIdFromGlobalPartId; using std::chrono::milliseconds; +using std::chrono::seconds; namespace claims { namespace loader { @@ -353,7 +354,7 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, << " commit result:" << is_commited << " to master and received response"; }, - caf::after(milliseconds(100)) >> + caf::after(seconds(2)) >> [&] { LOG(INFO) << "receiving response of txn " << txn_id << " time out"; From 2aad51be8b999c0ee6288d4ca1c75c71b6af8164 Mon Sep 17 00:00:00 2001 From: yukai Date: Thu, 5 May 2016 21:24:28 +0800 Subject: [PATCH 29/58] OTPIMIZE: replace LOG(INFO) with DLOG(INFO) in loader folder for information which shouldn't be seen by user --- common/error_define.h | 21 +++++- loader/load_packet.cpp | 9 ++- loader/master_loader.cpp | 125 +++++++++++++++++++---------------- loader/master_loader.h | 10 +-- loader/slave_loader.cpp | 68 ++++++++++--------- loader/slave_loader.h | 1 - storage/BlockManager.cpp | 6 +- storage/ChunkStorage.cpp | 11 ++- storage/PartitionStorage.cpp | 36 +++++----- 9 files changed, 152 insertions(+), 135 deletions(-) diff --git a/common/error_define.h b/common/error_define.h index 8d11a3edc..2214d1441 100644 --- a/common/error_define.h +++ b/common/error_define.h @@ -59,6 +59,15 @@ typedef int RetCode; // means return code } \ } while (0) +#define EXEC_AND_DLOG(ret, f, info, err_info) \ + do { \ + if (rSuccess == (ret = f)) { \ + DLOG(INFO) << info << std::endl; \ + } else { \ + ELOG(ret, err_info) \ + } \ + } while (0) + #define EXEC_AND_LOG_RETURN(ret, f, info, err_info) \ do { \ if (rSuccess == (ret = f)) { \ @@ -69,6 +78,16 @@ typedef int RetCode; // means return code } \ } while (0) +#define EXEC_AND_DLOG_RETURN(ret, f, info, err_info) \ + do { \ + if (rSuccess == (ret = f)) { \ + DLOG(INFO) << info << std::endl; \ + } else { \ + ELOG(ret, err_info) \ + return ret; \ + } \ + } while (0) + #define EXEC_AND_PLOG(ret, f, info, err_info) \ do { \ if (rSuccess == (ret = f)) { \ @@ -264,8 +283,6 @@ const int rBeginQueryFail = -2504; const int rBeginCheckpointFail = -2505; const int rCommitCheckpointFail = -2506; - - /* errorno for codegen -3001 ~ -4000 */ const int rTestError = -3001; diff --git a/loader/load_packet.cpp b/loader/load_packet.cpp index 98677c21e..9b9f9badb 100644 --- a/loader/load_packet.cpp +++ b/loader/load_packet.cpp @@ -29,7 +29,6 @@ #include "./load_packet.h" #include - #include "../common/memory_handle.h" using namespace claims::common; // NOLINT @@ -55,8 +54,8 @@ RetCode LoadPacket::Serialize(void*& packet_buffer, *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)) = offset_; *reinterpret_cast(packet_buffer + 4 * sizeof(uint64_t)) = data_length_; - LOG(INFO) << "Serialize packet: " << txn_id_ << " " << global_part_id_ << " " - << pos_ << " " << offset_ << " " << data_length_; + DLOG(INFO) << "Serialize packet: " << txn_id_ << " " << global_part_id_ << " " + << pos_ << " " << offset_ << " " << data_length_; memcpy(packet_buffer + kHeadLength, data_buffer_, data_length_); return rSuccess; @@ -72,8 +71,8 @@ RetCode LoadPacket::Deserialize(const void* const head_buffer, *reinterpret_cast(head_buffer + 3 * sizeof(uint64_t)); data_length_ = *reinterpret_cast(head_buffer + 4 * sizeof(uint64_t)); - LOG(INFO) << "Deserialize packet: " << txn_id_ << " " << global_part_id_ - << " " << pos_ << " " << offset_ << " " << data_length_; + DLOG(INFO) << "Deserialize packet: " << txn_id_ << " " << global_part_id_ + << " " << pos_ << " " << offset_ << " " << data_length_; data_buffer_ = data_buffer; return rSuccess; } diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 135215441..9d57ea6af 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -29,6 +29,7 @@ #include "./master_loader.h" #include +#include #include #include #include @@ -79,6 +80,11 @@ using namespace claims::txn; // NOLINT namespace claims { namespace loader { +void MasterLoader::IngestionRequest::Show() { + LOG(INFO) << "table name:" << table_name_ << ", column separator:" << col_sep_ + << ", row separator:" << row_sep_ + << ", tuples size is:" << tuples_.size(); +} MasterLoader::MasterLoader() : master_loader_ip_(Config::master_loader_ip), @@ -140,8 +146,8 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // map. // Consider that: if this function access the item in map just deleted // by above thread, unexpected thing happens. - LOG(INFO) << "received a commit result " << is_commited - << " of txn with id:" << txn_id; + DLOG(INFO) << "received a commit result " << is_commited + << " of txn with id:" << txn_id; // cout << "(" << syscall(__NR_gettid) << ")received a commit // result " // << is_commited << "of txn with id:" << txn_id << endl; @@ -155,15 +161,15 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, } if (commit_info.IsFinished()) { if (0 == commit_info.abort_part_num_) { - LOG(INFO) << "going to commit txn with id:" << txn_id << endl; + DLOG(INFO) << "going to commit txn with id:" << txn_id << endl; TxnClient::CommitIngest(txn_id); - LOG(INFO) << "committed txn with id:" << txn_id - << " to txn manager"; + DLOG(INFO) << "committed txn with id:" << txn_id + << " to txn manager"; } else { - LOG(INFO) << "going to abort txn with id:" << txn_id << endl; + DLOG(INFO) << "going to abort txn with id:" << txn_id << endl; TxnClient::AbortIngest(txn_id); - LOG(INFO) << "aborted txn with id:" << txn_id - << " to txn manager"; + DLOG(INFO) << "aborted txn with id:" << txn_id + << " to txn manager"; } mloader->txn_commint_info_.erase(txn_id); } @@ -218,8 +224,8 @@ RetCode MasterLoader::Ingest(const string& message, // get message from MQ IngestionRequest req; - EXEC_AND_LOG(ret, GetRequestFromMessage(message, &req), "got request!", - "failed to get request"); + EXEC_AND_DLOG(ret, GetRequestFromMessage(message, &req), "got request!", + "failed to get request"); // parse message and get all tuples of all partitions, then // check the validity of all tuple in message @@ -234,10 +240,10 @@ RetCode MasterLoader::Ingest(const string& message, table->getProjectoin(i)->getPartitioner()->getNumberOfPartitions()); vector columns_validities; - EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, - columns_validities), - "got all tuples of every partition", - "failed to get all tuples of every partition"); + EXEC_AND_DLOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, + columns_validities), + "got all tuples of every partition", + "failed to get all tuples of every partition"); if (ret != rSuccess && ret != claims::common::rNoMemory) { // TODO(YUKAI): error handle, like sending error message to client LOG(ERROR) << "the tuple is not valid"; @@ -248,40 +254,41 @@ RetCode MasterLoader::Ingest(const string& message, // merge all tuple buffers of partition into one partition buffer vector> partition_buffers( table->getNumberOfProjection()); - EXEC_AND_LOG(ret, MergePartitionTupleIntoOneBuffer( - table, tuple_buffers_per_part, partition_buffers), - "merged all tuple of same partition into one buffer", - "failed to merge tuples buffers into one buffer"); + EXEC_AND_DLOG(ret, MergePartitionTupleIntoOneBuffer( + table, tuple_buffers_per_part, partition_buffers), + "merged all tuple of same partition into one buffer", + "failed to merge tuples buffers into one buffer"); // start transaction from here claims::txn::Ingest ingest; - EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), - "applied transaction", "failed to apply transaction"); + EXEC_AND_DLOG(ret, ApplyTransaction(table, partition_buffers, ingest), + "applied transaction", "failed to apply transaction"); spin_lock_.acquire(); txn_commint_info_.insert(std::pair( ingest.id_, CommitInfo(ingest.strip_list_.size()))); spin_lock_.release(); - // TODO()need to deleted after testing - try { - CommitInfo temp = txn_commint_info_.at(ingest.id_); - } catch (const std::out_of_range& e) { - LOG(ERROR) << "Oh~~~NO!!!! no find " << ingest.id_ << " in map"; - assert(false); - } + // // TODO()need to deleted after testing + // try { + // CommitInfo temp = txn_commint_info_.at(ingest.id_); + // } catch (const std::out_of_range& e) { + // LOG(ERROR) << "Oh~~~NO!!!! no find " << ingest.id_ << " in map"; + // assert(false); + // } DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; // write data log - EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log ", - "failed to write log"); + EXEC_AND_DLOG(ret, WriteLog(table, partition_buffers, ingest), "written log ", + "failed to write log"); // reply ACK to MQ - EXEC_AND_LOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); + EXEC_AND_DLOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); // distribute partition load task - EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), - "sent every partition data to its slave", - "failed to send every partition data to its slave"); + EXEC_AND_DLOG(ret, + SendPartitionTupleToSlave(table, partition_buffers, ingest), + "sent every partition data to its slave", + "failed to send every partition data to its slave"); assert(rSuccess == ret); return ret; @@ -525,18 +532,18 @@ RetCode MasterLoader::WriteLog( ++part_id) { uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); - EXEC_AND_LOG(ret, - LogClient::Data(global_part_id, - ingest.strip_list_.at(global_part_id).first, - ingest.strip_list_.at(global_part_id).second, - partition_buffers[prj_id][part_id].buffer_, - partition_buffers[prj_id][part_id].length_), - "written data log for partition:" << global_part_id, - "failed to write data log for partition:" << global_part_id); + EXEC_AND_DLOG( + ret, LogClient::Data(global_part_id, + ingest.strip_list_.at(global_part_id).first, + ingest.strip_list_.at(global_part_id).second, + partition_buffers[prj_id][part_id].buffer_, + partition_buffers[prj_id][part_id].length_), + "written data log for partition:" << global_part_id, + "failed to write data log for partition:" << global_part_id); } } - EXEC_AND_LOG(ret, LogClient::Refresh(), "flushed data log into disk", - "failed to flush data log"); + EXEC_AND_DLOG(ret, LogClient::Refresh(), "flushed data log into disk", + "failed to flush data log"); return ret; } @@ -564,19 +571,20 @@ RetCode MasterLoader::SendPartitionTupleToSlave( void* packet_buffer; MemoryGuard guard(packet_buffer); // auto release by guard uint64_t packet_length; - EXEC_AND_LOG_RETURN(ret, packet.Serialize(packet_buffer, packet_length), - "serialized packet into buffer", - "failed to serialize packet"); + EXEC_AND_DLOG_RETURN(ret, packet.Serialize(packet_buffer, packet_length), + "serialized packet into buffer", + "failed to serialize packet"); int socket_fd = -1; - EXEC_AND_LOG_RETURN(ret, SelectSocket(table, prj_id, part_id, socket_fd), - "selected the socket", "failed to select the socket"); + EXEC_AND_DLOG_RETURN(ret, SelectSocket(table, prj_id, part_id, socket_fd), + "selected the socket", + "failed to select the socket"); assert(socket_fd > 3); - EXEC_AND_LOG_RETURN(ret, - SendPacket(socket_fd, packet_buffer, packet_length), - "sent message to slave :" << socket_fd, - "failed to sent message to slave :" << socket_fd); + EXEC_AND_DLOG_RETURN(ret, + SendPacket(socket_fd, packet_buffer, packet_length), + "sent message to slave :" << socket_fd, + "failed to sent message to slave :" << socket_fd); } } return ret; @@ -597,11 +605,12 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( "partition number is not match"); for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { int tuple_count = tuple_buffer_per_part[i][j].size(); + if (0 == tuple_count) continue; int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); int buffer_len = tuple_count * tuple_len; - LOG(INFO) << "the tuple length of prj:" << i << ",part:" << j - << ",table:" << table->getTableName() << " is:" << tuple_len; - LOG(INFO) << "tuple size is:" << tuple_count; + DLOG(INFO) << "the tuple length of prj:" << i << ",part:" << j + << ",table:" << table->getTableName() << " is:" << tuple_len; + DLOG(INFO) << "tuple size is:" << tuple_count; void* new_buffer = Malloc(buffer_len); if (NULL == new_buffer) return ret = claims::common::rNoMemory; @@ -629,12 +638,12 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, NodeID node_id_in_rmm = table->getProjectoin(prj_id)->getPartitioner()->getPartitionLocation( part_id); - LOG(INFO) << "node id is " << node_id_in_rmm; + DLOG(INFO) << "node id is " << node_id_in_rmm; NodeAddress addr; - EXEC_AND_LOG_RETURN( + EXEC_AND_DLOG_RETURN( ret, NodeTracker::GetInstance()->GetNodeAddr(node_id_in_rmm, addr), "got node address", "failed to get node address"); - LOG(INFO) << "node address is " << addr.ip << ":" << addr.port; + DLOG(INFO) << "node address is " << addr.ip << ":" << addr.port; addr.port = ""; // the port is used for OLAP, not for loading socket_fd = slave_addr_to_socket_[addr]; return ret; diff --git a/loader/master_loader.h b/loader/master_loader.h index 8d785230a..ce548d9a7 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -30,12 +30,11 @@ #define LOADER_MASTER_LOADER_H_ #include -#include #include #include #include -#include "caf/all.hpp" #include +#include "caf/all.hpp" #include "./validity.h" #include "../common/error_define.h" @@ -69,12 +68,7 @@ class MasterLoader { string col_sep_; string row_sep_; vector tuples_; - void Show() { - LOG(INFO) << "table name:" << table_name_ - << ", column separator:" << col_sep_ - << ", row separator:" << row_sep_ - << ", tuples size is:" << tuples_.size(); - } + void Show(); }; struct WorkerPara { WorkerPara(MasterLoader* mloader, const std::string& brokerURI, diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index aa507ef09..25611b849 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -28,11 +28,12 @@ #include "./slave_loader.h" +#include #include #include #include #include -#include +#include //NOLINT #include "caf/all.hpp" #include "caf/io/all.hpp" @@ -76,9 +77,9 @@ RetCode SlaveLoader::ConnectWithMaster() { int ret = rSuccess; int retry_time = 10; for (int i = 0; Clean(), i < retry_time; ++i) { // if failed, call Clean() - EXEC_AND_LOG(ret, EstablishListeningSocket(), - "established listening socket", - "failed to establish listening socket in " << i << " times"); + EXEC_AND_DLOG(ret, EstablishListeningSocket(), + "established listening socket", + "failed to establish listening socket in " << i << " times"); if (rSuccess == ret) break; } if (rSuccess != ret) { @@ -87,8 +88,8 @@ RetCode SlaveLoader::ConnectWithMaster() { } for (int i = 1; i <= retry_time; ++i) { - EXEC_AND_LOG(ret, SendSelfAddrToMaster(), "sent self ip/port to master", - "failed to send self ip/port to master in " << i << " times"); + EXEC_AND_DLOG(ret, SendSelfAddrToMaster(), "sent self ip/port to master", + "failed to send self ip/port to master in " << i << " times"); if (rSuccess == ret) break; sleep(1); } @@ -98,9 +99,9 @@ RetCode SlaveLoader::ConnectWithMaster() { } for (int i = 0; i < retry_time; ++i) { - EXEC_AND_LOG(ret, GetConnectedSocket(), "got connected socket with master", - "failed to get connected socket with master in " << i - << " times"); + EXEC_AND_DLOG(ret, GetConnectedSocket(), "got connected socket with master", + "failed to get connected socket with master in " << i + << " times"); if (rSuccess == ret) break; } if (rSuccess != ret) Clean(); @@ -165,7 +166,7 @@ RetCode SlaveLoader::SendSelfAddrToMaster() { caf::scoped_actor self; self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port) .await([&](int r) { // NOLINT - LOG(INFO) << "sent ip&port and received response"; + DLOG(INFO) << "sent ip&port and received response"; }); } catch (exception& e) { LOG(ERROR) << "can't send self ip&port to master loader. " << e.what(); @@ -224,8 +225,8 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { *reinterpret_cast(head_buffer + LoadPacket::kHeadLength - sizeof(uint64_t)); uint64_t real_packet_length = data_length + LoadPacket::kHeadLength; - LOG(INFO) << "real packet length is :" << real_packet_length - << ". date length is " << data_length; + DLOG(INFO) << "real packet length is :" << real_packet_length + << ". date length is " << data_length; assert(data_length >= 4 && data_length <= 10000000); char* data_buffer = Malloc(data_length); @@ -246,13 +247,13 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { LoadPacket packet; packet.Deserialize(head_buffer, data_buffer); - EXEC_AND_LOG(ret, StoreDataInMemory(packet), "stored data", - "failed to store"); + EXEC_AND_DLOG(ret, StoreDataInMemory(packet), "stored data", + "failed to store"); // return result to master loader - EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), - "sent commit result to master loader", - "failed to send commit res to master loader"); + EXEC_AND_DLOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), + "sent commit result to master loader", + "failed to send commit res to master loader"); if (rSuccess != ret) return ret; } } @@ -279,7 +280,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; - EXEC_AND_LOG_RETURN( + EXEC_AND_DLOG_RETURN( ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); @@ -313,9 +314,10 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { writer.Write(packet.data_buffer_ + total_written_length, data_length - total_written_length); total_written_length += written_length; - LOG(INFO) << "written " << written_length - << " bytes into chunk:" << cur_chunk_id - << ". Now total written " << total_written_length << " bytes"; + DLOG(INFO) << "written " << written_length + << " bytes into chunk:" << cur_chunk_id + << ". Now total written " << total_written_length + << " bytes"; if (total_written_length == data_length) { // all tuple is written into memory return rSuccess; @@ -325,14 +327,14 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { } while (writer.NextBlock()); ++cur_chunk_id; // get next chunk to write - LOG(INFO) << "Now chunk id is " << cur_chunk_id - << ", total number of chunk is" << part_storage->GetChunkNum(); + DLOG(INFO) << "Now chunk id is " << cur_chunk_id + << ", total number of chunk is" << part_storage->GetChunkNum(); assert(cur_chunk_id < part_storage->GetChunkNum()); cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; } else { - cout << "chunk id is " << cur_chunk_id << endl; + LOG(INFO) << "chunk id is " << cur_chunk_id << endl; assert(false && "no chunk with this chunk id"); } } @@ -350,14 +352,14 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, caf::scoped_actor self; self->sync_send(master_actor, LoadAckAtom::value, txn_id, is_commited) .await([&](int r) { // NOLINT - LOG(INFO) << "sent txn " << txn_id - << " commit result:" << is_commited - << " to master and received response"; + DLOG(INFO) << "sent txn " << txn_id + << " commit result:" << is_commited + << " to master and received response"; }, caf::after(seconds(2)) >> - [&] { - LOG(INFO) << "receiving response of txn " << txn_id - << " time out"; + [&] { // NOLINT + LOG(ERROR) << "receiving response of txn " << txn_id + << " time out"; throw caf::network_error("receiving response time out"); }); return rSuccess; @@ -376,9 +378,9 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { SlaveLoader* slave_loader = Environment::getInstance()->get_slave_loader(); int ret = rSuccess; - EXEC_AND_LOG(ret, slave_loader->ConnectWithMaster(), - "succeed to connect with master", - "failed to connect with master "); + EXEC_AND_DLOG(ret, slave_loader->ConnectWithMaster(), + "succeed to connect with master", + "failed to connect with master "); assert(rSuccess == ret && "can't connect with master"); diff --git a/loader/slave_loader.h b/loader/slave_loader.h index 535c8639c..79372109d 100644 --- a/loader/slave_loader.h +++ b/loader/slave_loader.h @@ -29,7 +29,6 @@ #ifndef LOADER_SLAVE_LOADER_H_ #define LOADER_SLAVE_LOADER_H_ #include -#include #include #include #include "../catalog/catalog.h" diff --git a/storage/BlockManager.cpp b/storage/BlockManager.cpp index 2a2973146..ef3999402 100755 --- a/storage/BlockManager.cpp +++ b/storage/BlockManager.cpp @@ -4,10 +4,10 @@ * Created on: 2013-10-11 * Author: casa */ -#include #include "BlockManager.h" #include +#include #include "../common/file_handle/hdfs_connector.h" #include "../Environment.h" #include "../common/rename.h" @@ -369,8 +369,8 @@ bool BlockManager::removePartition(const PartitionID& partition_id) { PartitionStorage* BlockManager::getPartitionHandle( const PartitionID& partition_id) const { - LOG(INFO) << "partid2storage size is:" << partition_id_to_storage_.size(); - LOG(INFO) << "going to find storage [" << partition_id.getName() << "]"; + DLOG(INFO) << "partid2storage size is:" << partition_id_to_storage_.size(); + DLOG(INFO) << "going to find storage [" << partition_id.getName() << "]"; boost::unordered_map::const_iterator it = partition_id_to_storage_.find(partition_id); if (it == partition_id_to_storage_.cend()) { diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index c24bb35fb..039c1eab1 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -5,15 +5,14 @@ * Author: wangli */ #include -#include "ChunkStorage.h" - +#include +#include "./BlockManager.h" +#include "./ChunkStorage.h" #include "../common/file_handle/hdfs_connector.h" -#include "BlockManager.h" - +#include "../Config.h" #include "../Debug.h" -#include "../utility/warmup.h" #include "../utility/rdtsc.h" -#include "../Config.h" +#include "../utility/warmup.h" using claims::common::HdfsConnector; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index d4de3c7bb..5163301c4 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -64,16 +64,16 @@ RetCode PartitionStorage::AddChunkWithMemoryToNum( const StorageLevel& storage_level) { RetCode ret = rSuccess; if (number_of_chunks_ >= expected_number_of_chunks) return ret; - LOG(INFO) << "now chunk number:" << number_of_chunks_ - << ". expected chunk num:" << expected_number_of_chunks; + DLOG(INFO) << "now chunk number:" << number_of_chunks_ + << ". expected chunk num:" << expected_number_of_chunks; for (unsigned i = number_of_chunks_; i < expected_number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); - EXEC_AND_LOG(ret, chunk->ApplyMemory(), "applied memory for chunk(" - << partition_id_.getName() - << "," << i << ")", - "failed to apply memory for chunk(" << partition_id_.getName() - << "," << i << ")"); + EXEC_AND_DLOG(ret, chunk->ApplyMemory(), "applied memory for chunk(" + << partition_id_.getName() + << "," << i << ")", + "failed to apply memory for chunk(" << partition_id_.getName() + << "," << i << ")"); chunk_list_.push_back(chunk); } number_of_chunks_ = expected_number_of_chunks; @@ -184,10 +184,9 @@ bool PartitionStorage::AtomicPartitionReaderIterator::nextBlock( } } } -PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() { - -} -PartitionStorage::PartitionReaderItetaor* PartitionStorage::createTxnReaderIterator() { +PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() {} +PartitionStorage::PartitionReaderItetaor* +PartitionStorage::createTxnReaderIterator() { return new TxnPartitionReaderIterator(this); } @@ -199,28 +198,27 @@ bool PartitionStorage::TxnPartitionReaderIterator::nextBlock( lock_.release(); ba->getBlock(block); auto block_addr = (char*)block->getBlock(); - auto chunk_addr = (char*)((InMemoryChunkReaderItetaor*)chunk_it_)->getChunk(); - //cout << (block_addr - chunk_addr) / (64 * 1024) << endl; + auto chunk_addr = + (char*)((InMemoryChunkReaderItetaor*)chunk_it_)->getChunk(); + // cout << (block_addr - chunk_addr) / (64 * 1024) << endl; return true; - } - else { + } else { if ((chunk_it_ = PartitionReaderItetaor::nextChunk()) > 0) { lock_.release(); return nextBlock(block); - } - else { + } else { lock_.release(); return false; } } } ChunkReaderIterator* PartitionStorage::TxnPartitionReaderIterator::nextChunk() { -// lock_.acquire(); + // lock_.acquire(); ChunkReaderIterator* ret; if (chunk_cur_ < ps->number_of_chunks_) ret = ps->chunk_list_[chunk_cur_++]->createChunkReaderIterator(); else ret = 0; -// lock_.release(); + // lock_.release(); return ret; } From e05330c6bcdb7ed0f9b6014569d9eb2f72b45e3a Mon Sep 17 00:00:00 2001 From: yukai Date: Sat, 7 May 2016 17:28:06 +0800 Subject: [PATCH 30/58] OPTIMIZE: support compiling to RELEASE version; ADD: PERFLOG in loader --- Client/Makefile.am | 7 ++- Client/Test/Makefile.am | 1 - Daemon/Makefile.am | 3 +- Executor/Makefile.am | 3 +- Executor/Test/Makefile.am | 3 +- IndexManager/Makefile.am | 4 +- IndexManager/Test/Makefile.am | 3 +- Makefile.am | 7 ++- Resource/Makefile.am | 1 - Test/Makefile.am | 1 - Test/TestSuit/Makefile.am | 1 - Test/common/Makefile.am | 1 - Test/utility/Makefile.am | 1 - catalog/Makefile.am | 7 +-- catalog/Test/Makefile.am | 7 +-- common/Expression/Makefile.am | 1 - common/Makefile.am | 1 - common/Schema/Test/Makefile.am | 1 - common/expression/Makefile.am | 1 - common/serialization/Makefile.am | 3 +- configure.ac | 13 ++++- loader/AMQ_consumer.cpp | 15 +++--- loader/Makefile.am | 15 ++---- loader/master_loader.cpp | 87 +++++++++++++++++++------------- loader/slave_loader.cpp | 25 ++++++--- loader/test/Makefile.am | 3 +- physical_operator/Makefile.am | 3 +- sql_parser/Test/Makefile.am | 1 - sql_parser/ast_node/Makefile.am | 1 - sql_parser/parser/Makefile.am | 1 - storage/Makefile.am | 1 - 31 files changed, 112 insertions(+), 110 deletions(-) diff --git a/Client/Makefile.am b/Client/Makefile.am index 478a319b2..d26371768 100644 --- a/Client/Makefile.am +++ b/Client/Makefile.am @@ -17,8 +17,7 @@ LDADD = ../catalog/libcatalog.a \ ../common/Block/libblock.a \ ../common/Schema/libschema.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libclient.a libclient_a_SOURCES = \ @@ -27,5 +26,5 @@ libclient_a_SOURCES = \ ClientResponse.cpp ClientResponse.h \ jsoncpp.cpp -SUBDIRS = json Test -DIST_SUBDIRS = json Test +#SUBDIRS = json Test +#DIST_SUBDIRS = json Test diff --git a/Client/Test/Makefile.am b/Client/Test/Makefile.am index 893724801..eab9f8ea5 100644 --- a/Client/Test/Makefile.am +++ b/Client/Test/Makefile.am @@ -20,7 +20,6 @@ LDADD = ../../catalog/libcatalog.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${HADOOP_HOME}/lib/native/libhdfs.so\ - ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ ${BOOST_HOME}/stage/lib/libboost_serialization.a diff --git a/Daemon/Makefile.am b/Daemon/Makefile.am index e8a8e3d82..7964a9353 100644 --- a/Daemon/Makefile.am +++ b/Daemon/Makefile.am @@ -19,8 +19,7 @@ LDADD = ../logical_operator/liblogicalqueryplan.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libdaemon.a libdaemon_a_SOURCES = Daemon.cpp Daemon.h \ diff --git a/Executor/Makefile.am b/Executor/Makefile.am index a6cd75df9..d3a13ab06 100644 --- a/Executor/Makefile.am +++ b/Executor/Makefile.am @@ -17,7 +17,6 @@ LDADD = ../BlockStreamIterator/libblockstreamiterator.a \ ../common/Block/libblock.a \ ../common/libcommon.a \ ../utility/libutility.a \ - ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so @@ -32,7 +31,7 @@ libexecutor_a_SOURCES = \ IteratorExecutorSlave.h PortManager.cpp \ PortManager.h -libexecutor_a_LIBADD = ${THERON_HOME}/Lib/libtherond.a \ +libexecutor_a_LIBADD = \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so diff --git a/Executor/Test/Makefile.am b/Executor/Test/Makefile.am index e72a28609..d3207b600 100644 --- a/Executor/Test/Makefile.am +++ b/Executor/Test/Makefile.am @@ -18,8 +18,7 @@ LDADD = ../libexecutor.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libtest.a libtest_a_SOURCES = diff --git a/IndexManager/Makefile.am b/IndexManager/Makefile.am index b103c55d9..187dc0526 100644 --- a/IndexManager/Makefile.am +++ b/IndexManager/Makefile.am @@ -24,9 +24,7 @@ LDADD = ../BlockStreamIterator/libblockstreamiterator.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a - + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libindexmanager.a libindexmanager_a_SOURCES = \ CSBIndexBuilding.cpp CSBIndexBuilding.h \ diff --git a/IndexManager/Test/Makefile.am b/IndexManager/Test/Makefile.am index 20c1aca0e..672fd1c08 100644 --- a/IndexManager/Test/Makefile.am +++ b/IndexManager/Test/Makefile.am @@ -26,8 +26,7 @@ LDADD = \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libtest.a libtest_a_SOURCES = diff --git a/Makefile.am b/Makefile.am index 569996f04..f6bd5f124 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,7 +25,6 @@ LDADD = \ Test/utility/libutility.a \ common/serialization/libserialization.a \ Client/libclient.a \ - Client/Test/libtest.a \ Daemon/libdaemon.a \ Executor/libexecutor.a \ Executor/Test/libtest.a \ @@ -59,7 +58,6 @@ LDADD = \ txn_manager/libtxnmanager.a \ ${CAF_HOME}/build/lib/libcaf_core.so \ ${CAF_HOME}/build/lib/libcaf_io.so \ - ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ ${BOOST_HOME}/stage/lib/libboost_system.so \ ${BOOST_HOME}/stage/lib/libboost_date_time.so \ @@ -74,6 +72,11 @@ LDADD = \ LDADD += catalog/stat/libstat.a +if OPT_DEBUG +LDADD += ${THERON_HOME}/Lib/libtherond.a +else +LDADD += ${THERON_HOME}/Lib/libtheron.a +endif include_HEADERS = Config.h \ Debug.h \ diff --git a/Resource/Makefile.am b/Resource/Makefile.am index 64752d74f..5216d1023 100644 --- a/Resource/Makefile.am +++ b/Resource/Makefile.am @@ -29,4 +29,3 @@ libresouce_a_SOURCES = \ BufferManager.cpp BufferManager.h \ CPUResource.cpp CPUResource.h -libresouce_a_LIBADD = ${THERON_HOME}/Lib/libtherond.a diff --git a/Test/Makefile.am b/Test/Makefile.am index dc5e33bbe..15b9328b8 100644 --- a/Test/Makefile.am +++ b/Test/Makefile.am @@ -34,7 +34,6 @@ LDADD = TestSuit/libtestsuit.a \ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a \ ${GTEST_HOME}/libgtest.a noinst_LIBRARIES=libtest.a diff --git a/Test/TestSuit/Makefile.am b/Test/TestSuit/Makefile.am index 765042f11..bf59b4dee 100644 --- a/Test/TestSuit/Makefile.am +++ b/Test/TestSuit/Makefile.am @@ -29,7 +29,6 @@ LDADD = ../libtest.a \ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a \ ${GTEST_HOME}/libgtest.a noinst_LIBRARIES=libtestsuit.a diff --git a/Test/common/Makefile.am b/Test/common/Makefile.am index a9123a8f8..44a351631 100644 --- a/Test/common/Makefile.am +++ b/Test/common/Makefile.am @@ -25,7 +25,6 @@ LDADD = ../../catalog/libcatalog.a \ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a \ ${GTEST_HOME}/libgtest.a noinst_LIBRARIES=libcommon.a diff --git a/Test/utility/Makefile.am b/Test/utility/Makefile.am index 41b756408..19dd28925 100644 --- a/Test/utility/Makefile.am +++ b/Test/utility/Makefile.am @@ -21,7 +21,6 @@ LDADD = ../../common/libcommon.a \ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a \ ${GTEST_HOME}/libgtest.a noinst_LIBRARIES=libutility.a diff --git a/catalog/Makefile.am b/catalog/Makefile.am index 995ea20e8..aa86d38c1 100644 --- a/catalog/Makefile.am +++ b/catalog/Makefile.am @@ -19,12 +19,7 @@ LDADD = ../storage/libstorage.a \ ../common/Schema/libschema.a \ ../loader/libloader.a \ ../common/file_handle/libfilehandle.a \ - ../common/libcommon.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${BOOST_HOME}/stage/lib/libboost_system.a \ - ${BOOST_HOME}/stage/lib/libboost_system.so \ - ${THERON_HOME}/Lib/libtherond.a + ../common/libcommon.a #DIR = ${shell /bin/pwd} #INCLUDES = -I${DIR}/ThirdParty diff --git a/catalog/Test/Makefile.am b/catalog/Test/Makefile.am index ff399e33f..e91aec0e9 100644 --- a/catalog/Test/Makefile.am +++ b/catalog/Test/Makefile.am @@ -18,12 +18,7 @@ LDADD = ../../logical_operator/liblogicalqueryplan.a \ ../stat/libstat.a \ ../libcatalog.a \ ../../common/libcommon.a \ - ../../utility/libutility.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${BOOST_HOME}/stage/lib/libboost_system.a \ - ${BOOST_HOME}/stage/lib/libboost_system.so \ - ${THERON_HOME}/Lib/libtherond.a + ../../utility/libutility.a noinst_LIBRARIES=libtest.a libtest_a_SOURCES = diff --git a/common/Expression/Makefile.am b/common/Expression/Makefile.am index 8e79c2212..db08acabf 100644 --- a/common/Expression/Makefile.am +++ b/common/Expression/Makefile.am @@ -17,7 +17,6 @@ LDADD = ../../catalog/libcatalog.a \ ../../common/libcommon.a \ ../../common/Schema/libschema.a \ ../../utility/libutility.a \ - ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_system.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ diff --git a/common/Makefile.am b/common/Makefile.am index 267da40b2..51bcaf0b5 100644 --- a/common/Makefile.am +++ b/common/Makefile.am @@ -21,7 +21,6 @@ endif # types/libtypes.a # ${BOOST_HOME}/stage/lib/libboost_serialization.a \ # ${BOOST_HOME}/stage/lib/libboost_serialization.so \ -# ${THERON_HOME}/Lib/libtherond.a \ # ${GTEST_HOME}/libgtest.a noinst_LIBRARIES=libcommon.a diff --git a/common/Schema/Test/Makefile.am b/common/Schema/Test/Makefile.am index f8d909d23..220de06fb 100644 --- a/common/Schema/Test/Makefile.am +++ b/common/Schema/Test/Makefile.am @@ -26,7 +26,6 @@ LDADD = ../libschema.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${HADOOP_HOME}/lib/native/libhdfs.so\ - ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ ${GTEST_HOME}/libgtest.a diff --git a/common/expression/Makefile.am b/common/expression/Makefile.am index 8c498a661..f723d4caa 100644 --- a/common/expression/Makefile.am +++ b/common/expression/Makefile.am @@ -11,7 +11,6 @@ LDADD = ../../catalog/libcatalog.a \ ../../common/libcommon.a \ ../../common/Schema/libschema.a \ ../../utility/libutility.a \ - ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_system.a \ ${GTEST_HOME}/libgtest.a diff --git a/common/serialization/Makefile.am b/common/serialization/Makefile.am index c5b1f2cb3..7e370d987 100644 --- a/common/serialization/Makefile.am +++ b/common/serialization/Makefile.am @@ -22,8 +22,7 @@ LDADD = ../../physical_operator/libphysicalqueryplan.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libserialization.a libserialization_a_SOURCES = \ diff --git a/configure.ac b/configure.ac index 3f3c7fd90..63a6b3d1e 100644 --- a/configure.ac +++ b/configure.ac @@ -2,9 +2,18 @@ AC_INIT([Claims], [0.3], [imdb@ecnu]) AM_INIT_AUTOMAKE([-Wall foreign subdir-objects]) AM_PROG_AR AC_PROG_LIBTOOL -CPPFLAGS="-w -O2 -DTHERON_XS -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS" AM_CONDITIONAL(OPT_TCMALLOC, true) -CXXFLAGS=${CXXFLAGS="-g -std=c++11"} +AM_CONDITIONAL(OPT_DEBUG, false) + +CPPFLAGS="-w -O2 -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS" +if OPT_DEBUG +then +CPPFLAGS+="-g -DTHERON_XS" +else +CPPFLAGS+=" -DNDEBUG" +fi + +CXXFLAGS=${CXXFLAGS=" -std=c++11"} AC_PROG_CXX AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([ diff --git a/loader/AMQ_consumer.cpp b/loader/AMQ_consumer.cpp index b046cf20e..d7312f40f 100644 --- a/loader/AMQ_consumer.cpp +++ b/loader/AMQ_consumer.cpp @@ -52,23 +52,23 @@ void claims::loader::AMQConsumer::run(MasterLoader* mloader) { // Create a ConnectionFactory ActiveMQConnectionFactory* connectionFactory = new ActiveMQConnectionFactory(brokerURI_); - LOG(INFO) << "Create a ConnectionFactory"; + DLOG(INFO) << "Create a ConnectionFactory"; // Create a Connection connection_ = connectionFactory->createConnection(); delete connectionFactory; - LOG(INFO) << "Create a Connection"; + DLOG(INFO) << "Create a Connection"; ActiveMQConnection* amqConnection = dynamic_cast(connection_); if (amqConnection != NULL) { amqConnection->addTransportListener(this); } - LOG(INFO) << "Create a ActiveMQConnection"; + DLOG(INFO) << "Create a ActiveMQConnection"; connection_->start(); connection_->setExceptionListener(this); - LOG(INFO) << "ActiveMQConnection is started"; + DLOG(INFO) << "ActiveMQConnection is started"; // Create a Session if (client_ack_) { @@ -76,7 +76,7 @@ void claims::loader::AMQConsumer::run(MasterLoader* mloader) { } else { session_ = connection_->createSession(Session::AUTO_ACKNOWLEDGE); } - LOG(INFO) << "Create a Session"; + DLOG(INFO) << "Create a Session"; // Create the destination (Topic or Queue) if (use_topic_) { @@ -84,12 +84,13 @@ void claims::loader::AMQConsumer::run(MasterLoader* mloader) { } else { destination_ = session_->createQueue(destURI_); } - LOG(INFO) << "Create a destination"; + DLOG(INFO) << "Create a destination"; // Create a MessageConsumer from the Session to the Topic or Queue consumer = session_->createConsumer(destination_); consumer->setMessageListener(this); - LOG(INFO) << "Create a MessageConsumer"; + DLOG(INFO) << "Create a MessageConsumer"; + LOG(INFO) << " ready to receive ingest message from ActiveMQ"; std::cout << "AMQ client listening...." << std::endl; } catch (CMSException& e) { e.printStackTrace(); diff --git a/loader/Makefile.am b/loader/Makefile.am index ac4667d16..e2eae354a 100644 --- a/loader/Makefile.am +++ b/loader/Makefile.am @@ -18,16 +18,11 @@ AM_CPPFLAGS+=-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-b AM_LDFLAGS+=-ltcmalloc endif -LDADD = ../catalog/libcatalog.a \ - ../common/libcommon.a \ - ../common/Block/libblock.a \ - ../common/file_handle/libfilehandle.a \ - ../common/Schema/libschema.a \ - ${HADOOP_HOME}/lib/native/libhdfs.a\ - ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ - ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a +#LDADD = ../catalog/libcatalog.a \ +# ../common/libcommon.a \ +# ../common/Block/libblock.a \ +# ../common/file_handle/libfilehandle.a \ +# ../common/Schema/libschema.a noinst_LIBRARIES=libloader.a libloader_a_SOURCES = \ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 9d57ea6af..28fdb59ce 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -78,12 +78,21 @@ using claims::common::rFailure; using namespace claims::txn; // NOLINT +#define MASTER_LOADER_DEBUG + +#ifdef MASTER_LOADER_DEBUG +#define PERFLOG(info) LOG(INFO) << info << endl; +#else +#define PERFLOG +#endif + namespace claims { namespace loader { void MasterLoader::IngestionRequest::Show() { - LOG(INFO) << "table name:" << table_name_ << ", column separator:" << col_sep_ - << ", row separator:" << row_sep_ - << ", tuples size is:" << tuples_.size(); + DLOG(INFO) << "table name:" << table_name_ + << ", column separator:" << col_sep_ + << ", row separator:" << row_sep_ + << ", tuples size is:" << tuples_.size(); } MasterLoader::MasterLoader() @@ -146,8 +155,8 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // map. // Consider that: if this function access the item in map just deleted // by above thread, unexpected thing happens. - DLOG(INFO) << "received a commit result " << is_commited - << " of txn with id:" << txn_id; + PERFLOG("received a commit result " << is_commited + << " of txn with id:" << txn_id); // cout << "(" << syscall(__NR_gettid) << ")received a commit // result " // << is_commited << "of txn with id:" << txn_id << endl; @@ -172,6 +181,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, << " to txn manager"; } mloader->txn_commint_info_.erase(txn_id); + PERFLOG("finished txn with id:" << txn_id); } } catch (const std::out_of_range& e) { LOG(ERROR) << "no find " << txn_id << " in map"; @@ -217,6 +227,10 @@ RetCode MasterLoader::ConnectWithSlaves() { RetCode MasterLoader::Ingest(const string& message, function ack_function) { + static uint64_t debug_consumed_message_count = 0; + __sync_add_and_fetch(&debug_consumed_message_count, 1); + PERFLOG("consumed message :" << debug_consumed_message_count); + RetCode ret = rSuccess; // string message = GetMessage(); @@ -224,8 +238,8 @@ RetCode MasterLoader::Ingest(const string& message, // get message from MQ IngestionRequest req; - EXEC_AND_DLOG(ret, GetRequestFromMessage(message, &req), "got request!", - "failed to get request"); + EXEC_AND_LOG(ret, GetRequestFromMessage(message, &req), "got request!", + "failed to get request"); // parse message and get all tuples of all partitions, then // check the validity of all tuple in message @@ -240,10 +254,10 @@ RetCode MasterLoader::Ingest(const string& message, table->getProjectoin(i)->getPartitioner()->getNumberOfPartitions()); vector columns_validities; - EXEC_AND_DLOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, - columns_validities), - "got all tuples of every partition", - "failed to get all tuples of every partition"); + EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, + columns_validities), + "got all tuples of every partition", + "failed to get all tuples of every partition"); if (ret != rSuccess && ret != claims::common::rNoMemory) { // TODO(YUKAI): error handle, like sending error message to client LOG(ERROR) << "the tuple is not valid"; @@ -254,15 +268,16 @@ RetCode MasterLoader::Ingest(const string& message, // merge all tuple buffers of partition into one partition buffer vector> partition_buffers( table->getNumberOfProjection()); - EXEC_AND_DLOG(ret, MergePartitionTupleIntoOneBuffer( - table, tuple_buffers_per_part, partition_buffers), - "merged all tuple of same partition into one buffer", - "failed to merge tuples buffers into one buffer"); + EXEC_AND_LOG(ret, MergePartitionTupleIntoOneBuffer( + table, tuple_buffers_per_part, partition_buffers), + "merged all tuple of same partition into one buffer", + "failed to merge tuples buffers into one buffer"); // start transaction from here claims::txn::Ingest ingest; - EXEC_AND_DLOG(ret, ApplyTransaction(table, partition_buffers, ingest), - "applied transaction", "failed to apply transaction"); + EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), + "applied transaction: " << ingest.id_, + "failed to apply transaction"); spin_lock_.acquire(); txn_commint_info_.insert(std::pair( @@ -278,17 +293,16 @@ RetCode MasterLoader::Ingest(const string& message, DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; // write data log - EXEC_AND_DLOG(ret, WriteLog(table, partition_buffers, ingest), "written log ", - "failed to write log"); + EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log ", + "failed to write log"); // reply ACK to MQ - EXEC_AND_DLOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); + EXEC_AND_LOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); // distribute partition load task - EXEC_AND_DLOG(ret, - SendPartitionTupleToSlave(table, partition_buffers, ingest), - "sent every partition data to its slave", - "failed to send every partition data to its slave"); + EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), + "sent every partition data to its slave", + "failed to send every partition data to its slave"); assert(rSuccess == ret); return ret; @@ -508,6 +522,7 @@ RetCode MasterLoader::ApplyTransaction( ProjectionDescriptor* prj = table->getProjectoin(i); uint64_t tuple_length = prj->getSchema()->getTupleMaxSize(); for (int j = 0; j < prj->getPartitioner()->getNumberOfPartitions(); ++j) { + if (partition_buffers[i][j].length_ == 0) continue; req.InsertStrip(GetGlobalPartId(table_id, i, j), tuple_length, partition_buffers[i][j].length_ / tuple_length); // DLOG(INFO) << "the length of partition buffer[" << i << "," << j @@ -530,20 +545,21 @@ RetCode MasterLoader::WriteLog( for (int prj_id = 0; prj_id < partition_buffers.size(); ++prj_id) { for (int part_id = 0; part_id < partition_buffers[prj_id].size(); ++part_id) { + if (0 == partition_buffers[prj_id][part_id].length_) continue; uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); - EXEC_AND_DLOG( - ret, LogClient::Data(global_part_id, - ingest.strip_list_.at(global_part_id).first, - ingest.strip_list_.at(global_part_id).second, - partition_buffers[prj_id][part_id].buffer_, - partition_buffers[prj_id][part_id].length_), - "written data log for partition:" << global_part_id, - "failed to write data log for partition:" << global_part_id); + EXEC_AND_LOG(ret, + LogClient::Data(global_part_id, + ingest.strip_list_.at(global_part_id).first, + ingest.strip_list_.at(global_part_id).second, + partition_buffers[prj_id][part_id].buffer_, + partition_buffers[prj_id][part_id].length_), + "written data log for partition:" << global_part_id, + "failed to write data log for partition:" << global_part_id); } } - EXEC_AND_DLOG(ret, LogClient::Refresh(), "flushed data log into disk", - "failed to flush data log"); + EXEC_AND_LOG(ret, LogClient::Refresh(), "flushed data log into disk", + "failed to flush data log"); return ret; } @@ -562,6 +578,7 @@ RetCode MasterLoader::SendPartitionTupleToSlave( for (int prj_id = 0; prj_id < partition_buffers.size(); ++prj_id) { for (int part_id = 0; part_id < partition_buffers[prj_id].size(); ++part_id) { + if (0 == partition_buffers[prj_id][part_id].length_) continue; uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); LoadPacket packet(ingest.id_, global_part_id, ingest.strip_list_.at(global_part_id).first, @@ -605,7 +622,7 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( "partition number is not match"); for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { int tuple_count = tuple_buffer_per_part[i][j].size(); - if (0 == tuple_count) continue; + // if (0 == tuple_count) continue; int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); int buffer_len = tuple_count * tuple_len; DLOG(INFO) << "the tuple length of prj:" << i << ",part:" << j diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 25611b849..3d091831b 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -60,6 +60,14 @@ using claims::txn::GetTableIdFromGlobalPartId; using std::chrono::milliseconds; using std::chrono::seconds; +#define MASTER_LOADER_DEBUG + +#ifdef MASTER_LOADER_DEBUG +#define PERFLOG(info) LOG(INFO) << info << endl; +#else +#define PERFLOG +#endif + namespace claims { namespace loader { @@ -221,6 +229,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { << " bytes"; continue; } + PERFLOG("received packet head"); uint64_t data_length = *reinterpret_cast(head_buffer + LoadPacket::kHeadLength - sizeof(uint64_t)); @@ -244,16 +253,18 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { // LOG(INFO) << "data of message from master is:" << buffer; // deserialization of packet + PERFLOG("got all packet buffer"); LoadPacket packet; - packet.Deserialize(head_buffer, data_buffer); + EXEC_AND_LOG(ret, packet.Deserialize(head_buffer, data_buffer), + "deserialized packet", "failed to deserialize packet"); - EXEC_AND_DLOG(ret, StoreDataInMemory(packet), "stored data", - "failed to store"); + EXEC_AND_LOG(ret, StoreDataInMemory(packet), "stored data", + "failed to store"); // return result to master loader - EXEC_AND_DLOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), - "sent commit result to master loader", - "failed to send commit res to master loader"); + EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), + "sent commit result to master loader", + "failed to send commit res to master loader"); if (rSuccess != ret) return ret; } } @@ -280,7 +291,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; - EXEC_AND_DLOG_RETURN( + EXEC_AND_LOG_RETURN( ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); diff --git a/loader/test/Makefile.am b/loader/test/Makefile.am index bfbde05c9..fe3cb2b3f 100644 --- a/loader/test/Makefile.am +++ b/loader/test/Makefile.am @@ -19,8 +19,7 @@ LDADD = ../../catalog/libcatalog.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libtest.a libtest_a_SOURCES = \ diff --git a/physical_operator/Makefile.am b/physical_operator/Makefile.am index 74056c961..aff93be9b 100644 --- a/physical_operator/Makefile.am +++ b/physical_operator/Makefile.am @@ -23,8 +23,7 @@ LDADD = ../Executor/libexecutor.a \ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libphysicalqueryplan.a libphysicalqueryplan_a_SOURCES = \ diff --git a/sql_parser/Test/Makefile.am b/sql_parser/Test/Makefile.am index e568f88e0..81cadce70 100755 --- a/sql_parser/Test/Makefile.am +++ b/sql_parser/Test/Makefile.am @@ -28,7 +28,6 @@ LDADD = \ ${HADOOP_HOME}/lib/native/libhdfs.so\ ${HADOOP_HOME}/lib/native/libhdfs.a\ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ - ${THERON_HOME}/Lib/libtherond.a \ ${GTEST_HOME}/libgtest.a diff --git a/sql_parser/ast_node/Makefile.am b/sql_parser/ast_node/Makefile.am index 8a9405b48..f5b0ee50e 100755 --- a/sql_parser/ast_node/Makefile.am +++ b/sql_parser/ast_node/Makefile.am @@ -30,7 +30,6 @@ LDADD = ../../Executor/libexecutor.a \ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a \ ${GTEST_HOME}/libgtest.a diff --git a/sql_parser/parser/Makefile.am b/sql_parser/parser/Makefile.am index d607e955d..995dba5d4 100755 --- a/sql_parser/parser/Makefile.am +++ b/sql_parser/parser/Makefile.am @@ -22,7 +22,6 @@ LDADD = ../../sql_parser/ast_node/libast_node.a \ ${JAVA_HOME}/jre/lib/amd64/server/libjvm.so\ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.so \ - ${THERON_HOME}/Lib/libtherond.a \ ${GTEST_HOME}/libgtest.a diff --git a/storage/Makefile.am b/storage/Makefile.am index f3a70577f..1c9b5838b 100644 --- a/storage/Makefile.am +++ b/storage/Makefile.am @@ -17,7 +17,6 @@ LDADD = ../BufferManager/libbuffermanager.a \ ../common/libcommon.a \ ../common/Block/libblock.a \ ../utility/libutility.a \ - ${THERON_HOME}/Lib/libtherond.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ ${BOOST_HOME}/stage/lib/libboost_serialization.a From 17e8f389fe02c1305dca75df08f0b1728961aac8 Mon Sep 17 00:00:00 2001 From: yukai Date: Mon, 9 May 2016 10:29:22 +0800 Subject: [PATCH 31/58] OPTIMIZE: new GetRequesetFromMessage(); OPTIMIZE: set new broker IP for AMQ; set loading no check validity --- common/Schema/Schema.h | 16 ++++++--- common/Schema/SchemaFix.cpp | 46 +++++++++++++++++++++----- common/Schema/SchemaFix.h | 3 ++ loader/AMQ_consumer.cpp | 14 ++++---- loader/master_loader.cpp | 65 ++++++++++++++++++++++++++++++------- loader/master_loader.h | 9 ++++- 6 files changed, 121 insertions(+), 32 deletions(-) diff --git a/common/Schema/Schema.h b/common/Schema/Schema.h index 55c3f8427..631296c53 100755 --- a/common/Schema/Schema.h +++ b/common/Schema/Schema.h @@ -7,11 +7,12 @@ #ifndef SCHEMA_H_ #define SCHEMA_H_ -#include #include #include #include #include +#include +#include #ifdef DMALLOC #include "dmalloc.h" #endif @@ -27,7 +28,7 @@ class Schema { enum schema_type { fixed, varaible }; Schema(const std::vector& columns); Schema(const Schema& r); - Schema(){}; + Schema() {} virtual ~Schema(); virtual unsigned getTupleMaxSize() const = 0; @@ -55,6 +56,13 @@ class Schema { virtual void addColumn(column_type ct, unsigned size){}; virtual void displayTuple(const void* tuple_start_address, const char* spliter = "|") const; + + virtual RetCode ToValue(std::string text_tuple, void* binary_tuple, + const string attr_separator) { + assert(false); + return claims::common::rFailure; + } + /** * @brief Method description: see more in its derived class */ @@ -63,14 +71,14 @@ class Schema { RawDataSource raw_data_source, vector& columns_validities) { assert(false); + return claims::common::rFailure; } - inline virtual void showAccum_off(){}; + inline virtual void showAccum_off() {} bool hasSameSchema(Schema* schema); std::vector columns; virtual std::string getColumnValue(const void* tuple_start_address, int i); - protected: private: friend class boost::serialization::access; template diff --git a/common/Schema/SchemaFix.cpp b/common/Schema/SchemaFix.cpp index 13626fdda..9445f2ada 100755 --- a/common/Schema/SchemaFix.cpp +++ b/common/Schema/SchemaFix.cpp @@ -19,14 +19,7 @@ #include "../../utility/Timer.h" #include "../common/error_define.h" using claims::loader::DataIngestion; -using claims::common::rTooFewColumn; -using claims::common::rSuccess; -using claims::common::rIncorrectData; -using claims::common::rIncorrectData; -using claims::common::rInvalidNullData; -using claims::common::rTooLongData; -using claims::common::rTooManyColumn; -using claims::common::rInvalidInsertData; +using namespace claims::common; // NOLINT // #define SCHEMA_FIX_DEBUG // #define SCHEMA_FIX_PERF @@ -101,6 +94,43 @@ int SchemaFix::getColumnOffset(unsigned index) const { return accum_offsets[index]; } +// WARNING: using carefully!!!!! +RetCode SchemaFix::ToValue(std::string text_tuple, void* binary_tuple, + const string attr_separator) { + RetCode ret = rSuccess; + string::size_type prev_pos = 0; + string::size_type pos = 0; + string text_column; + int attr_sep_length = attr_separator.length(); + + /** + * let's think : '|' is column separator, '\n' is line separator + * data format is always: xxx|xxx|xxx|......xxx|\n + */ + for (int i = 0; i < columns.size(); ++i) { + if (pos != string::npos && text_tuple.length() == prev_pos) { + // meet the first column without data + pos = string::npos; + return (ret = rTooFewColumn); + } else { + pos = text_tuple.find(attr_separator, prev_pos); + if (string::npos == pos) { // not the first column without data + return (ret = rFailure); + } else { // correct + columns[i].operate->toValue( + static_cast(binary_tuple) + accum_offsets[i], + text_tuple.substr(prev_pos, pos - prev_pos).c_str()); + prev_pos = pos + attr_sep_length; + } + } + } + if (text_tuple.length() != prev_pos) { // too many column data + return (ret = rTooManyColumn); + } + // PLOG_SF("check_and_to_value func time:" << temp << " us"); + return ret; +} + /* * 检查源数据是否合法,如果来自kSQL, * 若出现error则直接返回, diff --git a/common/Schema/SchemaFix.h b/common/Schema/SchemaFix.h index 0dda0ef4b..ae26f643c 100755 --- a/common/Schema/SchemaFix.h +++ b/common/Schema/SchemaFix.h @@ -54,6 +54,9 @@ class SchemaFix : public Schema { void addColumn(column_type ct, unsigned size); unsigned getColumnOffset(unsigned index); + RetCode ToValue(std::string text_tuple, void* binary_tuple, + const string attr_separator); + /** * @brief Method description: check the validity of raw data, maybe reset raw * data value to default or truncate raw data value depend on raw data diff --git a/loader/AMQ_consumer.cpp b/loader/AMQ_consumer.cpp index d7312f40f..47e1c5612 100644 --- a/loader/AMQ_consumer.cpp +++ b/loader/AMQ_consumer.cpp @@ -52,23 +52,23 @@ void claims::loader::AMQConsumer::run(MasterLoader* mloader) { // Create a ConnectionFactory ActiveMQConnectionFactory* connectionFactory = new ActiveMQConnectionFactory(brokerURI_); - DLOG(INFO) << "Create a ConnectionFactory"; + LOG(INFO) << "Create a ConnectionFactory"; // Create a Connection connection_ = connectionFactory->createConnection(); delete connectionFactory; - DLOG(INFO) << "Create a Connection"; + LOG(INFO) << "Create a Connection"; ActiveMQConnection* amqConnection = dynamic_cast(connection_); if (amqConnection != NULL) { amqConnection->addTransportListener(this); } - DLOG(INFO) << "Create a ActiveMQConnection"; + LOG(INFO) << "Create a ActiveMQConnection"; connection_->start(); connection_->setExceptionListener(this); - DLOG(INFO) << "ActiveMQConnection is started"; + LOG(INFO) << "ActiveMQConnection is started"; // Create a Session if (client_ack_) { @@ -76,7 +76,7 @@ void claims::loader::AMQConsumer::run(MasterLoader* mloader) { } else { session_ = connection_->createSession(Session::AUTO_ACKNOWLEDGE); } - DLOG(INFO) << "Create a Session"; + LOG(INFO) << "Create a Session"; // Create the destination (Topic or Queue) if (use_topic_) { @@ -84,12 +84,12 @@ void claims::loader::AMQConsumer::run(MasterLoader* mloader) { } else { destination_ = session_->createQueue(destURI_); } - DLOG(INFO) << "Create a destination"; + LOG(INFO) << "Create a destination"; // Create a MessageConsumer from the Session to the Topic or Queue consumer = session_->createConsumer(destination_); consumer->setMessageListener(this); - DLOG(INFO) << "Create a MessageConsumer"; + LOG(INFO) << "Create a MessageConsumer"; LOG(INFO) << " ready to receive ingest message from ActiveMQ"; std::cout << "AMQ client listening...." << std::endl; } catch (CMSException& e) { diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 28fdb59ce..43c4e356e 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -180,8 +180,8 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, DLOG(INFO) << "aborted txn with id:" << txn_id << " to txn manager"; } - mloader->txn_commint_info_.erase(txn_id); PERFLOG("finished txn with id:" << txn_id); + mloader->txn_commint_info_.erase(txn_id); } } catch (const std::out_of_range& e) { LOG(ERROR) << "no find " << txn_id << " in map"; @@ -228,11 +228,20 @@ RetCode MasterLoader::ConnectWithSlaves() { RetCode MasterLoader::Ingest(const string& message, function ack_function) { static uint64_t debug_consumed_message_count = 0; + static double start_time_stamp = 0; + static double txn_1000_end_time_stamp = 0; __sync_add_and_fetch(&debug_consumed_message_count, 1); PERFLOG("consumed message :" << debug_consumed_message_count); + if (1 == debug_consumed_message_count) { + start_time_stamp = GetCurrentMs(); + } + if (1000 == debug_consumed_message_count) { + txn_1000_end_time_stamp = GetCurrentMs(); + cout << "\n\n 1000 txn used " << txn_1000_end_time_stamp - start_time_stamp + << endl; + } RetCode ret = rSuccess; - // string message = GetMessage(); // DLOG(INFO) << "get message:\n" << message; @@ -253,6 +262,7 @@ RetCode MasterLoader::Ingest(const string& message, tuple_buffers_per_part[i].resize( table->getProjectoin(i)->getPartitioner()->getNumberOfPartitions()); +#ifdef CHECK_VALIDITY vector columns_validities; EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, columns_validities), @@ -264,6 +274,11 @@ RetCode MasterLoader::Ingest(const string& message, ack_function(); return rFailure; } +#else + EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part), + "got all tuples of every partition", + "failed to get all tuples of every partition"); +#endif // merge all tuple buffers of partition into one partition buffer vector> partition_buffers( @@ -293,16 +308,17 @@ RetCode MasterLoader::Ingest(const string& message, DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; // write data log - EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log ", + EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log", "failed to write log"); // reply ACK to MQ - EXEC_AND_LOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); - + EXEC_AND_DLOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); // distribute partition load task + EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), "sent every partition data to its slave", "failed to send every partition data to its slave"); + assert(rSuccess == ret); return ret; @@ -415,22 +431,40 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, req->row_sep_ = message.substr(pos, next_pos - pos); pos = next_pos + 1; - string tuple; - string data_string = message.substr(pos); - istringstream iss(data_string); - while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) { + // { + // string tuple; + // string data_string = message.substr(pos); + // istringstream iss(data_string); + // while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) { + // uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); + // req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + + // tuple); + // } + // } + int row_seq_length = req->row_sep_.length(); + while (string::npos != (next_pos = message.find(req->row_sep_, pos))) { uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); - req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + tuple); + req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + + message.substr(pos, next_pos - pos)); + pos = next_pos + row_seq_length; } - req->Show(); + + // req->Show(); return ret; } // map every tuple into associate part +#ifdef CHECK_VALIDITY RetCode MasterLoader::GetPartitionTuples( const IngestionRequest& req, const TableDescriptor* table, vector>>& tuple_buffer_per_part, vector& columns_validities) { +#else +RetCode MasterLoader::GetPartitionTuples( + const IngestionRequest& req, const TableDescriptor* table, + vector>>& tuple_buffer_per_part) { +#endif + RetCode ret = rSuccess; Schema* table_schema = table->getSchema(); MemoryGuard table_schema_guard(table_schema); @@ -448,6 +482,7 @@ RetCode MasterLoader::GetPartitionTuples( void* tuple_buffer = Malloc(table_schema->getTupleMaxSize()); if (tuple_buffer == NULL) return claims::common::rNoMemory; MemoryGuardWithRetCode guard(tuple_buffer, ret); +#ifdef CHECK_VALIDITY if (rSuccess != (ret = table_schema->CheckAndToValue( tuple_string, tuple_buffer, req.col_sep_, RawDataSource::kSQL, columns_validities))) { @@ -469,8 +504,14 @@ RetCode MasterLoader::GetPartitionTuples( return ret; } ++line; +#else + EXEC_AND_RETURN_ERROR( + ret, table_schema->ToValue(tuple_string, tuple_buffer, req.col_sep_), + "tuple is invalid." << tuple_string); +#endif correct_tuple_buffer.push_back(tuple_buffer); } + PERFLOG("all tuples are tovalued"); // map every tuple in different partition for (int i = 0; i < table->getNumberOfProjection(); i++) { @@ -714,7 +755,7 @@ void* MasterLoader::StartMasterLoader(void* arg) { // std::string brokerURI = "failover:(tcp://" - "58.198.176.92:61616?wireFormat=openwire&connection.useAsyncSend=true" + "10.11.1.192:61616?wireFormat=openwire&connection.useAsyncSend=true" // "&transport.commandTracingEnabled=true" // "&transport.tcpTracingEnabled=true" // "&wireFormat.tightEncodingEnabled=true" diff --git a/loader/master_loader.h b/loader/master_loader.h index ce548d9a7..b2a999022 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -58,6 +58,8 @@ using caf::behavior; using caf::event_based_actor; using claims::catalog::TableDescriptor; +// #define CHECK_VALIDITY + class MasterLoader { // public: // enum DataIngestSource { kActiveMQ }; @@ -127,11 +129,16 @@ class MasterLoader { RetCode CheckAndToValue(const IngestionRequest& req, void* tuple_buffer, vector& column_validities); +#ifdef CHECK_VALIDITY RetCode GetPartitionTuples( const IngestionRequest& req, const TableDescriptor* table, vector>>& tuple_buffer_per_part, vector& columns_validities); - +#else + RetCode GetPartitionTuples( + const IngestionRequest& req, const TableDescriptor* table, + vector>>& tuple_buffer_per_part); +#endif /** * copy and merge all tuples buffer of the same partition into one buffer, * and release all memory in tuple_buffer_per_part From c04badd5d153e9e07ebac23bffcf4ecffd8b529a Mon Sep 17 00:00:00 2001 From: yukai Date: Mon, 9 May 2016 13:48:02 +0800 Subject: [PATCH 32/58] ADD: elapsed time for 1000 transactions --- loader/master_loader.cpp | 108 ++++++++++++++++++--------------------- loader/master_loader.h | 7 +++ 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 43c4e356e..2a85ceb4e 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,9 @@ using namespace claims::txn; // NOLINT #define PERFLOG #endif +uint64_t MasterLoader::debug_consumed_message_count = 0; +timeval MasterLoader::start_time; + namespace claims { namespace loader { void MasterLoader::IngestionRequest::Show() { @@ -119,34 +123,34 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, DLOG(INFO) << "going to push socket into map"; mloader->slave_addr_to_socket_[NodeAddress(ip, "")] = new_slave_fd; DLOG(INFO) << "start to send test message to slave"; - - /// test whether socket works well - // ostringstream oss; - // oss << "hello, i'm master, whose address is " - // << mloader->master_loader_ip << ":" - // << to_string(mloader->master_loader_port) << ". \0"; - // - // int message_length = oss.str().length(); - // DLOG(INFO) << "message length is " << message_length; - // - // if (-1 == - // write(new_slave_fd, - // reinterpret_cast(&message_length), 4)) { - // PLOG(ERROR) << "failed to send message length to slave(" << - // ip << ":" - // << port << ")"; - // } else { - // DLOG(INFO) << "message length is sent"; - // } - // if (-1 == write(new_slave_fd, oss.str().c_str(), - // message_length)) { - // PLOG(ERROR) << "failed to send message to slave(" << ip << - // ":" << port - // << ")"; - // } else { - // DLOG(INFO) << "message buffer is sent"; - // } - + /* + /// test whether socket works well + ostringstream oss; + oss << "hello, i'm master, whose address is " + << mloader->master_loader_ip << ":" + << to_string(mloader->master_loader_port) << ". \0"; + + int message_length = oss.str().length(); + DLOG(INFO) << "message length is " << message_length; + + if (-1 == + write(new_slave_fd, + reinterpret_cast(&message_length), 4)) { + PLOG(ERROR) << "failed to send message length to slave(" << ip + << ":" + << port << ")"; + } else { + DLOG(INFO) << "message length is sent"; + } + if (-1 == write(new_slave_fd, oss.str().c_str(), + message_length)) { + PLOG(ERROR) << "failed to send message to slave(" << ip << ":" + << port + << ")"; + } else { + DLOG(INFO) << "message buffer is sent"; + } + */ return 1; }, [=](LoadAckAtom, uint64_t txn_id, bool is_commited) -> int { // NOLINT @@ -196,7 +200,8 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // return caf::make_message(OkAtom::value); return 1; }, - [=](BindPartAtom, PartitionID part_id, NodeID node_id) -> int { // NOLINT + [=](BindPartAtom, PartitionID part_id, // NOLINT + NodeID node_id) -> int { LOG(INFO) << "get part bind info (T" << part_id.projection_id.table_id << "P" << part_id.projection_id.projection_off << "G" << part_id.partition_off << ") --> " << node_id; @@ -227,31 +232,26 @@ RetCode MasterLoader::ConnectWithSlaves() { RetCode MasterLoader::Ingest(const string& message, function ack_function) { - static uint64_t debug_consumed_message_count = 0; - static double start_time_stamp = 0; - static double txn_1000_end_time_stamp = 0; - __sync_add_and_fetch(&debug_consumed_message_count, 1); - PERFLOG("consumed message :" << debug_consumed_message_count); - if (1 == debug_consumed_message_count) { - start_time_stamp = GetCurrentMs(); + if (1 == __sync_add_and_fetch(&debug_consumed_message_count, 1)) { + gettimeofday(&start_time, NULL); } - if (1000 == debug_consumed_message_count) { - txn_1000_end_time_stamp = GetCurrentMs(); - cout << "\n\n 1000 txn used " << txn_1000_end_time_stamp - start_time_stamp + if (1000 == __sync_add_and_fetch(&debug_consumed_message_count, 1)) { + cout << "\n\n 1000 txn used " << GetElapsedTimeInUs(start_time) << " us" << endl; } + PERFLOG("consumed message :" << debug_consumed_message_count); RetCode ret = rSuccess; // string message = GetMessage(); // DLOG(INFO) << "get message:\n" << message; - // get message from MQ + /// get message from MQ IngestionRequest req; EXEC_AND_LOG(ret, GetRequestFromMessage(message, &req), "got request!", "failed to get request"); - // parse message and get all tuples of all partitions, then - // check the validity of all tuple in message + /// parse message and get all tuples of all partitions, then + /// check the validity of all tuple in message TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable(req.table_name_); assert(table != NULL && "table is not exist!"); @@ -280,7 +280,7 @@ RetCode MasterLoader::Ingest(const string& message, "failed to get all tuples of every partition"); #endif - // merge all tuple buffers of partition into one partition buffer + /// merge all tuple buffers of partition into one partition buffer vector> partition_buffers( table->getNumberOfProjection()); EXEC_AND_LOG(ret, MergePartitionTupleIntoOneBuffer( @@ -288,7 +288,7 @@ RetCode MasterLoader::Ingest(const string& message, "merged all tuple of same partition into one buffer", "failed to merge tuples buffers into one buffer"); - // start transaction from here + /// start transaction from here claims::txn::Ingest ingest; EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), "applied transaction: " << ingest.id_, @@ -298,23 +298,16 @@ RetCode MasterLoader::Ingest(const string& message, txn_commint_info_.insert(std::pair( ingest.id_, CommitInfo(ingest.strip_list_.size()))); spin_lock_.release(); - // // TODO()need to deleted after testing - // try { - // CommitInfo temp = txn_commint_info_.at(ingest.id_); - // } catch (const std::out_of_range& e) { - // LOG(ERROR) << "Oh~~~NO!!!! no find " << ingest.id_ << " in map"; - // assert(false); - // } DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; - // write data log + /// write data log EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log", "failed to write log"); - // reply ACK to MQ + /// reply ACK to MQ EXEC_AND_DLOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); - // distribute partition load task + /// distribute partition load task EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), "sent every partition data to its slave", "failed to send every partition data to its slave"); @@ -435,9 +428,11 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, // string tuple; // string data_string = message.substr(pos); // istringstream iss(data_string); - // while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) { + // while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) + // { // uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); - // req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + + // req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + // + // tuple); // } // } @@ -638,7 +633,6 @@ RetCode MasterLoader::SendPartitionTupleToSlave( "selected the socket", "failed to select the socket"); assert(socket_fd > 3); - EXEC_AND_DLOG_RETURN(ret, SendPacket(socket_fd, packet_buffer, packet_length), "sent message to slave :" << socket_fd, diff --git a/loader/master_loader.h b/loader/master_loader.h index b2a999022..e212b08a4 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -30,6 +30,8 @@ #define LOADER_MASTER_LOADER_H_ #include +#include +#include #include #include #include @@ -194,6 +196,11 @@ class MasterLoader { unordered_map txn_commint_info_; Lock lock_; SpineLock spin_lock_; + + private: + // for test + static uint64_t debug_consumed_message_count; + static timeval start_time; }; } /* namespace loader */ From 11336e38af22bde5d833e219dc6a991eac5a8e48 Mon Sep 17 00:00:00 2001 From: yukai Date: Mon, 9 May 2016 18:55:41 +0800 Subject: [PATCH 33/58] OPTIMIZE: doing sending packet in another thread --- common/Schema/SchemaFix.cpp | 26 ++----- loader/load_packet.cpp | 23 +++--- loader/load_packet.h | 14 ++-- loader/master_loader.cpp | 138 +++++++++++++++++++++--------------- loader/master_loader.h | 17 ++++- 5 files changed, 122 insertions(+), 96 deletions(-) diff --git a/common/Schema/SchemaFix.cpp b/common/Schema/SchemaFix.cpp index 9445f2ada..afff5d661 100755 --- a/common/Schema/SchemaFix.cpp +++ b/common/Schema/SchemaFix.cpp @@ -108,27 +108,15 @@ RetCode SchemaFix::ToValue(std::string text_tuple, void* binary_tuple, * data format is always: xxx|xxx|xxx|......xxx|\n */ for (int i = 0; i < columns.size(); ++i) { - if (pos != string::npos && text_tuple.length() == prev_pos) { - // meet the first column without data - pos = string::npos; - return (ret = rTooFewColumn); - } else { - pos = text_tuple.find(attr_separator, prev_pos); - if (string::npos == pos) { // not the first column without data - return (ret = rFailure); - } else { // correct - columns[i].operate->toValue( - static_cast(binary_tuple) + accum_offsets[i], - text_tuple.substr(prev_pos, pos - prev_pos).c_str()); - prev_pos = pos + attr_sep_length; - } - } - } - if (text_tuple.length() != prev_pos) { // too many column data - return (ret = rTooManyColumn); + pos = text_tuple.find(attr_separator, prev_pos); + // correct + columns[i].operate->toValue( + binary_tuple + accum_offsets[i], + text_tuple.substr(prev_pos, pos - prev_pos).c_str()); + prev_pos = pos + attr_sep_length; } // PLOG_SF("check_and_to_value func time:" << temp << " us"); - return ret; + return rSuccess; } /* diff --git a/loader/load_packet.cpp b/loader/load_packet.cpp index 9b9f9badb..d7a0c95f3 100644 --- a/loader/load_packet.cpp +++ b/loader/load_packet.cpp @@ -36,28 +36,27 @@ using namespace claims::common; // NOLINT namespace claims { namespace loader { -LoadPacket::~LoadPacket() {} +LoadPacket::~LoadPacket() { DELETE_PTR(packet_buffer_); } -RetCode LoadPacket::Serialize(void*& packet_buffer, - uint64_t& packet_length) const { - packet_length = kHeadLength + data_length_; - packet_buffer = Malloc(packet_length); - if (NULL == packet_length) { +RetCode LoadPacket::Serialize() { + packet_length_ = kHeadLength + data_length_; + packet_buffer_ = Malloc(packet_length_); + if (NULL == packet_length_) { ELOG(rNoMemory, "no memory for packet buffer"); return rNoMemory; } - *reinterpret_cast(packet_buffer) = txn_id_; - *reinterpret_cast(packet_buffer + 1 * sizeof(uint64_t)) = + *reinterpret_cast(packet_buffer_) = txn_id_; + *reinterpret_cast(packet_buffer_ + 1 * sizeof(uint64_t)) = global_part_id_; - *reinterpret_cast(packet_buffer + 2 * sizeof(uint64_t)) = pos_; - *reinterpret_cast(packet_buffer + 3 * sizeof(uint64_t)) = offset_; - *reinterpret_cast(packet_buffer + 4 * sizeof(uint64_t)) = + *reinterpret_cast(packet_buffer_ + 2 * sizeof(uint64_t)) = pos_; + *reinterpret_cast(packet_buffer_ + 3 * sizeof(uint64_t)) = offset_; + *reinterpret_cast(packet_buffer_ + 4 * sizeof(uint64_t)) = data_length_; DLOG(INFO) << "Serialize packet: " << txn_id_ << " " << global_part_id_ << " " << pos_ << " " << offset_ << " " << data_length_; - memcpy(packet_buffer + kHeadLength, data_buffer_, data_length_); + memcpy(packet_buffer_ + kHeadLength, data_buffer_, data_length_); return rSuccess; } diff --git a/loader/load_packet.h b/loader/load_packet.h index d780f049d..0ba9dc5c2 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -53,16 +53,18 @@ using OkAtom = caf::atom_constant; struct LoadPacket { public: LoadPacket() {} - LoadPacket(const uint64_t txn_id, const uint64_t g_part_id, uint64_t pos, - uint64_t offset, uint64_t data_length, const void* data_buffer) + LoadPacket(int socket_fd, const uint64_t txn_id, const uint64_t g_part_id, + uint64_t pos, uint64_t offset, uint64_t data_length, + const void* data_buffer) : txn_id_(txn_id), global_part_id_(g_part_id), pos_(pos), offset_(offset), data_buffer_(data_buffer), - data_length_(data_length) {} + data_length_(data_length), + socket_fd_(socket_fd) {} ~LoadPacket(); - RetCode Serialize(void*& packet_buffer, uint64_t& packet_length) const; + RetCode Serialize(); RetCode Deserialize(const void* const head_buffer, void* data_buffer); @@ -76,6 +78,10 @@ struct LoadPacket { uint64_t offset_; uint64_t data_length_; void* data_buffer_; + + int socket_fd_ = -1; + uint64_t packet_length_ = 0; + void* packet_buffer_ = NULL; }; } /* namespace loader */ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 2a85ceb4e..bad76334a 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -309,8 +309,8 @@ RetCode MasterLoader::Ingest(const string& message, /// distribute partition load task EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), - "sent every partition data to its slave", - "failed to send every partition data to its slave"); + "sent every partition data to send queu", + "failed to send every partition data to queue"); assert(rSuccess == ret); @@ -472,9 +472,10 @@ RetCode MasterLoader::GetPartitionTuples( // check all tuples to be inserted int line = 0; + int table_tuple_length = table_schema->getTupleMaxSize(); for (auto tuple_string : req.tuples_) { // DLOG(INFO) << "to be inserted tuple:" << tuple_string; - void* tuple_buffer = Malloc(table_schema->getTupleMaxSize()); + void* tuple_buffer = Malloc(table_tuple_length); if (tuple_buffer == NULL) return claims::common::rNoMemory; MemoryGuardWithRetCode guard(tuple_buffer, ret); #ifdef CHECK_VALIDITY @@ -526,7 +527,7 @@ RetCode MasterLoader::GetPartitionTuples( for (auto tuple_buffer : correct_tuple_buffer) { // extract the sub tuple according to the projection schema - void* target = Malloc(prj_schema->getTupleMaxSize()); // newmalloc + void* target = Malloc(tuple_max_length); // newmalloc if (target == NULL) { return (ret = claims::common::rNoMemory); } @@ -546,6 +547,47 @@ RetCode MasterLoader::GetPartitionTuples( return ret; } +RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( + const TableDescriptor* table, + vector>>& tuple_buffer_per_part, + vector>& partition_buffers) { + RetCode ret = rSuccess; + assert(tuple_buffer_per_part.size() == table->getNumberOfProjection() && + "projection number is not match!!"); + for (int i = 0; i < tuple_buffer_per_part.size(); ++i) { + assert(tuple_buffer_per_part[i].size() == + table->getProjectoin(i) + ->getPartitioner() + ->getNumberOfPartitions() && + "partition number is not match"); + for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { + int tuple_count = tuple_buffer_per_part[i][j].size(); + // if (0 == tuple_count) continue; + int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); + int buffer_len = tuple_count * tuple_len; + DLOG(INFO) << "the tuple length of prj:" << i << ",part:" << j + << ",table:" << table->getTableName() << " is:" << tuple_len; + DLOG(INFO) << "tuple size is:" << tuple_count; + + void* new_buffer = Malloc(buffer_len); + if (NULL == new_buffer) return ret = claims::common::rNoMemory; + + for (int k = 0; k < tuple_count; ++k) { + memcpy(new_buffer + k * tuple_len, tuple_buffer_per_part[i][j][k], + tuple_len); + // release old memory stored tuple buffer + DELETE_PTR(tuple_buffer_per_part[i][j][k]); + } + // push new partition buffer + partition_buffers[i].push_back(PartitionBuffer(new_buffer, buffer_len)); + tuple_buffer_per_part[i][j].clear(); + } + tuple_buffer_per_part[i].clear(); + } + tuple_buffer_per_part.clear(); + return ret; +} + RetCode MasterLoader::ApplyTransaction( const TableDescriptor* table, const vector>& partition_buffers, @@ -616,70 +658,29 @@ RetCode MasterLoader::SendPartitionTupleToSlave( ++part_id) { if (0 == partition_buffers[prj_id][part_id].length_) continue; uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); - LoadPacket packet(ingest.id_, global_part_id, - ingest.strip_list_.at(global_part_id).first, - ingest.strip_list_.at(global_part_id).second, - partition_buffers[prj_id][part_id].length_, - partition_buffers[prj_id][part_id].buffer_); - void* packet_buffer; - MemoryGuard guard(packet_buffer); // auto release by guard - uint64_t packet_length; - EXEC_AND_DLOG_RETURN(ret, packet.Serialize(packet_buffer, packet_length), - "serialized packet into buffer", - "failed to serialize packet"); int socket_fd = -1; EXEC_AND_DLOG_RETURN(ret, SelectSocket(table, prj_id, part_id, socket_fd), "selected the socket", "failed to select the socket"); assert(socket_fd > 3); - EXEC_AND_DLOG_RETURN(ret, - SendPacket(socket_fd, packet_buffer, packet_length), - "sent message to slave :" << socket_fd, - "failed to sent message to slave :" << socket_fd); - } - } - return ret; -} -RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( - const TableDescriptor* table, - vector>>& tuple_buffer_per_part, - vector>& partition_buffers) { - RetCode ret = rSuccess; - assert(tuple_buffer_per_part.size() == table->getNumberOfProjection() && - "projection number is not match!!"); - for (int i = 0; i < tuple_buffer_per_part.size(); ++i) { - assert(tuple_buffer_per_part[i].size() == - table->getProjectoin(i) - ->getPartitioner() - ->getNumberOfPartitions() && - "partition number is not match"); - for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { - int tuple_count = tuple_buffer_per_part[i][j].size(); - // if (0 == tuple_count) continue; - int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); - int buffer_len = tuple_count * tuple_len; - DLOG(INFO) << "the tuple length of prj:" << i << ",part:" << j - << ",table:" << table->getTableName() << " is:" << tuple_len; - DLOG(INFO) << "tuple size is:" << tuple_count; + LoadPacket* packet = + new LoadPacket(socket_fd, ingest.id_, global_part_id, + ingest.strip_list_.at(global_part_id).first, + ingest.strip_list_.at(global_part_id).second, + partition_buffers[prj_id][part_id].length_, + partition_buffers[prj_id][part_id].buffer_); - void* new_buffer = Malloc(buffer_len); - if (NULL == new_buffer) return ret = claims::common::rNoMemory; + EXEC_AND_DLOG_RETURN(ret, packet->Serialize(), + "serialized packet into buffer", + "failed to serialize packet"); - for (int k = 0; k < tuple_count; ++k) { - memcpy(new_buffer + k * tuple_len, tuple_buffer_per_part[i][j][k], - tuple_len); - // release old memory stored tuple buffer - DELETE_PTR(tuple_buffer_per_part[i][j][k]); - } - // push new partition buffer - partition_buffers[i].push_back(PartitionBuffer(new_buffer, buffer_len)); - tuple_buffer_per_part[i][j].clear(); + LockGuard guard(packet_queue_lock_); + packet_queue_.push(packet); + packet_to_send_count_.post(); } - tuple_buffer_per_part[i].clear(); } - tuple_buffer_per_part.clear(); return ret; } @@ -704,7 +705,6 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, RetCode MasterLoader::SendPacket(const int socket_fd, const void* const packet_buffer, const uint64_t packet_length) { - LockGuard guard(lock_); size_t total_write_num = 0; while (total_write_num < packet_length) { ssize_t write_num = write( @@ -721,6 +721,25 @@ RetCode MasterLoader::SendPacket(const int socket_fd, return rSuccess; } +void* MasterLoader::SendPacketWork(void* arg) { + MasterLoader* loader = static_cast(arg); + while (1) { + loader->packet_to_send_count_.wait(); + LoadPacket* packet = nullptr; + { + LockGuard guard(loader->packet_queue_lock_); + packet = loader->packet_queue_.front(); + loader->packet_queue_.pop(); + } + + RetCode ret = rSuccess; + EXEC_AND_LOG(ret, SendPacket(packet->socket_fd_, packet->packet_buffer_, + packet->packet_length_), + "sent packet to" << packet->socket_fd_, + "failed to send packet"); + DELETE_PTR(packet); + } +} void* MasterLoader::Work(void* arg) { WorkerPara* para = static_cast(arg); AMQConsumer consumer(para->brokerURI_, para->destURI_, para->use_topic_, @@ -781,6 +800,9 @@ void* MasterLoader::StartMasterLoader(void* arg) { cin >> temp; cout << "Well , start flag is received" << std::endl; + Environment::getInstance()->getThreadPool()->AddTask( + MasterLoader::SendPacketWork, master_loader); + // AMQConsumer consumer(brokerURI, destURI, use_topics, client_ack); // consumer.run(master_loader); for (int i = 0; i < Config::master_loader_thread_num - 1; ++i) { diff --git a/loader/master_loader.h b/loader/master_loader.h index e212b08a4..da31c9127 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -37,6 +37,7 @@ #include #include #include "caf/all.hpp" +#include #include "./validity.h" #include "../common/error_define.h" @@ -51,6 +52,7 @@ namespace claims { namespace catalog { class TableDescriptor; } + namespace loader { using std::map; @@ -60,6 +62,8 @@ using caf::behavior; using caf::event_based_actor; using claims::catalog::TableDescriptor; +class LoadPacket; + // #define CHECK_VALIDITY class MasterLoader { @@ -169,9 +173,6 @@ class MasterLoader { RetCode SelectSocket(const TableDescriptor* table, const uint64_t prj_id, const uint64_t part_id, int& socket_fd); - RetCode SendPacket(const int socket_fd, const void* const packet_buffer, - const uint64_t packet_length); - RetCode GetSlaveNetAddr(); RetCode SetSocketWithSlaves(); RetCode GetSocketFdConnectedWithSlave(string ip, int port, int* connected_fd); @@ -180,9 +181,15 @@ class MasterLoader { static behavior ReceiveSlaveReg(event_based_actor* self, MasterLoader* mloader); + static RetCode SendPacket(const int socket_fd, + const void* const packet_buffer, + const uint64_t packet_length); public: static void* Work(void* para); + + static void* SendPacketWork(void* arg); + static void* StartMasterLoader(void* arg); private: @@ -197,6 +204,10 @@ class MasterLoader { Lock lock_; SpineLock spin_lock_; + std::queue packet_queue_; + SpineLock packet_queue_lock_; + semaphore packet_to_send_count_; + private: // for test static uint64_t debug_consumed_message_count; From 8d10e0bd467fc23910e8331234ecb5a90eef54da Mon Sep 17 00:00:00 2001 From: yukai Date: Tue, 10 May 2016 12:24:33 +0800 Subject: [PATCH 34/58] OPTIMIZE: disable some debug log to get better performance;BUG: some txn id are duplicated --- loader/master_loader.cpp | 133 +++++++++++++++++++++------------------ loader/master_loader.h | 1 + loader/slave_loader.cpp | 53 ++++++++-------- 3 files changed, 101 insertions(+), 86 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index bad76334a..9159d3145 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -79,16 +79,17 @@ using claims::common::rFailure; using namespace claims::txn; // NOLINT -#define MASTER_LOADER_DEBUG +// #define MASTER_LOADER_DEBUG #ifdef MASTER_LOADER_DEBUG #define PERFLOG(info) LOG(INFO) << info << endl; #else -#define PERFLOG +#define PERFLOG(info) #endif uint64_t MasterLoader::debug_consumed_message_count = 0; timeval MasterLoader::start_time; +uint64_t MasterLoader::get_request_time = 0; namespace claims { namespace loader { @@ -154,13 +155,18 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, return 1; }, [=](LoadAckAtom, uint64_t txn_id, bool is_commited) -> int { // NOLINT - // TODO(ANYONE): there should be a thread checking whether transaction - // overtime periodically and abort these transaction and delete from - // map. - // Consider that: if this function access the item in map just deleted - // by above thread, unexpected thing happens. + + /* + TODO(ANYONE): there should be a thread checking whether + transaction overtime periodically and abort these transaction + and delete from map. + Consider that: if this function access the item in map just deleted + by above thread, unexpected thing happens. + */ + PERFLOG("received a commit result " << is_commited << " of txn with id:" << txn_id); + // cout << "(" << syscall(__NR_gettid) << ")received a commit // result " // << is_commited << "of txn with id:" << txn_id << endl; @@ -184,7 +190,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, DLOG(INFO) << "aborted txn with id:" << txn_id << " to txn manager"; } - PERFLOG("finished txn with id:" << txn_id); + LOG(INFO) << "finished txn with id:" << txn_id; mloader->txn_commint_info_.erase(txn_id); } } catch (const std::out_of_range& e) { @@ -238,6 +244,7 @@ RetCode MasterLoader::Ingest(const string& message, if (1000 == __sync_add_and_fetch(&debug_consumed_message_count, 1)) { cout << "\n\n 1000 txn used " << GetElapsedTimeInUs(start_time) << " us" << endl; + cout << " 1000 txn get request used " << get_request_time << " us" << endl; } PERFLOG("consumed message :" << debug_consumed_message_count); @@ -246,9 +253,11 @@ RetCode MasterLoader::Ingest(const string& message, // DLOG(INFO) << "get message:\n" << message; /// get message from MQ + GETCURRENTTIME(req_start); IngestionRequest req; - EXEC_AND_LOG(ret, GetRequestFromMessage(message, &req), "got request!", - "failed to get request"); + EXEC_AND_DLOG(ret, GetRequestFromMessage(message, &req), "got request!", + "failed to get request"); + __sync_add_and_fetch(&get_request_time, GetElapsedTimeInUs(req_start)); /// parse message and get all tuples of all partitions, then /// check the validity of all tuple in message @@ -264,10 +273,10 @@ RetCode MasterLoader::Ingest(const string& message, #ifdef CHECK_VALIDITY vector columns_validities; - EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, - columns_validities), - "got all tuples of every partition", - "failed to get all tuples of every partition"); + EXEC_AND_DLOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part, + columns_validities), + "got all tuples of every partition", + "failed to get all tuples of every partition"); if (ret != rSuccess && ret != claims::common::rNoMemory) { // TODO(YUKAI): error handle, like sending error message to client LOG(ERROR) << "the tuple is not valid"; @@ -275,18 +284,18 @@ RetCode MasterLoader::Ingest(const string& message, return rFailure; } #else - EXEC_AND_LOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part), - "got all tuples of every partition", - "failed to get all tuples of every partition"); + EXEC_AND_DLOG(ret, GetPartitionTuples(req, table, tuple_buffers_per_part), + "got all tuples of every partition", + "failed to get all tuples of every partition"); #endif /// merge all tuple buffers of partition into one partition buffer vector> partition_buffers( table->getNumberOfProjection()); - EXEC_AND_LOG(ret, MergePartitionTupleIntoOneBuffer( - table, tuple_buffers_per_part, partition_buffers), - "merged all tuple of same partition into one buffer", - "failed to merge tuples buffers into one buffer"); + EXEC_AND_DLOG(ret, MergePartitionTupleIntoOneBuffer( + table, tuple_buffers_per_part, partition_buffers), + "merged all tuple of same partition into one buffer", + "failed to merge tuples buffers into one buffer"); /// start transaction from here claims::txn::Ingest ingest; @@ -301,16 +310,17 @@ RetCode MasterLoader::Ingest(const string& message, DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; /// write data log - EXEC_AND_LOG(ret, WriteLog(table, partition_buffers, ingest), "written log", - "failed to write log"); + EXEC_AND_DLOG(ret, WriteLog(table, partition_buffers, ingest), "written log", + "failed to write log"); /// reply ACK to MQ EXEC_AND_DLOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); /// distribute partition load task - EXEC_AND_LOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), - "sent every partition data to send queu", - "failed to send every partition data to queue"); + EXEC_AND_DLOG(ret, + SendPartitionTupleToSlave(table, partition_buffers, ingest), + "sent every partition data to send queu", + "failed to send every partition data to queue"); assert(rSuccess == ret); @@ -424,26 +434,26 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, req->row_sep_ = message.substr(pos, next_pos - pos); pos = next_pos + 1; - // { - // string tuple; - // string data_string = message.substr(pos); - // istringstream iss(data_string); - // while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) - // { - // uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); - // req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ - // + - // tuple); - // } - // } - int row_seq_length = req->row_sep_.length(); - while (string::npos != (next_pos = message.find(req->row_sep_, pos))) { - uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); - req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + - message.substr(pos, next_pos - pos)); - pos = next_pos + row_seq_length; - } + { + string tuple; + string data_string = message.substr(pos); + istringstream iss(data_string); + while (DataIngestion::GetTupleTerminatedBy(iss, tuple, req->row_sep_)) { + uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); + req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + + tuple); + } + } + /* { + int row_seq_length = req->row_sep_.length(); + while (string::npos != (next_pos = message.find(req->row_sep_, pos))) { + uint64_t allocated_row_id = __sync_add_and_fetch(&row_id, 1); + req->tuples_.push_back(to_string(allocated_row_id) + req->col_sep_ + + message.substr(pos, next_pos - pos)); + pos = next_pos + row_seq_length; + } + }*/ // req->Show(); return ret; } @@ -562,7 +572,11 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( "partition number is not match"); for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { int tuple_count = tuple_buffer_per_part[i][j].size(); - // if (0 == tuple_count) continue; + /* + * even if it is empty it has to be pushed into buffer, the index in + * buffer indicates the index of partition + */ + // if (0 == tuple_count) continue; int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); int buffer_len = tuple_count * tuple_len; DLOG(INFO) << "the tuple length of prj:" << i << ",part:" << j @@ -626,18 +640,18 @@ RetCode MasterLoader::WriteLog( if (0 == partition_buffers[prj_id][part_id].length_) continue; uint64_t global_part_id = GetGlobalPartId(table_id, prj_id, part_id); - EXEC_AND_LOG(ret, - LogClient::Data(global_part_id, - ingest.strip_list_.at(global_part_id).first, - ingest.strip_list_.at(global_part_id).second, - partition_buffers[prj_id][part_id].buffer_, - partition_buffers[prj_id][part_id].length_), - "written data log for partition:" << global_part_id, - "failed to write data log for partition:" << global_part_id); + EXEC_AND_DLOG( + ret, LogClient::Data(global_part_id, + ingest.strip_list_.at(global_part_id).first, + ingest.strip_list_.at(global_part_id).second, + partition_buffers[prj_id][part_id].buffer_, + partition_buffers[prj_id][part_id].length_), + "written data log for partition:" << global_part_id, + "failed to write data log for partition:" << global_part_id); } } - EXEC_AND_LOG(ret, LogClient::Refresh(), "flushed data log into disk", - "failed to flush data log"); + EXEC_AND_DLOG(ret, LogClient::Refresh(), "flushed data log into disk", + "failed to flush data log"); return ret; } @@ -733,10 +747,9 @@ void* MasterLoader::SendPacketWork(void* arg) { } RetCode ret = rSuccess; - EXEC_AND_LOG(ret, SendPacket(packet->socket_fd_, packet->packet_buffer_, - packet->packet_length_), - "sent packet to" << packet->socket_fd_, - "failed to send packet"); + EXEC_AND_DLOG(ret, SendPacket(packet->socket_fd_, packet->packet_buffer_, + packet->packet_length_), + "sent packet " << packet->txn_id_, "failed to send packet"); DELETE_PTR(packet); } } diff --git a/loader/master_loader.h b/loader/master_loader.h index da31c9127..be3804e68 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -211,6 +211,7 @@ class MasterLoader { private: // for test static uint64_t debug_consumed_message_count; + static uint64_t get_request_time; static timeval start_time; }; diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 3d091831b..f768de2e8 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -60,7 +60,7 @@ using claims::txn::GetTableIdFromGlobalPartId; using std::chrono::milliseconds; using std::chrono::seconds; -#define MASTER_LOADER_DEBUG +// #define MASTER_LOADER_DEBUG #ifdef MASTER_LOADER_DEBUG #define PERFLOG(info) LOG(INFO) << info << endl; @@ -85,9 +85,9 @@ RetCode SlaveLoader::ConnectWithMaster() { int ret = rSuccess; int retry_time = 10; for (int i = 0; Clean(), i < retry_time; ++i) { // if failed, call Clean() - EXEC_AND_DLOG(ret, EstablishListeningSocket(), - "established listening socket", - "failed to establish listening socket in " << i << " times"); + EXEC_AND_LOG(ret, EstablishListeningSocket(), + "established listening socket", + "failed to establish listening socket in " << i << " times"); if (rSuccess == ret) break; } if (rSuccess != ret) { @@ -96,8 +96,8 @@ RetCode SlaveLoader::ConnectWithMaster() { } for (int i = 1; i <= retry_time; ++i) { - EXEC_AND_DLOG(ret, SendSelfAddrToMaster(), "sent self ip/port to master", - "failed to send self ip/port to master in " << i << " times"); + EXEC_AND_LOG(ret, SendSelfAddrToMaster(), "sent self ip/port to master", + "failed to send self ip/port to master in " << i << " times"); if (rSuccess == ret) break; sleep(1); } @@ -107,9 +107,9 @@ RetCode SlaveLoader::ConnectWithMaster() { } for (int i = 0; i < retry_time; ++i) { - EXEC_AND_DLOG(ret, GetConnectedSocket(), "got connected socket with master", - "failed to get connected socket with master in " << i - << " times"); + EXEC_AND_LOG(ret, GetConnectedSocket(), "got connected socket with master", + "failed to get connected socket with master in " << i + << " times"); if (rSuccess == ret) break; } if (rSuccess != ret) Clean(); @@ -165,16 +165,16 @@ RetCode SlaveLoader::SendSelfAddrToMaster() { // remote_actor(Config::master_loader_ip, Config::master_loader_port); // self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port); // }); - DLOG(INFO) << "going to send self (" << self_ip << ":" << self_port << ")" - << "to (" << Config::master_loader_ip << ":" - << Config::master_loader_port << ")"; + LOG(INFO) << "going to send self (" << self_ip << ":" << self_port << ")" + << "to (" << Config::master_loader_ip << ":" + << Config::master_loader_port << ")"; try { auto master_actor = remote_actor(Config::master_loader_ip, Config::master_loader_port); caf::scoped_actor self; self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port) .await([&](int r) { // NOLINT - DLOG(INFO) << "sent ip&port and received response"; + LOG(INFO) << "sent ip&port and received response"; }); } catch (exception& e) { LOG(ERROR) << "can't send self ip&port to master loader. " << e.what(); @@ -186,7 +186,7 @@ RetCode SlaveLoader::SendSelfAddrToMaster() { RetCode SlaveLoader::GetConnectedSocket() { assert(listening_fd_ > 3); OutputFdIpPort(listening_fd_); - DLOG(INFO) << "fd is accepting..."; + LOG(INFO) << "fd is accepting..."; struct sockaddr_in master_addr; socklen_t len = sizeof(sockaddr_in); @@ -204,18 +204,18 @@ void SlaveLoader::OutputFdIpPort(int fd) { if (-1 == getsockname(fd, (struct sockaddr*)(&temp_addr), &addr_len)) { PLOG(ERROR) << "failed to get socket name "; } - DLOG(INFO) << "fd ----> (" << inet_ntoa(temp_addr.sin_addr) << ":" - << ntohs(temp_addr.sin_port) << ")"; + LOG(INFO) << "fd ----> (" << inet_ntoa(temp_addr.sin_addr) << ":" + << ntohs(temp_addr.sin_port) << ")"; } RetCode SlaveLoader::ReceiveAndWorkLoop() { assert(master_fd_ > 3); char head_buffer[LoadPacket::kHeadLength]; - DLOG(INFO) << "slave is receiving ..."; + LOG(INFO) << "slave is receiving ..."; while (1) { RetCode ret = rSuccess; - // get load packet + /// get load packet int real_read_num; if (-1 == (real_read_num = recv(master_fd_, head_buffer, LoadPacket::kHeadLength, MSG_WAITALL))) { @@ -252,18 +252,19 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { } // LOG(INFO) << "data of message from master is:" << buffer; - // deserialization of packet + /// deserialization of packet PERFLOG("got all packet buffer"); LoadPacket packet; - EXEC_AND_LOG(ret, packet.Deserialize(head_buffer, data_buffer), - "deserialized packet", "failed to deserialize packet"); + EXEC_AND_DLOG(ret, packet.Deserialize(head_buffer, data_buffer), + "deserialized packet", "failed to deserialize packet"); - EXEC_AND_LOG(ret, StoreDataInMemory(packet), "stored data", - "failed to store"); + EXEC_AND_DLOG(ret, StoreDataInMemory(packet), "stored data", + "failed to store"); - // return result to master loader + /// return result to master loader EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), - "sent commit result to master loader", + "sent commit result of " << packet.txn_id_ + << " to master loader", "failed to send commit res to master loader"); if (rSuccess != ret) return ret; } @@ -291,7 +292,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; - EXEC_AND_LOG_RETURN( + EXEC_AND_DLOG_RETURN( ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); From c2f820884d77fec65edc0c6c7020a8305d029e28 Mon Sep 17 00:00:00 2001 From: yukai Date: Wed, 11 May 2016 15:35:46 +0800 Subject: [PATCH 35/58] FIX: bug caused by multiple txn_servers and lack of lock in reading commit_info; ADD: switch release/debug in configure.ac; ADD: multi-thread sending with multiple queues --- Environment.cpp | 2 +- configure.ac | 37 ++++++++++--- loader/master_loader.cpp | 110 +++++++++++++++++++++++++++++-------- loader/master_loader.h | 20 ++++--- loader/slave_loader.cpp | 1 + txn_manager/txn_server.cpp | 20 +++++++ txn_manager/txn_server.hpp | 86 ++++++++++++----------------- 7 files changed, 183 insertions(+), 93 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index ccf818725..5be429c71 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -232,7 +232,7 @@ bool Environment::InitLoader() { master_thread.detach(); DLOG(INFO) << "started thread as master loader"; - TxnServer::Init(6); + // TxnServer::Init(6); } usleep(10000); diff --git a/configure.ac b/configure.ac index 63a6b3d1e..9627af218 100644 --- a/configure.ac +++ b/configure.ac @@ -3,17 +3,36 @@ AM_INIT_AUTOMAKE([-Wall foreign subdir-objects]) AM_PROG_AR AC_PROG_LIBTOOL AM_CONDITIONAL(OPT_TCMALLOC, true) -AM_CONDITIONAL(OPT_DEBUG, false) +#AM_CONDITIONAL(OPT_DEBUG, true) + +CPPFLAGS=" -w -O2 -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS " + +CXXFLAGS=${CXXFLAGS=" -std=c++11 "} + +# enable debug or not --- add by YuKai +AC_ARG_ENABLE(debug, AS_HELP_STRING([--enable-debug], [enable DEBUG mode(default=yes)]), + [], + [enable_debug=yes]) +AS_IF([test "x$enable_debug" = "xyes"],[ CPPFLAGS+=" -g -DTHERON_XS " + AM_CONDITIONAL(OPT_DEBUG, true) + #CXXFLAGS+=" ${THERON_HOME}/Lib/libtherond.a " + ], + [test "x$enable_debug" = "xno"], [ CPPFLAGS+=" -DNDEBUG " + AM_CONDITIONAL(OPT_DEBUG, false) + #CXXFLAGS+=" ${THERON_HOME}/Lib/libtheron.a " + ] + ) + + +#if OPT_DEBUG; then +# CPPFLAGS-=" -DNDEBUG" +# CPPFLAGS+="-g -DTHERON_XS -DDEBUG" +#else +# CPPFLAGS-="-g -DTHERON_XS -DDEBUG" +# CPPFLAGS+=" -DNDEBUG" +#fi -CPPFLAGS="-w -O2 -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS" -if OPT_DEBUG -then -CPPFLAGS+="-g -DTHERON_XS" -else -CPPFLAGS+=" -DNDEBUG" -fi -CXXFLAGS=${CXXFLAGS=" -std=c++11"} AC_PROG_CXX AC_CONFIG_HEADERS([config.h]) AC_CONFIG_FILES([ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 9159d3145..51d3f9749 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "caf/all.hpp" @@ -79,6 +80,8 @@ using claims::common::rFailure; using namespace claims::txn; // NOLINT +// #define SEND_THREAD + // #define MASTER_LOADER_DEBUG #ifdef MASTER_LOADER_DEBUG @@ -87,6 +90,7 @@ using namespace claims::txn; // NOLINT #define PERFLOG(info) #endif +uint64_t MasterLoader::debug_finished_txn_count = 0; uint64_t MasterLoader::debug_consumed_message_count = 0; timeval MasterLoader::start_time; uint64_t MasterLoader::get_request_time = 0; @@ -102,9 +106,28 @@ void MasterLoader::IngestionRequest::Show() { MasterLoader::MasterLoader() : master_loader_ip_(Config::master_loader_ip), - master_loader_port_(Config::master_loader_port) {} + master_loader_port_(Config::master_loader_port), + send_thread_num_(Config::master_loader_thread_num / 4 + 1) { +#ifdef SEND_THREAD + packet_queues_ = new queue[send_thread_num_]; + packet_queue_lock_ = new SpineLock[send_thread_num_]; + packet_queue_to_send_count_ = new semaphore[send_thread_num_]; +#endif +} -MasterLoader::~MasterLoader() {} +MasterLoader::~MasterLoader() { +#ifdef SEND_THREAD + for (int i = 0; i < send_thread_num_; ++i) { + while (!packet_queues_[i].empty()) { + DELETE_PTR(packet_queues_[i].front()); + packet_queues_[i].pop(); + } + } +#endif + DELETE_ARRAY(packet_queues_); + DELETE_ARRAY(packet_queue_lock_); + DELETE_ARRAY(packet_queue_to_send_count_); +} static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, MasterLoader* mloader) { @@ -117,12 +140,14 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, mloader->GetSocketFdConnectedWithSlave(ip, port, &new_slave_fd)) { LOG(ERROR) << "failed to get connected fd with slave"; } else { - LOG(INFO) << "succeed to get connected fd with slave"; + LOG(INFO) << "succeed to get connected fd " << new_slave_fd + << "with slave"; } assert(new_slave_fd > 3); DLOG(INFO) << "going to push socket into map"; mloader->slave_addr_to_socket_[NodeAddress(ip, "")] = new_slave_fd; + mloader->socket_fd_to_lock_[new_slave_fd] = Lock(); DLOG(INFO) << "start to send test message to slave"; /* /// test whether socket works well @@ -164,14 +189,16 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, by above thread, unexpected thing happens. */ - PERFLOG("received a commit result " << is_commited - << " of txn with id:" << txn_id); + DLOG(INFO) << "received a commit result " << is_commited + << " of txn with id:" << txn_id; // cout << "(" << syscall(__NR_gettid) << ")received a commit // result " // << is_commited << "of txn with id:" << txn_id << endl; try { + mloader->commit_info_spin_lock_.acquire(); CommitInfo& commit_info = mloader->txn_commint_info_.at(txn_id); + mloader->commit_info_spin_lock_.release(); if (is_commited) { __sync_add_and_fetch(&commit_info.commited_part_num_, 1); @@ -191,10 +218,17 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, << " to txn manager"; } LOG(INFO) << "finished txn with id:" << txn_id; + mloader->commit_info_spin_lock_.acquire(); mloader->txn_commint_info_.erase(txn_id); + mloader->commit_info_spin_lock_.release(); + if (++debug_finished_txn_count == 1000) { + cout << "\n\n 1000 txn used " << GetElapsedTimeInUs(start_time) + << " us" << endl; + } } } catch (const std::out_of_range& e) { LOG(ERROR) << "no find " << txn_id << " in map"; + // abort(); assert(false); } return 1; @@ -242,11 +276,9 @@ RetCode MasterLoader::Ingest(const string& message, gettimeofday(&start_time, NULL); } if (1000 == __sync_add_and_fetch(&debug_consumed_message_count, 1)) { - cout << "\n\n 1000 txn used " << GetElapsedTimeInUs(start_time) << " us" - << endl; cout << " 1000 txn get request used " << get_request_time << " us" << endl; } - PERFLOG("consumed message :" << debug_consumed_message_count); + LOG(INFO) << "consumed message :" << debug_consumed_message_count; RetCode ret = rSuccess; // string message = GetMessage(); @@ -303,10 +335,10 @@ RetCode MasterLoader::Ingest(const string& message, "applied transaction: " << ingest.id_, "failed to apply transaction"); - spin_lock_.acquire(); + commit_info_spin_lock_.acquire(); txn_commint_info_.insert(std::pair( ingest.id_, CommitInfo(ingest.strip_list_.size()))); - spin_lock_.release(); + commit_info_spin_lock_.release(); DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; /// write data log @@ -319,7 +351,7 @@ RetCode MasterLoader::Ingest(const string& message, /// distribute partition load task EXEC_AND_DLOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), - "sent every partition data to send queu", + "sent every partition data to send queue", "failed to send every partition data to queue"); assert(rSuccess == ret); @@ -679,6 +711,7 @@ RetCode MasterLoader::SendPartitionTupleToSlave( "failed to select the socket"); assert(socket_fd > 3); +#ifdef SEND_THREAD LoadPacket* packet = new LoadPacket(socket_fd, ingest.id_, global_part_id, ingest.strip_list_.at(global_part_id).first, @@ -690,9 +723,28 @@ RetCode MasterLoader::SendPartitionTupleToSlave( "serialized packet into buffer", "failed to serialize packet"); - LockGuard guard(packet_queue_lock_); - packet_queue_.push(packet); - packet_to_send_count_.post(); + int queue_index = socket_fd % send_thread_num_; + assert(queue_index < send_thread_num_); + { + LockGuard guard(packet_queue_lock_[queue_index]); + packet_queues_[queue_index].push(packet); + } + packet_queue_to_send_count_[queue_index].post(); +#else + LoadPacket packet(socket_fd, ingest.id_, global_part_id, + ingest.strip_list_.at(global_part_id).first, + ingest.strip_list_.at(global_part_id).second, + partition_buffers[prj_id][part_id].length_, + partition_buffers[prj_id][part_id].buffer_); + + EXEC_AND_LOG_RETURN(ret, packet.Serialize(), + "serialized packet into buffer", + "failed to serialize packet"); + EXEC_AND_LOG( + ret, + SendPacket(socket_fd, packet.packet_buffer_, packet.packet_length_), + "sent packet of " << packet.txn_id_, "failed to send packet"); +#endif } } return ret; @@ -720,6 +772,9 @@ RetCode MasterLoader::SendPacket(const int socket_fd, const void* const packet_buffer, const uint64_t packet_length) { size_t total_write_num = 0; + + // just lock this socket file descriptor + LockGuard guard(socket_fd_to_lock_[socket_fd]); while (total_write_num < packet_length) { ssize_t write_num = write( socket_fd, static_cast(packet_buffer) + total_write_num, @@ -737,22 +792,27 @@ RetCode MasterLoader::SendPacket(const int socket_fd, void* MasterLoader::SendPacketWork(void* arg) { MasterLoader* loader = static_cast(arg); + int index = __sync_fetch_and_add(&(loader->thread_index_), 1); + LOG(INFO) << " I got id :" << index; + assert(index < send_thread_num_); while (1) { - loader->packet_to_send_count_.wait(); + loader->packet_queue_to_send_count_[index].wait(); LoadPacket* packet = nullptr; { - LockGuard guard(loader->packet_queue_lock_); - packet = loader->packet_queue_.front(); - loader->packet_queue_.pop(); + LockGuard guard(loader->packet_queue_lock_[index]); + packet = loader->packet_queues_[index].front(); + loader->packet_queues_[index].pop(); } RetCode ret = rSuccess; - EXEC_AND_DLOG(ret, SendPacket(packet->socket_fd_, packet->packet_buffer_, - packet->packet_length_), - "sent packet " << packet->txn_id_, "failed to send packet"); + EXEC_AND_LOG(ret, + loader->SendPacket(packet->socket_fd_, packet->packet_buffer_, + packet->packet_length_), + "sent packet of " << packet->txn_id_, "failed to send packet"); DELETE_PTR(packet); } } + void* MasterLoader::Work(void* arg) { WorkerPara* para = static_cast(arg); AMQConsumer consumer(para->brokerURI_, para->destURI_, para->use_topic_, @@ -813,8 +873,12 @@ void* MasterLoader::StartMasterLoader(void* arg) { cin >> temp; cout << "Well , start flag is received" << std::endl; - Environment::getInstance()->getThreadPool()->AddTask( - MasterLoader::SendPacketWork, master_loader); +#ifdef SEND_THREAD + for (int i = 0; i < master_loader->send_thread_num_; ++i) { + Environment::getInstance()->getThreadPool()->AddTask( + MasterLoader::SendPacketWork, master_loader); + } +#endif // AMQConsumer consumer(brokerURI, destURI, use_topics, client_ack); // consumer.run(master_loader); diff --git a/loader/master_loader.h b/loader/master_loader.h index be3804e68..99979d52f 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -181,9 +181,8 @@ class MasterLoader { static behavior ReceiveSlaveReg(event_based_actor* self, MasterLoader* mloader); - static RetCode SendPacket(const int socket_fd, - const void* const packet_buffer, - const uint64_t packet_length); + RetCode SendPacket(const int socket_fd, const void* const packet_buffer, + const uint64_t packet_length); public: static void* Work(void* para); @@ -197,19 +196,24 @@ class MasterLoader { int master_loader_port_; // vector slave_addrs_; // vector slave_sockets_; + boost::unordered_map slave_addr_to_socket_; // store id of transactions which are not finished unordered_map txn_commint_info_; - Lock lock_; - SpineLock spin_lock_; + SpineLock commit_info_spin_lock_; + + int send_thread_num_; + std::queue* packet_queues_; + SpineLock* packet_queue_lock_; + semaphore* packet_queue_to_send_count_; + int thread_index_ = 0; - std::queue packet_queue_; - SpineLock packet_queue_lock_; - semaphore packet_to_send_count_; + unordered_map socket_fd_to_lock_; private: // for test + static uint64_t debug_finished_txn_count; static uint64_t debug_consumed_message_count; static uint64_t get_request_time; static timeval start_time; diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index f768de2e8..6fe5c389c 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -262,6 +262,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { "failed to store"); /// return result to master loader + packet.txn_id_ = *reinterpret_cast(head_buffer); EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), "sent commit result of " << packet.txn_id_ << " to master loader", diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index c91f571cd..380db84dc 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -26,7 +26,11 @@ * */ #include "txn_server.hpp" + +#include "caf/all.hpp" #include "txn_log.hpp" + +using caf::aout; namespace claims { namespace txn { // using claims::common::rSuccess; @@ -52,6 +56,22 @@ std::unordered_map TxnServer::phy_cp_list_; std::unordered_map> TxnServer::CountList; caf::actor test; +// UInt64 txn_id = 0; + +void TxnCore::ReMalloc() { + size_ = 0; + txn_index_.clear(); + commit_ = new bool[capacity_]; + abort_ = new bool[capacity_]; + strip_list_ = new vector[capacity_]; + // aout(this) << "core id is " << core_id_ << endl; +} + +inline UInt64 TxnCore::GetId() { + // return ((__sync_add_and_fetch(&txn_id_, 1)) * 1000) + core_id_; + return (++txn_id_) * 1000 + core_id_; +} + caf::behavior TxnCore::make_behavior() { // this->delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); return { diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index 4240bacbb..b225d7e07 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -20,8 +20,8 @@ * * Created on: 2016年4月10日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ @@ -48,8 +48,8 @@ //#include "txn_log.hpp" #include -namespace claims{ -namespace txn{ +namespace claims { +namespace txn { using std::cin; using std::cout; using std::endl; @@ -65,63 +65,54 @@ using std::chrono::seconds; using std::chrono::milliseconds; using std::make_shared; using std::shared_ptr; -class TxnCore: public caf::event_based_actor { +// UInt64 txn_id; +class TxnCore : public caf::event_based_actor { public: static int capacity_; UInt64 core_id_; UInt64 txn_id_ = 0; UInt64 size_; map txn_index_; - bool * commit_ = nullptr; - bool * abort_ = nullptr; - vector * strip_list_; + bool* commit_ = nullptr; + bool* abort_ = nullptr; + vector* strip_list_; caf::behavior make_behavior() override; - void ReMalloc() { - size_ = 0; - txn_index_.clear(); - commit_ = new bool[capacity_]; - abort_ = new bool[capacity_]; - strip_list_ = new vector[capacity_]; - } - TxnCore(int coreId):core_id_(coreId) { ReMalloc();} - UInt64 GetId(){ - return ((++txn_id_) *1000) + core_id_; - } + void ReMalloc(); + TxnCore(int coreId) : core_id_(coreId) { ReMalloc(); } + UInt64 GetId(); }; -class Test:public caf::event_based_actor { +class Test : public caf::event_based_actor { public: caf::behavior make_behavior() override; }; - -class IngestCommitWorker:public caf::event_based_actor { +class IngestCommitWorker : public caf::event_based_actor { public: caf::behavior make_behavior() override; }; -class AbortWorker:public caf::event_based_actor { +class AbortWorker : public caf::event_based_actor { public: caf::behavior make_behavior() override; }; -class QueryWorker:public caf::event_based_actor { +class QueryWorker : public caf::event_based_actor { public: caf::behavior make_behavior() override; }; -class CheckpointWorker:public caf::event_based_actor { +class CheckpointWorker : public caf::event_based_actor { public: caf::behavior make_behavior() override; }; -class CommitCPWorker:public caf::event_based_actor { +class CommitCPWorker : public caf::event_based_actor { public: caf::behavior make_behavior() override; }; - -class TxnServer: public caf::event_based_actor{ +class TxnServer : public caf::event_based_actor { public: static bool active_; static int port_; @@ -133,40 +124,31 @@ class TxnServer: public caf::event_based_actor{ static std::unordered_map phy_cp_list_; static std::unordered_map> CountList; /**************** User APIs ***************/ - static RetCode Init(int concurrency = kConcurrency , int port = kTxnPort); + static RetCode Init(int concurrency = kConcurrency, int port = kTxnPort); /**************** System APIs ***************/ - static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); - static RetCode CommitIngest(const UInt64 id); - static RetCode AbortIngest(const UInt64 id); - static RetCode BeginQuery(const QueryReq & request, Query & snapshot); - static RetCode BeginCheckpoint(Checkpoint & cp); - static RetCode CommitCheckpoint(const Checkpoint & cp); - static UInt64 GetCoreId(UInt64 id) { - return id % 1000; - } - static inline UInt64 SelectCoreId() { - return rand() % concurrency_; - } + static RetCode BeginIngest(const FixTupleIngestReq& request, Ingest& ingest); + static RetCode CommitIngest(const UInt64 id); + static RetCode AbortIngest(const UInt64 id); + static RetCode BeginQuery(const QueryReq& request, Query& snapshot); + static RetCode BeginCheckpoint(Checkpoint& cp); + static RetCode CommitCheckpoint(const Checkpoint& cp); + static UInt64 GetCoreId(UInt64 id) { return id % 1000; } + static inline UInt64 SelectCoreId() { return rand() % concurrency_; } caf::behavior make_behavior() override; static RetCode RecoveryFromCatalog(); static RetCode RecoveryFromTxnLog(); - static inline Strip AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount); - static inline bool IsStripListGarbage(const vector & striplist) { - for (auto & strip : striplist) { - if (strip.pos_ >= TxnServer::logic_cp_list_[strip.part_]) - return false; + static inline Strip AtomicMalloc(UInt64 part, UInt64 TupleSize, + UInt64 TupleCount); + static inline bool IsStripListGarbage(const vector& striplist) { + for (auto& strip : striplist) { + if (strip.pos_ >= TxnServer::logic_cp_list_[strip.part_]) return false; } return true; } - }; - - } } - - -#endif // TXN_SERVER_HPP_ +#endif // TXN_SERVER_HPP_ From 06a347d06e9b4201c646c97aacad9169e16e158e Mon Sep 17 00:00:00 2001 From: yukai Date: Fri, 13 May 2016 14:48:15 +0800 Subject: [PATCH 36/58] ADD: performance info output --- loader/master_loader.cpp | 89 ++++++++++++++++++++++++++++++++-------- loader/master_loader.h | 5 ++- loader/slave_loader.cpp | 42 ++++++++++++++++++- loader/slave_loader.h | 9 ++++ utility/Timer.h | 6 +++ 5 files changed, 130 insertions(+), 21 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 51d3f9749..5f4959671 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -64,6 +64,7 @@ #include "../txn_manager/txn_client.hpp" #include "../txn_manager/txn_log.hpp" #include "../utility/resource_guard.h" +#include "../utility/Timer.h" using caf::aout; using caf::behavior; using caf::event_based_actor; @@ -77,11 +78,11 @@ using claims::catalog::TableDescriptor; using claims::common::Malloc; using claims::common::rSuccess; using claims::common::rFailure; - using namespace claims::txn; // NOLINT // #define SEND_THREAD +#define MASTER_LOADER_PREF // #define MASTER_LOADER_DEBUG #ifdef MASTER_LOADER_DEBUG @@ -90,10 +91,22 @@ using namespace claims::txn; // NOLINT #define PERFLOG(info) #endif +#ifdef MASTER_LOADER_PREF +#define ATOMIC_ADD(var, value) __sync_add_and_fetch(&var, value); +#define GET_TIME_ML(var) GETCURRENTTIME(var); +#else +#define ATOMIC_ADD(var, value) +#define GET_TIME_ML(var) +#endif + uint64_t MasterLoader::debug_finished_txn_count = 0; uint64_t MasterLoader::debug_consumed_message_count = 0; timeval MasterLoader::start_time; uint64_t MasterLoader::get_request_time = 0; +uint64_t MasterLoader::txn_average_delay_ = 0; + +static const int txn_count_for_debug = 5000; +static const char* txn_count_string = "5000"; namespace claims { namespace loader { @@ -221,10 +234,21 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, mloader->commit_info_spin_lock_.acquire(); mloader->txn_commint_info_.erase(txn_id); mloader->commit_info_spin_lock_.release(); - if (++debug_finished_txn_count == 1000) { - cout << "\n\n 1000 txn used " << GetElapsedTimeInUs(start_time) - << " us" << endl; + + // FOR DEBUG +#ifdef MASTER_LOADER_PREF + if (++debug_finished_txn_count == txn_count_for_debug) { + cout << "\n" << txn_count_string << " txn used " + << GetElapsedTimeInUs(start_time) << " us" << endl; + cout << "average delay of " << txn_count_string + << "txn (from applied txn to finished txn) is:" + << txn_average_delay_ * 1.0 / txn_count_for_debug << " us" + << endl; + } else if (debug_finished_txn_count < txn_count_for_debug) { + txn_average_delay_ += + GetCurrentUs() - mloader->txn_start_time_.at(txn_id); } +#endif } } catch (const std::out_of_range& e) { LOG(ERROR) << "no find " << txn_id << " in map"; @@ -272,27 +296,40 @@ RetCode MasterLoader::ConnectWithSlaves() { RetCode MasterLoader::Ingest(const string& message, function ack_function) { - if (1 == __sync_add_and_fetch(&debug_consumed_message_count, 1)) { + static uint64_t get_tuple_time = 0; + static uint64_t merge_tuple_time = 0; + +#ifdef MASTER_LOADER_PREF + uint64_t temp_message_count = + __sync_add_and_fetch(&debug_consumed_message_count, 1); + if (1 == temp_message_count) { gettimeofday(&start_time, NULL); } - if (1000 == __sync_add_and_fetch(&debug_consumed_message_count, 1)) { - cout << " 1000 txn get request used " << get_request_time << " us" << endl; + if (txn_count_for_debug == temp_message_count) { + cout << txn_count_string << " txn get request used " << get_request_time + << " us" << endl; + cout << txn_count_string << " txn get tuples used " << get_tuple_time + << " us" << endl; + cout << txn_count_string << " txn merge tuples used " << merge_tuple_time + << " us" << endl; } - LOG(INFO) << "consumed message :" << debug_consumed_message_count; +#endif + DLOG(INFO) << "consumed message :" << debug_consumed_message_count; RetCode ret = rSuccess; // string message = GetMessage(); // DLOG(INFO) << "get message:\n" << message; /// get message from MQ - GETCURRENTTIME(req_start); + GET_TIME_ML(req_start); IngestionRequest req; EXEC_AND_DLOG(ret, GetRequestFromMessage(message, &req), "got request!", "failed to get request"); - __sync_add_and_fetch(&get_request_time, GetElapsedTimeInUs(req_start)); + ATOMIC_ADD(get_request_time, GetElapsedTimeInUs(req_start)); /// parse message and get all tuples of all partitions, then /// check the validity of all tuple in message + GET_TIME_ML(get_tuple_start); TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable(req.table_name_); assert(table != NULL && "table is not exist!"); @@ -320,14 +357,17 @@ RetCode MasterLoader::Ingest(const string& message, "got all tuples of every partition", "failed to get all tuples of every partition"); #endif + ATOMIC_ADD(get_tuple_time, GetElapsedTimeInUs(get_tuple_start)); /// merge all tuple buffers of partition into one partition buffer + GET_TIME_ML(merge_start); vector> partition_buffers( table->getNumberOfProjection()); EXEC_AND_DLOG(ret, MergePartitionTupleIntoOneBuffer( table, tuple_buffers_per_part, partition_buffers), "merged all tuple of same partition into one buffer", "failed to merge tuples buffers into one buffer"); + ATOMIC_ADD(merge_tuple_time, GetElapsedTimeInUs(merge_start)); /// start transaction from here claims::txn::Ingest ingest; @@ -338,6 +378,8 @@ RetCode MasterLoader::Ingest(const string& message, commit_info_spin_lock_.acquire(); txn_commint_info_.insert(std::pair( ingest.id_, CommitInfo(ingest.strip_list_.size()))); + + txn_start_time_.insert(pair(ingest.id_, GetCurrentUs())); commit_info_spin_lock_.release(); DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; @@ -737,10 +779,10 @@ RetCode MasterLoader::SendPartitionTupleToSlave( partition_buffers[prj_id][part_id].length_, partition_buffers[prj_id][part_id].buffer_); - EXEC_AND_LOG_RETURN(ret, packet.Serialize(), - "serialized packet into buffer", - "failed to serialize packet"); - EXEC_AND_LOG( + EXEC_AND_DLOG_RETURN(ret, packet.Serialize(), + "serialized packet into buffer", + "failed to serialize packet"); + EXEC_AND_DLOG( ret, SendPacket(socket_fd, packet.packet_buffer_, packet.packet_length_), "sent packet of " << packet.txn_id_, "failed to send packet"); @@ -771,10 +813,13 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, RetCode MasterLoader::SendPacket(const int socket_fd, const void* const packet_buffer, const uint64_t packet_length) { + static int sent_packetcount = 0; + static uint64_t send_total_time = 0; size_t total_write_num = 0; // just lock this socket file descriptor LockGuard guard(socket_fd_to_lock_[socket_fd]); + GET_TIME_ML(send_start); while (total_write_num < packet_length) { ssize_t write_num = write( socket_fd, static_cast(packet_buffer) + total_write_num, @@ -787,6 +832,14 @@ RetCode MasterLoader::SendPacket(const int socket_fd, } total_write_num += write_num; } +#ifdef MASTER_LOADER_PREF + if (__sync_add_and_fetch(&sent_packetcount, 1) == txn_count_for_debug * 4) { + cout << "send " << sent_packetcount << " packets used " << send_total_time + << ", average time is:" << send_total_time / sent_packetcount << endl; + } else { + ATOMIC_ADD(send_total_time, GetElapsedTimeInUs(send_start)); + } +#endif return rSuccess; } @@ -805,10 +858,10 @@ void* MasterLoader::SendPacketWork(void* arg) { } RetCode ret = rSuccess; - EXEC_AND_LOG(ret, - loader->SendPacket(packet->socket_fd_, packet->packet_buffer_, - packet->packet_length_), - "sent packet of " << packet->txn_id_, "failed to send packet"); + EXEC_AND_DLOG( + ret, loader->SendPacket(packet->socket_fd_, packet->packet_buffer_, + packet->packet_length_), + "sent packet of " << packet->txn_id_, "failed to send packet"); DELETE_PTR(packet); } } diff --git a/loader/master_loader.h b/loader/master_loader.h index 99979d52f..c2a0c5264 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -212,11 +212,14 @@ class MasterLoader { unordered_map socket_fd_to_lock_; private: - // for test + // for debug static uint64_t debug_finished_txn_count; static uint64_t debug_consumed_message_count; static uint64_t get_request_time; static timeval start_time; + + unordered_map txn_start_time_; + static uint64_t txn_average_delay_; }; } /* namespace loader */ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 6fe5c389c..edacdd361 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -60,6 +60,8 @@ using claims::txn::GetTableIdFromGlobalPartId; using std::chrono::milliseconds; using std::chrono::seconds; +// #define MULTI_WORK_THREAD + // #define MASTER_LOADER_DEBUG #ifdef MASTER_LOADER_DEBUG @@ -262,7 +264,6 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { "failed to store"); /// return result to master loader - packet.txn_id_ = *reinterpret_cast(head_buffer); EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), "sent commit result of " << packet.txn_id_ << " to master loader", @@ -293,9 +294,15 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; +#ifdef MULTI_WORK_THREAD + partition_storage_lock_.acquire(); +#endif EXEC_AND_DLOG_RETURN( ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); +#ifdef MULTI_WORK_THREAD + partition_storage_lock_.release(); +#endif /// copy data into applied memory const uint64_t tuple_size = Catalog::getInstance() @@ -385,6 +392,30 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, return rSuccess; } +void* SlaveLoader::HandleWork(void* arg) { + SlaveLoader* slave_loader = static_cast(arg); + while (1) { + RetCode ret = rSuccess; + slave_loader->packet_count_.wait(); + LoadPacket* packet = nullptr; + { + LockGuard guard(slave_loader->queue_lock_); + packet = slave_loader->packet_queue_.front(); + slave_loader->packet_queue_.pop(); + } + + EXEC_AND_DLOG(ret, slave_loader->StoreDataInMemory(*packet), "stored data", + "failed to store"); + + /// return result to master loader + EXEC_AND_LOG( + ret, + slave_loader->SendAckToMasterLoader(packet->txn_id_, rSuccess == ret), + "sent commit result of " << packet->txn_id_ << " to master loader", + "failed to send commit res to master loader"); + } +} + void* SlaveLoader::StartSlaveLoader(void* arg) { Config::getInstance(); LOG(INFO) << "start slave loader..."; @@ -398,7 +429,14 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { assert(rSuccess == ret && "can't connect with master"); cout << "connected with master loader" << endl; - // TODO(YK): error handle +// TODO(YK): error handle + +#ifdef MULTI_WORK_THREAD + for (int i = 0; i < Config::master_loader_thread_num - 1; ++i) { + Environment::getInstance()->getThreadPool()->AddTask(MasterLoader::Work, + slave_loader); + } +#endif slave_loader->ReceiveAndWorkLoop(); assert(false); diff --git a/loader/slave_loader.h b/loader/slave_loader.h index 79372109d..c36057e91 100644 --- a/loader/slave_loader.h +++ b/loader/slave_loader.h @@ -34,6 +34,7 @@ #include "../catalog/catalog.h" #include "../storage/BlockManager.h" #include "caf/all.hpp" +#include namespace claims { namespace loader { @@ -72,6 +73,8 @@ class SlaveLoader { RetCode StoreDataInMemory(const LoadPacket& packet); RetCode SendAckToMasterLoader(const uint64_t& txn_id, bool is_commited); + static void* HandleWork(void* arg); + private: int master_socket_fd_; string self_ip; @@ -80,6 +83,12 @@ class SlaveLoader { int listening_fd_ = -1; int master_fd_ = -1; + + private: + queue packet_queue_; + SpineLock queue_lock_; + semaphore packet_count_; + Lock partition_storage_lock_; }; } /* namespace loader */ diff --git a/utility/Timer.h b/utility/Timer.h index bba2ab84f..c428d8c21 100755 --- a/utility/Timer.h +++ b/utility/Timer.h @@ -19,6 +19,12 @@ static inline double GetCurrentMs() { return t_start.tv_sec * 1000 + 1.0 * t_start.tv_usec / 1000; } +static inline uint64_t GetCurrentUs() { + timeval t_start; + gettimeofday(&t_start, NULL); + return t_start.tv_sec * 1000000 + t_start.tv_usec; +} + /* * @param start: the start time * @return : the elapsed time(ms) from start, accurate to us From 6c71cc36e0fd41e62f48e902dfbe34ba79f78d7b Mon Sep 17 00:00:00 2001 From: yukai Date: Fri, 13 May 2016 20:09:54 +0800 Subject: [PATCH 37/58] Great performance improvment! OTPIMIZE: set remote actor as static actor; slave loader don't wait for master loader's feedback --- loader/master_loader.cpp | 21 +++++++++++++++++++-- loader/slave_loader.cpp | 35 +++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 5f4959671..9c7b50be9 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -82,6 +82,8 @@ using namespace claims::txn; // NOLINT // #define SEND_THREAD +// #define NON_BLOCK_SOCKET + #define MASTER_LOADER_PREF // #define MASTER_LOADER_DEBUG @@ -192,7 +194,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, */ return 1; }, - [=](LoadAckAtom, uint64_t txn_id, bool is_commited) -> int { // NOLINT + [=](LoadAckAtom, uint64_t txn_id, bool is_commited) { // NOLINT /* TODO(ANYONE): there should be a thread checking whether @@ -230,6 +232,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, DLOG(INFO) << "aborted txn with id:" << txn_id << " to txn manager"; } + LOG(INFO) << "finished txn with id:" << txn_id; mloader->commit_info_spin_lock_.acquire(); mloader->txn_commint_info_.erase(txn_id); @@ -252,10 +255,11 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, } } catch (const std::out_of_range& e) { LOG(ERROR) << "no find " << txn_id << " in map"; + cout << "no find " << txn_id << " in map"; // abort(); assert(false); } - return 1; + // return 1; }, [=](RegNodeAtom, NodeAddress addr, NodeID node_id) -> int { // NOLINT LOG(INFO) << "get node register info : (" << addr.ip << ":" << addr.port @@ -487,6 +491,12 @@ RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, PLOG(ERROR) << "failed to connect socket(" << ip << ":" << port << ")"; return rFailure; } +#ifdef NON_BLOCK_SOCKET + int flag = fcntl(fd, F_GETFL); + if (-1 == flag) PLOG(ERROR) << "failed to get fd flag"; + if (-1 == fcntl(fd, F_SETFL, flag | O_NONBLOCK)) + PLOG(ERROR) << "failed to set fd non-blocking"; +#endif *connected_fd = fd; return rSuccess; } @@ -825,6 +835,13 @@ RetCode MasterLoader::SendPacket(const int socket_fd, socket_fd, static_cast(packet_buffer) + total_write_num, packet_length - total_write_num); if (-1 == write_num) { +#ifdef NON_BLOCK_SOCKET + if (EAGAIN == errno) { + cout << "buffer is full, retry..." << endl; + usleep(1000); + continue; + } +#endif std::cerr << "failed to send buffer to slave(" << socket_fd << "): " << std::endl; PLOG(ERROR) << "failed to send buffer to slave(" << socket_fd << "): "; diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index edacdd361..184b549c9 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -171,7 +171,7 @@ RetCode SlaveLoader::SendSelfAddrToMaster() { << "to (" << Config::master_loader_ip << ":" << Config::master_loader_port << ")"; try { - auto master_actor = + static auto master_actor = remote_actor(Config::master_loader_ip, Config::master_loader_port); caf::scoped_actor self; self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port) @@ -257,6 +257,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { /// deserialization of packet PERFLOG("got all packet buffer"); LoadPacket packet; + EXEC_AND_DLOG(ret, packet.Deserialize(head_buffer, data_buffer), "deserialized packet", "failed to deserialize packet"); @@ -264,6 +265,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { "failed to store"); /// return result to master loader + packet.txn_id_ = *reinterpret_cast(head_buffer); EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), "sent commit result of " << packet.txn_id_ << " to master loader", @@ -367,21 +369,26 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, int retry_max_time = 10; while (1) { try { - auto master_actor = + static auto master_actor = remote_actor(Config::master_loader_ip, Config::master_loader_port); caf::scoped_actor self; - self->sync_send(master_actor, LoadAckAtom::value, txn_id, is_commited) - .await([&](int r) { // NOLINT - DLOG(INFO) << "sent txn " << txn_id - << " commit result:" << is_commited - << " to master and received response"; - }, - caf::after(seconds(2)) >> - [&] { // NOLINT - LOG(ERROR) << "receiving response of txn " << txn_id - << " time out"; - throw caf::network_error("receiving response time out"); - }); + /* self->sync_send(master_actor, LoadAckAtom::value, txn_id, + is_commited) + .await([&](int r) { // NOLINT + DLOG(INFO) << "sent txn " << txn_id + << " commit result:" << is_commited + << " to master and received response"; + }, + caf::after(seconds(2)) >> + [&] { // NOLINT + LOG(ERROR) << "receiving response of txn " << + txn_id + << " time out"; + throw caf::network_error("receiving response time + out"); + });*/ + + self->send(master_actor, LoadAckAtom::value, txn_id, is_commited); return rSuccess; } catch (exception& e) { LOG(ERROR) << "failed to send commit result of " << txn_id From ae1235677a6616cb706c1ce9ac67b16c1afdb031 Mon Sep 17 00:00:00 2001 From: yukai Date: Sun, 15 May 2016 14:09:40 +0800 Subject: [PATCH 38/58] ADD: performance log --- loader/master_loader.cpp | 12 ++-- loader/master_loader.h | 1 + loader/slave_loader.cpp | 122 ++++++++++++++++++++++++--------------- 3 files changed, 83 insertions(+), 52 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 9c7b50be9..f46f3f5af 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -82,7 +82,7 @@ using namespace claims::txn; // NOLINT // #define SEND_THREAD -// #define NON_BLOCK_SOCKET +#define NON_BLOCK_SOCKET #define MASTER_LOADER_PREF // #define MASTER_LOADER_DEBUG @@ -106,6 +106,7 @@ uint64_t MasterLoader::debug_consumed_message_count = 0; timeval MasterLoader::start_time; uint64_t MasterLoader::get_request_time = 0; uint64_t MasterLoader::txn_average_delay_ = 0; +static int MasterLoader::buffer_full_time = 0; static const int txn_count_for_debug = 5000; static const char* txn_count_string = "5000"; @@ -122,7 +123,7 @@ void MasterLoader::IngestionRequest::Show() { MasterLoader::MasterLoader() : master_loader_ip_(Config::master_loader_ip), master_loader_port_(Config::master_loader_port), - send_thread_num_(Config::master_loader_thread_num / 4 + 1) { + send_thread_num_(Config::master_loader_thread_num / 2 + 1) { #ifdef SEND_THREAD packet_queues_ = new queue[send_thread_num_]; packet_queue_lock_ = new SpineLock[send_thread_num_]; @@ -247,6 +248,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, << "txn (from applied txn to finished txn) is:" << txn_average_delay_ * 1.0 / txn_count_for_debug << " us" << endl; + cout << "buffer full times:" << buffer_full_time << endl; } else if (debug_finished_txn_count < txn_count_for_debug) { txn_average_delay_ += GetCurrentUs() - mloader->txn_start_time_.at(txn_id); @@ -259,7 +261,6 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // abort(); assert(false); } - // return 1; }, [=](RegNodeAtom, NodeAddress addr, NodeID node_id) -> int { // NOLINT LOG(INFO) << "get node register info : (" << addr.ip << ":" << addr.port @@ -837,8 +838,9 @@ RetCode MasterLoader::SendPacket(const int socket_fd, if (-1 == write_num) { #ifdef NON_BLOCK_SOCKET if (EAGAIN == errno) { - cout << "buffer is full, retry..." << endl; - usleep(1000); + // cout << "buffer is full, retry..." << endl; + ATOMIC_ADD(buffer_full_time, 1); + usleep(500); continue; } #endif diff --git a/loader/master_loader.h b/loader/master_loader.h index c2a0c5264..fa685e0fa 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -217,6 +217,7 @@ class MasterLoader { static uint64_t debug_consumed_message_count; static uint64_t get_request_time; static timeval start_time; + static int buffer_full_time; unordered_map txn_start_time_; static uint64_t txn_average_delay_; diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 184b549c9..4b62112b8 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -60,23 +60,38 @@ using claims::txn::GetTableIdFromGlobalPartId; using std::chrono::milliseconds; using std::chrono::seconds; -// #define MULTI_WORK_THREAD +// #define WORK_THREAD +#define NO_ACTUAL_WORK // #define MASTER_LOADER_DEBUG +#define SLAVE_LOADER_PREF + #ifdef MASTER_LOADER_DEBUG #define PERFLOG(info) LOG(INFO) << info << endl; #else #define PERFLOG #endif +#ifdef SLAVE_LOADER_PREF +#define ATOMIC_ADD(var, value) __sync_add_and_fetch(&var, value); +#define GET_TIME_SL(var) GETCURRENTTIME(var); +#else +#define ATOMIC_ADD(var, value) +#define GET_TIME_SL(var) +#endif + +static const int txn_count_for_debug = 5000; +static const char* txn_count_string = "5000"; + namespace claims { namespace loader { SlaveLoader::SlaveLoader() { // try { // master_actor_ = - // remote_actor(Config::master_loader_ip, Config::master_loader_port); + // remote_actor(Config::master_loader_ip, + // Config::master_loader_port); // } catch (const exception& e) { // cout << "master loader actor failed." << e.what() << endl; // } @@ -164,7 +179,8 @@ RetCode SlaveLoader::EstablishListeningSocket() { RetCode SlaveLoader::SendSelfAddrToMaster() { // auto send_actor = spawn([&](event_based_actor* self) { // auto master_actor = - // remote_actor(Config::master_loader_ip, Config::master_loader_port); + // remote_actor(Config::master_loader_ip, + // Config::master_loader_port); // self->sync_send(master_actor, IpPortAtom::value, self_ip, self_port); // }); LOG(INFO) << "going to send self (" << self_ip << ":" << self_port << ")" @@ -212,13 +228,15 @@ void SlaveLoader::OutputFdIpPort(int fd) { RetCode SlaveLoader::ReceiveAndWorkLoop() { assert(master_fd_ > 3); + static uint64_t total_handle_time = 0; + static int handle_count = 0; char head_buffer[LoadPacket::kHeadLength]; + int real_read_num; LOG(INFO) << "slave is receiving ..."; while (1) { RetCode ret = rSuccess; /// get load packet - int real_read_num; if (-1 == (real_read_num = recv(master_fd_, head_buffer, LoadPacket::kHeadLength, MSG_WAITALL))) { PLOG(ERROR) << "failed to receive message length from master"; @@ -231,6 +249,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { << " bytes"; continue; } + GET_TIME_SL(start_handle); PERFLOG("received packet head"); uint64_t data_length = *reinterpret_cast(head_buffer + LoadPacket::kHeadLength - @@ -256,20 +275,40 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { /// deserialization of packet PERFLOG("got all packet buffer"); +#ifdef WORK_THREAD + LoadPacket* packet = new LoadPacket(); + EXEC_AND_DLOG(ret, packet->Deserialize(head_buffer, data_buffer), + "deserialized packet", "failed to deserialize packet"); + { + LockGuard guard(queue_lock_); + packet_queue_.push(packet); + } + packet_count_.post(); +#else LoadPacket packet; - +#ifndef NO_ACTUAL_WORK EXEC_AND_DLOG(ret, packet.Deserialize(head_buffer, data_buffer), "deserialized packet", "failed to deserialize packet"); EXEC_AND_DLOG(ret, StoreDataInMemory(packet), "stored data", "failed to store"); - - /// return result to master loader +#else packet.txn_id_ = *reinterpret_cast(head_buffer); +#endif + /// return result to master loader EXEC_AND_LOG(ret, SendAckToMasterLoader(packet.txn_id_, rSuccess == ret), "sent commit result of " << packet.txn_id_ << " to master loader", "failed to send commit res to master loader"); +#endif + +#ifdef SLAVE_LOADER_PREF + ATOMIC_ADD(total_handle_time, GetElapsedTimeInUs(start_handle)); + if (txn_count_for_debug == ++handle_count) { + cout << "handle " << handle_count + << " messages used:" << total_handle_time << endl; + } +#endif if (rSuccess != ret) return ret; } } @@ -296,13 +335,13 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; -#ifdef MULTI_WORK_THREAD +#ifdef WORK_THREAD partition_storage_lock_.acquire(); #endif EXEC_AND_DLOG_RETURN( ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); -#ifdef MULTI_WORK_THREAD +#ifdef WORK_THREAD partition_storage_lock_.release(); #endif @@ -319,12 +358,12 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { uint64_t total_written_length = 0; uint64_t data_length = packet.data_length_; HdfsInMemoryChunk chunk_info; + ChunkID current_chunk(PartitionID(ProjectionID(table_id, prj_id), part_id), + cur_chunk_id); while (total_written_length < data_length) { /// get start position of current chunk if (BlockManager::getInstance()->getMemoryChunkStore()->getChunk( - ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), - cur_chunk_id), - chunk_info)) { + current_chunk, chunk_info)) { DLOG(INFO) << "start address of chunk:" << cur_chunk_id << " is " << chunk_info.hook << ", end addr is " << chunk_info.hook + CHUNK_SIZE; @@ -354,7 +393,6 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { assert(cur_chunk_id < part_storage->GetChunkNum()); cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; - } else { LOG(INFO) << "chunk id is " << cur_chunk_id << endl; assert(false && "no chunk with this chunk id"); @@ -365,38 +403,27 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, bool is_commited) { - int time = 0; - int retry_max_time = 10; - while (1) { - try { - static auto master_actor = - remote_actor(Config::master_loader_ip, Config::master_loader_port); - caf::scoped_actor self; - /* self->sync_send(master_actor, LoadAckAtom::value, txn_id, - is_commited) - .await([&](int r) { // NOLINT - DLOG(INFO) << "sent txn " << txn_id - << " commit result:" << is_commited - << " to master and received response"; - }, - caf::after(seconds(2)) >> - [&] { // NOLINT - LOG(ERROR) << "receiving response of txn " << - txn_id - << " time out"; - throw caf::network_error("receiving response time - out"); - });*/ - - self->send(master_actor, LoadAckAtom::value, txn_id, is_commited); - return rSuccess; - } catch (exception& e) { - LOG(ERROR) << "failed to send commit result of " << txn_id - << " to master loader in " << ++time << " time." << e.what(); - if (time >= retry_max_time) return rFailure; + static uint64_t total_send_time = 0; + static uint64_t count = 0; + GET_TIME_SL(send_start); + try { + static auto master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); + static caf::scoped_actor self; + self->send(master_actor, LoadAckAtom::value, txn_id, is_commited); +#ifdef SLAVE_LOADER_PREF + ATOMIC_ADD(total_send_time, GetElapsedTimeInUs(send_start)); + if (txn_count_for_debug == ++count) { + cout << "send " << count << " ACK used:" << total_send_time + << " us. average time is:" << total_send_time / count << endl; } +#endif + return rSuccess; + } catch (exception& e) { + LOG(ERROR) << "failed to send commit result of " << txn_id + << " to master loader ." << e.what(); + return rFailure; } - return rSuccess; } void* SlaveLoader::HandleWork(void* arg) { @@ -420,6 +447,7 @@ void* SlaveLoader::HandleWork(void* arg) { slave_loader->SendAckToMasterLoader(packet->txn_id_, rSuccess == ret), "sent commit result of " << packet->txn_id_ << " to master loader", "failed to send commit res to master loader"); + DELETE_PTR(packet); } } @@ -438,10 +466,10 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { cout << "connected with master loader" << endl; // TODO(YK): error handle -#ifdef MULTI_WORK_THREAD - for (int i = 0; i < Config::master_loader_thread_num - 1; ++i) { - Environment::getInstance()->getThreadPool()->AddTask(MasterLoader::Work, - slave_loader); +#ifdef WORK_THREAD + for (int i = 0; i < 1; ++i) { + Environment::getInstance()->getThreadPool()->AddTask( + SlaveLoader::HandleWork, slave_loader); } #endif From 83557cf4ad5de8ba4fbf93b4a9481b724365c80a Mon Sep 17 00:00:00 2001 From: yukai Date: Sun, 15 May 2016 22:49:21 +0800 Subject: [PATCH 39/58] Ready for measuring performance. ADD: handle work thread based on CAF in slave_loader; --- Environment.cpp | 7 +++ common/Schema/SchemaFix.cpp | 2 - loader/load_packet.h | 10 ++++- loader/master_loader.cpp | 46 +++++++++---------- loader/master_loader.h | 1 - loader/slave_loader.cpp | 88 +++++++++++++++++++++++++++---------- loader/slave_loader.h | 15 +++++-- 7 files changed, 115 insertions(+), 54 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index 5be429c71..fa56da68e 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -9,6 +9,7 @@ #include #include +#include "loader/load_packet.h" #define GLOG_NO_ABBREVIATED_SEVERITIES #include #undef GLOG_NO_ABBREVIATED_SEVERITIES @@ -43,6 +44,7 @@ using claims::common::InitOperatorFunc; using claims::common::InitTypeCastFunc; using claims::common::InitTypeConversionMatrix; using claims::common::rSuccess; +using claims::loader::LoadPacket; using claims::loader::MasterLoader; using claims::loader::SlaveLoader; using claims::txn::TxnServer; @@ -61,6 +63,11 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { &ProjectionID::projection_off); announce("PartitionID", &PartitionID::projection_id, &PartitionID::partition_off); + + announce("LoadPacket", &LoadPacket::txn_id_, + &LoadPacket::global_part_id_, &LoadPacket::pos_, + &LoadPacket::offset_, &LoadPacket::data_length_, + &LoadPacket::data_buffer_, &LoadPacket::socket_fd_); _instance = this; Config::getInstance(); CodeGenerator::getInstance(); diff --git a/common/Schema/SchemaFix.cpp b/common/Schema/SchemaFix.cpp index afff5d661..8288daad9 100755 --- a/common/Schema/SchemaFix.cpp +++ b/common/Schema/SchemaFix.cpp @@ -97,10 +97,8 @@ int SchemaFix::getColumnOffset(unsigned index) const { // WARNING: using carefully!!!!! RetCode SchemaFix::ToValue(std::string text_tuple, void* binary_tuple, const string attr_separator) { - RetCode ret = rSuccess; string::size_type prev_pos = 0; string::size_type pos = 0; - string text_column; int attr_sep_length = attr_separator.length(); /** diff --git a/loader/load_packet.h b/loader/load_packet.h index 0ba9dc5c2..a0102037f 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -35,6 +35,7 @@ namespace claims { namespace loader { using IpPortAtom = caf::atom_constant; +using LoadPacketAtom = caf::atom_constant; using LoadAckAtom = caf::atom_constant; using RegNodeAtom = caf::atom_constant; using BindPartAtom = caf::atom_constant; @@ -55,7 +56,7 @@ struct LoadPacket { LoadPacket() {} LoadPacket(int socket_fd, const uint64_t txn_id, const uint64_t g_part_id, uint64_t pos, uint64_t offset, uint64_t data_length, - const void* data_buffer) + void* data_buffer) : txn_id_(txn_id), global_part_id_(g_part_id), pos_(pos), @@ -70,6 +71,8 @@ struct LoadPacket { public: static const int kHeadLength = 5 * sizeof(uint64_t); + const void* getDataBuffer() { return data_buffer_; } + void setDataBuffer(void* data_buffer) { data_buffer_ = data_buffer; } public: uint64_t txn_id_; @@ -84,6 +87,11 @@ struct LoadPacket { void* packet_buffer_ = NULL; }; +inline bool operator==(const LoadPacket& a, const LoadPacket& b) { + return a.txn_id_ == b.txn_id_ && a.global_part_id_ == b.global_part_id_ && + a.pos_ == b.pos_; +} + } /* namespace loader */ } /* namespace claims */ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index f46f3f5af..4a8e0b052 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -82,7 +82,7 @@ using namespace claims::txn; // NOLINT // #define SEND_THREAD -#define NON_BLOCK_SOCKET +// #define NON_BLOCK_SOCKET #define MASTER_LOADER_PREF // #define MASTER_LOADER_DEBUG @@ -104,12 +104,10 @@ using namespace claims::txn; // NOLINT uint64_t MasterLoader::debug_finished_txn_count = 0; uint64_t MasterLoader::debug_consumed_message_count = 0; timeval MasterLoader::start_time; -uint64_t MasterLoader::get_request_time = 0; uint64_t MasterLoader::txn_average_delay_ = 0; static int MasterLoader::buffer_full_time = 0; -static const int txn_count_for_debug = 5000; -static const char* txn_count_string = "5000"; +static const int txn_count_for_debug = 10000; namespace claims { namespace loader { @@ -242,13 +240,14 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, // FOR DEBUG #ifdef MASTER_LOADER_PREF if (++debug_finished_txn_count == txn_count_for_debug) { - cout << "\n" << txn_count_string << " txn used " + cout << "\n" << txn_count_for_debug << " txn used " << GetElapsedTimeInUs(start_time) << " us" << endl; - cout << "average delay of " << txn_count_string + cout << "average delay of " << txn_count_for_debug << "txn (from applied txn to finished txn) is:" << txn_average_delay_ * 1.0 / txn_count_for_debug << " us" << endl; - cout << "buffer full times:" << buffer_full_time << endl; + // cout << "buffer full times:" << buffer_full_time + // << endl; } else if (debug_finished_txn_count < txn_count_for_debug) { txn_average_delay_ += GetCurrentUs() - mloader->txn_start_time_.at(txn_id); @@ -258,7 +257,6 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, } catch (const std::out_of_range& e) { LOG(ERROR) << "no find " << txn_id << " in map"; cout << "no find " << txn_id << " in map"; - // abort(); assert(false); } }, @@ -301,6 +299,7 @@ RetCode MasterLoader::ConnectWithSlaves() { RetCode MasterLoader::Ingest(const string& message, function ack_function) { + static uint64_t get_request_time = 0; static uint64_t get_tuple_time = 0; static uint64_t merge_tuple_time = 0; @@ -311,11 +310,11 @@ RetCode MasterLoader::Ingest(const string& message, gettimeofday(&start_time, NULL); } if (txn_count_for_debug == temp_message_count) { - cout << txn_count_string << " txn get request used " << get_request_time + cout << txn_count_for_debug << " txn get request used " << get_request_time << " us" << endl; - cout << txn_count_string << " txn get tuples used " << get_tuple_time + cout << txn_count_for_debug << " txn get tuples used " << get_tuple_time << " us" << endl; - cout << txn_count_string << " txn merge tuples used " << merge_tuple_time + cout << txn_count_for_debug << " txn merge tuples used " << merge_tuple_time << " us" << endl; } #endif @@ -555,6 +554,9 @@ RetCode MasterLoader::GetPartitionTuples( vector>>& tuple_buffer_per_part) { #endif + static uint64_t total_get_tuple_time = 0; + static uint64_t total_to_value_time = 0; + RetCode ret = rSuccess; Schema* table_schema = table->getSchema(); MemoryGuard table_schema_guard(table_schema); @@ -751,7 +753,6 @@ RetCode MasterLoader::SendPartitionTupleToSlave( const claims::txn::Ingest& ingest) { RetCode ret = rSuccess; uint64_t table_id = table->get_table_id(); - for (int prj_id = 0; prj_id < partition_buffers.size(); ++prj_id) { for (int part_id = 0; part_id < partition_buffers[prj_id].size(); ++part_id) { @@ -828,22 +829,20 @@ RetCode MasterLoader::SendPacket(const int socket_fd, static uint64_t send_total_time = 0; size_t total_write_num = 0; - // just lock this socket file descriptor + /// just lock this socket file descriptor LockGuard guard(socket_fd_to_lock_[socket_fd]); - GET_TIME_ML(send_start); + // GET_TIME_ML(send_start); while (total_write_num < packet_length) { ssize_t write_num = write( socket_fd, static_cast(packet_buffer) + total_write_num, packet_length - total_write_num); if (-1 == write_num) { -#ifdef NON_BLOCK_SOCKET if (EAGAIN == errno) { - // cout << "buffer is full, retry..." << endl; + cout << "buffer is full, retry..." << buffer_full_time << endl; ATOMIC_ADD(buffer_full_time, 1); usleep(500); continue; } -#endif std::cerr << "failed to send buffer to slave(" << socket_fd << "): " << std::endl; PLOG(ERROR) << "failed to send buffer to slave(" << socket_fd << "): "; @@ -852,12 +851,13 @@ RetCode MasterLoader::SendPacket(const int socket_fd, total_write_num += write_num; } #ifdef MASTER_LOADER_PREF - if (__sync_add_and_fetch(&sent_packetcount, 1) == txn_count_for_debug * 4) { - cout << "send " << sent_packetcount << " packets used " << send_total_time - << ", average time is:" << send_total_time / sent_packetcount << endl; - } else { - ATOMIC_ADD(send_total_time, GetElapsedTimeInUs(send_start)); - } +// if (__sync_add_and_fetch(&sent_packetcount, 1) == txn_count_for_debug * 4) { +// cout << "send " << sent_packetcount << " packets used " << send_total_time +// << ", average time is:" << send_total_time / sent_packetcount << +// endl; +// } else { +// ATOMIC_ADD(send_total_time, GetElapsedTimeInUs(send_start)); +// } #endif return rSuccess; } diff --git a/loader/master_loader.h b/loader/master_loader.h index fa685e0fa..2c60b25ec 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -215,7 +215,6 @@ class MasterLoader { // for debug static uint64_t debug_finished_txn_count; static uint64_t debug_consumed_message_count; - static uint64_t get_request_time; static timeval start_time; static int buffer_full_time; diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 4b62112b8..ac4f3ccdc 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -30,9 +30,11 @@ #include #include +#include #include #include #include +#include //NOLINT #include //NOLINT #include "caf/all.hpp" #include "caf/io/all.hpp" @@ -48,6 +50,7 @@ #include "../storage/PartitionStorage.h" #include "../txn_manager/txn.hpp" #include "../utility/resource_guard.h" +using caf::behavior; using caf::event_based_actor; using caf::io::remote_actor; using caf::spawn; @@ -60,14 +63,16 @@ using claims::txn::GetTableIdFromGlobalPartId; using std::chrono::milliseconds; using std::chrono::seconds; -// #define WORK_THREAD +// #define WORK_THREADS +// #define ONE_WORK_THREAD -#define NO_ACTUAL_WORK -// #define MASTER_LOADER_DEBUG +// #define NO_ACTUAL_WORK -#define SLAVE_LOADER_PREF +// #define SLAVE_LOADER_PREF -#ifdef MASTER_LOADER_DEBUG +// #define MASTER_LOADER_PREF + +#ifdef MASTER_LOADER_PREF #define PERFLOG(info) LOG(INFO) << info << endl; #else #define PERFLOG @@ -81,21 +86,15 @@ using std::chrono::seconds; #define GET_TIME_SL(var) #endif +caf::actor SlaveLoader::handle; + static const int txn_count_for_debug = 5000; static const char* txn_count_string = "5000"; namespace claims { namespace loader { -SlaveLoader::SlaveLoader() { - // try { - // master_actor_ = - // remote_actor(Config::master_loader_ip, - // Config::master_loader_port); - // } catch (const exception& e) { - // cout << "master loader actor failed." << e.what() << endl; - // } -} +SlaveLoader::SlaveLoader() {} SlaveLoader::~SlaveLoader() {} RetCode SlaveLoader::ConnectWithMaster() { @@ -275,7 +274,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { /// deserialization of packet PERFLOG("got all packet buffer"); -#ifdef WORK_THREAD +#ifdef WORK_THREADS LoadPacket* packet = new LoadPacket(); EXEC_AND_DLOG(ret, packet->Deserialize(head_buffer, data_buffer), "deserialized packet", "failed to deserialize packet"); @@ -284,6 +283,14 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { packet_queue_.push(packet); } packet_count_.post(); +#else +#ifdef ONE_WORK_THREAD + LoadPacket* packet = new LoadPacket(); + EXEC_AND_DLOG(ret, packet->Deserialize(head_buffer, data_buffer), + "deserialized packet", "failed to deserialize packet"); + // std::async(std::launch::async, WorkInAsync, packet); // too slow + caf::scoped_actor self; + self->send(handle, LoadPacketAtom::value, packet); #else LoadPacket packet; #ifndef NO_ACTUAL_WORK @@ -301,6 +308,7 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { << " to master loader", "failed to send commit res to master loader"); #endif +#endif #ifdef SLAVE_LOADER_PREF ATOMIC_ADD(total_handle_time, GetElapsedTimeInUs(start_handle)); @@ -335,13 +343,13 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; -#ifdef WORK_THREAD +#ifdef WORK_THREADS partition_storage_lock_.acquire(); #endif EXEC_AND_DLOG_RETURN( ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); -#ifdef WORK_THREAD +#ifdef WORK_THREADS partition_storage_lock_.release(); #endif @@ -358,12 +366,12 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { uint64_t total_written_length = 0; uint64_t data_length = packet.data_length_; HdfsInMemoryChunk chunk_info; - ChunkID current_chunk(PartitionID(ProjectionID(table_id, prj_id), part_id), - cur_chunk_id); while (total_written_length < data_length) { /// get start position of current chunk if (BlockManager::getInstance()->getMemoryChunkStore()->getChunk( - current_chunk, chunk_info)) { + ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), + cur_chunk_id), + chunk_info)) { DLOG(INFO) << "start address of chunk:" << cur_chunk_id << " is " << chunk_info.hook << ", end addr is " << chunk_info.hook + CHUNK_SIZE; @@ -426,6 +434,36 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, } } +// this method has the best performance +static behavior SlaveLoader::WorkInCAF(event_based_actor* self) { + return {[=](LoadPacketAtom, LoadPacket* packet) { // NOLINT + RetCode ret = rSuccess; + EXEC_AND_DLOG(ret, StoreDataInMemory(*packet), "stored data", + "failed to store"); + /// return result to master loader + EXEC_AND_LOG(ret, SendAckToMasterLoader(packet->txn_id_, rSuccess == ret), + "sent commit result of " << packet->txn_id_ + << " to master loader", + "failed to send commit res to master loader"); + DELETE_PTR(packet); + }}; +} + +// It's very slow +void SlaveLoader::WorkInAsync(LoadPacket* packet) { + RetCode ret = rSuccess; + EXEC_AND_DLOG(ret, StoreDataInMemory(*packet), "stored data", + "failed to store"); + + /// return result to master loader + EXEC_AND_LOG(ret, SendAckToMasterLoader(packet->txn_id_, rSuccess == ret), + "sent commit result of " << packet->txn_id_ + << " to master loader", + "failed to send commit res to master loader"); + DELETE_PTR(packet); +} + +// it is so slow to deliver message using queue void* SlaveLoader::HandleWork(void* arg) { SlaveLoader* slave_loader = static_cast(arg); while (1) { @@ -438,13 +476,13 @@ void* SlaveLoader::HandleWork(void* arg) { slave_loader->packet_queue_.pop(); } - EXEC_AND_DLOG(ret, slave_loader->StoreDataInMemory(*packet), "stored data", + EXEC_AND_DLOG(ret, SlaveLoader::StoreDataInMemory(*packet), "stored data", "failed to store"); /// return result to master loader EXEC_AND_LOG( ret, - slave_loader->SendAckToMasterLoader(packet->txn_id_, rSuccess == ret), + SlaveLoader::SendAckToMasterLoader(packet->txn_id_, rSuccess == ret), "sent commit result of " << packet->txn_id_ << " to master loader", "failed to send commit res to master loader"); DELETE_PTR(packet); @@ -466,7 +504,11 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { cout << "connected with master loader" << endl; // TODO(YK): error handle -#ifdef WORK_THREAD +#ifdef ONE_WORK_THREAD + SlaveLoader::handle = caf::spawn(SlaveLoader::WorkInCAF); +#endif + +#ifdef WORK_THREADS for (int i = 0; i < 1; ++i) { Environment::getInstance()->getThreadPool()->AddTask( SlaveLoader::HandleWork, slave_loader); diff --git a/loader/slave_loader.h b/loader/slave_loader.h index c36057e91..968585c29 100644 --- a/loader/slave_loader.h +++ b/loader/slave_loader.h @@ -30,15 +30,17 @@ #define LOADER_SLAVE_LOADER_H_ #include #include +#include #include #include "../catalog/catalog.h" #include "../storage/BlockManager.h" #include "caf/all.hpp" -#include namespace claims { namespace loader { +using caf::behavior; +using caf::event_based_actor; using std::string; using claims::catalog::Catalog; @@ -70,20 +72,25 @@ class SlaveLoader { void OutputFdIpPort(int fd); - RetCode StoreDataInMemory(const LoadPacket& packet); - RetCode SendAckToMasterLoader(const uint64_t& txn_id, bool is_commited); + static RetCode StoreDataInMemory(const LoadPacket& packet); + static RetCode SendAckToMasterLoader(const uint64_t& txn_id, + bool is_commited); + static behavior WorkInCAF(event_based_actor* self); + static void WorkInAsync(LoadPacket* packet); static void* HandleWork(void* arg); private: int master_socket_fd_; string self_ip; int self_port; - caf::actor master_actor_; int listening_fd_ = -1; int master_fd_ = -1; + private: + static caf::actor handle; + private: queue packet_queue_; SpineLock queue_lock_; From 40268e42eb00c5e65cc1e3a21b906a68ee080e13 Mon Sep 17 00:00:00 2001 From: yukai Date: Mon, 16 May 2016 12:45:24 +0800 Subject: [PATCH 40/58] Performance test version --- loader/master_loader.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 4a8e0b052..384db74d0 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -63,6 +63,7 @@ #include "../txn_manager/txn.hpp" #include "../txn_manager/txn_client.hpp" #include "../txn_manager/txn_log.hpp" +#include "../utility/cpu_scheduler.h" #include "../utility/resource_guard.h" #include "../utility/Timer.h" using caf::aout; @@ -302,6 +303,7 @@ RetCode MasterLoader::Ingest(const string& message, static uint64_t get_request_time = 0; static uint64_t get_tuple_time = 0; static uint64_t merge_tuple_time = 0; + static uint64_t time_before_txn = 0; #ifdef MASTER_LOADER_PREF uint64_t temp_message_count = @@ -316,6 +318,8 @@ RetCode MasterLoader::Ingest(const string& message, << " us" << endl; cout << txn_count_for_debug << " txn merge tuples used " << merge_tuple_time << " us" << endl; + cout << txn_count_for_debug << " txn before txn used " << time_before_txn + << " us" << endl; } #endif DLOG(INFO) << "consumed message :" << debug_consumed_message_count; @@ -329,11 +333,11 @@ RetCode MasterLoader::Ingest(const string& message, IngestionRequest req; EXEC_AND_DLOG(ret, GetRequestFromMessage(message, &req), "got request!", "failed to get request"); - ATOMIC_ADD(get_request_time, GetElapsedTimeInUs(req_start)); + // ATOMIC_ADD(get_request_time, GetElapsedTimeInUs(req_start)); /// parse message and get all tuples of all partitions, then /// check the validity of all tuple in message - GET_TIME_ML(get_tuple_start); + // GET_TIME_ML(get_tuple_start); TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable(req.table_name_); assert(table != NULL && "table is not exist!"); @@ -361,17 +365,18 @@ RetCode MasterLoader::Ingest(const string& message, "got all tuples of every partition", "failed to get all tuples of every partition"); #endif - ATOMIC_ADD(get_tuple_time, GetElapsedTimeInUs(get_tuple_start)); + // ATOMIC_ADD(get_tuple_time, GetElapsedTimeInUs(get_tuple_start)); /// merge all tuple buffers of partition into one partition buffer - GET_TIME_ML(merge_start); + // GET_TIME_ML(merge_start); vector> partition_buffers( table->getNumberOfProjection()); EXEC_AND_DLOG(ret, MergePartitionTupleIntoOneBuffer( table, tuple_buffers_per_part, partition_buffers), "merged all tuple of same partition into one buffer", "failed to merge tuples buffers into one buffer"); - ATOMIC_ADD(merge_tuple_time, GetElapsedTimeInUs(merge_start)); + // ATOMIC_ADD(merge_tuple_time, GetElapsedTimeInUs(merge_start)); + ATOMIC_ADD(time_before_txn, GetElapsedTimeInUs(req_start)); /// start transaction from here claims::txn::Ingest ingest; @@ -956,6 +961,8 @@ void* MasterLoader::StartMasterLoader(void* arg) { // consumer.run(master_loader); for (int i = 0; i < Config::master_loader_thread_num - 1; ++i) { WorkerPara para(master_loader, brokerURI, destURI, use_topics, client_ack); + // Environment::getInstance()->getThreadPool()->AddTaskInCpu( + // MasterLoader::Work, ¶, (i + 1) % GetNumberOfCpus()); Environment::getInstance()->getThreadPool()->AddTask(MasterLoader::Work, ¶); } From 20672706bc93689963eba72a1c9f1c8869a15b68 Mon Sep 17 00:00:00 2001 From: yukai Date: Tue, 31 May 2016 18:47:05 +0800 Subject: [PATCH 41/58] FIX: using the old partition reader --- physical_operator/physical_projection_scan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index 6ff46bd4b..3803d5435 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -96,7 +96,7 @@ bool PhysicalProjectionScan::Open(const PartitionOffset& kPartitionOffset) { SetReturnStatus(false); } else { partition_reader_iterator_ = - partition_handle_->createTxnReaderIterator(); + partition_handle_->createAtomicReaderIterator(); SetReturnStatus(true); } From 805faee1479212c2edae06da6c0db268a2700c73 Mon Sep 17 00:00:00 2001 From: yukai Date: Wed, 1 Jun 2016 12:06:20 +0800 Subject: [PATCH 42/58] FIX: a bug in InitTxnManager() --- Environment.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Environment.cpp b/Environment.cpp index fa56da68e..621d67d0a 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -258,12 +258,21 @@ bool Environment::InitTxnManager() { auto cat = Catalog::getInstance(); auto table_count = cat->getNumberOfTable(); // cout << "table count:" << table_count << endl; - for (auto table_id = 0; table_id < table_count; table_id++) { + for (auto table_id : cat->getAllTableIDs()) { auto table = cat->getTable(table_id); + if (NULL == table) { + cout << " No table whose id is:" << table_id << endl; + assert(false); + } auto proj_count = table->getNumberOfProjection(); // cout << "proj_count:" << proj_count << endl; for (auto proj_id = 0; proj_id < proj_count; proj_id++) { auto proj = table->getProjectoin(proj_id); + if (NULL == proj) { + cout << "No projection whose id is:" << proj_id + << " in table:" << table->getTableName() << endl; + assert(false); + } auto part = proj->getPartitioner(); auto part_count = part->getNumberOfPartitions(); // cout << "part_count:" << part_count << endl; From 365b82ef5f5364e6297ce513ca12f56a422e25ac Mon Sep 17 00:00:00 2001 From: yukai Date: Wed, 8 Jun 2016 20:31:28 +0800 Subject: [PATCH 43/58] OPTIMIZE:adjust the order of initialize master_loader and master_node; FIX: bug in catalog.cpp --- Environment.cpp | 18 +-- Makefile.am | 2 +- catalog/catalog.cpp | 3 +- common/types/decimal.cpp | 218 +++++++++++++++-------------------- loader/data_ingestion.cpp | 8 ++ loader/master_loader.cpp | 2 +- loader/slave_loader.cpp | 1 + node_manager/master_node.cpp | 4 +- node_manager/slave_node.cpp | 49 ++++---- stmt_handler/select_exec.cpp | 3 +- 10 files changed, 145 insertions(+), 163 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index fb9795b9a..3893af26d 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -75,6 +75,8 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { initializeExpressionSystem(); portManager = PortManager::getInstance(); + AnnounceCafMessage(); + catalog_ = claims::catalog::Catalog::getInstance(); logging_->log("restore the catalog ..."); if (rSuccess != catalog_->restoreCatalog()) { @@ -104,7 +106,6 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { sleep() dose not needed. This is done in Aug.18 by Li :) */ - AnnounceCafMessage(); /*Before initializing Resource Manager, the instance ip and port should be * decided.*/ logging_->log("Initializing the ResourceManager..."); @@ -231,6 +232,7 @@ void Environment::InitMembership() { if (ismaster_) { master_node_ = MasterNode::GetInstance(); } + sleep(2); slave_node_ = SlaveNode::GetInstance(); slave_node_->RegisterToMaster(); nodeid = slave_node_->get_node_id(); @@ -243,11 +245,8 @@ bool Environment::InitLoader() { std::thread master_thread(&MasterLoader::StartMasterLoader, nullptr); master_thread.detach(); DLOG(INFO) << "started thread as master loader"; - - // TxnServer::Init(6); } - usleep(10000); DLOG(INFO) << "starting create thread as slave loader"; slave_loader_ = new SlaveLoader(); std::thread slave_thread(&SlaveLoader::StartSlaveLoader, nullptr); @@ -262,15 +261,16 @@ bool Environment::InitTxnManager() { TxnServer::Init(Config::txn_server_cores, Config::txn_server_port); auto cat = Catalog::getInstance(); auto table_count = cat->getNumberOfTable(); - // cout << "table count:" << table_count << endl; - for (auto table_id : cat->getAllTableIDs()) { + cout << "table count:" << table_count << endl; + for (unsigned table_id : cat->getAllTableIDs()) { + cout << "table id :" << table_id << endl; auto table = cat->getTable(table_id); if (NULL == table) { cout << " No table whose id is:" << table_id << endl; assert(false); } auto proj_count = table->getNumberOfProjection(); - // cout << "proj_count:" << proj_count << endl; + cout << "proj_count:" << proj_count << endl; for (auto proj_id = 0; proj_id < proj_count; proj_id++) { auto proj = table->getProjectoin(proj_id); if (NULL == proj) { @@ -280,10 +280,10 @@ bool Environment::InitTxnManager() { } auto part = proj->getPartitioner(); auto part_count = part->getNumberOfPartitions(); - // cout << "part_count:" << part_count << endl; + cout << "part_count:" << part_count << endl; for (auto part_id = 0; part_id < part_count; part_id++) { auto global_part_id = GetGlobalPartId(table_id, proj_id, part_id); - // cout << global_part_id << endl; + cout << global_part_id << endl; TxnServer::pos_list_[global_part_id] = TxnServer::logic_cp_list_[global_part_id] = TxnServer::phy_cp_list_[global_part_id] = diff --git a/Makefile.am b/Makefile.am index 26e5d4ff4..f98acea0d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -9,7 +9,7 @@ AM_CPPFLAGS=-fPIC -DTHERON_XS\ -I${CAF_HOME}/libcaf_core #-L/usr/local/lib \ #-I/usr/local/include -AM_LDFLAGS= -lc -lm -lrt -lpthread -lactivemq-cpp\ +AM_LDFLAGS= -lc -lm -lrt -lpthread -lactivemq-cpp -lprofiler\ -lconfig++ -lxs -lnuma -lreadline -lhistory -lz -ltinfo -Wl,--no-as-needed -ldl -rdynamic -lglog if OPT_TCMALLOC diff --git a/catalog/catalog.cpp b/catalog/catalog.cpp index 40df50a38..e3f084b8e 100644 --- a/catalog/catalog.cpp +++ b/catalog/catalog.cpp @@ -353,9 +353,10 @@ void Catalog::GetAllTables(ostringstream& ostr) const { vector Catalog::getAllTableIDs() const { vector ids; - for(auto it = tableid_to_table.begin(); it != tableid_to_table.end(); it ++) + for (auto it = tableid_to_table.begin(); it != tableid_to_table.end(); it++) ids.push_back(it->first); + return ids; } } /* namespace catalog */ diff --git a/common/types/decimal.cpp b/common/types/decimal.cpp index c48e53fd3..707190416 100644 --- a/common/types/decimal.cpp +++ b/common/types/decimal.cpp @@ -46,126 +46,106 @@ namespace common { // 30 '0' 1000000000000000000000000000000 #define KMAXSCALEFACTOR "1000000000000000000000000000000" - const TTInt Decimal::kMaxScaleFactor = KMAXSCALEFACTOR; -Decimal::Decimal() - : decimal_sign_(DECIMAL_POS) { +Decimal::Decimal() : decimal_sign_(DECIMAL_POS) { memset(word, 0, sizeof(word)); } Decimal::Decimal(int precision, int scale, string valuestr) - : decimal_sign_(DECIMAL_POS) { - memset(word, 0, sizeof(word)); - StrtoDecimal(precision, scale, valuestr.c_str()); + : decimal_sign_(DECIMAL_POS) { + memset(word, 0, sizeof(word)); + StrtoDecimal(precision, scale, valuestr.c_str()); } -Decimal::Decimal(int precision, int scale, const char * valstr) - : decimal_sign_(DECIMAL_POS) { - memset(word, 0, sizeof(word)); - StrtoDecimal(precision, scale, valstr); +Decimal::Decimal(int precision, int scale, const char *valstr) + : decimal_sign_(DECIMAL_POS) { + memset(word, 0, sizeof(word)); + StrtoDecimal(precision, scale, valstr); } Decimal::~Decimal() { // TODO Auto-generated destructor stub } -bool Decimal::StrtoDecimal(int p, int s, const char *cp) -{ - bool have_dp = false; - int i = 0; - char decdigits[DECIMAL_MAXPRCISION+1]; - char num1[31]; - int dsign = DECIMAL_POS; - int dweight = -1; - const char *str = cp; - - memset(decdigits, '0', DECIMAL_MAXPRCISION+1); - num1[0] = '1'; - memset(num1+1, '0', 30); - - while (isspace(*cp)) - cp++; - //printf("source :%s\n",cp); - switch (*cp) - { - case '+': - cp++; - break; - case '-': - dsign = DECIMAL_NEG; - cp++; - break; - } - - while (*cp) - { - if (isdigit((unsigned char) *cp)) - { - decdigits[i++] = *cp++; - if (!have_dp) - dweight++; - } - else if (*cp == '.') - { - if (have_dp) - { - printf("invalid input syntax for type numeric: \"%s\"\n", str); - return false; - } - have_dp = true; - cp++; - } - else - break; - } - if (*cp == 'e' || *cp == 'E') - { - long exponent; - char *endptr; - cp++; - exponent = strtol(cp, &endptr, 10); - if (endptr == cp) - { - printf("invalid input syntax for type numeric: \"%s\"\n", str); - return false; - } - cp = endptr; - dweight += (int) exponent; - } - if (dweight > p - s ) - { - printf("invalid input syntax for type numeric: \"%s\"\n", str); - return false; - } - while (*cp) - { - if (isspace((unsigned char) *cp)) - { - cp++; - } - else - { - printf("invalid input syntax for type numeric: \"%s\"\n", str); - return false; - } - } - - decdigits[DECIMAL_MAXSCALE + dweight + 1] = 0; - char c = decdigits[s + dweight + 1]; - memset(decdigits + s + dweight + 1, '0', DECIMAL_MAXSCALE - s); - TTInt whole(decdigits); - - num1[30 - s + 1] = 0; - TTInt fra(num1); - - if (DECIMAL_NEG == dsign) - { - fra.SetSign(); - whole.SetSign(); - } - SetTTInt((c>='5'?whole+fra:whole)); - return true; +bool Decimal::StrtoDecimal(int p, int s, const char *cp) { + bool have_dp = false; + int i = 0; + char decdigits[DECIMAL_MAXPRCISION + 1]; + char num1[31]; + int dsign = DECIMAL_POS; + int dweight = -1; + const char *str = cp; + + memset(decdigits, '0', DECIMAL_MAXPRCISION + 1); + num1[0] = '1'; + memset(num1 + 1, '0', 30); + + while (isspace(*cp)) cp++; + // printf("source :%s\n",cp); + switch (*cp) { + case '+': + cp++; + break; + case '-': + dsign = DECIMAL_NEG; + cp++; + break; + } + + while (*cp) { + if (isdigit((unsigned char)*cp)) { + decdigits[i++] = *cp++; + if (!have_dp) dweight++; + } else if (*cp == '.') { + if (have_dp) { + printf("invalid input syntax for type numeric: \"%s\"\n", str); + return false; + } + have_dp = true; + cp++; + } else + break; + } + if (*cp == 'e' || *cp == 'E') { + long exponent; + char *endptr; + cp++; + exponent = strtol(cp, &endptr, 10); + if (endptr == cp) { + printf("invalid input syntax for type numeric: \"%s\"\n", str); + return false; + } + cp = endptr; + dweight += (int)exponent; + } + if (dweight > p - s) { + printf("invalid input syntax for type numeric: \"%s\"\n", str); + return false; + } + while (*cp) { + if (isspace((unsigned char)*cp)) { + cp++; + } else { + printf("invalid input syntax for type numeric: \"%s\"\n", str); + return false; + } + } + + decdigits[DECIMAL_MAXSCALE + dweight + 1] = 0; + char c = decdigits[s + dweight + 1]; + memset(decdigits + s + dweight + 1, '0', DECIMAL_MAXSCALE - s); + TTInt whole(decdigits); + + num1[30 - s + 1] = 0; + TTInt fra(num1); + + if (DECIMAL_NEG == dsign) { + fra.SetSign(); + whole.SetSign(); + } + SetTTInt((c >= '5' ? whole + fra : whole)); + return true; } string Decimal::toString(unsigned number_of_fractinal_digits) const { @@ -175,9 +155,9 @@ string Decimal::toString(unsigned number_of_fractinal_digits) const { rest.ToString(ress); int sign = 0; if (rest.IsSign()) sign = 1; - while ((Decimal::kMaxDecScale + sign - (int) ress.length()) >= 0) + while ((Decimal::kMaxDecScale + sign - (int)ress.length()) >= 0) ress.insert(sign, "0"); - if(number_of_fractinal_digits > 0) + if (number_of_fractinal_digits > 0) ress.insert(ress.length() - Decimal::kMaxDecScale, "."); ress.erase(ress.size() - Decimal::kMaxDecScale + number_of_fractinal_digits, Decimal::kMaxDecScale - number_of_fractinal_digits); @@ -186,29 +166,27 @@ string Decimal::toString(unsigned number_of_fractinal_digits) const { Decimal Decimal::CreateNullDecimal() { static Decimal NDecimal; - const_cast(NDecimal.decimal_sign_) = DECIMAL_NAN; + const_cast(NDecimal.decimal_sign_) = DECIMAL_NAN; return NDecimal; } -bool Decimal::isNull() const { - return decimal_sign_ == DECIMAL_NAN; -} +bool Decimal::isNull() const { return decimal_sign_ == DECIMAL_NAN; } -Decimal & Decimal::operator=(const Decimal &rhs) { +Decimal &Decimal::operator=(const Decimal &rhs) { if (this == &rhs) return *this; this->word[0] = rhs.GetTTInt(); - const_cast(this->decimal_sign_) = rhs.decimal_sign_; - //PrintValue(10); + const_cast(this->decimal_sign_) = rhs.decimal_sign_; + // PrintValue(10); return *this; } void Decimal::PrintValue(int ifra) { - printf("value:[%s][%s]\n", this->word[0].ToString().c_str(), toString(ifra).c_str()); + printf("value:[%s][%s]\n", this->word[0].ToString().c_str(), + toString(ifra).c_str()); } ///////////////////////////////////////////////////////////////////// Decimal Decimal::op_add(const Decimal rhs) const { - TTInt rett; rett = this->GetTTInt(); rett.Add(rhs.GetTTInt()); @@ -220,7 +198,6 @@ Decimal Decimal::op_add(const Decimal rhs) const { } Decimal Decimal::op_subtract(const Decimal rhs) const { - TTInt rett; rett = this->GetTTInt(); rett.Sub(rhs.GetTTInt()); @@ -232,7 +209,6 @@ Decimal Decimal::op_subtract(const Decimal rhs) const { } Decimal Decimal::op_multiply(const Decimal rhs) const { - TTLInt rett; rett = this->GetTTInt(); rett *= rhs.GetTTInt(); @@ -245,7 +221,6 @@ Decimal Decimal::op_multiply(const Decimal rhs) const { } Decimal Decimal::op_divide(const Decimal rhs) const { - TTLInt rett; rett = this->GetTTInt(); rett *= Decimal::kMaxScaleFactor; @@ -253,10 +228,9 @@ Decimal Decimal::op_divide(const Decimal rhs) const { Decimal ret; ret.SetTTInt(rett); - + return ret; } - } // namespace common } // namespace claims diff --git a/loader/data_ingestion.cpp b/loader/data_ingestion.cpp index bd941a31e..a2b220b13 100644 --- a/loader/data_ingestion.cpp +++ b/loader/data_ingestion.cpp @@ -660,6 +660,8 @@ void* DataIngestion::HandleTuple(void* ptr) { DataIngestion* injestion = static_cast(ptr); string tuple_to_handle = ""; string file_name = ""; + uint64_t got_tuple_count = 0; + uint64_t handled_tuple_count = 0; uint64_t row_id_in_file = 0; DataIngestion::LoadTask task; RetCode ret = rSuccess; @@ -699,6 +701,9 @@ void* DataIngestion::HandleTuple(void* ptr) { ATOMIC_ADD(injestion->total_read_sem_time_, GetElapsedTimeInUs(start_read_sem)); DLOG_DI("all tuple in pool is handled "); + DLOG(INFO) << " thread " << self_thread_index << " got " + << got_tuple_count << " tuples and handled " + << handled_tuple_count << " tuples"; EXEC_AND_LOG( ret, injestion->FlushNotFullBlock(block_to_write, local_pj_buffer), @@ -744,6 +749,7 @@ void* DataIngestion::HandleTuple(void* ptr) { injestion->task_lists_[self_thread_index].pop_front(); ///// lock/sem } + ++got_tuple_count; tuple_to_handle = task.tuple_; file_name = task.file_name_; row_id_in_file = task.row_id_in_file_; @@ -804,6 +810,8 @@ void* DataIngestion::HandleTuple(void* ptr) { injestion->multi_thread_status_ = ret; break; } + + ++handled_tuple_count; ATOMIC_ADD(injestion->total_insert_time_, GetElapsedTimeInUs(start_insert_time)); } diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 384db74d0..780901917 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -871,7 +871,7 @@ void* MasterLoader::SendPacketWork(void* arg) { MasterLoader* loader = static_cast(arg); int index = __sync_fetch_and_add(&(loader->thread_index_), 1); LOG(INFO) << " I got id :" << index; - assert(index < send_thread_num_); + assert(index < loader->send_thread_num_); while (1) { loader->packet_queue_to_send_count_[index].wait(); LoadPacket* packet = nullptr; diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 1d5b6da11..cf9ca2d0e 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -492,6 +492,7 @@ void* SlaveLoader::HandleWork(void* arg) { void* SlaveLoader::StartSlaveLoader(void* arg) { Config::getInstance(); LOG(INFO) << "start slave loader..."; + sleep(2); SlaveLoader* slave_loader = Environment::getInstance()->get_slave_loader(); int ret = rSuccess; diff --git a/node_manager/master_node.cpp b/node_manager/master_node.cpp index f4e4a5802..e0578b5d8 100644 --- a/node_manager/master_node.cpp +++ b/node_manager/master_node.cpp @@ -117,9 +117,9 @@ void MasterNode::CreateActor() { caf::io::publish(master_actor_, get_node_port(), nullptr, 1); LOG(INFO) << "master ip port" << get_node_port() << " publish succeed!"; } catch (caf::bind_failure& e) { - LOG(ERROR) << "the specified port " << get_node_port() << " is used!"; + PLOG(ERROR) << "the specified port " << get_node_port() << " is used!"; } catch (caf::network_error& e) { - LOG(ERROR) << "connection error in publishing master actor port"; + PLOG(ERROR) << "connection error in publishing master actor port"; } } void MasterNode::PrintNodeList() { diff --git a/node_manager/slave_node.cpp b/node_manager/slave_node.cpp index 5b96272df..69c8c6abf 100644 --- a/node_manager/slave_node.cpp +++ b/node_manager/slave_node.cpp @@ -212,32 +212,29 @@ RetCode SlaveNode::RegisterToMaster() { } // register to master loader - // { - // int retry_max_time = 10; - // int time = 0; - // while (1) { - // try { - // caf::actor master_actor = - // remote_actor(Config::master_loader_ip, - // Config::master_loader_port); - // caf::scoped_actor self; - // self->sync_send( - // master_actor, RegNodeAtom::value, - // NodeAddress(Environment::getInstance()->getIp(), - // to_string(Environment::getInstance()->getPort())), - // ret).await([&](int r) { - // LOG(INFO) << "sent node info and received response"; - // }); - // break; - // } catch (exception& e) { - // cout << "new remote actor " << Config::master_loader_ip << "," - // << Config::master_loader_port << "failed for " << ++time - // << " time. " << e.what() << endl; - // usleep(100 * 1000); - // if (time >= retry_max_time) return false; - // } - // } - // } + { + int retry_max_time = 10; + int time = 0; + caf::actor master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); + while (1) { + try { + caf::scoped_actor self; + self->sync_send(master_actor, RegNodeAtom::value, + NodeAddress(get_node_ip(), to_string(get_node_port())), + (int)node_id_).await([&](int r) { + LOG(INFO) << "sent node info and received response"; + }); + break; + } catch (exception& e) { + cout << "new remote actor " << Config::master_loader_ip << "," + << Config::master_loader_port << "failed for " << ++time + << " time. " << e.what() << endl; + usleep(100 * 1000); + if (time >= retry_max_time) return false; + } + } + } return ret; } diff --git a/stmt_handler/select_exec.cpp b/stmt_handler/select_exec.cpp index e828322c6..04e2ecde1 100644 --- a/stmt_handler/select_exec.cpp +++ b/stmt_handler/select_exec.cpp @@ -324,7 +324,8 @@ void* SelectExec::SendAllSegments(void* arg) { LOG(INFO) << "sending plan of " << select_exec->get_stmt_exec_status()->get_query_id() - << " , " << segment_id << "succeed!!!" << endl; + << " , " << segment_id << " , partition:" << i + << " succeed!!!" << endl; } } else { LOG(ERROR) << "asking upper exchange failed!" << endl; From 6b59d847ad932aea664c7aedd9083bb1080b2885 Mon Sep 17 00:00:00 2001 From: yukai Date: Wed, 15 Jun 2016 20:06:26 +0800 Subject: [PATCH 44/58] ADD: multi-thread slave loader; ADD: concurrency control on storage when multi-thread loading --- Config.cpp | 2 ++ Config.h | 1 + common/Block/Block.h | 41 ++++++++++++++++++------------------ loader/slave_loader.cpp | 32 +++++++++++++++------------- loader/slave_loader.h | 1 + storage/ChunkStorage.cpp | 33 +++++++++++++++++------------ storage/ChunkStorage.h | 3 ++- storage/PartitionStorage.cpp | 17 ++++++++++++--- storage/PartitionStorage.h | 7 ++++-- 9 files changed, 82 insertions(+), 55 deletions(-) diff --git a/Config.cpp b/Config.cpp index 4db3ef241..b659b4242 100644 --- a/Config.cpp +++ b/Config.cpp @@ -104,6 +104,7 @@ bool Config::enable_txn_log; std::string Config::txn_log_path; int Config::master_loader_thread_num; +int Config::slave_loader_thread_num; Config *Config::getInstance() { if (instance_ == 0) { @@ -187,6 +188,7 @@ void Config::initialize() { txn_log_path = getString("txn_log_path", "."); master_loader_thread_num = getInt("master_loader_thread_num", 4); + slave_loader_thread_num = getInt("slave_loader_thread_num", 4); memory_utilization = getInt("memory_utilization", 100); diff --git a/Config.h b/Config.h index 739941246..3828aec65 100644 --- a/Config.h +++ b/Config.h @@ -93,6 +93,7 @@ class Config { static std::string txn_log_path; static int master_loader_thread_num; + static int slave_loader_thread_num; private: static Config* instance_; diff --git a/common/Block/Block.h b/common/Block/Block.h index a55a66638..f2e069eec 100755 --- a/common/Block/Block.h +++ b/common/Block/Block.h @@ -11,30 +11,29 @@ #include "dmalloc.h" #endif class Block { -public: - Block(unsigned BlockSize); - Block(const Block &block); - Block(const unsigned& size,const void* start); - virtual ~Block(); - void* getBlock()const; - void setBlock(void*); - unsigned getsize() const; - unsigned setsize(const unsigned& size) ; + public: + Block(unsigned BlockSize); + Block(const Block& block); + Block(const unsigned& size, const void* start); + virtual ~Block(); + void* getBlock() const; + void setBlock(void*); + unsigned getsize() const; + unsigned setsize(const unsigned& size); - bool isIsReference() const { - return isReference_; - } + bool isIsReference() const { return isReference_; } - void setIsReference(bool isReference); + void setIsReference(bool isReference); -protected: - unsigned BlockSize; - char* start; -private: - /* true, if it references to others, which means that the block does not need - * to free the memory reference in destructor. - */ - bool isReference_; + protected: + unsigned BlockSize; // don't include SchemeFix::tail_info + char* start; + + private: + /* true, if it references to others, which means that the block does not need + * to free the memory reference in destructor. + */ + bool isReference_; }; #endif /* BLOCK_H_ */ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index cf9ca2d0e..66c1bfedb 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -63,7 +63,7 @@ using claims::txn::GetTableIdFromGlobalPartId; using std::chrono::milliseconds; using std::chrono::seconds; -// #define WORK_THREADS +#define WORK_THREADS // #define ONE_WORK_THREAD // #define NO_ACTUAL_WORK @@ -87,6 +87,7 @@ using std::chrono::seconds; #endif caf::actor SlaveLoader::handle; +caf::actor* SlaveLoader::handles_; static const int txn_count_for_debug = 5000; static const char* txn_count_string = "5000"; @@ -278,11 +279,12 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { LoadPacket* packet = new LoadPacket(); EXEC_AND_DLOG(ret, packet->Deserialize(head_buffer, data_buffer), "deserialized packet", "failed to deserialize packet"); - { - LockGuard guard(queue_lock_); - packet_queue_.push(packet); - } - packet_count_.post(); + + static int handle_index = 0; + caf::scoped_actor self; + self->send(handles_[handle_index], LoadPacketAtom::value, packet); + if (++handle_index == Config::slave_loader_thread_num) handle_index = 0; + #else #ifdef ONE_WORK_THREAD LoadPacket* packet = new LoadPacket(); @@ -343,15 +345,9 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { DLOG(INFO) << "position+offset is:" << packet.pos_ + packet.offset_ << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; -#ifdef WORK_THREADS - partition_storage_lock_.acquire(); -#endif EXEC_AND_DLOG_RETURN( ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); -#ifdef WORK_THREADS - partition_storage_lock_.release(); -#endif /// copy data into applied memory const uint64_t tuple_size = Catalog::getInstance() @@ -510,9 +506,15 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { #endif #ifdef WORK_THREADS - for (int i = 0; i < 1; ++i) { - Environment::getInstance()->getThreadPool()->AddTask( - SlaveLoader::HandleWork, slave_loader); + // for (int i = 0; i < 1; ++i) { + // Environment::getInstance()->getThreadPool()->AddTask( + // SlaveLoader::HandleWork, slave_loader); + // } + + handles_ = new caf::actor[Config::slave_loader_thread_num]; + + for (int i = 0; i < Config::slave_loader_thread_num; ++i) { + SlaveLoader::handles_[i] = caf::spawn(SlaveLoader::WorkInCAF); } #endif diff --git a/loader/slave_loader.h b/loader/slave_loader.h index 968585c29..141be6935 100644 --- a/loader/slave_loader.h +++ b/loader/slave_loader.h @@ -90,6 +90,7 @@ class SlaveLoader { private: static caf::actor handle; + static caf::actor* handles_; private: queue packet_queue_; diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 2e100d07b..74c3222dc 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -104,7 +104,7 @@ RetCode ChunkStorage::ApplyMemory() { ChunkReaderIterator* ChunkStorage::CreateChunkReaderIterator() { ChunkReaderIterator* ret; - lock_.acquire(); + // lock_.acquire(); HdfsInMemoryChunk chunk_info; if (current_storage_level_ == MEMORY && !BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( @@ -177,7 +177,7 @@ ChunkReaderIterator* ChunkStorage::CreateChunkReaderIterator() { } default: { WLOG(rUnkownStroageLevel, "current storage level: unknown!"); } } - lock_.release(); + // lock_.release(); return ret; } @@ -422,6 +422,8 @@ void ChunkReaderIterator::InMemeryBlockAccessor::GetBlock( int tuple_count = *(unsigned*)((char*)target_block_start_address_ + block->getSerializedBlockSize() - sizeof(unsigned)); + DLOG(INFO) << "Get Block whose tuple counts is:" << tuple_count + << ", start address is:" << target_block_start_address_; dynamic_cast(block)->setTuplesInBlock(tuple_count); // ((BlockStreamFix*)block)->free_ = // (char*)block->getBlock() + @@ -450,27 +452,32 @@ uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, void* block_offset = chunk_offset_ + block_id_ * block_size_; assert(block_offset < chunk_offset_ + CHUNK_SIZE && "this block is not in this chunk"); + unsigned* tuple_count_in_block = reinterpret_cast( block_offset + block_size_ - sizeof(unsigned)); - int can_store_tuple_count = - (block_size_ - sizeof(unsigned)) / tuple_size_ - *tuple_count_in_block; + int left_space = block_size_ - sizeof(unsigned) - pos_in_block_; + int can_store_tuple_count = left_space / tuple_size_; + + // int can_store_tuple_count = + // (block_size_ - sizeof(unsigned)) / tuple_size_ - + // *tuple_count_in_block; assert(can_store_tuple_count >= 0); DLOG(INFO) << "block whose id is " << block_id_ << " stored " << *tuple_count_in_block << " tuple and leaf " << can_store_tuple_count << " tuple space. and tuple size is:" << tuple_size_; - // there are space to store data + /// there are space to store data if (can_store_tuple_count > 0) { - int actual_written_tuple_count = - length_to_write / tuple_size_ > can_store_tuple_count - ? can_store_tuple_count - : length_to_write / tuple_size_; - DLOG(INFO) << "memcpy start pos is " - << block_offset + (*tuple_count_in_block) * tuple_size_ + int actual_written_tuple_count = length_to_write > left_space + ? can_store_tuple_count + : length_to_write / tuple_size_; + DLOG(INFO) << "memcpy " << actual_written_tuple_count + << " tuples to memory whose start pos is " + << block_offset + pos_in_block_ << ". buffer to write: " << buffer_to_write; - memcpy(block_offset + (*tuple_count_in_block) * tuple_size_, - buffer_to_write, actual_written_tuple_count * tuple_size_); + memcpy(block_offset + pos_in_block_, buffer_to_write, + actual_written_tuple_count * tuple_size_); DLOG(INFO) << "copy " << actual_written_tuple_count * tuple_size_ << " bytes into block:" << block_id_; diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index e2c40b57b..f0ff7a79b 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -30,6 +30,7 @@ #ifndef CHUNKSTORAGE_H_ #define CHUNKSTORAGE_H_ #include +#include #include #include "./StorageLevel.h" @@ -331,7 +332,7 @@ class ChunkStorage { unsigned block_size_; unsigned chunk_size_; StorageLevel desirable_storage_level_; - StorageLevel current_storage_level_; + atomic current_storage_level_; ChunkID chunk_id_; Lock lock_; }; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 33cfb75dd..7561711ec 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -30,13 +30,18 @@ #include "PartitionStorage.h" #include +#include + #include "../common/error_define.h" #include "../Debug.h" #include "./MemoryManager.h" +#include "../common/memory_handle.h" #include "../Config.h" #include "../Resource/BufferManager.h" +#include "../utility/lock_guard.h" using claims::common::rSuccess; +using claims::utility::LockGuard; /** * According to number_of_chunks, construct chunk from partition and add into @@ -65,7 +70,7 @@ PartitionStorage::PartitionStorage(const PartitionID& partition_id, PartitionStorage::~PartitionStorage() { for (unsigned i = 0; i < chunk_list_.size(); i++) { - chunk_list_[i]->~ChunkStorage(); + DELETE_PTR(chunk_list_[i]); } chunk_list_.clear(); } @@ -73,12 +78,15 @@ PartitionStorage::~PartitionStorage() { void PartitionStorage::AddNewChunk() { number_of_chunks_++; } RetCode PartitionStorage::AddChunkWithMemoryToNum( - const unsigned& expected_number_of_chunks, - const StorageLevel& storage_level) { + unsigned expected_number_of_chunks, const StorageLevel& storage_level) { RetCode ret = rSuccess; if (number_of_chunks_ >= expected_number_of_chunks) return ret; DLOG(INFO) << "now chunk number:" << number_of_chunks_ << ". expected chunk num:" << expected_number_of_chunks; + + LockGuard guard(write_lock_); + if (number_of_chunks_ >= expected_number_of_chunks) return ret; + for (unsigned i = number_of_chunks_; i < expected_number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); @@ -103,6 +111,7 @@ RetCode PartitionStorage::AddChunkWithMemoryToNum( void PartitionStorage::UpdateChunksWithInsertOrAppend( const PartitionID& partition_id, const unsigned& number_of_chunks, const StorageLevel& storage_level) { + LockGuard guard(write_lock_); if (!chunk_list_.empty()) { MemoryChunkStore::GetInstance()->ReturnChunk( chunk_list_.back()->GetChunkID()); @@ -147,6 +156,7 @@ PartitionStorage::PartitionReaderIterator::PartitionReaderIterator( PartitionStorage::PartitionReaderIterator::~PartitionReaderIterator() {} ChunkReaderIterator* PartitionStorage::PartitionReaderIterator::NextChunk() { + LockGuard guard(ps_->write_lock_); if (chunk_cur_ < ps_->number_of_chunks_) return ps_->chunk_list_[chunk_cur_++]->CreateChunkReaderIterator(); else @@ -159,6 +169,7 @@ PartitionStorage::AtomicPartitionReaderIterator:: ChunkReaderIterator* PartitionStorage::AtomicPartitionReaderIterator::NextChunk() { ChunkReaderIterator* ret = NULL; + LockGuard guard(ps_->write_lock_); if (chunk_cur_ < ps_->number_of_chunks_) ret = ps_->chunk_list_[chunk_cur_++]->CreateChunkReaderIterator(); else diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index 694370f7e..d92315ad6 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -29,6 +29,7 @@ #ifndef PARTITIONSTORAGE_H_ #define PARTITIONSTORAGE_H_ +#include #include #include "../common/error_define.h" @@ -121,7 +122,7 @@ class PartitionStorage { void AddNewChunk(); - RetCode AddChunkWithMemoryToNum(const unsigned& expected_number_of_chunks, + RetCode AddChunkWithMemoryToNum(unsigned expected_number_of_chunks, const StorageLevel& storage_level); const int GetChunkNum() const { return chunk_list_.size(); } @@ -156,9 +157,11 @@ class PartitionStorage { protected: PartitionID partition_id_; - unsigned number_of_chunks_; + atomic number_of_chunks_; std::vector chunk_list_; StorageLevel desirable_storage_level_; + + Lock write_lock_; }; //} // namespace storage //} // namespace claims From e2d0ae8ad6a438408e3b04c79fb1fb068f391c52 Mon Sep 17 00:00:00 2001 From: yukai Date: Thu, 16 Jun 2016 16:58:20 +0800 Subject: [PATCH 45/58] ADD: comments about why current scan can't read new data --- loader/master_loader.cpp | 73 +----------------------------------- loader/master_loader.h | 4 ++ loader/slave_loader.cpp | 6 +++ storage/PartitionStorage.cpp | 10 ++--- 4 files changed, 15 insertions(+), 78 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 780901917..f3755133d 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -410,77 +410,6 @@ RetCode MasterLoader::Ingest(const string& message, return ret; } -string MasterLoader::GetMessage() { - // for testing - string ret = - "LINEITEM,|,\n," - "1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-" - "02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the|\n" - "1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-" - "04-" - "20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold |\n" - "1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-" - "31|TAKE BACK RETURN|REG AIR|riously. regular, express dep|\n" - "1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-" - "16|NONE|AIR|lites. fluffily even de|\n" - "1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-" - "04-" - "01|NONE|FOB| pending foxes. slyly re|\n" - "1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-" - "03|DELIVER IN PERSON|MAIL|arefully slyly ex|\n" - "2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-" - "02-" - "02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a|\n" - "3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-" - "23|NONE|AIR|ongside of the furiously brave acco|\n" - "3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-" - "11-" - "24|TAKE BACK RETURN|RAIL| unusual accounts. eve|\n" - "3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-" - "01-" - "23|DELIVER IN PERSON|SHIP|nal foxes wake. |\n" - "3|29380|1883|4|2|2618.76|0.01|0.06|A|F|1993-12-04|1994-01-07|1994-01-" - "01|" - "NONE|TRUCK|y. fluffily pending d|\n" - "7|145243|7758|2|9|11594.16|0.08|0.08|N|O|1996-02-01|1996-03-02|1996-" - "02-" - "19|TAKE BACK RETURN|SHIP|es. instructions|\n" - "7|94780|9799|3|46|81639.88|0.10|0.07|N|O|1996-01-15|1996-03-27|1996-" - "02-" - "03|COLLECT COD|MAIL| unusual reques|\n" - "7|163073|3074|4|28|31809.96|0.03|0.04|N|O|1996-03-21|1996-04-08|1996-" - "04-" - "20|NONE|FOB|. slyly special requests haggl|\n" - "7|151894|9440|5|38|73943.82|0.08|0.01|N|O|1996-02-11|1996-02-24|1996-" - "02-" - "18|DELIVER IN PERSON|TRUCK|ns haggle carefully ironic deposits. bl|\n" - "7|79251|1759|6|35|43058.75|0.06|0.03|N|O|1996-01-16|1996-02-23|1996-" - "01-" - "22|TAKE BACK RETURN|FOB|jole. excuses wake carefully alongside of |\n" - "7|157238|2269|7|5|6476.15|0.04|0.02|N|O|1996-02-10|1996-03-26|1996-02-" - "13|NONE|FOB|ithely regula|\n" - "32|82704|7721|1|28|47227.60|0.05|0.08|N|O|1995-10-23|1995-08-27|1995-" - "10-" - "26|TAKE BACK RETURN|TRUCK|sleep quickly. req|\n" - "32|197921|441|2|32|64605.44|0.02|0.00|N|O|1995-08-14|1995-10-07|1995-" - "08-" - "27|COLLECT COD|AIR|lithely regular deposits. fluffily |\n" - "32|44161|6666|3|2|2210.32|0.09|0.02|N|O|1995-08-07|1995-10-07|1995-08-" - "23|DELIVER IN PERSON|AIR| express accounts wake according to the|\n" - "32|2743|7744|4|4|6582.96|0.09|0.03|N|O|1995-08-04|1995-10-01|1995-09-" - "03|" - "NONE|REG AIR|e slyly final pac|\n" - "32|85811|8320|5|44|79059.64|0.05|0.06|N|O|1995-08-28|1995-08-20|1995-" - "09-" - "14|DELIVER IN PERSON|AIR|symptotes nag according to the ironic " - "depo|\n"; - return ret; -} - -// bool MasterLoader::CheckValidity() {} -// -// void MasterLoader::DistributeSubIngestion() {} - RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, int* connected_fd) { int fd = socket(AF_INET, SOCK_STREAM, 0); @@ -509,7 +438,7 @@ RetCode MasterLoader::GetSocketFdConnectedWithSlave(string ip, int port, // get every tuples and add row id for it RetCode MasterLoader::GetRequestFromMessage(const string& message, IngestionRequest* req) { - // AddRowIdColumn() + // TODO(ANYONE): get the real row_id of table static uint64_t row_id = 10000000; RetCode ret = rSuccess; size_t pos = message.find(',', 0); diff --git a/loader/master_loader.h b/loader/master_loader.h index 2c60b25ec..e69690d0a 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -38,6 +38,8 @@ #include #include "caf/all.hpp" #include +#include +#include #include "./validity.h" #include "../common/error_define.h" @@ -46,7 +48,9 @@ #include "../utility/lock.h" using std::function; +using std::set; using std::unordered_map; +using std::unordered_set; namespace claims { namespace catalog { diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 66c1bfedb..69f7f7a39 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -368,6 +368,12 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), cur_chunk_id), chunk_info)) { + // In this version, the last chunk info don't updated their member: length + // after inserting data, + // so the data inserted into last chunk is invisible. + // This leads to no new data is show in the beginning of loading. + // The new applied chunk has updated its length to CHUNK_SIZE, + // so they performs well DLOG(INFO) << "start address of chunk:" << cur_chunk_id << " is " << chunk_info.hook << ", end addr is " << chunk_info.hook + CHUNK_SIZE; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 7561711ec..12caeb4e0 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -156,7 +156,7 @@ PartitionStorage::PartitionReaderIterator::PartitionReaderIterator( PartitionStorage::PartitionReaderIterator::~PartitionReaderIterator() {} ChunkReaderIterator* PartitionStorage::PartitionReaderIterator::NextChunk() { - LockGuard guard(ps_->write_lock_); + // LockGuard guard(ps_->write_lock_); if (chunk_cur_ < ps_->number_of_chunks_) return ps_->chunk_list_[chunk_cur_++]->CreateChunkReaderIterator(); else @@ -168,13 +168,11 @@ PartitionStorage::AtomicPartitionReaderIterator:: ChunkReaderIterator* PartitionStorage::AtomicPartitionReaderIterator::NextChunk() { - ChunkReaderIterator* ret = NULL; LockGuard guard(ps_->write_lock_); if (chunk_cur_ < ps_->number_of_chunks_) - ret = ps_->chunk_list_[chunk_cur_++]->CreateChunkReaderIterator(); + return ps_->chunk_list_[chunk_cur_++]->CreateChunkReaderIterator(); else - ret = NULL; - return ret; + return NULL; } bool PartitionStorage::PartitionReaderIterator::NextBlock( @@ -208,7 +206,7 @@ bool PartitionStorage::AtomicPartitionReaderIterator::NextBlock( delete chunk_it_; chunk_it_ = NULL; } - if ((chunk_it_ = PartitionReaderIterator::NextChunk()) > 0) { + if ((chunk_it_ = NextChunk()) > 0) { lock_.release(); return NextBlock(block); } else { From 6e71ec6e726d647a8778dd5328c39a271ffe9ea9 Mon Sep 17 00:00:00 2001 From: yukai Date: Sat, 18 Jun 2016 18:04:40 +0800 Subject: [PATCH 46/58] FIX: bug appeared when txn_log is disable --- Config.cpp | 2 +- loader/data_ingestion.cpp | 4 +- loader/data_ingestion.h | 2 +- txn_manager/txn_client.cpp | 179 ++++++++++++++++++++----------------- txn_manager/txn_log.cpp | 157 +++++++++++++++----------------- txn_manager/txn_server.cpp | 11 ++- 6 files changed, 185 insertions(+), 170 deletions(-) diff --git a/Config.cpp b/Config.cpp index b659b4242..025fba699 100644 --- a/Config.cpp +++ b/Config.cpp @@ -100,7 +100,7 @@ int Config::txn_server_cores; std::string Config::txn_server_ip; int Config::txn_server_port; -bool Config::enable_txn_log; +bool Config::enable_txn_log = false; std::string Config::txn_log_path; int Config::master_loader_thread_num; diff --git a/loader/data_ingestion.cpp b/loader/data_ingestion.cpp index a2b220b13..8920cb3f9 100644 --- a/loader/data_ingestion.cpp +++ b/loader/data_ingestion.cpp @@ -152,8 +152,8 @@ DataIngestion::DataIngestion(TableDescriptor* table, const string col_separator, : table_(table), col_separator_(col_separator), row_separator_(row_separator), - row_id_in_table_(table_->row_number_), - connector_(table_->get_connector()) { + row_id_in_table_(table->row_number_), + connector_(table->get_connector()) { sub_tuple_generator_.clear(); table_schema_ = table_->getSchema(); for (int i = 0; i < table_->getNumberOfProjection(); i++) { diff --git a/loader/data_ingestion.h b/loader/data_ingestion.h index 892ab2fb8..a191d6ede 100644 --- a/loader/data_ingestion.h +++ b/loader/data_ingestion.h @@ -180,7 +180,7 @@ class DataIngestion { const string& terminator); private: - TableDescriptor* table_; + const TableDescriptor* table_; TableFileConnector& connector_; Schema* table_schema_; diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index bf52269d5..fdc635b62 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -20,172 +20,191 @@ * * Created on: 2016年4月10日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ #include "txn_client.hpp" +#include "txn_log.hpp" //#include "../common/error_define.h" -namespace claims{ -namespace txn{ - -//using claims::txn::TxnClient; -//using claims::txn::RetCode; -//using claims::txn::FixTupleIngestReq; -//using claims::txn::Ingest; -//using claims::common::rSuccess; -//using claims::common::rLinkTmTimeout; -//using claims::common::rLinkTmFail; -//using claims::common::rBeginIngestTxnFail; -//using claims::common::rBeginQueryFail; -//using claims::common::rBeginCheckpointFail; -//using claims::common::rCommitIngestTxnFail; -//using claims::common::rCommitCheckpointFail; +namespace claims { +namespace txn { +// using claims::txn::TxnClient; +// using claims::txn::RetCode; +// using claims::txn::FixTupleIngestReq; +// using claims::txn::Ingest; +// using claims::common::rSuccess; +// using claims::common::rLinkTmTimeout; +// using claims::common::rLinkTmFail; +// using claims::common::rBeginIngestTxnFail; +// using claims::common::rBeginQueryFail; +// using claims::common::rBeginCheckpointFail; +// using claims::common::rCommitIngestTxnFail; +// using claims::common::rCommitCheckpointFail; string TxnClient::ip_ = kTxnIp; int TxnClient::port_ = kTxnPort; caf::actor TxnClient::proxy_; -RetCode TxnClient::Init(string ip, int port){ +RetCode TxnClient::Init(string ip, int port) { ip_ = ip; port_ = port; SerConfig(); try { proxy_ = caf::io::remote_actor(ip_, port); } catch (...) { -// return rLinkTmFail; + // return rLinkTmFail; return -1; } -// return rSuccess; + // return rSuccess; return 0; } -RetCode TxnClient::BeginIngest(const FixTupleIngestReq & request, Ingest & ingest){ -// RetCode ret = rSuccess; +RetCode TxnClient::BeginIngest(const FixTupleIngestReq& request, + Ingest& ingest) { + // RetCode ret = rSuccess; RetCode ret = 0; - try{ + try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - IngestAtom::value, request).await( - [&](RetCode r, const Ingest & reply) {ret = r; ingest = reply;}, - [&](RetCode r) { ret = r;}, - caf::others >> [](){ cout << " unkown message" << endl;}, - caf::after(seconds(kTimeout)) >> [&]{ -// ret = rLinkTmTimeout; - ret = -1; - cout <<"time out" << endl; }); - } catch (...){ + IngestAtom::value, request) + .await([&](RetCode r, const Ingest& reply) { + ret = r; + ingest = reply; + }, + [&](RetCode r) { ret = r; }, + caf::others >> []() { cout << " unkown message" << endl; }, + caf::after(seconds(kTimeout)) >> [&] { + // ret = + // rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; + }); + } catch (...) { cout << "link fail" << endl; -// return rLinkTmFail; + // return rLinkTmFail; return -1; } return ret; } RetCode TxnClient::CommitIngest(const UInt64 id) { -// RetCode ret = rSuccess; + // RetCode ret = rSuccess; RetCode ret = 0; try { caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - CommitIngestAtom::value, id).await( - [&](RetCode r) { ret = r;}, - caf::after(seconds(kTimeout)) >> [&]{ -// ret = rLinkTmTimeout; - ret = -1; - cout << "time out" << endl; }); + CommitIngestAtom::value, id) + .await([&](RetCode r) { ret = r; }, + caf::others >> []() { cout << " unkown message" << endl; }, + caf::after(seconds(kTimeout)) >> [&] { + // ret = + // rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; + }); } catch (...) { cout << "link fail" << endl; -// return rLinkTmFail; + // return rLinkTmFail; return -1; } return ret; } RetCode TxnClient::AbortIngest(const UInt64 id) { -// RetCode ret = rSuccess; + // RetCode ret = rSuccess; RetCode ret = 0; try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - AbortIngestAtom::value, id).await( - [&](RetCode r) { ret = r;}, - caf::after(seconds(kTimeout)) >> [&]{ -// ret = rLinkTmTimeout; - ret = -1; - cout << "time out" << endl; }); + AbortIngestAtom::value, id) + .await([&](RetCode r) { ret = r; }, + caf::others >> []() { cout << " unkown message" << endl; }, + caf::after(seconds(kTimeout)) >> [&] { + // ret = + // rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; + }); } catch (...) { cout << "link fail" << endl; -// return rLinkTmFail; + // return rLinkTmFail; return -1; } return ret; } -RetCode TxnClient::BeginQuery(const QueryReq & request, Query & query) { -// RetCode ret = rSuccess; +RetCode TxnClient::BeginQuery(const QueryReq& request, Query& query) { + // RetCode ret = rSuccess; RetCode ret = 0; try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - QueryAtom::value, request).await( - [&](const Query & q) { query = q;}, - caf::after(seconds(kTimeout)) >> [&]{ -// ret = rLinkTmTimeout; - ret = -1; - cout << "time out" << endl;}); + QueryAtom::value, request) + .await([&](const Query& q) { query = q; }, + caf::after(seconds(kTimeout)) >> [&] { + // ret = + // rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; + }); } catch (...) { cout << "link fail" << endl; -// return rLinkTmFail; + // return rLinkTmFail; return -1; } return ret; } -RetCode TxnClient::BeginCheckpoint(Checkpoint & cp) { -// RetCode ret = rSuccess; +RetCode TxnClient::BeginCheckpoint(Checkpoint& cp) { + // RetCode ret = rSuccess; RetCode ret = 0; try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - CheckpointAtom::value, cp.part_).await( - [&](const Checkpoint & checkpoint, RetCode r) { - cp = checkpoint; ret = r;}, - caf::after(seconds(kTimeout)) >> [&]{ -// ret = rLinkTmTimeout; - ret = -1; - cout << "time out" << endl;}); + CheckpointAtom::value, cp.part_) + .await([&](const Checkpoint& checkpoint, RetCode r) { + cp = checkpoint; + ret = r; + }, + caf::after(seconds(kTimeout)) >> [&] { + // ret = + // rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; + }); } catch (...) { cout << "link fail" << endl; -// return rLinkTmFail; + // return rLinkTmFail; return -1; } return ret; } -RetCode TxnClient::CommitCheckpoint(const UInt64 logic_cp, const UInt64 phy_cp) { -// RetCode ret = rSuccess; +RetCode TxnClient::CommitCheckpoint(const UInt64 logic_cp, + const UInt64 phy_cp) { + // RetCode ret = rSuccess; RetCode ret = 0; try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - CommitCPAtom::value, logic_cp, phy_cp).await( - [&](RetCode r) { ret = r;}, - caf::after(seconds(kTimeout)) >> [&]{ -// ret = rLinkTmTimeout; - ret = -1; - cout << "time out" << endl;}); + CommitCPAtom::value, logic_cp, phy_cp) + .await([&](RetCode r) { ret = r; }, + caf::after(seconds(kTimeout)) >> [&] { + // ret = + // rLinkTmTimeout; + ret = -1; + cout << "time out" << endl; + }); } catch (...) { cout << "link fail" << endl; -// return rLinkTmFail; + // return rLinkTmFail; return -1; } return ret; } - } } - - diff --git a/txn_manager/txn_log.cpp b/txn_manager/txn_log.cpp index 0c137369c..39a28a4e3 100644 --- a/txn_manager/txn_log.cpp +++ b/txn_manager/txn_log.cpp @@ -20,23 +20,22 @@ * * Created on: 2016年2月24日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ #include "txn_log.hpp" -namespace claims{ -namespace txn{ - +namespace claims { +namespace txn { string LogServer::file_path_ = "."; -FILE * LogServer::file_handler_ = nullptr; +FILE* LogServer::file_handler_ = nullptr; UInt64 LogServer::file_size_ = 0; UInt64 LogServer::file_capacity_ = kMaxLogSize; -char * LogServer::buffer_ = nullptr; -UInt64 LogServer::buffer_size_ = 0; +char* LogServer::buffer_ = nullptr; +UInt64 LogServer::buffer_size_ = 0; UInt64 LogServer::buffer_capacity_ = kMaxLogSize * 10; caf::actor LogServer::proxy_; bool LogServer::active_ = false; @@ -51,47 +50,44 @@ RetCode LogServer::Init(const string path) { } caf::behavior LogServer::make_behavior() { - return { - [=](IngestAtom, shared_ptr ingest)->caf::message { - Append(BeginLog(ingest->id_)); - for (auto & strip : ingest->strip_list_) - Append(WriteLog(ingest->id_, strip.first, strip.second.first,strip.second.second)); - //cout << "begin" << endl; - return caf::make_message(0, *ingest); + [=](IngestAtom, shared_ptr ingest) -> caf::message { + Append(BeginLog(ingest->id_)); + for (auto& strip : ingest->strip_list_) + Append(WriteLog(ingest->id_, strip.first, strip.second.first, + strip.second.second)); + // cout << "begin" << endl; + return caf::make_message(0, *ingest); }, - [=](CommitIngestAtom, const UInt64 id)->caf::message { + [=](CommitIngestAtom, const UInt64 id) -> caf::message { Append(CommitLog(id)); - //cout << "commit" << endl; + // cout << "commit" << endl; Refresh(); return caf::make_message(0); }, - [=](AbortIngestAtom, UInt64 id)->caf::message { + [=](AbortIngestAtom, UInt64 id) -> caf::message { Append(AbortLog(id)); - //cout << "abort" << endl; + // cout << "abort" << endl; Refresh(); return caf::make_message(0); }, - [=](CheckpointAtom, UInt64 part, UInt64 logic_cp, UInt64 phy_cp) - ->RetCode { - return Append(CheckpointLog(part, logic_cp, phy_cp)); - }, - [=](DataAtom,UInt64 part, UInt64 pos, UInt64 offset, - void * buffer, UInt64 size)->caf::message { + [=](CheckpointAtom, UInt64 part, UInt64 logic_cp, UInt64 phy_cp) + -> RetCode { return Append(CheckpointLog(part, logic_cp, phy_cp)); }, + [=](DataAtom, UInt64 part, UInt64 pos, UInt64 offset, void* buffer, + UInt64 size) -> caf::message { Append(DataLogPrefix(part, pos, offset, size)); Append(buffer, size); return caf::make_message(0); }, - [=](RefreshAtom)->caf::message { + [=](RefreshAtom) -> caf::message { Refresh(); return caf::make_message(0); }, - caf::others >> [=] () { cout << "unknown log message" << endl; } - }; + caf::others >> [=]() { cout << "unknown log message" << endl; }}; } -RetCode LogServer::Append (const string & log) { +RetCode LogServer::Append(const string& log) { if (buffer_size_ + log.length() >= buffer_capacity_) { cout << "append fail" << endl; return -1; @@ -102,42 +98,39 @@ RetCode LogServer::Append (const string & log) { return 0; } -RetCode LogServer::Append(void * data, UInt64 size){ - if (buffer_size_ + size >= buffer_capacity_) - return -1; +RetCode LogServer::Append(void* data, UInt64 size) { + if (buffer_size_ + size >= buffer_capacity_) return -1; - memcpy(buffer_ + buffer_size_, data, size); - buffer_size_ += size; - buffer_[buffer_size_++] = '\n'; - file_size_ += size + 1; + memcpy(buffer_ + buffer_size_, data, size); + buffer_size_ += size; + buffer_[buffer_size_++] = '\n'; + file_size_ += size + 1; - return 0; + return 0; } RetCode LogServer::Refresh() { - // cout << "refresh" << endl; + // cout << "refresh" << endl; if (file_handler_ == nullptr) { - struct timeval ts; - gettimeofday (&ts, NULL); - string file = file_path_ + "/" + kTxnLogFileName + to_string(ts.tv_sec); - //cout << file << endl; - file_handler_ = fopen (file.c_str(),"a"); - if (file_handler_ == nullptr){ - //cout <<"open file fail"<= file_capacity_) { + if (file_size_ >= file_capacity_) { if (file_handler_ == nullptr) return -1; fclose(file_handler_); file_handler_ = nullptr; @@ -147,67 +140,65 @@ RetCode LogServer::Refresh() { } RetCode LogClient::Begin(UInt64 id) { -// RetCode ret = 0; -// caf::scoped_actor self; -// cout<<"going to send begin atom to log server :"<sync_send( log_s,BeginAtom::value, id). -// await( [&](RetCode ret_code) { cout<<"log:Begin, ret"<sync_send( log_s,BeginAtom::value, id). + // await( [&](RetCode ret_code) { cout<<"log:Begin, + // ret"<sync_send(LogServer::proxy_, - WriteAtom::value, id, part, pos, offset).await( - [&](RetCode ret_code) { ret = ret_code;} - ); + if (LogServer::active_) + self->sync_send(LogServer::proxy_, WriteAtom::value, id, part, pos, offset) + .await([&](RetCode ret_code) { ret = ret_code; }); return ret; } RetCode LogClient::Commit(UInt64 id) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send( LogServer::proxy_, - CommitAtom::value,id).await( - [&](RetCode ret_code) { ret = ret_code;} - ); + if (LogServer::active_) + self->sync_send(LogServer::proxy_, CommitAtom::value, id) + .await([&](RetCode ret_code) { ret = ret_code; }); return ret; } RetCode LogClient::Abort(UInt64 id) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send( LogServer::proxy_, - AbortAtom::value, id).await( - [&](RetCode ret_code) { ret = ret_code;} - ); + if (LogServer::active_) + self->sync_send(LogServer::proxy_, AbortAtom::value, id) + .await([&](RetCode ret_code) { ret = ret_code; }); return ret; } -RetCode LogClient::Data(UInt64 part, UInt64 pos, UInt64 offset, void * buffer, UInt64 size) { +RetCode LogClient::Data(UInt64 part, UInt64 pos, UInt64 offset, void* buffer, + UInt64 size) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send( LogServer::proxy_, - DataAtom::value, part, pos, offset, buffer, size).await( - [&](RetCode ret_code) { ret = ret_code;} - ); + if (LogServer::active_) + self->sync_send(LogServer::proxy_, DataAtom::value, part, pos, offset, + buffer, + size).await([&](RetCode ret_code) { ret = ret_code; }); return ret; } RetCode LogClient::Checkpoint(UInt64 part, UInt64 logic_cp, UInt64 phy_cp) { RetCode ret = 0; caf::scoped_actor self; - self->sync_send(LogServer::proxy_, - CheckpointAtom::value, part, logic_cp, phy_cp).await( - [&](RetCode ret_code) { ret = ret_code;} - ); + if (LogServer::active_) + self->sync_send(LogServer::proxy_, CheckpointAtom::value, part, logic_cp, + phy_cp).await([&](RetCode ret_code) { ret = ret_code; }); return ret; } RetCode LogClient::Refresh() { RetCode ret = 0; caf::scoped_actor self; - self->sync_send(LogServer::proxy_, RefreshAtom::value). - await( [&](RetCode ret_code) { ret = ret_code;}); + if (LogServer::active_) + self->sync_send(LogServer::proxy_, RefreshAtom::value) + .await([&](RetCode ret_code) { ret = ret_code; }); return ret; } - - } } diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 380db84dc..1c354d769 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -76,6 +76,7 @@ caf::behavior TxnCore::make_behavior() { // this->delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); return { [=](IngestAtom, const FixTupleIngestReq& request) -> caf::message { + // cout << "begin" << endl; auto ingest = make_shared(); // RetCode ret = rSuccess; @@ -94,7 +95,7 @@ caf::behavior TxnCore::make_behavior() { ingest->InsertStrip(strip); } size_++; - /// cout << ingest.ToString() << endl; + // cout << ingest.ToString() << endl; if (LogServer::active_) { current_message() = caf::make_message(IngestAtom::value, ingest); this->forward_to(LogServer::proxy_); @@ -102,21 +103,25 @@ caf::behavior TxnCore::make_behavior() { return caf::make_message(ret, *ingest); }, [=](CommitIngestAtom, const UInt64 id) -> caf::message { - // cout << "commit" << endl; + // cout << "commit ingest txn id :" << id << endl; if (txn_index_.find(id) == txn_index_.end()) return caf::make_message(-1 /*rCommitIngestTxnFail*/); commit_[txn_index_[id]] = true; + // cout << "Logserver active:" << LogServer::active_ << endl; if (LogServer::active_) { + // assert(false); this->forward_to(LogServer::proxy_); } + return caf::make_message(0 /*rSuccess*/); }, [=](AbortIngestAtom, const UInt64 id) -> caf::message { - // cout << "abort" << endl; + // cout << "abort ingest txn id :" << id << endl; if (txn_index_.find(id) == txn_index_.end()) return caf::make_message(-1 /*rBeginIngestTxnFail*/); abort_[txn_index_[id]] = true; if (LogServer::active_) { + // assert(false); this->forward_to(LogServer::proxy_); } return caf::make_message(0 /*rAbortIngestTxnFail*/); From b1da53726d5b39b51dcfaefe0785910d1db1efd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Fri, 8 Jul 2016 13:11:12 +0800 Subject: [PATCH 47/58] rm useless txn_manager/log_client.hpp,log_client.cpp,log_server.hpp,log_server.cpp --- txn_manager/Makefile.am | 4 +--- txn_manager/log_client.cpp | 31 ------------------------------- txn_manager/log_client.hpp | 36 ------------------------------------ txn_manager/log_server.cpp | 31 ------------------------------- txn_manager/log_server.hpp | 36 ------------------------------------ 5 files changed, 1 insertion(+), 137 deletions(-) delete mode 100644 txn_manager/log_client.cpp delete mode 100644 txn_manager/log_client.hpp delete mode 100644 txn_manager/log_server.cpp delete mode 100644 txn_manager/log_server.hpp diff --git a/txn_manager/Makefile.am b/txn_manager/Makefile.am index 2445f4e37..4a5bbd50e 100644 --- a/txn_manager/Makefile.am +++ b/txn_manager/Makefile.am @@ -26,9 +26,7 @@ libtxnmanager_a_SOURCES = \ txn.hpp txn.cpp \ txn_client.hpp txn_client.cpp \ txn_server.hpp txn_server.cpp \ - txn_log.hpp txn_log.cpp \ - log_client.hpp log_client.cpp \ - log_server.hpp log_server.cpp + txn_log.hpp txn_log.cpp diff --git a/txn_manager/log_client.cpp b/txn_manager/log_client.cpp deleted file mode 100644 index b7d6f9bb5..000000000 --- a/txn_manager/log_client.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright [2012-2015] DaSE@ECNU - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * /CLAIMS/txn_manager/log_client.cpp - * - * Created on: 2016年4月22日 - * Author: imdb - * Email: - * - * Description: - * - */ - - - - diff --git a/txn_manager/log_client.hpp b/txn_manager/log_client.hpp deleted file mode 100644 index fd7b65b7f..000000000 --- a/txn_manager/log_client.hpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright [2012-2015] DaSE@ECNU - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * /CLAIMS/txn_manager/log_client.hpp - * - * Created on: 2016年4月22日 - * Author: imdb - * Email: - * - * Description: - * - */ - -#ifndef TXN_MANAGER_LOG_CLIENT_HPP_ -#define TXN_MANAGER_LOG_CLIENT_HPP_ - - - - - -#endif // TXN_MANAGER_LOG_CLIENT_HPP_ diff --git a/txn_manager/log_server.cpp b/txn_manager/log_server.cpp deleted file mode 100644 index bb00877d7..000000000 --- a/txn_manager/log_server.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright [2012-2015] DaSE@ECNU - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * /CLAIMS/txn_manager/log_server.cpp - * - * Created on: 2016年4月22日 - * Author: imdb - * Email: - * - * Description: - * - */ - - - - diff --git a/txn_manager/log_server.hpp b/txn_manager/log_server.hpp deleted file mode 100644 index d49a8f10b..000000000 --- a/txn_manager/log_server.hpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright [2012-2015] DaSE@ECNU - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * /CLAIMS/txn_manager/log_server.hpp - * - * Created on: 2016年4月21日 - * Author: imdb - * Email: - * - * Description: - * - */ - -#ifndef TXN_MANAGER_LOG_SERVER_HPP_ -#define TXN_MANAGER_LOG_SERVER_HPP_ - - - - - -#endif // TXN_MANAGER_LOG_SERVER_HPP_ From fd067e0f1a14febcd3c2e67bc3e97fcb20f6aedc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Fri, 29 Jul 2016 21:15:35 +0800 Subject: [PATCH 48/58] implement timestamp and txn binning for ingestion --- Environment.cpp | 16 +- Makefile.am | 5 +- loader/master_loader.cpp | 12 +- physical_operator/physical_txn_scan.cpp | 209 ----------- physical_operator/physical_txn_scan.hpp | 158 -------- txn_manager/Makefile.am | 2 +- txn_manager/txn.cpp | 213 +++++++---- txn_manager/txn.hpp | 269 ++++++++++---- txn_manager/txn_client.cpp | 47 ++- txn_manager/txn_client_test.cpp | 298 ++++++++++----- txn_manager/txn_log.cpp | 4 +- txn_manager/txn_server.cpp | 470 +++++++----------------- txn_manager/txn_server.hpp | 108 +++--- txn_manager/txn_server_test.cpp | 2 +- utility/Timer.h | 7 + 15 files changed, 811 insertions(+), 1009 deletions(-) delete mode 100644 physical_operator/physical_txn_scan.cpp delete mode 100644 physical_operator/physical_txn_scan.hpp diff --git a/Environment.cpp b/Environment.cpp index 3893af26d..eefc78a47 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -55,6 +55,7 @@ using claims::common::rSuccess; using claims::loader::LoadPacket; using claims::loader::MasterLoader; using claims::loader::SlaveLoader; +using claims::txn::UInt64; using claims::txn::TxnServer; using claims::txn::TxnClient; using claims::txn::LogServer; @@ -257,6 +258,7 @@ bool Environment::InitLoader() { bool Environment::InitTxnManager() { if (Config::enable_txn_server) { + unordered_map pos_list, his_cp_list, rt_cp_list; LOG(INFO) << "I'm txn manager server"; TxnServer::Init(Config::txn_server_cores, Config::txn_server_port); auto cat = Catalog::getInstance(); @@ -284,14 +286,18 @@ bool Environment::InitTxnManager() { for (auto part_id = 0; part_id < part_count; part_id++) { auto global_part_id = GetGlobalPartId(table_id, proj_id, part_id); cout << global_part_id << endl; - TxnServer::pos_list_[global_part_id] = - TxnServer::logic_cp_list_[global_part_id] = - TxnServer::phy_cp_list_[global_part_id] = - part->getPartitionBlocks(part_id) * 64 * 1024; + // TxnServer::pos_list_[global_part_id] = + // TxnServer::his_cp_list_[global_part_id] = + // TxnServer::rt_cp_list_[global_part_id] = + // part->getPartitionBlocks(part_id) * 64 * 1024; + pos_list[global_part_id] = his_cp_list[global_part_id] = + rt_cp_list[global_part_id] = + part->getPartitionBlocks(part_id) * 64 * 1024; } } } - + TxnServer::LoadCPList(0, his_cp_list, rt_cp_list); + TxnServer::LoadPos(pos_list); cout << "*******pos_list*******" << endl; for (auto& pos : TxnServer::pos_list_) cout << "partition[" << pos.first << "] => " << pos.second << endl; diff --git a/Makefile.am b/Makefile.am index f98acea0d..0390f85b4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -90,7 +90,8 @@ include_HEADERS = Config.h \ bin_PROGRAMS = claimsserver \ client \ - test + test \ + txntest client_SOURCES = Client.cpp \ Environment.cpp \ @@ -107,6 +108,8 @@ test_SOURCES = Test/gtest_main.cpp\ IDsGenerator.cpp \ Config.cpp +txntest_SOURCES = txn_manager/txn_client_test.cpp + SUBDIRS= catalog Client common Daemon Executor IndexManager\ loader physical_operator logical_operator Resource \ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index f3755133d..34f5fd726 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -381,16 +381,16 @@ RetCode MasterLoader::Ingest(const string& message, /// start transaction from here claims::txn::Ingest ingest; EXEC_AND_LOG(ret, ApplyTransaction(table, partition_buffers, ingest), - "applied transaction: " << ingest.id_, + "applied transaction: " << ingest.ts_, "failed to apply transaction"); commit_info_spin_lock_.acquire(); txn_commint_info_.insert(std::pair( - ingest.id_, CommitInfo(ingest.strip_list_.size()))); + ingest.ts_, CommitInfo(ingest.strip_list_.size()))); - txn_start_time_.insert(pair(ingest.id_, GetCurrentUs())); + txn_start_time_.insert(pair(ingest.ts_, GetCurrentUs())); commit_info_spin_lock_.release(); - DLOG(INFO) << "insert txn " << ingest.id_ << " into map "; + DLOG(INFO) << "insert txn " << ingest.ts_ << " into map "; /// write data log EXEC_AND_DLOG(ret, WriteLog(table, partition_buffers, ingest), "written log", @@ -701,7 +701,7 @@ RetCode MasterLoader::SendPartitionTupleToSlave( #ifdef SEND_THREAD LoadPacket* packet = - new LoadPacket(socket_fd, ingest.id_, global_part_id, + new LoadPacket(socket_fd, ingest.ts_, global_part_id, ingest.strip_list_.at(global_part_id).first, ingest.strip_list_.at(global_part_id).second, partition_buffers[prj_id][part_id].length_, @@ -719,7 +719,7 @@ RetCode MasterLoader::SendPartitionTupleToSlave( } packet_queue_to_send_count_[queue_index].post(); #else - LoadPacket packet(socket_fd, ingest.id_, global_part_id, + LoadPacket packet(socket_fd, ingest.ts_, global_part_id, ingest.strip_list_.at(global_part_id).first, ingest.strip_list_.at(global_part_id).second, partition_buffers[prj_id][part_id].length_, diff --git a/physical_operator/physical_txn_scan.cpp b/physical_operator/physical_txn_scan.cpp deleted file mode 100644 index 09550bbca..000000000 --- a/physical_operator/physical_txn_scan.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright [2012-2015] DaSE@ECNU - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * /CLAIMS/physical_operator/physical_txn_scan.cpp - * - * Created on: 2016年4月19日 - * Author: imdb - * Email: - * - * Description: - * - */ - -#include "../physical_operator/physical_txn_scan.hpp" - -#include -#include -#include -#include -#include -#include -#include "../common/rename.h" -#include "../storage/BlockManager.h" -#include "../Config.h" -#include "../utility/warmup.h" -#include "../storage/ChunkStorage.h" -#include "../Executor/expander_tracker.h" -#include "../storage/PartitionStorage.h" -using claims::common::rNoPartitionIdScan; -using claims::common::rSuccess; -using claims::common::rCodegenFailed; - -namespace claims { -namespace physical_operator { -PhysicalTxnScan::PhysicalTxnScan(State state) - : state_(state), partition_reader_iterator_(NULL), perf_info_(NULL) { - InitExpandedStatus(); -} - -PhysicalTxnScan::PhysicalTxnScan() - : partition_reader_iterator_(NULL), perf_info_(NULL) { - InitExpandedStatus(); -} - -PhysicalTxnScan::~PhysicalTxnScan() { - if (NULL != state_.schema_) { - delete state_.schema_; - state_.schema_ = NULL; - } - if (NULL != perf_info_) { - delete perf_info_; - perf_info_ = NULL; - } -} - -PhysicalTxnScan::State::State(ProjectionID projection_id, Schema* schema, - unsigned block_size, float sample_rate) - : schema_(schema), - projection_id_(projection_id), - block_size_(block_size), - sample_rate_(sample_rate) {} - -/** - * Initialize the operator to get the initial position. Scan is the start point - * of stage, get instance of ExpanderTracker to add this point. Different policy - * decide if it generates a buffer. - */ - -bool PhysicalTxnScan::Open(const PartitionOffset& kPartitionOffset) { - RegisterExpandedThreadToAllBarriers(); - - if (TryEntryIntoSerializedSection()) { - /* this is the first expanded thread*/ - PartitionStorage* partition_handle_; - if (NULL == - (partition_handle_ = BlockManager::getInstance()->GetPartitionHandle( - PartitionID(state_.projection_id_, kPartitionOffset)))) { - LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset) - .getName() - .c_str() << CStrError(rNoPartitionIdScan) << std::endl; - SetReturnStatus(false); - } else { - partition_reader_iterator_ = - partition_handle_->CreateAtomicReaderIterator(); - SetReturnStatus(true); - } - -#ifdef AVOID_CONTENTION_IN_SCAN - unsigned long long start = curtick(); - - ChunkReaderIterator* chunk_reader_it; - ChunkReaderIterator::block_accessor* ba; - while (chunk_reader_it = partition_reader_iterator_->nextChunk()) { - while (chunk_reader_it->getNextBlockAccessor(ba)) { - ba->getBlockSize(); - input_dataset_.input_data_blocks_.push_back(ba); - } - } -#endif - ExpanderTracker::getInstance()->addNewStageEndpoint( - pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0)); - perf_info_ = - ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self()); - perf_info_->initialize(); - } - BarrierArrive(); - return GetReturnStatus(); -} - -/** - * There are two method of strategy to scan data. - * 1) make a buffer(input_data). wait for quantitative block and return it. - * because destorySelfContext() is not work, we don't use this method(code has - * commented). - * 2) get a block and return it immediately. - */ - -// TODO(Hanzhang): According to AVOID_CONTENTION_IN_SCAN, we choose the -// strategy. We need finish case(1). -bool PhysicalTxnScan::Next(BlockStreamBase* block) { - unsigned long long total_start = curtick(); -#ifdef AVOID_CONTENTION_IN_SCAN - ScanThreadContext* stc = reinterpret_cast(GetContext()); - if (NULL == stc) { - stc = new ScanThreadContext(); - InitContext(stc); - } - if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( - pthread_self())) { - input_dataset_.AtomicPut(stc->assigned_data_); - delete stc; - destorySelfContext(); - kPerfInfo->report_instance_performance_in_millibytes(); - return false; - } - - if (!stc->assigned_data_.empty()) { - ChunkReaderIterator::block_accessor* ba = stc->assigned_data_.front(); - stc->assigned_data_.pop_front(); - - ba->getBlock(block); - - // whether delete InMemeryBlockAccessor::target_block_start_address - // is depend on whether use copy in ba->getBlock(block); - delete ba; - kPerfInfo->processed_one_block(); - return true; - } else { - if (input_dataset_.AtomicGet(stc->assigned_data_, Config::scan_batch)) { - // case(1) - return Next(block); - } else { - delete stc; - destorySelfContext(); - return false; - } - } - -#else - - if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( - pthread_self())) { - return false; - } - perf_info_->processed_one_block(); - // case(2) - return partition_reader_iterator_->NextBlock(block); - -#endif -} - -bool PhysicalTxnScan::Close() { - if (NULL != partition_reader_iterator_) { - delete partition_reader_iterator_; - partition_reader_iterator_ = NULL; - } - DestoryAllContext(); - - /* reset the expanded status in that open and next will be re-invoked.*/ - InitExpandedStatus(); - return true; -} - -void PhysicalTxnScan::Print() { - printf("Scan (ID=%d)\n", state_.projection_id_.table_id); -} - -bool PhysicalTxnScan::PassSample() const { - if ((rand() / (float)RAND_MAX) < state_.sample_rate_) return true; - return false; -} - -} // namespace physical_operator -} // namespace claims diff --git a/physical_operator/physical_txn_scan.hpp b/physical_operator/physical_txn_scan.hpp deleted file mode 100644 index a0e158406..000000000 --- a/physical_operator/physical_txn_scan.hpp +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright [2012-2015] DaSE@ECNU - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * /CLAIMS/physical_operator/physical_txn_scan.hpp - * - * Created on: 2016年4月19日 - * Author: imdb - * Email: - * - * Description: - * - */ - -#ifndef PHYSICAL_OPERATOR_PHYSICAL_TXN_SCAN_HPP_ -#define PHYSICAL_OPERATOR_PHYSICAL_TXN_SCAN_HPP_ - -#define GLOG_NO_ABBREVIATED_SEVERITIES -#include -#include -#include -#include -#include -#include "../common/error_no.h" -#include "../physical_operator/physical_operator_base.h" -#include "../common/Schema/Schema.h" -#include "../storage/ChunkStorage.h" -#include "../storage/PartitionStorage.h" -#include "../physical_operator/physical_operator.h" -#include "../common/ExpandedThreadTracker.h" - -namespace claims { -namespace physical_operator { - -typedef std::list assigned_data; -/** - * @brief Method description: As a buffer for input. - */ - -struct input_dataset { - assigned_data input_data_blocks_; - SpineLock lock; - bool AtomicGet(assigned_data& target, unsigned number_of_block) { - lock.acquire(); - bool not_empty = !target.empty(); - while (number_of_block-- && (!input_data_blocks_.empty())) { - target.push_back(input_data_blocks_.front()); - input_data_blocks_.pop_front(); - } - lock.release(); - return not_empty; - } - void AtomicPut(assigned_data blocks) { - lock.acquire(); - for (assigned_data::iterator it = blocks.begin(); it != blocks.end(); it++) - input_data_blocks_.push_front(*it); - lock.release(); - } -}; - -/** - * Implementation of Scan operator in physical layer. Get blocks for Storage - * medium. In the current implementation, for simplicity, the underlying - * storage is arranged in blocks, each of which is the same as the size of the - * block in the parameter of the next function. Actually, read chunks from - * partition, read blocks from chunk. - */ -class PhysicalTxnScan : public PhysicalOperator { - public: - class ScanThreadContext : public ThreadContext { - public: - ~ScanThreadContext(){}; - assigned_data assigned_data_; - }; - - // struct allocated_block { - // char* start; - // unsigned length; - // }; - class State { - friend class PhysicalTxnScan; - - public: - State(ProjectionID projection_id, Schema* schema, unsigned block_size, - float sample_rate = 1); - State(){}; - - public: - Schema* schema_; - ProjectionID projection_id_; - unsigned block_size_; - float sample_rate_; - friend class boost::serialization::access; - template - void serialize(Archive& ar, const unsigned int version) { - ar& schema_& projection_id_& block_size_& sample_rate_; - } - }; - PhysicalTxnScan(State state); - PhysicalTxnScan(); - virtual ~PhysicalTxnScan(); - /** - * @brief Method description: Initialize the operator and get the initial - * position of chunk read iterator. - */ - bool Open(const PartitionOffset& partition_offset = 0); - - /** - * @brief: fetch block from child operator. - */ - - bool Next(BlockStreamBase* block); - /** - * @brief: revoke resource. - */ - bool Close(); - void Print(); - - private: - bool PassSample() const; - - private: - State state_; - PartitionStorage::PartitionReaderIterator* partition_reader_iterator_; - std::list remaining_chunk_iterator_list_; - Lock chunk_reader_container_lock_; - // like a buffer - input_dataset input_dataset_; - - PerformanceInfo* perf_info_; - - // The following code is for boost serialization. - private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const unsigned int version) { - ar& boost::serialization::base_object(*this) & state_; - } -}; - -} // namespace physical_operator -} // namespace claims - -#endif // PHYSICAL_OPERATOR_PHYSICAL_TXN_SCAN_HPP_ diff --git a/txn_manager/Makefile.am b/txn_manager/Makefile.am index 4a5bbd50e..c6a7099c6 100644 --- a/txn_manager/Makefile.am +++ b/txn_manager/Makefile.am @@ -26,7 +26,7 @@ libtxnmanager_a_SOURCES = \ txn.hpp txn.cpp \ txn_client.hpp txn_client.cpp \ txn_server.hpp txn_server.cpp \ - txn_log.hpp txn_log.cpp + txn_log.hpp txn_log.cpp diff --git a/txn_manager/txn.cpp b/txn_manager/txn.cpp index cce2e3f79..ca9a220e9 100644 --- a/txn_manager/txn.cpp +++ b/txn_manager/txn.cpp @@ -20,19 +20,32 @@ * * Created on: 2016年3月28日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ #include "txn.hpp" +#include "txn_server.hpp" namespace claims { namespace txn { using claims::txn::Strip; -void Strip::Map(vector & input, map> & output) { + +string Snapshot::ToString() const { + string str = "*****snapshot*****\n"; + for (auto part_cp : part_pstrips_) { + str += "part:" + to_string(part_cp.first) + " "; + for (auto pstrip : part_cp.second) + str += + "<" + to_string(pstrip.first) + "," + to_string(pstrip.second) + ">"; + } + return str + "\n******************\n"; +} + +void Strip::Map(vector &input, map> &output) { output.clear(); - for (auto & strip:input) { + for (auto &strip : input) { if (output.find(strip.part_) != output.end()) output[strip.part_].push_back(strip); else @@ -40,29 +53,27 @@ void Strip::Map(vector & input, map> & output) { } } -void Strip::Sort(vector & input) { +void Strip::Sort(vector &input) { sort(input.begin(), input.end(), - [](const Strip & a, const Strip &b){ return a.pos_ < b.pos_;}); + [](const Strip &a, const Strip &b) { return a.pos_ < b.pos_; }); } -void Strip::Sort(vector & input) { - +void Strip::Sort(vector &input) { sort(input.begin(), input.end(), - [](const PStrip & a, const PStrip & b) - { return a.first < b.first;}); + [](const PStrip &a, const PStrip &b) { return a.first < b.first; }); } -void Strip::Merge(vector & input){ +void Strip::Merge(vector &input) { vector buffer(input); input.clear(); if (buffer.size() == 0) return; auto pid = buffer[0].part_; auto begin = buffer[0].pos_; auto end = buffer[0].pos_ + buffer[0].offset_; - for (auto i = 1; i < buffer.size(); i ++) { - if (end == buffer[i].pos_) + for (auto i = 1; i < buffer.size(); i++) { + if (end == buffer[i].pos_) { end = buffer[i].pos_ + buffer[i].offset_; - else { + } else { input.emplace_back(pid, begin, end - begin); begin = buffer[i].pos_; end = begin + buffer[i].offset_; @@ -71,97 +82,161 @@ void Strip::Merge(vector & input){ input.emplace_back(pid, begin, end - begin); } -void Strip::Merge(vector & input) { - if (input.size() == 0) return; - vector buffer; - auto begin = input[0].first; - auto end = input[0].first + input[0].second; - for (auto i = 1; i < input.size(); i++) { - if (end == input[i].first) - end = input[i].first + input[i].second; - else { +void Strip::Merge(vector &input) { + if (input.size() == 0) return; + vector buffer; + auto begin = input[0].first; + auto end = input[0].first + input[0].second; + for (auto i = 1; i < input.size(); i++) { + if (end == input[i].first) { + end = input[i].first + input[i].second; + } else { buffer.emplace_back(begin, end - begin); begin = input[i].first; end = input[i].first + input[i].second; - } - } - buffer.emplace_back(begin, end - begin); - input = buffer; + } + } + buffer.emplace_back(begin, end - begin); + input = buffer; } - -void Strip::Filter(vector & input, function predicate) { +void Strip::Filter(vector &input, function predicate) { vector buffer(input); input.clear(); - for (auto & strip : buffer) - if (predicate(strip)) - input.push_back(strip); + for (auto &strip : buffer) + if (predicate(strip)) input.push_back(strip); +} + +void Strip::Filter(vector &input, function predicate) { + vector buffer(input); + input.clear(); + for (auto &pstrip : buffer) + if (predicate(pstrip)) input.push_back(pstrip); } string Strip::ToString() { string str = "*******Strip******\n"; - str += "part:" + to_string(part_) + - ",pos:" + to_string(pos_) + - ",Offset:" + to_string(offset_) + "\n"; + str += "part:" + to_string(part_) + ",pos:" + to_string(pos_) + ",Offset:" + + to_string(offset_) + "\n"; return str; } string FixTupleIngestReq::ToString() { string str = "*******FixTupleIngestReq********\n"; - for (auto & item : content_) - str += "part:" + to_string(item.first) + - ",tuple_size:" + to_string(item.second.first) + - ",tuple_count:"+ to_string(item.second.second)+"\n"; + for (auto &item : content_) + str += "part:" + to_string(item.first) + ",tuple_size:" + + to_string(item.second.first) + ",tuple_count:" + + to_string(item.second.second) + "\n"; return str; } string Ingest::ToString() { - UInt64 core_id = id_ % 1000; - core_id << 54; - core_id >> 54; + UInt64 core_id = TxnServer::GetCoreID(ts_); + UInt64 txnbin_id = TxnBin::GetTxnBinID(ts_, TxnServer::concurrency_); + UInt64 txnbin_pos = TxnBin::GetTxnBinPos(ts_, TxnServer::concurrency_); string str = "*******Ingest*********\n"; - str += "id:" + to_string(id_) + ",core:" + to_string(core_id)+ "\n"; - for (auto & item : strip_list_) - str += "part:" + to_string(item.first) + - ",pos:" + to_string(item.second.first) + - ",offset:"+ to_string(item.second.second)+"\n"; + str += "TS:" + to_string(ts_) + ",core:" + to_string(core_id) + + ",txnbin_id:" + to_string(txnbin_id) + ",txnbin_pos:" + + to_string(txnbin_pos) + "\n"; + for (auto &item : strip_list_) + str += "part:" + to_string(item.first) + ",pos:" + + to_string(item.second.first) + ",offset:" + + to_string(item.second.second) + "\n"; return str; } string QueryReq::ToString() { string str = "*******QueryReq********\n"; - for (auto & part : part_list_) - str += "part:" + to_string(part) +"\n"; + for (auto &part : part_list_) str += "part:" + to_string(part) + "\n"; return str; } string Query::ToString() { string str = "******Query*******\n"; - for (auto & part : snapshot_){ - str += "part:" + to_string(part.first)+"\n"; - for (auto & strip : part.second) - str += "Pos:" + to_string(strip.first) + - ",Offset:" + to_string(strip.second) + "\n"; - } + for (auto &part : snapshot_) { + str += "part:" + to_string(part.first) + "\n"; + for (auto &strip : part.second) + str += "Pos:" + to_string(strip.first) + ",Offset:" + + to_string(strip.second) + "\n"; + } return str; } string Checkpoint::ToString() { - string str = "******checkpoint******\n"; - - str += "part:" + to_string(part_) +"\n"; - str += "commit strip\n"; - for (auto & strip : commit_strip_list_) - str += "Pos:" + to_string(strip.first) + - ",Offset:" + to_string(strip.second) + "\n"; - - str += "abort strip\n"; - for (auto & strip : abort_strip_list_) - str += "Pos:" + to_string(strip.first) + - ",Offset:" + to_string(strip.second) + "\n"; - str += "logic cp:" + to_string(logic_cp_) + "\n"; - str += "phy cp:" + to_string(phy_cp_) + "\n"; + string str = "*******checkpoint*******\n"; + str += "Historical:"; + for (auto &cp : vers_his_cp_) + str += "<" + to_string(cp.first) + "," + to_string(cp.second) + ">"; + str += "\nReal-time:"; + for (auto &cp : vers_rt_cp_) + str += "<" + to_string(cp.first) + "," + to_string(cp.second) + ">"; + str += "\n"; return str; } +void Snapshot::Merge(const vector &strips) { + for (auto &strip : strips) + part_pstrips_[strip.part_].push_back(PStrip(strip.pos_, strip.offset_)); + for (auto &part_strips : part_pstrips_) { + Strip::Sort(part_strips.second); + Strip::Merge(part_strips.second); + } +} +void Snapshot::Merge(const Snapshot &snapshot) { + for (auto &part_pstrip : snapshot.part_pstrips_) { + part_pstrips_[part_pstrip.first].insert( + part_pstrips_[part_pstrip.first].end(), part_pstrip.second.begin(), + part_pstrip.second.end()); + } + for (auto &part_strips : part_pstrips_) { + Strip::Sort(part_strips.second); + Strip::Merge(part_strips.second); + } } +void TxnBin::MergeSnapshot(Query &query) const { + for (auto &part_cp : query.rt_cp_list_) { + UInt64 checkpoint = part_cp.second; + query.snapshot_[part_cp.first].insert(query.snapshot_[part_cp.first].end(), + snapshot_[part_cp.first].begin(), + snapshot_[part_cp.first].end()); + Strip::Sort(query.snapshot_[part_cp.first]); + Strip::Merge(query.snapshot_[part_cp.first]); + Strip::Filter(query.snapshot_[part_cp.first], + [checkpoint](PStrip &pstrip) -> bool { + if (pstrip.first + pstrip.second <= checkpoint) { + return false; + } else { + if (pstrip.first < checkpoint && + pstrip.first + pstrip.second > checkpoint) + pstrip = + make_pair(checkpoint, pstrip.first + pstrip.second - checkpoint); + return true; + } + }); + } } +void TxnBin::MergeTxn(Query &query, int pos) const { + for (auto i = 0; i <= pos; i++) { + if (txn_list_[i].isCommit()) + for (auto &strip : txn_list_[i].strip_list_) + query.snapshot_[strip.first].push_back(strip.second); + } + for (auto &part_cp : query.rt_cp_list_) { + UInt64 checkpoint = part_cp.first; + Strip::Sort(query.snapshot_[part_cp.first]); + Strip::Merge(query.snapshot_[part_cp.first]); + Strip::Filter(query.snapshot_[part_cp.first], + [checkpoint](PStrip &pstrip) -> bool { + if (pstrip.first + pstrip.second <= checkpoint) { + return false; + } else { + if (pstrip.first < checkpoint && + pstrip.first + pstrip.second > checkpoint) + pstrip = + make_pair(checkpoint, pstrip.first + pstrip.second - checkpoint); + return true; + } + }); + } +} +} +} diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index 85faab9de..d49e7203b 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -29,6 +29,8 @@ #ifndef TXN_HPP_ #define TXN_HPP_ #include +#include +#include #include #include #include @@ -41,7 +43,8 @@ #include #include "caf/all.hpp" #include "caf/io/all.hpp" - +#include "../common/error_define.h" +#include "../utility/Timer.h" namespace claims { namespace txn { @@ -52,16 +55,23 @@ using std::vector; using std::string; using std::map; using std::pair; +using std::set; +using std::tuple; +using std::get; using std::unordered_map; using std::to_string; using std::function; using std::sort; using std::make_pair; +using std::make_tuple; +using claims::common::rSuccess; + using UInt64 = unsigned long long; using UInt32 = unsigned int; using UInt16 = unsigned short; using UInt8 = char; using RetCode = int; +using TimerAtom = caf::atom_constant; using BeginAtom = caf::atom_constant; using CommitAtom = caf::atom_constant; using AbortAtom = caf::atom_constant; @@ -82,7 +92,7 @@ using AbortCPAtom = caf::atom_constant; using QuitAtom = caf::atom_constant; using LinkAtom = caf::atom_constant; using RefreshAtom = caf::atom_constant; -using MergeAtom = caf::atom_constant; +using MergeAtom = caf::atom_constant; static const int kTxnPort = 8089; static const string kTxnIp = "127.0.0.1"; @@ -94,7 +104,7 @@ static const int kGCTime = 5; static const int kTimeout = 3; static const int kBlockSize = 64 * 1024; static const int kTailSize = sizeof(unsigned); - +static const int kTxnBinSize = 1024; inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, UInt64 partition_id) { return partition_id + 1000 * (projeciton_id + 1000 * table_id); @@ -110,8 +120,10 @@ inline UInt64 GetProjectionIdFromGlobalPartId(UInt64 global_partition_id) { inline UInt64 GetPartitionIdFromGlobalPartId(UInt64 global_partition_id) { return global_partition_id % (1000); } + /********Strip******/ using PStrip = pair; + class Strip { public: UInt64 part_; @@ -132,51 +144,113 @@ class Strip { static void Sort(vector &input); static void Merge(vector &input); static void Merge(vector &input); - static void Filter(vector &input, - function predicate); + static void Filter(vector &input, function predicate); + static void Filter(vector &input, function predicate); }; + inline bool operator==(const Strip &a, const Strip &b) { return a.part_ == b.part_ && a.pos_ == b.pos_ && a.offset_ == b.offset_; } -/***********FixTupleIngestReq************/ +class Txn { + public: + static const int kActive = 0; + static const int kCommit = 1; + static const int kAbort = 2; + int status_ = kActive; + long realtime_; + unordered_map strip_list_; + Txn() { realtime_ = GetCurrents(); } + Txn(const unordered_map &strip_list) { + strip_list_ = strip_list; + // realtime_ = GetCurrents(); + } + void Commit() { status_ = kCommit; } + void Abort() { status_ = kAbort; } + bool isCommit() { return status_ == kCommit; } + bool isAbort() { return status_ == kAbort; } + bool isActive() { return status_ == kActive; } +}; + +class Snapshot { + public: + unordered_map his_cp_list_; + /** + * real-time checkpoint will never be send. + */ + unordered_map rt_cp_list_; + unordered_map> part_pstrips_; + string ToString() const; + void setRtCPS(const unordered_map &cps) { rt_cp_list_ = cps; } + void setHisCPS(const unordered_map &cps) { + his_cp_list_ = cps; + }; + unordered_map getHisCPS() const { return his_cp_list_; } + void setPStrips(const unordered_map> &part_pstrips) { + if (rt_cp_list_.size() > 0) { + /** + * Need to cut off all strip before ahead real-time checkpoint + */ + for (auto &pstrips : part_pstrips) + for (auto &pstrip : pstrips.second) + if (pstrip.first >= rt_cp_list_[pstrips.first]) + part_pstrips_[pstrips.first].push_back(pstrip); + } else { + part_pstrips_ = part_pstrips; + } + } + unordered_map> getPStrps() const { + return part_pstrips_; + } + void Merge(const vector &strips); + void Merge(const Snapshot &snapshot); +}; + +inline bool operator==(const Snapshot &lhs, const Snapshot &rhs) { + return lhs.his_cp_list_ == rhs.his_cp_list_ && + lhs.part_pstrips_ == rhs.part_pstrips_; +} class FixTupleIngestReq { public: /*fix tuple part -> */ - map content_; + unordered_map content_; void InsertStrip(UInt64 part, UInt64 tuple_size, UInt64 tuple_count) { content_[part] = make_pair(tuple_size, tuple_count); } - map get_content() const { return content_; } - void set_content(const map &content) { content_ = content; } + unordered_map get_content() const { return content_; } + void set_content(const unordered_map &content) { + content_ = content; + } string ToString(); }; inline bool operator==(const FixTupleIngestReq &a, const FixTupleIngestReq &b) { return a.content_ == b.content_; } -/****************Ingest***************/ class Ingest { public: - UInt64 id_; - map strip_list_; + UInt64 ts_; + unordered_map strip_list_; + Ingest() {} + Ingest(const unordered_map &strip_list, UInt64 ts) + : strip_list_(strip_list), ts_(ts) {} void InsertStrip(UInt64 part, UInt64 pos, UInt64 offset) { strip_list_[part] = make_pair(pos, offset); } void InsertStrip(const Strip &strip) { strip_list_[strip.part_] = make_pair(strip.pos_, strip.offset_); } - UInt64 get_id() const { return id_; } - map get_strip_list() const { return strip_list_; } - void set_id(const UInt64 &id) { id_ = id; } - void set_strip_list(const map &stripList) { + UInt64 get_id() const { return ts_; } + unordered_map get_strip_list() const { return strip_list_; } + void set_id(const UInt64 &id) { ts_ = id; } + void set_strip_list(const unordered_map &stripList) { strip_list_ = stripList; } string ToString(); }; -inline bool operator==(const Ingest &a, const Ingest &b) { - return a.id_ == b.id_; +inline bool operator==(const Ingest &lhs, const Ingest &rhs) { + return lhs.ts_ == rhs.ts_; } /************QueryReq************/ @@ -188,66 +262,116 @@ class QueryReq { void set_part_list(const vector &partList) { part_list_ = partList; } string ToString(); }; -inline bool operator==(const QueryReq &a, const QueryReq &b) { - return a.part_list_ == b.part_list_; + +inline bool operator==(const QueryReq &lhs, const QueryReq &rhs) { + return lhs.part_list_ == rhs.part_list_; } /***********Snapshot***********/ class Query { public: - map> snapshot_; - map cp_list_; - void InsertStrip(UInt64 part, UInt64 pos, UInt64 offset) { - // if (Snapshot.find(part) == Snapshot.end()) - // Snapshot[part] = vector>(); - // else - snapshot_[part].push_back(make_pair(pos, offset)); + UInt64 ts_; + unordered_map> snapshot_; + unordered_map his_cp_list_; + /** + * real-time checkpoint will never be send + */ + unordered_map rt_cp_list_; + Query() {} + Query(UInt64 ts, const unordered_map &his_cp_list, + const unordered_map &rt_cp_list) + : ts_(ts), his_cp_list_(his_cp_list), rt_cp_list_(rt_cp_list) {} + UInt64 getTS() const { return ts_; } + unordered_map> getSnapshot() const { + return snapshot_; + } + unordered_map getCPList() const { return his_cp_list_; } + void setTS(UInt64 ts) { ts_ = ts; } + void setSnapshot(const unordered_map> &sp) { + snapshot_ = sp; + } + void setCPList(const unordered_map &cplist) { + his_cp_list_ = cplist; } - void InsertCP(UInt64 part, UInt64 cp) { cp_list_[part] = cp; } - map> get_snapshot() const { return snapshot_; } - map get_cp_list() const { return cp_list_; } - void set_snapshot(const map> &sp) { snapshot_ = sp; } - void set_cp_list(const map &cplist) { cp_list_ = cplist; } string ToString(); }; -inline bool operator==(const Query &a, const Query &b) { - return a.snapshot_ == b.snapshot_; +inline bool operator==(const Query &lhs, const Query &rhs) { + return lhs.snapshot_ == rhs.snapshot_ && lhs.his_cp_list_ == rhs.his_cp_list_; } /*********Checkpoint***********/ class Checkpoint { public: - UInt64 id_; - UInt64 part_; - UInt64 logic_cp_; - UInt64 phy_cp_; - vector commit_strip_list_; - vector abort_strip_list_; - Checkpoint() {} - Checkpoint(UInt64 part, UInt64 newLogicCP, UInt64 oldPhyCP) - : part_(part), logic_cp_(newLogicCP), phy_cp_(oldPhyCP) {} - UInt64 get_id() const { return id_; } - UInt64 get_part() const { return part_; } - UInt64 get_logic_cp() const { return logic_cp_; } - UInt64 get_phy_cp() const { return phy_cp_; } - vector get_commit_strip_list() const { return commit_strip_list_; }; - vector get_abort_strip_list() const { return abort_strip_list_; }; - void set_part(UInt64 part) { part_ = part; } - void set_Logic_cp(UInt64 logicCP) { logic_cp_ = logicCP; } - void set_Phy_cp(UInt64 phyCP) { phy_cp_ = phyCP; } - void set_commit_strip_list(const vector &commitstripList) { - commit_strip_list_ = commitstripList; + UInt64 GetHisCP(UInt64 ts) { + UInt64 cp; + for (auto &ver_cp : vers_his_cp_) + if (ver_cp.first > ts) + break; + else + cp = ver_cp.second; + return cp; } - void set_abort_strip_list(const vector &abortstripList) { - abort_strip_list_ = abortstripList; + UInt64 GetRtCP(UInt64 ts) { + UInt64 cp; + for (auto &ver_cp : vers_rt_cp_) + if (ver_cp.first > ts) + break; + else + cp = ver_cp.second; + return cp; } + void SetHisCP(UInt64 ts, UInt64 cp) { vers_his_cp_[ts] = cp; } + void SetRtCP(UInt64 ts, UInt64 cp) { vers_rt_cp_[ts] = cp; } string ToString(); + + private: + map vers_his_cp_; + map vers_rt_cp_; +}; + +class TxnBin { + public: + Txn GetTxn(int pos) const { return txn_list_[pos]; } + void SetTxn(int pos, const Txn &txn) { + txn_list_[pos] = txn; + ct_++; + } + void SetTxn(int pos, const unordered_map &strip_list) { + txn_list_[pos] = Txn(strip_list); + ct_++; + } + void CommitTxn(int pos) { + txn_list_[pos].Commit(); + ct_commit_++; + } + void AbortTxn(int pos) { + txn_list_[pos].Abort(); + ct_abort_++; + } + bool IsFull() const { return ct_commit_ + ct_abort_ == kTxnBinSize; } + int Count() const { return ct_; } + int CountCommit() const { return ct_commit_; } + int CountAbort() const { return ct_abort_; } + void MergeSnapshot(Query &query) const; + void MergeTxn(Query &query, int pos) const; + static UInt64 GetTxnBinID(UInt64 ts, UInt64 core_num) { + return (ts / core_num) / kTxnBinSize; + } + static UInt64 GetTxnBinPos(UInt64 ts, UInt64 core_num) { + return (ts / core_num) % kTxnBinSize; + } + + Txn txn_list_[kTxnBinSize]; + + private: + int ct_ = 0; + int ct_commit_ = 0; + int ct_abort_ = 0; + unordered_map> + snapshot_; // If bin is full, a snapshot is generated. }; -inline bool operator==(const Checkpoint &a, const Checkpoint &b) { - return a.id_ == b.id_; -} -inline void SerConfig() { +inline void CAFSerConfig() { caf::announce("FixTupleIngestReq", make_pair(&FixTupleIngestReq::get_content, &FixTupleIngestReq::set_content)); @@ -256,17 +380,20 @@ inline void SerConfig() { make_pair(&Ingest::get_strip_list, &Ingest::set_strip_list)); caf::announce("QueryReq", make_pair(&QueryReq::get_part_list, &QueryReq::set_part_list)); - caf::announce("Query", - make_pair(&Query::get_snapshot, &Query::set_snapshot), - make_pair(&Query::get_cp_list, &Query::set_cp_list)); - caf::announce( - "Checkpoint", make_pair(&Checkpoint::get_part, &Checkpoint::set_part), - make_pair(&Checkpoint::get_logic_cp, &Checkpoint::set_Logic_cp), - make_pair(&Checkpoint::get_phy_cp, &Checkpoint::set_Phy_cp), - make_pair(&Checkpoint::get_commit_strip_list, - &Checkpoint::set_commit_strip_list), - make_pair(&Checkpoint::get_abort_strip_list, - &Checkpoint::set_abort_strip_list)); + caf::announce("Query", make_pair(&Query::getTS, &Query::setTS), + make_pair(&Query::getSnapshot, &Query::setSnapshot), + make_pair(&Query::getCPList, &Query::setCPList)); + /* caf::announce( + "Checkpoint", make_pair(&Checkpoint::get_part, &Checkpoint::set_part), + make_pair(&Checkpoint::get_logic_cp, &Checkpoint::set_Logic_cp), + make_pair(&Checkpoint::get_phy_cp, &Checkpoint::set_Phy_cp), + make_pair(&Checkpoint::get_commit_strip_list, + &Checkpoint::set_commit_strip_list), + make_pair(&Checkpoint::get_abort_strip_list, + &Checkpoint::set_abort_strip_list));*/ + caf::announce( + "Snapshot", make_pair(&Snapshot::getHisCPS, &Snapshot::setHisCPS), + make_pair(&Snapshot::getPStrps, &Snapshot::setPStrips)); } } } diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index fdc635b62..c09dc81c2 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -51,7 +51,7 @@ caf::actor TxnClient::proxy_; RetCode TxnClient::Init(string ip, int port) { ip_ = ip; port_ = port; - SerConfig(); + CAFSerConfig(); try { proxy_ = caf::io::remote_actor(ip_, port); } catch (...) { @@ -77,8 +77,6 @@ RetCode TxnClient::BeginIngest(const FixTupleIngestReq& request, [&](RetCode r) { ret = r; }, caf::others >> []() { cout << " unkown message" << endl; }, caf::after(seconds(kTimeout)) >> [&] { - // ret = - // rLinkTmTimeout; ret = -1; cout << "time out" << endl; }); @@ -90,14 +88,14 @@ RetCode TxnClient::BeginIngest(const FixTupleIngestReq& request, return ret; } -RetCode TxnClient::CommitIngest(const UInt64 id) { +RetCode TxnClient::CommitIngest(const UInt64 ts) { // RetCode ret = rSuccess; RetCode ret = 0; try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - CommitIngestAtom::value, id) + CommitIngestAtom::value, ts) .await([&](RetCode r) { ret = r; }, caf::others >> []() { cout << " unkown message" << endl; }, caf::after(seconds(kTimeout)) >> [&] { @@ -162,25 +160,26 @@ RetCode TxnClient::BeginQuery(const QueryReq& request, Query& query) { RetCode TxnClient::BeginCheckpoint(Checkpoint& cp) { // RetCode ret = rSuccess; RetCode ret = 0; - try { - caf::scoped_actor self; - self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - CheckpointAtom::value, cp.part_) - .await([&](const Checkpoint& checkpoint, RetCode r) { - cp = checkpoint; - ret = r; - }, - caf::after(seconds(kTimeout)) >> [&] { - // ret = - // rLinkTmTimeout; - ret = -1; - cout << "time out" << endl; - }); - } catch (...) { - cout << "link fail" << endl; - // return rLinkTmFail; - return -1; - } + // try { + // caf::scoped_actor self; + // self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + // CheckpointAtom::value, cp.part_) + // .await([&](const Checkpoint& checkpoint, RetCode r) { + // cp = checkpoint; + // ret = r; + // }, + // caf::after(seconds(kTimeout)) >> [&] { + // // ret = + // // rLinkTmTimeout; + // ret = -1; + // cout << "time out" << + // endl; + // }); + // } catch (...) { + // cout << "link fail" << endl; + // // return rLinkTmFail; + // return -1; + // } return ret; } diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index 3a718b0c2..59152f7a3 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -20,16 +20,17 @@ * * Created on: 2016年4月7日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ - +#include #include #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ using std::make_tuple; using std::make_pair; using std::get; using std::string; + using UInt64 = unsigned long long; using UInt32 = unsigned int; using UInt16 = unsigned short; @@ -70,67 +72,68 @@ using IngestAtom = caf::atom_constant; using QueryAtom = caf::atom_constant; using FailAtom = caf::atom_constant; using QuitAtom = caf::atom_constant; - +using claims::txn::Snapshot; +using claims::txn::CAFSerConfig; class Foo { public: vector request1; unordered_map> request2; - vector> request3; - void set_request1(const vector & req) { request1 = req;} - void set_request2(const unordered_map> & req) { + vector> request3; + void set_request1(const vector &req) { request1 = req; } + void set_request2(const unordered_map> &req) { request2 = req; } - void set_request3(const vector> &req) { request3 = req;} - vector get_request1() const {return request1;} - unordered_map> get_request2() const {return request2;} - vector> get_request3() const { return request3;} + void set_request3(const vector> &req) { request3 = req; } + vector get_request1() const { return request1; } + unordered_map> get_request2() const { + return request2; + } + vector> get_request3() const { return request3; } }; - -inline bool operator == (const Foo & a, const Foo & b) { +inline bool operator==(const Foo &a, const Foo &b) { return a.request1 == b.request1 && a.request2 == b.request2; } -char v[1024+10]; +char v[1024 + 10]; caf::actor proxy; -class A{ +class A { public: - vector list_ ; + vector list_; int c = 0; - void set_list_(const vector list) { list_ = list;} - vector get_list_() const { return list_;} + void set_list_(const vector list) { list_ = list; } + vector get_list_() const { return list_; } }; -inline bool operator == (const A & a1, const A & a2) { +inline bool operator==(const A &a1, const A &a2) { return a1.list_ == a2.list_; } -void ConfigA(){ +void ConfigA() { caf::announce("A", make_pair(&A::get_list_, &A::set_list_)); } -void task(int index){ -for (auto i=0;isync_send(proxy, IngestAtom::value, i).await( - [=](int ret) { /*cout <<"receive:" << ret << endl;*/}, - caf::after(std::chrono::seconds(2)) >> [] { - cout << "ingest time out" << endl; - } - ); -// self->sync_send(proxy, QueryAtom::value).await( -// [=](int t) { -// cout << t<< endl; -// }, -// [=](A a) { -// cout << "success" << endl; -// for (auto &it : a.list_){ -// cout << it << endl; -// } -// }, -// caf::after(std::chrono::seconds(2)) >> [] { -// cout << "query time out" << endl; -// } -// ); -} + self->sync_send(proxy, IngestAtom::value, i) + .await([=](int ret) { /*cout <<"receive:" << ret << endl;*/ + }, + caf::after(std::chrono::seconds(2)) >> + [] { cout << "ingest time out" << endl; }); + // self->sync_send(proxy, QueryAtom::value).await( + // [=](int t) { + // cout << t<< endl; + // }, + // [=](A a) { + // cout << "success" << endl; + // for (auto &it : a.list_){ + // cout << it << endl; + // } + // }, + // caf::after(std::chrono::seconds(2)) >> [] { + // cout << "query time out" << endl; + // } + // ); + } } using claims::txn::FixTupleIngestReq; @@ -141,52 +144,183 @@ using claims::txn::TxnServer; using claims::txn::TxnClient; using claims::txn::LogServer; using claims::txn::LogClient; -char buffer[20*1024+10]; +char buffer[20 * 1024 + 10]; int is_log = 0; -void task2(int id, int times){ +void task2(int id, int times) { std::default_random_engine e; std::uniform_int_distribution rand_tuple_size(50, 150); std::uniform_int_distribution rand_tuple_count(10, 100); std::uniform_int_distribution rand_part_count(1, 10); - for (auto i=0; i0 ?tuple_count/part_count :1); - TxnClient::BeginIngest(req, ingest); - if (is_log == 1) - for (auto & strip : ingest.strip_list_) - LogClient::Data(strip.first,strip.second.first,strip.second.second, - buffer, tuple_size*tuple_count/part_count); - TxnClient::CommitIngest(ingest.id_); - if (is_log == 1) - LogClient::Refresh(); - } + for (auto i = 0; i < times; i++) { + FixTupleIngestReq req; + Ingest ingest; + auto part_count = rand_part_count(e); + auto tuple_size = rand_tuple_size(e); + auto tuple_count = rand_tuple_size(e); + for (auto i = 0; i < part_count; i++) + req.InsertStrip(i, part_count, tuple_count / part_count > 0 + ? tuple_count / part_count + : 1); + TxnClient::BeginIngest(req, ingest); + if (is_log == 1) + for (auto &strip : ingest.strip_list_) + LogClient::Data(strip.first, strip.second.first, strip.second.second, + buffer, tuple_size * tuple_count / part_count); + TxnClient::CommitIngest(ingest.ts_); + if (is_log == 1) LogClient::Refresh(); + } +} + +using std::set; + +class Tuple { + public: + int a_, b_; + Tuple() {} + Tuple(int a, int b) : a_(a), b_(b) {} +}; +inline bool operator<(const Tuple &lhs, const Tuple &rhs) { + return lhs.a_ < rhs.a_ || (lhs.a_ == rhs.a_ && lhs.b_ < rhs.b_); +} +class Type { + public: + set> type1_; + set type2_; + Type() {} + explicit Type(set> type) { + // type1_ = type; + for (auto &tuple : type) type2_.insert(Tuple(tuple.first, tuple.second)); + } + void setType(set> type) { + // type1_ = type; + for (auto &tuple : type) type2_.insert(Tuple(tuple.first, tuple.second)); + } + set> getType() const { + // return type1_; + set> ret2; + for (auto &tuple : type2_) ret2.insert(make_pair(tuple.a_, tuple.b_)); + return ret2; + } + string ToString() const { + string str = ""; + for (auto &it : type1_) + str += to_string(it.first) + "," + to_string(it.second) + "\n"; + for (auto &it : type2_) + str += to_string(it.a_) + "," + to_string(it.b_) + "\n"; + return str; + } +}; + +inline bool operator==(const Type &lhs, const Type &rhs) { return lhs == rhs; } +caf::behavior Server(caf::event_based_actor *self, int port) { + try { + caf::io::publish(self, port); + } catch (...) { + cout << "server publish fail" << endl; + } + return {[self](const Snapshot &snapshot) { + cout << "type" << endl; + cout << snapshot.ToString() << endl; + // self->quit(); + // return caf::make_message("ok"); + }}; +} + +caf::behavior Anon(caf::event_based_actor *self) { + return {[](string it) { cout << "anon message:" << it << endl; }}; } -int main(int argc, const char **argv){ - int n = stoi(string(argv[1])); - int times = stoi(string(argv[2])); - string ip = string(argv[3]); - int port = stoi(string(argv[4])); - is_log = stoi(string(argv[5])); - TxnClient::Init(ip, port); - LogServer::Init("data-log"); - struct timeval tv1, tv2; - vector threads; - for (auto i=0;i threads; + for (auto i=0;i("type", make_pair(&Type::getType, &Type::setType)); + // cout << type << " launched" << endl; + if (type == "client") { + { + try { + caf::scoped_actor self; + // Type type({{1, 2}, {3, 4}, {5, 6}}); + Snapshot snapshot; + snapshot.setHisCPS({{1, 2}, {2, 3}}); + snapshot.setPStrips({{1, {{3, 1}, {5, 6}}}, {2, {{3, 1}, {5, 6}}}}); + auto server = caf::io::remote_actor(ip, port); + caf::anon_send(server, snapshot); + + // self->sync_send(server, type) + // .await([](const string &str) { cout << "receive" << endl; + // }, + // caf::others >> [] { cout << "others message" << + // endl; }, + // caf::after(std::chrono::seconds(1)) >> + // []() { cout << "timeout" << endl; }); + } catch (...) { + cout << "client send fail" << endl; + } + } + } else if (type == "server") { + caf::spawn(Server, port); + } else if (type == "vector") { + vector v1 = {1, 2, 3}; + vector v2 = {4, 5, 6}; + v1.insert(v1.end(), v2.begin(), v2.end()); + for (auto it : v1) cout << it << endl; + } else if (type == "snapshot") { + Snapshot s1, s2; + s1.part_pstrips_ = {{1, {{1, 1}, {2, 2}, {7, 1}}}}; + s2.part_pstrips_ = {{1, {{4, 3}, {8, 1}}}}; + s1.Merge(s2); + cout << s1.ToString() << endl; + } else if (type == "txnserver") { + unordered_map pos_list = { + {1, 10}, {2, 20}, {3, 16}, {4, 19}}; + TxnServer::Init(4, 8089); + cout << "1:" << endl; + TxnServer::LoadPos(pos_list); + cout << "2:" << endl; + TxnServer::LoadCPList(0, pos_list, pos_list); + cout << "3:" << endl; + } else if (type == "txnclient") { + TxnClient::Init("127.0.0.1", 8089); + auto job = []() { + FixTupleIngestReq request; + Ingest ingest; + request.InsertStrip(1, 2, 2); + request.InsertStrip(2, 2, 2); + request.InsertStrip(3, 2, 2); + request.InsertStrip(4, 2, 2); + for (auto i = 0; i < 100000; i++) { + TxnClient::BeginIngest(request, ingest); + TxnClient::CommitIngest(ingest.ts_); + } + // cout << ingest.ToString() << endl; + }; + vector jobs; + for (auto i = 0; i < 12; i++) jobs.push_back(thread(job)); + for (auto &j : jobs) j.join(); + } caf::await_all_actors_done(); caf::shutdown(); } diff --git a/txn_manager/txn_log.cpp b/txn_manager/txn_log.cpp index 39a28a4e3..9fefaa695 100644 --- a/txn_manager/txn_log.cpp +++ b/txn_manager/txn_log.cpp @@ -52,9 +52,9 @@ RetCode LogServer::Init(const string path) { caf::behavior LogServer::make_behavior() { return { [=](IngestAtom, shared_ptr ingest) -> caf::message { - Append(BeginLog(ingest->id_)); + Append(BeginLog(ingest->ts_)); for (auto& strip : ingest->strip_list_) - Append(WriteLog(ingest->id_, strip.first, strip.second.first, + Append(WriteLog(ingest->ts_, strip.first, strip.second.first, strip.second.second)); // cout << "begin" << endl; return caf::make_message(0, *ingest); diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 1c354d769..429d8d782 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -42,7 +42,13 @@ namespace txn { // using claims::common::rCommitIngestTxnFail; // using claims::common::rAbortIngestTxnFail; // using claims::common::rCommitCheckpointFail; -int TxnCore::capacity_ = kTxnBufferSize; + +/***************/ +atomic TimeStamp::now_; +caf::actor QueryTracker::tracker_; +set QueryTracker::active_querys_; + +/**************/ int TxnServer::port_ = kTxnPort; int TxnServer::concurrency_ = kConcurrency; @@ -50,236 +56,103 @@ caf::actor TxnServer::proxy_; vector TxnServer::cores_; bool TxnServer::active_ = false; -std::unordered_map> TxnServer::pos_list_; -std::unordered_map TxnServer::logic_cp_list_; -std::unordered_map TxnServer::phy_cp_list_; -std::unordered_map> TxnServer::CountList; +unordered_map> TxnServer::pos_list_; +unordered_map TxnServer::cp_list_; caf::actor test; -// UInt64 txn_id = 0; - -void TxnCore::ReMalloc() { - size_ = 0; - txn_index_.clear(); - commit_ = new bool[capacity_]; - abort_ = new bool[capacity_]; - strip_list_ = new vector[capacity_]; - // aout(this) << "core id is " << core_id_ << endl; +caf::behavior QueryTracker::make_behavior() { + this->delayed_send(this, seconds(3), TimerAtom::value); + return {[](BeginAtom, UInt64 ts) { active_querys_.insert(ts); }, + [](CommitAtom, UInt64 ts) { active_querys_.erase(ts); }, + [this](TimerAtom) { + /** + * TODO broadcast all components what minimum timestamp + * is still alive. + */ + this->delayed_send(this, seconds(3), TimerAtom::value); + }}; } -inline UInt64 TxnCore::GetId() { - // return ((__sync_add_and_fetch(&txn_id_, 1)) * 1000) + core_id_; - return (++txn_id_) * 1000 + core_id_; -} +caf::behavior CheckpointTracker::make_behavior() {} caf::behavior TxnCore::make_behavior() { // this->delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); return { - [=](IngestAtom, const FixTupleIngestReq& request) -> caf::message { - - // cout << "begin" << endl; - auto ingest = make_shared(); - // RetCode ret = rSuccess; - RetCode ret = 0; - if (size_ >= capacity_) - return caf::make_message(-1 /*rBeginIngestTxnFail*/); - ingest->id_ = GetId(); - txn_index_[ingest->id_] = size_; - commit_[size_] = abort_[size_] = false; - for (auto& strip_ : request.content_) { - auto part = strip_.first; - auto tupleSize = strip_.second.first; - auto tupleCount = strip_.second.second; - auto strip = TxnServer::AtomicMalloc(part, tupleSize, tupleCount); - strip_list_[size_].push_back(strip); - ingest->InsertStrip(strip); - } - size_++; - // cout << ingest.ToString() << endl; - if (LogServer::active_) { - current_message() = caf::make_message(IngestAtom::value, ingest); - this->forward_to(LogServer::proxy_); - } + [this](IngestAtom, shared_ptr ingest) -> caf::message { + RetCode ret = rSuccess; + // cout << "begin ingestion" << endl << ingest->ToString() << endl; + auto id = TxnBin::GetTxnBinID(ingest->ts_, TxnServer::concurrency_); + auto pos = TxnBin::GetTxnBinPos(ingest->ts_, TxnServer::concurrency_); + txnbin_list_[id].SetTxn(pos, ingest->strip_list_); return caf::make_message(ret, *ingest); }, - [=](CommitIngestAtom, const UInt64 id) -> caf::message { - // cout << "commit ingest txn id :" << id << endl; - if (txn_index_.find(id) == txn_index_.end()) - return caf::make_message(-1 /*rCommitIngestTxnFail*/); - commit_[txn_index_[id]] = true; - // cout << "Logserver active:" << LogServer::active_ << endl; - if (LogServer::active_) { - // assert(false); - this->forward_to(LogServer::proxy_); - } - - return caf::make_message(0 /*rSuccess*/); - }, - [=](AbortIngestAtom, const UInt64 id) -> caf::message { - // cout << "abort ingest txn id :" << id << endl; - if (txn_index_.find(id) == txn_index_.end()) - return caf::make_message(-1 /*rBeginIngestTxnFail*/); - abort_[txn_index_[id]] = true; - if (LogServer::active_) { - // assert(false); - this->forward_to(LogServer::proxy_); - } - return caf::make_message(0 /*rAbortIngestTxnFail*/); + [this](CommitIngestAtom, const UInt64 ts) -> caf::message { + // cout << "commit:" << ts << endl; + RetCode ret = rSuccess; + auto id = TxnBin::GetTxnBinID(ts, TxnServer::concurrency_); + auto pos = TxnBin::GetTxnBinPos(ts, TxnServer::concurrency_); + txnbin_list_[id].CommitTxn(pos); + return caf::make_message(rSuccess); }, - [=](QueryAtom, const QueryReq& request, shared_ptr query) { - // cout << "core:"<< core_id_ <<" query" << endl; - // cout << query->ToString() << endl; - for (auto i = 0; i < size_; i++) { - // cout << "commit:" << commit_[i] << endl; - if (commit_[i]) - for (auto& strip : strip_list_[i]) { - if (query->cp_list_.find(strip.part_) != query->cp_list_.end() && - strip.pos_ >= query->cp_list_[strip.part_]) - query->InsertStrip(strip.part_, strip.pos_, strip.offset_); - } - } - if (core_id_ != TxnServer::cores_.size() - 1) - this->forward_to(TxnServer::cores_[core_id_ + 1]); - else { - current_message() = - caf::make_message(MergeAtom::value, request, query); - this->forward_to(TxnServer::cores_[TxnServer::SelectCoreId()]); - } + [this](AbortIngestAtom, const UInt64 ts) -> caf::message { + cout << "abort:" << ts << endl; + RetCode ret = rSuccess; + auto id = TxnBin::GetTxnBinID(ts, TxnServer::concurrency_); + auto pos = TxnBin::GetTxnBinPos(ts, TxnServer::concurrency_); + txnbin_list_[id].AbortTxn(pos); + return caf::make_message(rSuccess); }, - [=](MergeAtom, const QueryReq& request, shared_ptr query) - -> Query { - // cout << "query merge" << endl; - for (auto& part : query->snapshot_) { - Strip::Sort(part.second); - Strip::Merge(part.second); - } - return *query; - }, - [=](MergeAtom, shared_ptr cp) -> Checkpoint { - Strip::Sort(cp->commit_strip_list_); - Strip::Merge(cp->commit_strip_list_); - Strip::Sort(cp->abort_strip_list_); - Strip::Merge(cp->abort_strip_list_); - return *cp; - }, - [=](CheckpointAtom, shared_ptr cp) { - for (auto i = 0; i < size_; i++) - if (commit_[i]) { - for (auto& strip : strip_list_[i]) - if (strip.part_ == cp->part_ && strip.pos_ >= cp->logic_cp_) - cp->commit_strip_list_.push_back( - PStrip(strip.pos_, strip.offset_)); - } else if (abort_[i]) { - for (auto& strip : strip_list_[i]) - if (strip.part_ == cp->part_ && strip.pos_ >= cp->logic_cp_) - cp->abort_strip_list_.push_back( - PStrip(strip.pos_, strip.offset_)); - } - if (core_id_ != TxnServer::cores_.size() - 1) - this->forward_to(TxnServer::cores_[core_id_ + 1]); - else { - current_message() = caf::make_message(MergeAtom::value, cp); - this->forward_to(TxnServer::cores_[TxnServer::SelectCoreId()]); - } - }, - [=](GCAtom) { - auto size_old = size_; - auto pos = 0; - for (auto i = 0; i < size_; i++) - if (!TxnServer::IsStripListGarbage(strip_list_[i])) { - txn_index_[txn_index_[i]] = pos; - commit_[pos] = commit_[i]; - abort_[pos] = abort_[i]; - strip_list_[pos] = strip_list_[i]; - ++pos; - } - size_ = pos; - cout << "core:" << core_id_ << ",gc:" << size_old << "=>" << pos - << endl; - this->delayed_send(this, seconds(kGCTime), GCAtom::value); - }, - caf::others >> - [&]() { cout << "core:" << core_id_ << " unkown message" << endl; }}; -} + [](QueryAtom, const QueryReq& request, shared_ptr query) { -caf::behavior Test::make_behavior() { - cout << "test init..." << endl; - return {[=](int a) -> int { - cout << "receive int:" << a << endl; - return -a; - }, - caf::others >> [&]() { cout << "test unkown message" << endl; }}; -} - -caf::behavior IngestWorker(caf::event_based_actor* self) { - return { - [=](IngestAtom, const FixTupleIngestReq& request) -> caf::message { - Ingest ingest; - auto ret = TxnServer::BeginIngest(request, ingest); - // auto ret = 10; - // cout<<"new IngestWorker!!"<sync_send(test, 34).await( - //// [&](int a) { ret = a;}); - //// - //// caf::scoped_actor self; - //// self->sync_send(test, 34).await( - //// [=](int a) { cout<quit(); - return caf::make_message(ingest, ret); }, - caf::others >> []() { cout << "IngestWorker unkown message" << endl; }}; -} - -caf::behavior IngestCommitWorker::make_behavior() { - return {[=](CommitIngestAtom, const UInt64 id) -> RetCode { - quit(); - return TxnServer::CommitIngest(id); - }, - caf::others >> - []() { cout << "IngestCommitWorker unkown message" << endl; }}; -} + [](MergeAtom, const QueryReq& request, shared_ptr query) { -caf::behavior AbortWorker::make_behavior() { - return { - [=](AbortIngestAtom, const UInt64 id) -> RetCode { - quit(); - return TxnServer::AbortIngest(id); }, - caf::others >> []() { cout << "AbortWorker unkown message" << endl; }}; -} + [](MergeAtom, shared_ptr cp) { -caf::behavior QueryWorker::make_behavior() { - return { - [=](QueryAtom, const QueryReq& request) -> caf::message { - Query query; - auto ret = TxnServer::BeginQuery(request, query); - quit(); - return caf::make_message(query, ret); }, - caf::others >> []() { cout << "QueryWorker unkown message" << endl; }}; -} - -caf::behavior CheckpointWorker::make_behavior() { - return {[=](CheckpointAtom, const UInt64 part) -> caf::message { - Checkpoint cp; - cp.part_ = part; - auto ret = TxnServer::BeginCheckpoint(cp); - quit(); - return caf::make_message(cp, ret); - }, - caf::others >> - []() { cout << "CheckpointWorker unkown message" << endl; }}; -} -caf::behavior CommitCPWorker::make_behavior() { - return { - [=](CommitCPAtom, const Checkpoint& cp) -> RetCode { - quit(); - return TxnServer::CommitCheckpoint(cp); + [](CheckpointAtom, shared_ptr cp) { + /* for (auto i = 0; i < size_; i++) + if (commit_[i]) { + for (auto& strip : strip_list_[i]) + if (strip.part_ == cp->part_ && strip.pos_ >= + cp->logic_cp_) + cp->commit_strip_list_.push_back( + PStrip(strip.pos_, strip.offset_)); + } else if (abort_[i]) { + for (auto& strip : strip_list_[i]) + if (strip.part_ == cp->part_ && strip.pos_ >= + cp->logic_cp_) + cp->abort_strip_list_.push_back( + PStrip(strip.pos_, strip.offset_)); + } + if (core_id_ != TxnServer::cores_.size() - 1) + this->forward_to(TxnServer::cores_[core_id_ + 1]); + else { + current_message() = caf::make_message(MergeAtom::value, cp); + this->forward_to(TxnServer::cores_[TxnServer::SelectCoreId()]); + }*/ }, - caf::others >> []() { cout << "CommitCPWorker unkown message" << endl; }}; + [](GCAtom) { + /* auto size_old = size_; + auto pos = 0; + for (auto i = 0; i < size_; i++) + if (!TxnServer::IsStripListGarbage(strip_list_[i])) { + txn_index_[txn_index_[i]] = pos; + commit_[pos] = commit_[i]; + abort_[pos] = abort_[i]; + strip_list_[pos] = strip_list_[i]; + ++pos; + } + size_ = pos; + cout << "core:" << core_id_ << ",gc:" << size_old << "=>" << pos + << endl;*/ + // this->delayed_send(this, seconds(kGCTime), GCAtom::value); + }, + caf::others >> + [&]() { cout << "core:" << core_id_ << " unkown message" << endl; }}; } + caf::behavior TxnServer::make_behavior() { try { caf::io::publish(proxy_, port_, nullptr, true); @@ -287,31 +160,40 @@ caf::behavior TxnServer::make_behavior() { } catch (...) { cout << "txn server bind to port:" << port_ << " fail" << endl; } - return {[=](IngestAtom, const FixTupleIngestReq& request) { - forward_to(cores_[SelectCoreId()]); + // this->delayed_send(this, seconds(3), CheckpointAtom::value); + // this->delayed_send(this, seconds(3)); + return {[this](IngestAtom, const FixTupleIngestReq& request) { + auto ts = TimeStamp::GenAdd(); + auto ingest = make_shared(request.content_, ts); + for (auto& part : ingest->strip_list_) + ingest->InsertStrip(AtomicMalloc(part.first, part.second.first, + part.second.second)); + current_message() = caf::make_message(IngestAtom::value, ingest); + forward_to(cores_[GetCoreID(ts)]); }, - [=](CommitIngestAtom, - const UInt64 id) { forward_to(cores_[GetCoreId(id)]); }, - [=](AbortIngestAtom, - const UInt64 id) { forward_to(cores_[GetCoreId(id)]); }, - [=](QueryAtom, const QueryReq& request) { - auto query = make_shared(); - for (auto& part : request.part_list_) - query->cp_list_[part] = TxnServer::logic_cp_list_[part]; + [this](CommitIngestAtom, + const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, + [this](AbortIngestAtom, + const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, + [this](QueryAtom, const QueryReq& request) { + auto ts = TimeStamp::Gen(); + auto query = + make_shared(ts, GetHisCPList(ts, request.part_list_), + GetRtCPList(ts, request.part_list_)); current_message() = caf::make_message(QueryAtom::value, request, query); - forward_to(cores_[0]); - }, - [=](CheckpointAtom, const UInt64 part) { - auto cp = make_shared(); - cp->part_ = part; - current_message() = caf::make_message(CheckpointAtom::value, cp); - forward_to(cores_[0]); + forward_to(cores_[GetCoreID(ts)]); }, - [=](CommitCPAtom, const Checkpoint& cp) { - + [this](CommitCPAtom, UInt64 ts, UInt64 part, UInt64 his_cp, + UInt64 rt_cp) -> caf::message { + cp_list_[part].SetHisCP(ts, his_cp); + cp_list_[part].SetRtCP(ts, rt_cp); + return caf::make_message(OkAtom::value); }, - caf::others >> []() { cout << "server unkown message" << endl; }}; + caf::others >> [this]() { + cout << "server unkown message:" + << to_string(current_message()) << endl; + }}; } RetCode TxnServer::Init(int concurrency, int port) { @@ -321,96 +203,13 @@ RetCode TxnServer::Init(int concurrency, int port) { proxy_ = caf::spawn(); for (auto i = 0; i < concurrency_; i++) cores_.push_back(caf::spawn(i)); - SerConfig(); - RecoveryFromCatalog(); - RecoveryFromTxnLog(); - srand((unsigned)time(NULL)); + CAFSerConfig(); + // RecoveryCheckpoint(); + // RecoveryFromTxnLog(); + // srand((unsigned)time(NULL)); return 0; } -RetCode TxnServer::BeginIngest(const FixTupleIngestReq& request, - Ingest& ingest) { - RetCode ret = 0; - UInt64 core_id = SelectCoreId(); - caf::scoped_actor self; - self->sync_send(cores_[core_id], IngestAtom::value, &request, &ingest) - .await([&](int r) { ret = r; }); - if (ret == 0) { - // LogClient::Begin(ingest.Id); - // for (auto & strip : ingest.StripList) - // LogClient::Write(ingest.Id, strip.first, strip.second.first, - // strip.second.second); - } - return ret; -} -RetCode TxnServer::CommitIngest(const UInt64 id) { - RetCode ret = 0; - UInt64 core_id = GetCoreId(id); - caf::scoped_actor self; - self->sync_send(cores_[core_id], CommitIngestAtom::value, id) - .await([&](int r) { ret = r; }); - if (ret == 0) { - // LogClient::Commit(ingest.Id); - // LogClient::Refresh(); - } - return ret; -} -RetCode TxnServer::AbortIngest(const UInt64 id) { - RetCode ret; - UInt64 core_id = GetCoreId(id); - caf::scoped_actor self; - self->sync_send(cores_[core_id], AbortIngestAtom::value, id) - .await([&](int r) { ret = r; }); - if (ret == 0) { - // LogClient::Abort(ingest.Id); - // LogClient::Refresh(); - } - return ret; -} -RetCode TxnServer::BeginQuery(const QueryReq& request, Query& query) { - RetCode ret; - caf::scoped_actor self; - for (auto& part : request.part_list_) - query.cp_list_[part] = TxnServer::logic_cp_list_[part]; - for (auto& core : cores_) - self->sync_send(core, QueryAtom::value, &request, &query) - .await([&](int r) { r = ret; }); - for (auto& part : query.snapshot_) { - Strip::Sort(part.second); - Strip::Merge(part.second); - } - return ret; -} -RetCode TxnServer::BeginCheckpoint(Checkpoint& cp) { - RetCode ret = 0; - if (TxnServer::pos_list_.find(cp.part_) == TxnServer::pos_list_.end()) - return -1; - cp.logic_cp_ = TxnServer::logic_cp_list_[cp.part_]; - cp.phy_cp_ = TxnServer::phy_cp_list_[cp.part_]; - - caf::scoped_actor self; - for (auto& core : cores_) - self->sync_send(core, CheckpointAtom::value, &cp) - .await([&](int r) { r = ret; }); - Strip::Sort(cp.commit_strip_list_); - Strip::Merge(cp.commit_strip_list_); - Strip::Sort(cp.abort_strip_list_); - Strip::Merge(cp.abort_strip_list_); - return ret; -} -RetCode TxnServer::CommitCheckpoint(const Checkpoint& cp) { - RetCode ret = 0; - if (TxnServer::pos_list_.find(cp.part_) == TxnServer::pos_list_.end()) - return -1; - TxnServer::logic_cp_list_[cp.part_] = cp.logic_cp_; - TxnServer::phy_cp_list_[cp.part_] = cp.phy_cp_; - if (ret == 0) { - // LogClient::Checkpoint(cp.Part, cp.LogicCP, cp.PhyCP); - // LogClient::Refresh(); - } - return ret; -} - Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount) { Strip strip; @@ -442,18 +241,33 @@ Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, return strip; } -RetCode TxnServer::RecoveryFromCatalog() { - for (auto i = 0; i < 10; i++) { - pos_list_[i] = 0; - CountList[i] = 0; - logic_cp_list_[i] = 0; - } +unordered_map TxnServer::GetHisCPList( + UInt64 ts, const vector& parts) { + unordered_map cps; + for (auto& part : parts) cps[part] = cp_list_[part].GetHisCP(ts); + return cps; +} + +unordered_map TxnServer::GetRtCPList( + UInt64 ts, const vector& parts) { + unordered_map cps; + for (auto& part : parts) cps[part] = cp_list_[part].GetRtCP(ts); + return cps; +} + +RetCode TxnServer::LoadCPList(UInt64 ts, + const unordered_map& his_cp_list, + const unordered_map& rt_cp_list) { + for (auto& cp : his_cp_list) cp_list_[cp.first].SetHisCP(ts, cp.second); + for (auto& cp : rt_cp_list) cp_list_[cp.first].SetRtCP(ts, cp.second); + return rSuccess; +} - pos_list_[10000000] = 2017198080; - CountList[10000000] = 6001215; - logic_cp_list_[10000000] = 2017198080; +RetCode TxnServer::LoadPos(const unordered_map& pos_list) { + for (auto& pos : pos_list) pos_list_[pos.first].store(pos.second); + return rSuccess; } -RetCode TxnServer::RecoveryFromTxnLog() {} +string TxnServer::ToString() {} } } diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index b225d7e07..372bf9be8 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -34,8 +34,11 @@ #include #include #include +#include +#include #include #include +#include #include #include #include @@ -44,9 +47,9 @@ #include "stdlib.h" #include "caf/all.hpp" #include "caf/io/all.hpp" -#include "txn.hpp" +#include "../txn_manager/txn.hpp" +#include "../utility/Timer.h" //#include "txn_log.hpp" -#include namespace claims { namespace txn { @@ -56,60 +59,69 @@ using std::endl; using std::vector; using std::string; using std::map; +using std::unordered_map; using std::pair; using std::to_string; using std::function; using std::sort; using std::atomic; +using std::mutex; +using std::lock_guard; using std::chrono::seconds; using std::chrono::milliseconds; using std::make_shared; using std::shared_ptr; +using std::atomic; // UInt64 txn_id; -class TxnCore : public caf::event_based_actor { +class TimeStamp { public: - static int capacity_; - UInt64 core_id_; - UInt64 txn_id_ = 0; - UInt64 size_; - map txn_index_; - bool* commit_ = nullptr; - bool* abort_ = nullptr; - vector* strip_list_; - caf::behavior make_behavior() override; - void ReMalloc(); - TxnCore(int coreId) : core_id_(coreId) { ReMalloc(); } - UInt64 GetId(); -}; + static UInt64 Init(UInt64 ts) { now_.store(ts); } + static UInt64 Gen() { return now_.load(); } + static UInt64 GenAdd() { return now_.fetch_add(1); } + static UInt64 TSLow(UInt64 ts, int num) { return ts % num; } + static UInt64 TSHigh(UInt64 ts, int num) { return ts / num; } -class Test : public caf::event_based_actor { - public: - caf::behavior make_behavior() override; + private: + static atomic now_; }; -class IngestCommitWorker : public caf::event_based_actor { +class QueryTracker : public caf::event_based_actor { public: + static RetCode Init() { + tracker_ = caf::spawn(); + return rSuccess; + } + static RetCode Begin(UInt64 ts) { + caf::anon_send(tracker_, BeginAtom::value, ts); + } + static RetCode Commit(UInt64 ts) { + caf::anon_send(tracker_, CommitAtom::value, ts); + } caf::behavior make_behavior() override; -}; -class AbortWorker : public caf::event_based_actor { - public: - caf::behavior make_behavior() override; + private: + static caf::actor tracker_; + static set active_querys_; }; -class QueryWorker : public caf::event_based_actor { +class CheckpointTracker : public caf::event_based_actor { public: + static RetCode Init() { + tracker_ = caf::spawn(); + return rSuccess; + } caf::behavior make_behavior() override; -}; -class CheckpointWorker : public caf::event_based_actor { - public: - caf::behavior make_behavior() override; + private: + static caf::actor tracker_; }; -class CommitCPWorker : public caf::event_based_actor { +class TxnCore : public caf::event_based_actor { public: + UInt64 core_id_; + map txnbin_list_; caf::behavior make_behavior() override; + TxnCore(int coreId) : core_id_(coreId) {} }; class TxnServer : public caf::event_based_actor { @@ -119,34 +131,26 @@ class TxnServer : public caf::event_based_actor { static int concurrency_; static caf::actor proxy_; static vector cores_; - static std::unordered_map> pos_list_; - static std::unordered_map logic_cp_list_; - static std::unordered_map phy_cp_list_; - static std::unordered_map> CountList; + static unordered_map> pos_list_; + static unordered_map cp_list_; + // static unordered_map> CountList; /**************** User APIs ***************/ static RetCode Init(int concurrency = kConcurrency, int port = kTxnPort); - + static RetCode LoadCPList(UInt64 ts, + const unordered_map& his_cp_list, + const unordered_map& rt_cp_list); + static RetCode LoadPos(const unordered_map& pos_list); + static int GetCoreID(UInt64 ts) { return ts % concurrency_; } + static string ToString(); /**************** System APIs ***************/ - static RetCode BeginIngest(const FixTupleIngestReq& request, Ingest& ingest); - static RetCode CommitIngest(const UInt64 id); - static RetCode AbortIngest(const UInt64 id); - static RetCode BeginQuery(const QueryReq& request, Query& snapshot); - static RetCode BeginCheckpoint(Checkpoint& cp); - static RetCode CommitCheckpoint(const Checkpoint& cp); - static UInt64 GetCoreId(UInt64 id) { return id % 1000; } - static inline UInt64 SelectCoreId() { return rand() % concurrency_; } + private: caf::behavior make_behavior() override; - - static RetCode RecoveryFromCatalog(); - static RetCode RecoveryFromTxnLog(); + static unordered_map GetHisCPList( + UInt64 ts, const vector& parts); + static unordered_map GetRtCPList(UInt64 ts, + const vector& parts); static inline Strip AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount); - static inline bool IsStripListGarbage(const vector& striplist) { - for (auto& strip : striplist) { - if (strip.pos_ >= TxnServer::logic_cp_list_[strip.part_]) return false; - } - return true; - } }; } } diff --git a/txn_manager/txn_server_test.cpp b/txn_manager/txn_server_test.cpp index 1fb28bd2e..17ec3edb3 100644 --- a/txn_manager/txn_server_test.cpp +++ b/txn_manager/txn_server_test.cpp @@ -199,7 +199,7 @@ for (auto i=0; i int gettimeofday(struct timeval *tv, struct timezone *tz); + +static inline double GetCurrents() { + timeval t_start; + gettimeofday(&t_start, NULL); + return t_start.tv_sec + 1.0 * t_start.tv_usec / 1000000; +} + static inline double GetCurrentMs() { timeval t_start; gettimeofday(&t_start, NULL); From ca73e30d742f4b4ea65f06f4f0eebce46f83a9fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Thu, 4 Aug 2016 22:46:33 +0800 Subject: [PATCH 49/58] add txn info for a select query --- logical_operator/logical_aggregation.h | 7 + logical_operator/logical_cross_join.h | 8 + logical_operator/logical_delete_filter.h | 9 + logical_operator/logical_equal_join.h | 9 + logical_operator/logical_filter.h | 6 + logical_operator/logical_limit.h | 7 + logical_operator/logical_operator.h | 12 +- logical_operator/logical_outer_join.h | 8 + logical_operator/logical_project.h | 6 + logical_operator/logical_query_plan_root.h | 6 + logical_operator/logical_scan.cpp | 1 + logical_operator/logical_scan.h | 14 ++ logical_operator/logical_sort.h | 6 + logical_operator/logical_subquery.h | 6 + physical_operator/physical_projection_scan.h | 10 +- stmt_handler/select_exec.cpp | 14 ++ txn_manager/txn.cpp | 84 ++++++-- txn_manager/txn.hpp | 41 +++- txn_manager/txn_client.cpp | 42 +++- txn_manager/txn_client.hpp | 2 + txn_manager/txn_client_test.cpp | 42 ++-- txn_manager/txn_server.cpp | 202 +++++++++++++++---- txn_manager/txn_server.hpp | 7 +- 23 files changed, 454 insertions(+), 95 deletions(-) diff --git a/logical_operator/logical_aggregation.h b/logical_operator/logical_aggregation.h index 7e2289595..9ab38261b 100755 --- a/logical_operator/logical_aggregation.h +++ b/logical_operator/logical_aggregation.h @@ -95,6 +95,13 @@ class LogicalAggregation : public LogicalOperator { LogicalOperator* child); virtual ~LogicalAggregation(); + void GetTxnInfo(QueryReq& request) const override { + child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + child_->SetTxnInfo(query); + } + protected: /** * get plan context resulting from executing aggregation operator. diff --git a/logical_operator/logical_cross_join.h b/logical_operator/logical_cross_join.h index 5801d2696..14db24eb8 100644 --- a/logical_operator/logical_cross_join.h +++ b/logical_operator/logical_cross_join.h @@ -55,6 +55,14 @@ class LogicalCrossJoin : public LogicalOperator { PlanContext GetPlanContext(); PhysicalOperatorBase* GetPhysicalPlan(const unsigned& blocksize); void Print(int level = 0) const; + void GetTxnInfo(QueryReq& request) const override { + left_child_->GetTxnInfo(request); + right_child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + left_child_->SetTxnInfo(query); + right_child_->SetTxnInfo(query); + } protected: /** diff --git a/logical_operator/logical_delete_filter.h b/logical_operator/logical_delete_filter.h index aa31f61ef..be3124fac 100755 --- a/logical_operator/logical_delete_filter.h +++ b/logical_operator/logical_delete_filter.h @@ -127,6 +127,15 @@ class LogicalDeleteFilter : public LogicalOperator { bool GetOptimalPhysicalPlan(Requirement requirement, PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned& block_size = 4096 * 1024); + void GetTxnInfo(QueryReq& request) const override { + left_child_->GetTxnInfo(request); + right_child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + left_child_->SetTxnInfo(query); + right_child_->SetTxnInfo(query); + } + private: std::vector GetLeftFilterKeyIds() const; diff --git a/logical_operator/logical_equal_join.h b/logical_operator/logical_equal_join.h index 2eb643e73..17b861b74 100755 --- a/logical_operator/logical_equal_join.h +++ b/logical_operator/logical_equal_join.h @@ -107,6 +107,15 @@ class LogicalEqualJoin : public LogicalOperator { PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned& block_size = 4096 * 1024); + void GetTxnInfo(QueryReq& request) const override { + left_child_->GetTxnInfo(request); + right_child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + left_child_->SetTxnInfo(query); + right_child_->SetTxnInfo(query); + } + private: std::vector GetLeftJoinKeyIds() const; std::vector GetRightJoinKeyIds() const; diff --git a/logical_operator/logical_filter.h b/logical_operator/logical_filter.h index 0ef3c2a65..2f2df1eca 100644 --- a/logical_operator/logical_filter.h +++ b/logical_operator/logical_filter.h @@ -87,6 +87,12 @@ class LogicalFilter : public LogicalOperator { * @param level: As an index. */ void Print(int level = 0) const; + void GetTxnInfo(QueryReq& request) const override { + child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + child_->SetTxnInfo(query); + } private: /** diff --git a/logical_operator/logical_limit.h b/logical_operator/logical_limit.h index 115b2ce9d..3c7499795 100644 --- a/logical_operator/logical_limit.h +++ b/logical_operator/logical_limit.h @@ -90,6 +90,13 @@ class LogicalLimit : public LogicalOperator { * @param level: As an index. */ virtual void Print(int level = 0) const; + void GetTxnInfo(QueryReq& request) const override { + child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + child_->SetTxnInfo(query); + } + LogicalOperator* child_; diff --git a/logical_operator/logical_operator.h b/logical_operator/logical_operator.h index 28bfb8cdd..29b5195ae 100755 --- a/logical_operator/logical_operator.h +++ b/logical_operator/logical_operator.h @@ -38,12 +38,15 @@ #include "../logical_operator/Requirement.h" #include "../physical_operator/physical_operator_base.h" #include "../utility/lock.h" - +#include "../txn_manager/txn.hpp" static std::atomic_uint MIDINADE_TABLE_ID(1000000); namespace claims { namespace logical_operator { #define kTabSize 4 using claims::physical_operator::PhysicalOperatorBase; +using claims::txn::QueryReq; +using claims::txn::Query; +using claims::txn::GetGlobalPartId; enum OperatorType { kLogicalScan, kLogicalFilter, @@ -113,6 +116,13 @@ class LogicalOperator { virtual void Print(int level = 0) const = 0; OperatorType get_operator_type() { return operator_type_; } + /** + * Get the information(list of partition to scan) for txn creation + * Set the txn concurrency information to operator + * @param request + */ + virtual void GetTxnInfo(QueryReq& request) const {} + virtual void SetTxnInfo(const Query& query) {} protected: Schema* GetSchema(const std::vector&) const; diff --git a/logical_operator/logical_outer_join.h b/logical_operator/logical_outer_join.h index 3d7fcaf15..32d997426 100644 --- a/logical_operator/logical_outer_join.h +++ b/logical_operator/logical_outer_join.h @@ -110,6 +110,14 @@ class LogicalOuterJoin : public LogicalOperator { bool GetOptimalPhysicalPlan(Requirement requirement, PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned& block_size = 4096 * 1024); + void GetTxnInfo(QueryReq& request) const override { + left_child_->GetTxnInfo(request); + right_child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + left_child_->SetTxnInfo(query); + right_child_->SetTxnInfo(query); + } private: std::vector GetLeftJoinKeyIds() const; diff --git a/logical_operator/logical_project.h b/logical_operator/logical_project.h index 23d5a7848..b1e4dfdf7 100644 --- a/logical_operator/logical_project.h +++ b/logical_operator/logical_project.h @@ -79,6 +79,12 @@ class LogicalProject : public LogicalOperator { * @param level:initialized to zero */ void Print(int level = 0) const; + void GetTxnInfo(QueryReq& request) const override { + child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + child_->SetTxnInfo(query); + } private: /** diff --git a/logical_operator/logical_query_plan_root.h b/logical_operator/logical_query_plan_root.h index 67c4bccd9..a19ba85ce 100644 --- a/logical_operator/logical_query_plan_root.h +++ b/logical_operator/logical_query_plan_root.h @@ -99,6 +99,12 @@ class LogicalQueryPlanRoot : public LogicalOperator { * @return void */ void Print(int level = 0) const; + void GetTxnInfo(QueryReq& request) const override { + child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + child_->SetTxnInfo(query); + } private: /** diff --git a/logical_operator/logical_scan.cpp b/logical_operator/logical_scan.cpp index 70e9c79bc..3b43942fb 100644 --- a/logical_operator/logical_scan.cpp +++ b/logical_operator/logical_scan.cpp @@ -210,6 +210,7 @@ PhysicalOperatorBase* LogicalScan::GetPhysicalPlan(const unsigned& block_size) { state.projection_id_ = target_projection_->getProjectionID(); state.schema_ = GetSchema(plan_context_->attribute_list_); state.sample_rate_ = sample_rate_; + state.query_ = query_; return new PhysicalProjectionScan(state); } diff --git a/logical_operator/logical_scan.h b/logical_operator/logical_scan.h index 7e8a0e052..2062c8b94 100644 --- a/logical_operator/logical_scan.h +++ b/logical_operator/logical_scan.h @@ -65,6 +65,19 @@ class LogicalScan : public LogicalOperator { PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned& kBlock_size = 4096 * 1024); void ChangeAliasAttr(); + void GetTxnInfo(QueryReq& request) const override { + auto table_id = target_projection_->getProjectionID().table_id; + auto proj_id = target_projection_->getProjectionID().projection_off; + auto part_count = + target_projection_->getPartitioner()->getNumberOfPartitions(); + for (auto part_id = 0; part_id < part_count; part_id++) { + request.part_list_.push_back(GetGlobalPartId(table_id, proj_id, part_id)); + } + } + void SetTxnInfo(const Query& query) override { + query_ = query; + query_.GenTxnInfo(); + } private: /**check whether all the involved attributes are in the same projection.*/ @@ -77,6 +90,7 @@ class LogicalScan : public LogicalOperator { PlanContext* plan_context_; string table_alias_; float sample_rate_; + Query query_; }; } // namespace logical_operator diff --git a/logical_operator/logical_sort.h b/logical_operator/logical_sort.h index b67e0453b..193b0687c 100644 --- a/logical_operator/logical_sort.h +++ b/logical_operator/logical_sort.h @@ -110,6 +110,12 @@ class LogicalSort : public LogicalOperator { virtual bool GetOptimalPhysicalPlan( Requirement requirement, PhysicalPlanDescriptor &physical_plan_descriptor, const unsigned &block_size = 4096 * 1024) {} + void GetTxnInfo(QueryReq& request) const override { + child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + child_->SetTxnInfo(query); + } private: vector> order_by_attrs_; diff --git a/logical_operator/logical_subquery.h b/logical_operator/logical_subquery.h index 8a7404757..8a9521acf 100644 --- a/logical_operator/logical_subquery.h +++ b/logical_operator/logical_subquery.h @@ -48,6 +48,12 @@ class LogicalSubquery : public LogicalOperator { PlanContext GetPlanContext(); PhysicalOperatorBase *GetPhysicalPlan(const unsigned &blocksize); void Print(int level = 0) const; + void GetTxnInfo(QueryReq& request) const override { + child_->GetTxnInfo(request); + } + void SetTxnInfo(const Query& query) override { + child_->SetTxnInfo(query); + } private: vector subquery_attrs_; diff --git a/physical_operator/physical_projection_scan.h b/physical_operator/physical_projection_scan.h index de5ecbe9b..e1a973899 100644 --- a/physical_operator/physical_projection_scan.h +++ b/physical_operator/physical_projection_scan.h @@ -49,7 +49,8 @@ #include "../storage/PartitionStorage.h" #include "../physical_operator/physical_operator.h" #include "../common/ExpandedThreadTracker.h" - +#include "../txn_manager/txn.hpp" +using claims::txn::Query; namespace claims { namespace physical_operator { @@ -111,10 +112,11 @@ class PhysicalProjectionScan : public PhysicalOperator { ProjectionID projection_id_; unsigned block_size_; float sample_rate_; + Query query_; friend class boost::serialization::access; template void serialize(Archive& ar, const unsigned int version) { - ar& schema_& projection_id_& block_size_& sample_rate_; + ar& schema_& projection_id_& block_size_& sample_rate_& query_; } }; PhysicalProjectionScan(State state); @@ -138,6 +140,10 @@ class PhysicalProjectionScan : public PhysicalOperator { bool Close(SegmentExecStatus* const exec_status); void Print(); RetCode GetAllSegments(stack* all_segments); + void SetTxnInfo(const Query& query) { + state_.query_ = query; + state_.query_.GenTxnInfo(); + } private: bool PassSample() const; diff --git a/stmt_handler/select_exec.cpp b/stmt_handler/select_exec.cpp index 04e2ecde1..28f845269 100644 --- a/stmt_handler/select_exec.cpp +++ b/stmt_handler/select_exec.cpp @@ -48,6 +48,8 @@ #include "../physical_operator/physical_nest_loop_join.h" #include "../physical_operator/physical_operator_base.h" #include "../stmt_handler/stmt_handler.h" +#include "../txn_manager/txn.hpp" +#include "../txn_manager/txn_client.hpp" #include "caf/io/all.hpp" using caf::io::remote_actor; using claims::logical_operator::LogicalQueryPlanRoot; @@ -63,6 +65,9 @@ using std::string; using std::cout; using std::make_pair; using claims::common::rStmtCancelled; +using claims::txn::Query; +using claims::txn::QueryReq; +using claims::txn::TxnClient; namespace claims { namespace stmt_handler { @@ -221,7 +226,16 @@ RetCode SelectExec::Execute() { logic_plan->Print(); cout << "--------------begin physical plan -------------------" << endl; #endif + /** + * Add Txn information for plan + */ + QueryReq request; + Query query; + logic_plan->GetTxnInfo(request); + TxnClient::BeginQuery(request, query); + logic_plan->SetTxnInfo(query); + cout << request.ToString() << endl; PhysicalOperatorBase* physical_plan = logic_plan->GetPhysicalPlan(64 * 1024); #ifndef PRINTCONTEXT physical_plan->Print(); diff --git a/txn_manager/txn.cpp b/txn_manager/txn.cpp index ca9a220e9..f5b0159b2 100644 --- a/txn_manager/txn.cpp +++ b/txn_manager/txn.cpp @@ -32,6 +32,14 @@ namespace txn { using claims::txn::Strip; +string Txn::ToString() { + string str = ""; + for (auto &strip : strip_list_) + str += "<" + to_string(strip.first) + "," + to_string(strip.second.first) + + "," + to_string(strip.second.second) + ">,"; + str += "\n"; + return str; +} string Snapshot::ToString() const { string str = "*****snapshot*****\n"; for (auto part_cp : part_pstrips_) { @@ -150,12 +158,13 @@ string QueryReq::ToString() { } string Query::ToString() { - string str = "******Query*******\n"; + string str = "******Query:" + to_string(ts_) + "*******\n"; for (auto &part : snapshot_) { - str += "part:" + to_string(part.first) + "\n"; + str += "part:" + to_string(part.first) + " "; for (auto &strip : part.second) - str += "Pos:" + to_string(strip.first) + ",Offset:" + - to_string(strip.second) + "\n"; + str += + "<" + to_string(strip.first) + "," + to_string(strip.second) + ">,"; + str += "\n"; } return str; } @@ -193,14 +202,14 @@ void Snapshot::Merge(const Snapshot &snapshot) { } void TxnBin::MergeSnapshot(Query &query) const { for (auto &part_cp : query.rt_cp_list_) { - UInt64 checkpoint = part_cp.second; - query.snapshot_[part_cp.first].insert(query.snapshot_[part_cp.first].end(), - snapshot_[part_cp.first].begin(), - snapshot_[part_cp.first].end()); - Strip::Sort(query.snapshot_[part_cp.first]); - Strip::Merge(query.snapshot_[part_cp.first]); - Strip::Filter(query.snapshot_[part_cp.first], - [checkpoint](PStrip &pstrip) -> bool { + auto part = part_cp.first; + auto checkpoint = part_cp.second; + query.snapshot_[part].insert(query.snapshot_[part].end(), + snapshot_[part].begin(), + snapshot_[part].end()); + Strip::Sort(query.snapshot_[part]); + Strip::Merge(query.snapshot_[part]); + Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { if (pstrip.first + pstrip.second <= checkpoint) { return false; } else { @@ -214,19 +223,21 @@ void TxnBin::MergeSnapshot(Query &query) const { } } -void TxnBin::MergeTxn(Query &query, int pos) const { - for (auto i = 0; i <= pos; i++) { - if (txn_list_[i].isCommit()) +void TxnBin::MergeTxn(Query &query, int len) const { + for (auto i = 0; i < len; i++) { + if (txn_list_[i].IsCommit()) for (auto &strip : txn_list_[i].strip_list_) query.snapshot_[strip.first].push_back(strip.second); } for (auto &part_cp : query.rt_cp_list_) { - UInt64 checkpoint = part_cp.first; - Strip::Sort(query.snapshot_[part_cp.first]); - Strip::Merge(query.snapshot_[part_cp.first]); - Strip::Filter(query.snapshot_[part_cp.first], - [checkpoint](PStrip &pstrip) -> bool { + auto part = part_cp.first; + auto checkpoint = part_cp.second; + Strip::Sort(query.snapshot_[part]); + Strip::Merge(query.snapshot_[part]); + Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { if (pstrip.first + pstrip.second <= checkpoint) { + cout << "fail:<" << pstrip.first << "," << pstrip.second << ">" + << pstrip.second << endl; return false; } else { if (pstrip.first < checkpoint && @@ -238,5 +249,38 @@ void TxnBin::MergeTxn(Query &query, int pos) const { }); } } + +string TxnBin::ToString() { + string str = ""; + for (auto i = 0; i < kTxnBinSize; i++) + if (txn_list_[i].IsCommit() || txn_list_[i].IsAbort()) + str += "txnbin_pos:" + to_string(i) + txn_list_[i].ToString(); + return str; +} + +void TxnBin::GenSnapshot(const TxnBin &prev) { + status_ = true; + snapshot_ = prev.snapshot_; + for (auto &txn : txn_list_) + if (txn.IsCommit()) + for (auto &strip : txn.strip_list_) + snapshot_[strip.first].push_back(strip.second); + for (auto &part : snapshot_) { + Strip::Sort(part.second); + Strip::Merge(part.second); + } +} + +void TxnBin::GenSnapshot() { + status_ = true; + for (auto &txn : txn_list_) + if (txn.IsCommit()) + for (auto &strip : txn.strip_list_) + snapshot_[strip.first].push_back(strip.second); + for (auto &part : snapshot_) { + Strip::Sort(part.second); + Strip::Merge(part.second); + } +} } } diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index d49e7203b..c1267f1b3 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -45,6 +45,10 @@ #include "caf/io/all.hpp" #include "../common/error_define.h" #include "../utility/Timer.h" +#include +#include +#include +#include namespace claims { namespace txn { @@ -82,7 +86,9 @@ using FailAtom = caf::atom_constant; using IngestAtom = caf::atom_constant; using WriteAtom = caf::atom_constant; +using DebugAtom = caf::atom_constant; using QueryAtom = caf::atom_constant; +using CommitQueryAtom = caf::atom_constant; using CheckpointAtom = caf::atom_constant; using GCAtom = caf::atom_constant; using CommitIngestAtom = caf::atom_constant; @@ -104,7 +110,7 @@ static const int kGCTime = 5; static const int kTimeout = 3; static const int kBlockSize = 64 * 1024; static const int kTailSize = sizeof(unsigned); -static const int kTxnBinSize = 1024; +static const int kTxnBinSize = 3; // 1024; inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, UInt64 partition_id) { return partition_id + 1000 * (projeciton_id + 1000 * table_id); @@ -167,9 +173,10 @@ class Txn { } void Commit() { status_ = kCommit; } void Abort() { status_ = kAbort; } - bool isCommit() { return status_ == kCommit; } - bool isAbort() { return status_ == kAbort; } - bool isActive() { return status_ == kActive; } + bool IsCommit() { return status_ == kCommit; } + bool IsAbort() { return status_ == kAbort; } + bool IsActive() { return status_ == kActive; } + string ToString(); }; class Snapshot { @@ -257,6 +264,8 @@ inline bool operator==(const Ingest &lhs, const Ingest &rhs) { class QueryReq { public: vector part_list_; + QueryReq() {} + QueryReq(const vector &part_list) : part_list_(part_list) {} void InsertPart(UInt64 part) { part_list_.push_back(part); } vector get_part_list() const { return part_list_; } void set_part_list(const vector &partList) { part_list_ = partList; } @@ -277,6 +286,7 @@ class Query { * real-time checkpoint will never be send */ unordered_map rt_cp_list_; + Query() {} Query(UInt64 ts, const unordered_map &his_cp_list, const unordered_map &rt_cp_list) @@ -294,6 +304,18 @@ class Query { his_cp_list_ = cplist; } string ToString(); + void GenTxnInfo() { + for (auto &part_strips : snapshot_) + scan_snapshot_[part_strips.first] = part_strips.second; + for (auto &part_cp : his_cp_list_) + scan_cp_list_[part_cp.first] = part_cp.second; + } + map> scan_snapshot_; + map scan_cp_list_; + template + void serialize(Archive &ar, const unsigned int version) { + ar &scan_snapshot_ &scan_cp_list_; + } }; inline bool operator==(const Query &lhs, const Query &rhs) { return lhs.snapshot_ == rhs.snapshot_ && lhs.his_cp_list_ == rhs.his_cp_list_; @@ -349,21 +371,30 @@ class TxnBin { ct_abort_++; } bool IsFull() const { return ct_commit_ + ct_abort_ == kTxnBinSize; } + bool IsSnapshot() const { return status_ == true; } int Count() const { return ct_; } int CountCommit() const { return ct_commit_; } int CountAbort() const { return ct_abort_; } + void GenSnapshot(); + void GenSnapshot(const TxnBin &prev); void MergeSnapshot(Query &query) const; - void MergeTxn(Query &query, int pos) const; + void MergeTxn(Query &query, int len) const; + string ToString(); static UInt64 GetTxnBinID(UInt64 ts, UInt64 core_num) { return (ts / core_num) / kTxnBinSize; } static UInt64 GetTxnBinPos(UInt64 ts, UInt64 core_num) { return (ts / core_num) % kTxnBinSize; } + static UInt64 GetTxnBinMaxTs(UInt64 txnbin_id, UInt64 core_num, + UInt64 core_id) { + return (txnbin_id + 1) * kTxnBinSize * core_num + core_id; + } Txn txn_list_[kTxnBinSize]; private: + bool status_ = false; int ct_ = 0; int ct_commit_ = 0; int ct_abort_ = 0; diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index c09dc81c2..f8621e46b 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -62,6 +62,25 @@ RetCode TxnClient::Init(string ip, int port) { return 0; } +RetCode TxnClient::Debug(string flag) { + RetCode ret = 0; + try { + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + DebugAtom::value, flag) + .await([&ret](RetCode r) { ret = r; }, + caf::others >> []() { cout << " unkown message" << endl; }, + caf::after(seconds(kTimeout)) >> [&ret] { + ret = -1; + cout << "time out" << endl; + }); + } catch (...) { + cout << "link fail" << endl; + return -1; + } + return ret; +} + RetCode TxnClient::BeginIngest(const FixTupleIngestReq& request, Ingest& ingest) { // RetCode ret = rSuccess; @@ -142,10 +161,8 @@ RetCode TxnClient::BeginQuery(const QueryReq& request, Query& query) { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, QueryAtom::value, request) - .await([&](const Query& q) { query = q; }, - caf::after(seconds(kTimeout)) >> [&] { - // ret = - // rLinkTmTimeout; + .await([&query](const Query& q) { query = q; }, + caf::after(seconds(kTimeout)) >> [&ret] { ret = -1; cout << "time out" << endl; }); @@ -157,6 +174,23 @@ RetCode TxnClient::BeginQuery(const QueryReq& request, Query& query) { return ret; } +RetCode TxnClient::CommitQuery(UInt64 ts) { + RetCode ret = 0; + try { + caf::scoped_actor self; + self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, + CommitQueryAtom::value, ts) + .await([&ret](RetCode r) { ret = r; }, + caf::after(seconds(kTimeout)) >> [&ret] { + ret = -1; + cout << "time out" << endl; + }); + } catch (...) { + cout << "link to proxy fail in commitQuery" << endl; + } + return ret; +} + RetCode TxnClient::BeginCheckpoint(Checkpoint& cp) { // RetCode ret = rSuccess; RetCode ret = 0; diff --git a/txn_manager/txn_client.hpp b/txn_manager/txn_client.hpp index 1923e904d..ee5157c21 100644 --- a/txn_manager/txn_client.hpp +++ b/txn_manager/txn_client.hpp @@ -73,10 +73,12 @@ class TxnClient{ static int port_; static caf::actor proxy_; static RetCode Init(string ip = kTxnIp, int port = kTxnPort); + static RetCode Debug(string flag); static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); static RetCode CommitIngest(const UInt64 id); static RetCode AbortIngest(const UInt64 id); static RetCode BeginQuery(const QueryReq & request, Query & query); + static RetCode CommitQuery(UInt64 ts); static RetCode BeginCheckpoint(Checkpoint & cp); static RetCode CommitCheckpoint(const UInt64 logic_cp, const UInt64 phy_cp); }; diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index 59152f7a3..f57fe42a7 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -74,6 +74,7 @@ using FailAtom = caf::atom_constant; using QuitAtom = caf::atom_constant; using claims::txn::Snapshot; using claims::txn::CAFSerConfig; +using claims::txn::QueryTracker; class Foo { public: vector request1; @@ -294,31 +295,38 @@ int main(int argc, const char **argv) { s1.Merge(s2); cout << s1.ToString() << endl; } else if (type == "txnserver") { - unordered_map pos_list = { - {1, 10}, {2, 20}, {3, 16}, {4, 19}}; + unordered_map pos_list = {{1, 0}, {2, 0}, {3, 0}, {4, 0}}; TxnServer::Init(4, 8089); - cout << "1:" << endl; TxnServer::LoadPos(pos_list); - cout << "2:" << endl; TxnServer::LoadCPList(0, pos_list, pos_list); - cout << "3:" << endl; + // QueryTracker::Init(); + sleep(1); + FixTupleIngestReq request; + Ingest ingest; + request.InsertStrip(1, 2, 2); + request.InsertStrip(2, 2, 2); + request.InsertStrip(3, 2, 2); + request.InsertStrip(4, 2, 2); + for (auto i = 0; i < 40; i++) { + TxnClient::BeginIngest(request, ingest); + // if (i % 10 != 0) + TxnClient::CommitIngest(ingest.ts_); + // else + // TxnClient::AbortIngest(ingest.ts_); + } + TxnClient::Debug("core"); } else if (type == "txnclient") { TxnClient::Init("127.0.0.1", 8089); auto job = []() { - FixTupleIngestReq request; - Ingest ingest; - request.InsertStrip(1, 2, 2); - request.InsertStrip(2, 2, 2); - request.InsertStrip(3, 2, 2); - request.InsertStrip(4, 2, 2); - for (auto i = 0; i < 100000; i++) { - TxnClient::BeginIngest(request, ingest); - TxnClient::CommitIngest(ingest.ts_); - } - // cout << ingest.ToString() << endl; + QueryReq req({1, 2, 3, 4}); + Query query; + TxnClient::BeginQuery(req, query); + // sleep(3); + TxnClient::CommitQuery(query.ts_); + cout << query.ToString() << endl; }; vector jobs; - for (auto i = 0; i < 12; i++) jobs.push_back(thread(job)); + for (auto i = 0; i < 1; i++) jobs.push_back(thread(job)); for (auto &j : jobs) j.join(); } caf::await_all_actors_done(); diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 429d8d782..efbdc9075 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -58,26 +58,36 @@ bool TxnServer::active_ = false; unordered_map> TxnServer::pos_list_; unordered_map TxnServer::cp_list_; +set TxnServer::active_querys_; caf::actor test; caf::behavior QueryTracker::make_behavior() { - this->delayed_send(this, seconds(3), TimerAtom::value); + this->delayed_send(this, seconds(3), GCAtom::value); return {[](BeginAtom, UInt64 ts) { active_querys_.insert(ts); }, [](CommitAtom, UInt64 ts) { active_querys_.erase(ts); }, - [this](TimerAtom) { + [this](GCAtom) { /** * TODO broadcast all components what minimum timestamp * is still alive. */ - this->delayed_send(this, seconds(3), TimerAtom::value); + UInt64 ts; + if (active_querys_.size() > 0) + ts = *active_querys_.begin(); + else + ts = TimeStamp::Gen(); + for (auto& core : TxnServer::cores_) + caf::anon_send(core, GCAtom::value, ts); + this->delayed_send(this, seconds(3), GCAtom::value); }}; } caf::behavior CheckpointTracker::make_behavior() {} caf::behavior TxnCore::make_behavior() { - // this->delayed_send(this, seconds(kGCTime + CoreId), GCAtom::value); + this->delayed_send(this, seconds(3 + core_id_), MergeAtom::value); + return { + [this](DebugAtom, string flag) { cout << ToString() << endl; }, [this](IngestAtom, shared_ptr ingest) -> caf::message { RetCode ret = rSuccess; // cout << "begin ingestion" << endl << ingest->ToString() << endl; @@ -95,20 +105,95 @@ caf::behavior TxnCore::make_behavior() { return caf::make_message(rSuccess); }, [this](AbortIngestAtom, const UInt64 ts) -> caf::message { - cout << "abort:" << ts << endl; + // cout << "abort:" << ts << endl; RetCode ret = rSuccess; auto id = TxnBin::GetTxnBinID(ts, TxnServer::concurrency_); auto pos = TxnBin::GetTxnBinPos(ts, TxnServer::concurrency_); txnbin_list_[id].AbortTxn(pos); return caf::make_message(rSuccess); }, - [](QueryAtom, const QueryReq& request, shared_ptr query) { - + [this](QueryAtom, shared_ptr query) -> caf::message { + auto id = TxnBin::GetTxnBinID(query->ts_, TxnServer::concurrency_); + auto pos = TxnBin::GetTxnBinPos(query->ts_, TxnServer::concurrency_); + auto ts = TxnBin::GetTxnBinMaxTs(id, TxnServer::concurrency_, core_id_); + auto remain = kTxnBinSize - (ts - query->ts_) / TxnServer::concurrency_; + // cout << "query core:" << core_id_ << endl; + // cout << query->ts_ << "," << id << "," << pos << "," << ts << + // "," + // << remain << endl; + if (remain > 0) { + txnbin_list_[id].MergeTxn(*query, remain); + // cout << "first txnbin<" << core_id_ << "," << id << ">txn<" << 0 + // << "," << remain << ">" << endl; + } + while (id > 0) { + --id; + if (txnbin_list_[id].IsSnapshot()) { + // cout << "txnbin<" << core_id_ << "," << id << ">snapshot" << + // endl; + txnbin_list_[id].MergeSnapshot(*query); + break; + } else { + // cout << "txnbin<" << core_id_ << "," << id << ">full" << endl; + txnbin_list_[id].MergeTxn(*query, kTxnBinSize); + } + } + auto next_core_id = (core_id_ + 1) % TxnServer::concurrency_; + if (next_core_id != TxnServer::GetCoreID(query->ts_)) + this->forward_to(TxnServer::cores_[next_core_id]); + return caf::make_message(*query); }, - [](MergeAtom, const QueryReq& request, shared_ptr query) { - + [this](MergeAtom) { + // cout << "start merge @ core:" << core_id_ << endl; + while (txnbin_list_[txnbin_cur_].IsFull()) { + if (txnbin_cur_ == 0) { + // cout << ToString() << endl; + txnbin_list_[txnbin_cur_].GenSnapshot(); + // cout << "merge: <" << core_id_ << "," << txnbin_cur_ << "," + // << txnbin_list_[txnbin_cur_].IsSnapshot() << ">\n"; + } else { + txnbin_list_[txnbin_cur_].GenSnapshot( + txnbin_list_[txnbin_cur_ - 1]); + // cout << "merge: <" << core_id_ << "," << txnbin_cur_ << "," + // << txnbin_list_[txnbin_cur_].IsSnapshot() << ">\n"; + } + // cout << ToString() << endl; + txnbin_cur_++; + } + this->delayed_send(this, seconds(3 + core_id_), MergeAtom::value); }, - [](MergeAtom, shared_ptr cp) { + [this](GCAtom, UInt64 min_ts_remain) { + // cout << "core:" << core_id_ << " gc:" << min_ts << endl; + // if (core_id_ == 0) + map new_txnbin_list; + auto ct = 0; + for (auto it = txnbin_list_.rbegin(); it != txnbin_list_.rend(); it++) { + auto id = it->first; + auto max_ts = + TxnBin::GetTxnBinMaxTs(id, TxnServer::concurrency_, core_id_); + if (txnbin_list_[id].IsSnapshot()) { + if (ct == 0) { + new_txnbin_list[id] = txnbin_list_[id]; + ct++; + } else { + break; + } + } else { + new_txnbin_list[id] = txnbin_list_[id]; + } + } + + // if (core_id_ == 0 && new_txnbin_list.size() < + // txnbin_list_.size()) { + // cout << "gc :" << endl; + // for (auto& txnbin : txnbin_list_) + // if (new_txnbin_list.find(txnbin.first) == + // new_txnbin_list.end()) + // cout << "<" << core_id_ << "," << txnbin.first << ">"; + // cout << endl; + // } + if (new_txnbin_list.size() < txnbin_list_.size()) + txnbin_list_ = new_txnbin_list; }, [](CheckpointAtom, shared_ptr cp) { @@ -149,8 +234,19 @@ caf::behavior TxnCore::make_behavior() { << endl;*/ // this->delayed_send(this, seconds(kGCTime), GCAtom::value); }, - caf::others >> - [&]() { cout << "core:" << core_id_ << " unkown message" << endl; }}; + caf::others >> [&]() { + cout << "core:" << core_id_ << " unkown message " + << to_string(current_message()) << endl; + }}; +} + +string TxnCore::ToString() { + string str = "core_id:" + to_string(core_id_) + "\n"; + for (auto& txnbin : txnbin_list_) { + str += "txnbin_id:" + to_string(txnbin.first) + "\n"; + str += txnbin.second.ToString(); + } + return str; } caf::behavior TxnServer::make_behavior() { @@ -160,40 +256,60 @@ caf::behavior TxnServer::make_behavior() { } catch (...) { cout << "txn server bind to port:" << port_ << " fail" << endl; } - // this->delayed_send(this, seconds(3), CheckpointAtom::value); - // this->delayed_send(this, seconds(3)); - return {[this](IngestAtom, const FixTupleIngestReq& request) { - auto ts = TimeStamp::GenAdd(); - auto ingest = make_shared(request.content_, ts); - for (auto& part : ingest->strip_list_) - ingest->InsertStrip(AtomicMalloc(part.first, part.second.first, - part.second.second)); - current_message() = caf::make_message(IngestAtom::value, ingest); - forward_to(cores_[GetCoreID(ts)]); - }, - [this](CommitIngestAtom, - const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, - [this](AbortIngestAtom, - const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, - [this](QueryAtom, const QueryReq& request) { - auto ts = TimeStamp::Gen(); - auto query = - make_shared(ts, GetHisCPList(ts, request.part_list_), - GetRtCPList(ts, request.part_list_)); - current_message() = - caf::make_message(QueryAtom::value, request, query); - forward_to(cores_[GetCoreID(ts)]); - }, - [this](CommitCPAtom, UInt64 ts, UInt64 part, UInt64 his_cp, - UInt64 rt_cp) -> caf::message { + this->delayed_send(this, seconds(3), GCAtom::value); + return { + [this](DebugAtom, string flag) -> caf::message { + cout << "debug begin" << endl; + for (auto& core : cores_) caf::anon_send(core, DebugAtom::value, flag); + return caf::make_message(rSuccess); + }, + [this](IngestAtom, const FixTupleIngestReq& request) { + auto ts = TimeStamp::GenAdd(); + auto ingest = make_shared(request.content_, ts); + for (auto& part : ingest->strip_list_) + ingest->InsertStrip( + AtomicMalloc(part.first, part.second.first, part.second.second)); + current_message() = caf::make_message(IngestAtom::value, ingest); + forward_to(cores_[GetCoreID(ts)]); + }, + [this](CommitIngestAtom, + const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, + [this](AbortIngestAtom, + const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, + [this](QueryAtom, const QueryReq& request) { + auto ts = TimeStamp::Gen(); + active_querys_.insert(ts); + auto query = + make_shared(ts, GetHisCPList(ts, request.part_list_), + GetRtCPList(ts, request.part_list_)); + current_message() = caf::make_message(QueryAtom::value, query); + forward_to(cores_[GetCoreID(ts)]); + // cout << "**********query:" << ts << " begin**************" << endl; + }, + [this](CommitQueryAtom, UInt64 ts) -> caf::message { + active_querys_.erase(ts); + return caf::make_message(rSuccess); + }, + [this](CommitCPAtom, UInt64 ts, UInt64 part, UInt64 his_cp, UInt64 rt_cp) + -> caf::message { cp_list_[part].SetHisCP(ts, his_cp); cp_list_[part].SetRtCP(ts, rt_cp); return caf::make_message(OkAtom::value); }, - caf::others >> [this]() { - cout << "server unkown message:" - << to_string(current_message()) << endl; - }}; + [this](GCAtom) { + UInt64 ts; + if (active_querys_.size() > 0) + ts = *active_querys_.begin(); + else + ts = TimeStamp::Gen(); + for (auto& core : TxnServer::cores_) + caf::anon_send(core, GCAtom::value, ts); + this->delayed_send(this, seconds(3), GCAtom::value); + }, + caf::others >> [this]() { + cout << "server unkown message:" + << to_string(current_message()) << endl; + }}; } RetCode TxnServer::Init(int concurrency, int port) { @@ -267,7 +383,5 @@ RetCode TxnServer::LoadPos(const unordered_map& pos_list) { for (auto& pos : pos_list) pos_list_[pos.first].store(pos.second); return rSuccess; } - -string TxnServer::ToString() {} } } diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index 372bf9be8..5c954d21a 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -119,9 +119,12 @@ class CheckpointTracker : public caf::event_based_actor { class TxnCore : public caf::event_based_actor { public: UInt64 core_id_; + UInt64 txnbin_cur_ = 0; + // txnbin id <=> txnbin object map txnbin_list_; caf::behavior make_behavior() override; TxnCore(int coreId) : core_id_(coreId) {} + string ToString(); }; class TxnServer : public caf::event_based_actor { @@ -141,10 +144,10 @@ class TxnServer : public caf::event_based_actor { const unordered_map& rt_cp_list); static RetCode LoadPos(const unordered_map& pos_list); static int GetCoreID(UInt64 ts) { return ts % concurrency_; } - static string ToString(); + caf::behavior make_behavior() override; /**************** System APIs ***************/ private: - caf::behavior make_behavior() override; + static set active_querys_; static unordered_map GetHisCPList( UInt64 ts, const vector& parts); static unordered_map GetRtCPList(UInt64 ts, From 5523080c69aac79baeb53c35bb271489d3a10622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Mon, 22 Aug 2016 15:22:38 +0800 Subject: [PATCH 50/58] implement real-time chunk_list for ingest, scan from both real-time chunk_list and historical chunk_list, advaned binding all partitions in claims initialize stage --- Client/Test/TestSeverClient.cpp | 56 +-- Environment.cpp | 51 ++- Environment.h | 2 + IndexManager/LogicalCSBIndexBuilding.cpp | 4 +- Test/TestSuit/hash_table_test.h | 26 +- Test/TestSuit/in_segment_scalability_test.cpp | 20 +- Test/common/issue27.cpp | 10 +- Test/common/issue27_sort.cpp | 8 +- Test/common/issue27ing.cpp | 8 +- Test/set_up_environment.h | 156 ++++---- catalog/Test/statistic_manager_test.cpp | 16 +- common/Block/BlockStream.cpp | 6 +- common/Block/BlockStream.h | 3 +- common/Schema/Test/VariableSchema_test.cpp | 8 +- common/ids.h | 339 +++++++++--------- loader/slave_loader.cpp | 2 +- .../physical_projection_scan.cpp | 15 +- stmt_handler/select_exec.cpp | 2 +- storage/PartitionStorage.cpp | 129 ++++++- storage/PartitionStorage.h | 55 ++- 20 files changed, 564 insertions(+), 352 deletions(-) diff --git a/Client/Test/TestSeverClient.cpp b/Client/Test/TestSeverClient.cpp index 719931133..ecdf6d6a2 100644 --- a/Client/Test/TestSeverClient.cpp +++ b/Client/Test/TestSeverClient.cpp @@ -123,7 +123,7 @@ static int loadData() { catalog->add_table(table_1); ////////////////////////////////////Create table - ///right////////////////////////// + /// right////////////////////////// TableDescriptor* table_2 = new TableDescriptor( "sb", Environment::getInstance()->getCatalog()->allocate_unique_table_id()); @@ -212,20 +212,20 @@ static int loadData() { } // partitioned by row_id // for(unsigned - //i=0;igetProjectoin(14)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(14)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(14)->getPartitioner()->RegisterPartition(i,2); // } // // // 8 partitions // for(unsigned - //i=0;igetProjectoin(2)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(2)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(2)->getPartitioner()->RegisterPartition(i,1); // } // // for(unsigned - //i=0;igetProjectoin(3)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(3)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(3)->getPartitioner()->RegisterPartition(i,3); // } @@ -245,20 +245,20 @@ static int loadData() { i, 6); } // for(unsigned - //i=0;igetProjectoin(2)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(2)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(2)->getPartitioner()->RegisterPartition(i,1); // } // for(unsigned - //i=0;igetProjectoin(3)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(3)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(3)->getPartitioner()->RegisterPartition(i,3); // } // // //partitioned by row_id // for(unsigned - //i=0;igetProjectoin(14)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(14)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(14)->getPartitioner()->RegisterPartition(i,2); // } @@ -266,41 +266,41 @@ static int loadData() { // //////////////////////////////////////// // // ///////////////////ONE - //MONTH///////////////////////////////////////////////////////////// + // MONTH///////////////////////////////////////////////////////////// // //CJ // // 4 partition // for(unsigned - //i=0;igetProjectoin(4)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(4)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(4)->getPartitioner()->RegisterPartition(i,40); // } // // for(unsigned - //i=0;igetProjectoin(5)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(5)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(5)->getPartitioner()->RegisterPartition(i,104); // } // //8 partitions // for(unsigned - //i=0;igetProjectoin(10)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(10)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(10)->getPartitioner()->RegisterPartition(i,20); // } // // for(unsigned - //i=0;igetProjectoin(11)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(11)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(11)->getPartitioner()->RegisterPartition(i,52); // } // // 18 partitions // for(unsigned - //i=0;igetProjectoin(6)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(6)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(6)->getPartitioner()->RegisterPartition(i,10); // } // // for(unsigned - //i=0;igetProjectoin(7)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(7)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(7)->getPartitioner()->RegisterPartition(i,24); // } @@ -308,37 +308,37 @@ static int loadData() { // //SB // // 4 partition // for(unsigned - //i=0;igetProjectoin(4)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(4)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(4)->getPartitioner()->RegisterPartition(i,39); // } // // for(unsigned - //i=0;igetProjectoin(5)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(5)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(5)->getPartitioner()->RegisterPartition(i,131); // } // // 8 partitions // for(unsigned - //i=0;igetProjectoin(10)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(10)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(10)->getPartitioner()->RegisterPartition(i,20); // } // // for(unsigned - //i=0;igetProjectoin(11)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(11)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(11)->getPartitioner()->RegisterPartition(i,66); // } // // 18 partitions // for(unsigned - //i=0;igetProjectoin(6)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(6)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(6)->getPartitioner()->RegisterPartition(i,10); // } // // for(unsigned - //i=0;igetProjectoin(7)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(7)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(7)->getPartitioner()->RegisterPartition(i,30); // } @@ -349,25 +349,25 @@ static int loadData() { // //// cj//// // // 4 partitions // for(unsigned - //i=0;igetProjectoin(8)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(8)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(8)->getPartitioner()->RegisterPartition(i,14); // } // // for(unsigned - //i=0;igetProjectoin(9)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(9)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(9)->getPartitioner()->RegisterPartition(i,36); // } // // 8 partitions // for(unsigned - //i=0;igetProjectoin(12)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(12)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(12)->getPartitioner()->RegisterPartition(i,7); // } // // for(unsigned - //i=0;igetProjectoin(13)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(13)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(0)->getProjectoin(13)->getPartitioner()->RegisterPartition(i,19); // } @@ -375,25 +375,25 @@ static int loadData() { // //// sb //// // // 4 partitions// // for(unsigned - //i=0;igetProjectoin(8)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(8)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(8)->getPartitioner()->RegisterPartition(i,14); // } // // for(unsigned - //i=0;igetProjectoin(9)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(9)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(9)->getPartitioner()->RegisterPartition(i,131); // } // // 8 partitions// // for(unsigned - //i=0;igetProjectoin(12)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(12)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(12)->getPartitioner()->RegisterPartition(i,7); // } // // for(unsigned - //i=0;igetProjectoin(13)->getPartitioner()->getNumberOfPartitions();i++){ + // i=0;igetProjectoin(13)->getPartitioner()->getNumberOfPartitions();i++){ // // catalog->getTable(1)->getProjectoin(13)->getPartitioner()->RegisterPartition(i,23); // } diff --git a/Environment.cpp b/Environment.cpp index eefc78a47..cdb90cdfe 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -120,12 +120,6 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { logging_->log("Initializing the BufferManager..."); initializeBufferManager(); - logging_->log("Initializing txn manager"); - if (!InitTxnManager()) LOG(ERROR) << "failed to initialize txn manager"; - - logging_->log("Initializing txn log server"); - if (!InitTxnLog()) LOG(ERROR) << "failed to initialize txn log"; - logging_->log("Initializing the ExecutorMaster..."); iteratorExecutorMaster = new IteratorExecutorMaster(); @@ -134,6 +128,21 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { exchangeTracker = new ExchangeTracker(); expander_tracker_ = ExpanderTracker::getInstance(); + + logging_->log("Initializing txn manager"); + if (!InitTxnManager()) LOG(ERROR) << "failed to initialize txn manager"; + + logging_->log("Initializing txn log server"); + if (!InitTxnLog()) LOG(ERROR) << "failed to initialize txn log"; + + /** + * Binding all partition for each projection + * Because + */ + sleep(3); + logging_->log("Advanced Bind all partition for each projection"); + if (!AdvancedBindAllPart()) LOG(ERROR) << "failed to bing partitions"; + #ifndef DEBUG_MODE if (ismaster) { initializeClientListener(); @@ -263,16 +272,16 @@ bool Environment::InitTxnManager() { TxnServer::Init(Config::txn_server_cores, Config::txn_server_port); auto cat = Catalog::getInstance(); auto table_count = cat->getNumberOfTable(); - cout << "table count:" << table_count << endl; + // cout << "table count:" << table_count << endl; for (unsigned table_id : cat->getAllTableIDs()) { - cout << "table id :" << table_id << endl; + // cout << "table id :" << table_id << endl; auto table = cat->getTable(table_id); if (NULL == table) { cout << " No table whose id is:" << table_id << endl; assert(false); } auto proj_count = table->getNumberOfProjection(); - cout << "proj_count:" << proj_count << endl; + // cout << "proj_count:" << proj_count << endl; for (auto proj_id = 0; proj_id < proj_count; proj_id++) { auto proj = table->getProjectoin(proj_id); if (NULL == proj) { @@ -282,17 +291,12 @@ bool Environment::InitTxnManager() { } auto part = proj->getPartitioner(); auto part_count = part->getNumberOfPartitions(); - cout << "part_count:" << part_count << endl; + // cout << "part_count:" << part_count << endl; for (auto part_id = 0; part_id < part_count; part_id++) { auto global_part_id = GetGlobalPartId(table_id, proj_id, part_id); - cout << global_part_id << endl; - // TxnServer::pos_list_[global_part_id] = - // TxnServer::his_cp_list_[global_part_id] = - // TxnServer::rt_cp_list_[global_part_id] = - // part->getPartitionBlocks(part_id) * 64 * 1024; pos_list[global_part_id] = his_cp_list[global_part_id] = rt_cp_list[global_part_id] = - part->getPartitionBlocks(part_id) * 64 * 1024; + part->getPartitionBlocks(part_id) * BLOCK_SIZE; } } } @@ -306,6 +310,21 @@ bool Environment::InitTxnManager() { return true; } +bool Environment::AdvancedBindAllPart() { + for (auto table_id : Catalog::getInstance()->getAllTableIDs()) { + auto table = Catalog::getInstance()->getTable(table_id); + auto proj_count = table->getNumberOfProjection(); + for (auto proj_id = 0; proj_id < proj_count; proj_id++) { + auto proj = table->getProjectoin(proj_id); + if (!proj->AllPartitionBound()) { + Catalog::getInstance()->getBindingModele()->BindingEntireProjection( + proj->getPartitioner(), DESIRIABLE_STORAGE_LEVEL); + } + } + } + return true; +} + bool Environment::InitTxnLog() { if (Config::enable_txn_log) { LOG(INFO) << "I'm txn log server"; diff --git a/Environment.h b/Environment.h index 5a349d45f..25e19445c 100755 --- a/Environment.h +++ b/Environment.h @@ -93,6 +93,8 @@ class Environment { bool InitTxnLog(); + bool AdvancedBindAllPart(); + private: static Environment* _instance; PortManager* portManager; diff --git a/IndexManager/LogicalCSBIndexBuilding.cpp b/IndexManager/LogicalCSBIndexBuilding.cpp index 6138714c9..f8debc91e 100644 --- a/IndexManager/LogicalCSBIndexBuilding.cpp +++ b/IndexManager/LogicalCSBIndexBuilding.cpp @@ -59,8 +59,8 @@ PhysicalOperatorBase* LogicalCSBIndexBuilding::GetPhysicalPlan( bls_column_list.push_back(t_int); // chunk offset bls_column_list.push_back( blc_state.schema_->getcolumn(blc_state.key_indexing_)); // sec_code - bls_column_list.push_back(t_u_smallInt); // chunk offset - bls_column_list.push_back(t_u_smallInt); // chunk offset + bls_column_list.push_back(t_u_smallInt); // chunk offset + bls_column_list.push_back(t_u_smallInt); // chunk offset bls_state.schema_ = new SchemaFix(bls_column_list); bls_state.child_ = blc; diff --git a/Test/TestSuit/hash_table_test.h b/Test/TestSuit/hash_table_test.h index 26528d7c5..c768c1bb7 100644 --- a/Test/TestSuit/hash_table_test.h +++ b/Test/TestSuit/hash_table_test.h @@ -164,43 +164,43 @@ static void startup_catalog() { 1); // G1 catalog->add_table(table_2); - for (unsigned i = 0; i < table_1->getProjectoin(0) + for (unsigned i = 0; i < table_1->getProjection(0) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(0) - ->getProjectoin(0) + ->getProjection(0) ->getPartitioner() ->RegisterPartition(i, 2); } - for (unsigned i = 0; i < table_1->getProjectoin(1) + for (unsigned i = 0; i < table_1->getProjection(1) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(0) - ->getProjectoin(1) + ->getProjection(1) ->getPartitioner() ->RegisterPartition(i, 6); } // sb_table - for (unsigned i = 0; i < table_2->getProjectoin(0) + for (unsigned i = 0; i < table_2->getProjection(0) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(1) - ->getProjectoin(0) + ->getProjection(0) ->getPartitioner() ->RegisterPartition(i, 2); } - for (unsigned i = 0; i < table_2->getProjectoin(1) + for (unsigned i = 0; i < table_2->getProjection(1) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(1) - ->getProjectoin(1) + ->getProjection(1) ->getPartitioner() ->RegisterPartition(i, 6); } @@ -229,7 +229,7 @@ void* insert_into_hash_table_from_projection(void* argment) { const unsigned bucketsize = 256 - 8; TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("sb"); - Schema* schema = table->getProjectoin(1)->getSchema(); + Schema* schema = table->getProjection(1)->getSchema(); *arg.hash_table = generate_hashtable(schema->getTupleMaxSize(), nbuckets, bucketsize); } @@ -287,17 +287,17 @@ static double projection_scan(unsigned degree_of_parallelism) { printf("nthread=%d\n", nthreads); TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("sb"); - Schema* schema = table->getProjectoin(1)->getSchema(); + Schema* schema = table->getProjection(1)->getSchema(); // BasicHashTable* // hashtable=generate_hashtable(schema->getTupleMaxSize(),nbuckets,bucketsize); - LogicalScan* scan = new LogicalScan(table->getProjectoin(1)); + LogicalScan* scan = new LogicalScan(table->getProjection(1)); scan->GetPlanContext(); PhysicalOperatorBase* warm_up_iterator = scan->GetPhysicalPlan(1024 * 64); PhysicalProjectionScan::State ps_state; ps_state.block_size_ = 1024 * 64; - ps_state.projection_id_ = table->getProjectoin(1)->getProjectionID(); + ps_state.projection_id_ = table->getProjection(1)->getProjectionID(); ps_state.schema_ = schema; BlockStreamBase* block_for_asking = @@ -314,7 +314,7 @@ static double projection_scan(unsigned degree_of_parallelism) { arg.schema = schema; arg.partition_reader = BlockManager::getInstance() ->GetPartitionHandle(PartitionID( - table->getProjectoin(1)->getProjectionID(), 0)) + table->getProjection(1)->getProjectionID(), 0)) ->CreateAtomicReaderIterator(); arg.barrier = new Barrier(nthreads); pthread_t pid[1000]; diff --git a/Test/TestSuit/in_segment_scalability_test.cpp b/Test/TestSuit/in_segment_scalability_test.cpp index c553ce0e8..56c031ab8 100644 --- a/Test/TestSuit/in_segment_scalability_test.cpp +++ b/Test/TestSuit/in_segment_scalability_test.cpp @@ -21,9 +21,9 @@ static double lineitem_scan_self_join() { TableDescriptor* table_right = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); - LogicalOperator* scan_right = new LogicalScan(table_right->getProjectoin(0)); + LogicalOperator* scan_right = new LogicalScan(table_right->getProjection(0)); LogicalFilter::Condition filter_condition_1; filter_condition_1.add(table->getAttribute("row_id"), AttributeComparator::EQ, @@ -66,9 +66,9 @@ static double sb_scan_self_join() { TableDescriptor* table_right = Environment::getInstance()->getCatalog()->getTable("sb"); - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); - LogicalOperator* scan_right = new LogicalScan(table_right->getProjectoin(0)); + LogicalOperator* scan_right = new LogicalScan(table_right->getProjection(0)); LogicalFilter::Condition filter_condition_1; filter_condition_1.add(table->getAttribute("row_id"), AttributeComparator::EQ, @@ -111,7 +111,7 @@ static double lineitem_scan_aggregation() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); std::vector group_by_attributes; // group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG")); @@ -157,7 +157,7 @@ static double lineitem_scan_filter() { Environment::getInstance()->getCatalog()->getTable("LINEITEM"); // printf("Tuple // size:%d\n",table->getProjectoin(0)->getSchema()->getTupleMaxSize()); - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); LogicalFilter::Condition filter_condition_1; filter_condition_1.add(table->getAttribute("row_id"), AttributeComparator::EQ, @@ -188,7 +188,7 @@ static double sb_scan_filter() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("sb"); - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); // printf("Tuple // size:%d\n",table->getProjectoin(0)->getSchema()->getTupleMaxSize()); @@ -225,7 +225,7 @@ static double sb_scan_aggregation() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("sb"); - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); std::vector group_by_attributes; // group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG")); @@ -331,7 +331,7 @@ static void test_block_construct() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("sb"); - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); scan->GetPlanContext(); PhysicalOperatorBase* s = scan->GetPhysicalPlan(64 * 1024); @@ -339,7 +339,7 @@ static void test_block_construct() { std::vector vect; BlockStreamBase* block = BlockStreamBase::createBlock( - table->getProjectoin(0)->getSchema(), 64 * 1024); + table->getProjection(0)->getSchema(), 64 * 1024); s->Open(); while (s->Next(block)) ; diff --git a/Test/common/issue27.cpp b/Test/common/issue27.cpp index b57c950fc..499beb4b7 100644 --- a/Test/common/issue27.cpp +++ b/Test/common/issue27.cpp @@ -38,7 +38,7 @@ static void query_select_aggregation() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); //==========================project========================= vector > expr_list1; @@ -261,9 +261,9 @@ static void init_single_node_tpc_h_envoriment_(bool master = true) { catalog->add_table(table_1); for (unsigned i = 0; - i < table_1->getProjectoin(0)->getPartitioner()->getNumberOfPartitions(); + i < table_1->getProjection(0)->getPartitioner()->getNumberOfPartitions(); i++) { - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition( + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition( i, 5); } } @@ -303,9 +303,9 @@ static void init_multi_node_tpc_h_envoriment_(bool master = true) { catalog->add_table(table_1); for (unsigned i = 0; - i < table_1->getProjectoin(0)->getPartitioner()->getNumberOfPartitions(); + i < table_1->getProjection(0)->getPartitioner()->getNumberOfPartitions(); i++) { - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition( + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition( i, 3); } } diff --git a/Test/common/issue27_sort.cpp b/Test/common/issue27_sort.cpp index 85615faff..5eb86e764 100644 --- a/Test/common/issue27_sort.cpp +++ b/Test/common/issue27_sort.cpp @@ -41,7 +41,7 @@ static void query_select_sort() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); //==========================project========================= vector > expr_list1; @@ -124,7 +124,7 @@ static void query_select_sort_string() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); //==========================project========================= vector > expr_list1; @@ -262,9 +262,9 @@ static void init_single_node_tpc_h_envoriment_sort(bool master = true) { catalog->add_table(table_1); for (unsigned i = 0; - i < table_1->getProjectoin(0)->getPartitioner()->getNumberOfPartitions(); + i < table_1->getProjection(0)->getPartitioner()->getNumberOfPartitions(); i++) { - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition( + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition( i, 5); } } diff --git a/Test/common/issue27ing.cpp b/Test/common/issue27ing.cpp index 393b635de..d5c1bd17f 100644 --- a/Test/common/issue27ing.cpp +++ b/Test/common/issue27ing.cpp @@ -38,7 +38,7 @@ static void query_select_fzh() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); //==========================project========================= vector > expr_list1; @@ -157,7 +157,7 @@ static void query_select_aggregation_ing() { TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== - LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); + LogicalOperator* scan = new LogicalScan(table->getProjection(0)); //==========================project========================= vector > expr_list1; @@ -383,9 +383,9 @@ static void init_single_node_tpc_h_envoriment_ing(bool master = true) { catalog->add_table(table_1); for (unsigned i = 0; - i < table_1->getProjectoin(0)->getPartitioner()->getNumberOfPartitions(); + i < table_1->getProjection(0)->getPartitioner()->getNumberOfPartitions(); i++) { - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition( + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition( i, 1); } } diff --git a/Test/set_up_environment.h b/Test/set_up_environment.h index 27634cea9..daa661694 100644 --- a/Test/set_up_environment.h +++ b/Test/set_up_environment.h @@ -139,27 +139,27 @@ static void create_poc_data_one_partitions(){ catalog->add_table(table_2); - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,2); + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition(i,2); // catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,0); } - for(unsigned i=0;igetProjectoin(1)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(1)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(1)->getPartitioner()->RegisterPartition(i,6); + catalog->getTable(0)->getProjection(1)->getPartitioner()->RegisterPartition(i,6); } //sb_table - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,2); + catalog->getTable(1)->getProjection(0)->getPartitioner()->RegisterPartition(i,2); } - for(unsigned i=0;igetProjectoin(1)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(1)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(1)->getPartitioner()->RegisterPartition(i,6); + catalog->getTable(1)->getProjection(1)->getPartitioner()->RegisterPartition(i,6); } } @@ -292,27 +292,27 @@ static void create_poc_data_four_partitions(){ catalog->add_table(table_2); - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,2); + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition(i,2); // catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,0); } - for(unsigned i=0;igetProjectoin(1)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(1)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(1)->getPartitioner()->RegisterPartition(i,6); + catalog->getTable(0)->getProjection(1)->getPartitioner()->RegisterPartition(i,6); } //sb_table - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,2); + catalog->getTable(1)->getProjection(0)->getPartitioner()->RegisterPartition(i,2); } - for(unsigned i=0;igetProjectoin(1)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(1)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(1)->getPartitioner()->RegisterPartition(i,6); + catalog->getTable(1)->getProjection(1)->getPartitioner()->RegisterPartition(i,6); } } @@ -478,27 +478,27 @@ static void startup_mulitple_node_environment_of_poc(){ catalog->add_table(table_2); - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,2); + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition(i,2); // catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,0); } - for(unsigned i=0;igetProjectoin(1)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(1)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(1)->getPartitioner()->RegisterPartition(i,6); + catalog->getTable(0)->getProjection(1)->getPartitioner()->RegisterPartition(i,6); } //sb_table - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,2); + catalog->getTable(1)->getProjection(0)->getPartitioner()->RegisterPartition(i,2); } - for(unsigned i=0;igetProjectoin(1)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(1)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(1)->getPartitioner()->RegisterPartition(i,6); + catalog->getTable(1)->getProjection(1)->getPartitioner()->RegisterPartition(i,6); } } } @@ -647,43 +647,43 @@ static void startup_single_node_environment_of_tpch(bool master=true){ //T0 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,3); + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition(i,3); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + catalog->getTable(1)->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(2)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,15); + catalog->getTable(2)->getProjection(0)->getPartitioner()->RegisterPartition(i,15); } //T3 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(3)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,3); + catalog->getTable(3)->getProjection(0)->getPartitioner()->RegisterPartition(i,3); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(4)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,19); + catalog->getTable(4)->getProjection(0)->getPartitioner()->RegisterPartition(i,19); } //T6 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_6->getProjectoin(0)->getPartitioner()->RegisterPartition(i,93); + table_6->getProjection(0)->getPartitioner()->RegisterPartition(i,93); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_7->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + table_7->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_8->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + table_8->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } catalog->saveCatalog(); } @@ -832,43 +832,43 @@ static void startup_single_node_one_partition_environment_of_tpch(bool master=tr //T0 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,3); + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition(i,3); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + catalog->getTable(1)->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(2)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,15); + catalog->getTable(2)->getProjection(0)->getPartitioner()->RegisterPartition(i,15); } //T3 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(3)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,3); + catalog->getTable(3)->getProjection(0)->getPartitioner()->RegisterPartition(i,3); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(4)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,19); + catalog->getTable(4)->getProjection(0)->getPartitioner()->RegisterPartition(i,19); } //T6 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_6->getProjectoin(0)->getPartitioner()->RegisterPartition(i,93); + table_6->getProjection(0)->getPartitioner()->RegisterPartition(i,93); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_7->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + table_7->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_8->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + table_8->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } } @@ -932,19 +932,19 @@ static void startup_multiple_node_environment_of_stock(bool master=true){ - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + catalog->getTable(1)->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(2)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + catalog->getTable(2)->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(3)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,445); + catalog->getTable(3)->getProjection(0)->getPartitioner()->RegisterPartition(i,445); } catalog->saveCatalog(); @@ -1098,43 +1098,43 @@ static void startup_multiple_node_environment_of_tpch(bool master=true){ //T0 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(1)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + catalog->getTable(1)->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(2)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,15); + catalog->getTable(2)->getProjection(0)->getPartitioner()->RegisterPartition(i,15); } //T3 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(3)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,3); + catalog->getTable(3)->getProjection(0)->getPartitioner()->RegisterPartition(i,3); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - catalog->getTable(4)->getProjectoin(0)->getPartitioner()->RegisterPartition(i,19); + catalog->getTable(4)->getProjection(0)->getPartitioner()->RegisterPartition(i,19); } //T6 - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_6->getProjectoin(0)->getPartitioner()->RegisterPartition(i,93); + table_6->getProjection(0)->getPartitioner()->RegisterPartition(i,93); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_7->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + table_7->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } - for(unsigned i=0;igetProjectoin(0)->getPartitioner()->getNumberOfPartitions();i++){ + for(unsigned i=0;igetProjection(0)->getPartitioner()->getNumberOfPartitions();i++){ - table_8->getProjectoin(0)->getPartitioner()->RegisterPartition(i,1); + table_8->getProjection(0)->getPartitioner()->RegisterPartition(i,1); } // save catalog as a file diff --git a/catalog/Test/statistic_manager_test.cpp b/catalog/Test/statistic_manager_test.cpp index f95f0310e..48fdad924 100755 --- a/catalog/Test/statistic_manager_test.cpp +++ b/catalog/Test/statistic_manager_test.cpp @@ -254,22 +254,22 @@ static int statistic_manager_test() { //////////////////ONE DAY//////////////////////////////////////////////// // cj_table // 4 partitions partitioned by order_no - for (unsigned i = 0; i < table_1->getProjectoin(0) + for (unsigned i = 0; i < table_1->getProjection(0) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(0) - ->getProjectoin(0) + ->getProjection(0) ->getPartitioner() ->RegisterPartition(i, 2); } - for (unsigned i = 0; i < table_1->getProjectoin(1) + for (unsigned i = 0; i < table_1->getProjection(1) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(0) - ->getProjectoin(1) + ->getProjection(1) ->getPartitioner() ->RegisterPartition(i, 6); } @@ -294,22 +294,22 @@ static int statistic_manager_test() { // } // sb_table - for (unsigned i = 0; i < table_2->getProjectoin(0) + for (unsigned i = 0; i < table_2->getProjection(0) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(1) - ->getProjectoin(0) + ->getProjection(0) ->getPartitioner() ->RegisterPartition(i, 2); } - for (unsigned i = 0; i < table_2->getProjectoin(1) + for (unsigned i = 0; i < table_2->getProjection(1) ->getPartitioner() ->getNumberOfPartitions(); i++) { catalog->getTable(1) - ->getProjectoin(1) + ->getProjection(1) ->getPartitioner() ->RegisterPartition(i, 6); } diff --git a/common/Block/BlockStream.cpp b/common/Block/BlockStream.cpp index 09abed10a..b6825fd64 100755 --- a/common/Block/BlockStream.cpp +++ b/common/Block/BlockStream.cpp @@ -51,9 +51,9 @@ BlockStreamBase* BlockStreamBase::createBlockWithDesirableSerilaizedSize( void* BlockStreamFix::getBlockDataAddress() { return start; } -// void BlockStreamFix::setBlockDataAddress(void* addr){ -// data_=(char*)addr; -//} +void BlockStreamFix::setBlockDataAddress(void* addr) { + start = reinterpret_cast(addr); +} bool BlockStreamFix::switchBlock(BlockStreamBase& block) { BlockStreamFix* blockfix = (BlockStreamFix*)█ diff --git a/common/Block/BlockStream.h b/common/Block/BlockStream.h index c16ba4a58..960b81d81 100755 --- a/common/Block/BlockStream.h +++ b/common/Block/BlockStream.h @@ -159,7 +159,7 @@ class BlockStreamFix : public BlockStreamBase { return free_ + tuple_size_ > start + BlockSize - sizeof(tail_info); } void* getBlockDataAddress(); - // void setBlockDataAddress(void* addr); + void setBlockDataAddress(void* addr); bool switchBlock(BlockStreamBase& block); void copyBlock(void* addr, unsigned length); bool insert(void* dest, void* src, unsigned bytes); @@ -169,6 +169,7 @@ class BlockStreamFix : public BlockStreamBase { unsigned getSerializedBlockSize() const; unsigned getBlockCapacityInTuples() const; unsigned getTuplesInBlock() const; + inline unsigned getTupleSize() const {return tuple_size_;} /* construct the BlockStream from a storage level block, * which last four bytes indicate the number of tuples in the block.*/ void constructFromBlock(const Block& block); diff --git a/common/Schema/Test/VariableSchema_test.cpp b/common/Schema/Test/VariableSchema_test.cpp index dd645e444..4dfa1b3c4 100644 --- a/common/Schema/Test/VariableSchema_test.cpp +++ b/common/Schema/Test/VariableSchema_test.cpp @@ -55,16 +55,16 @@ static int variable_schema_test() { catalog->add_table(table_1); for (unsigned i = 0; - i < table_1->getProjectoin(0)->getPartitioner()->getNumberOfPartitions(); + i < table_1->getProjection(0)->getPartitioner()->getNumberOfPartitions(); i++) { - catalog->getTable(0)->getProjectoin(0)->getPartitioner()->RegisterPartition( + catalog->getTable(0)->getProjection(0)->getPartitioner()->RegisterPartition( i, 1); } ProjectionBinding* pb = new ProjectionBinding(); cout << "in ======================================" << endl; pb->BindingEntireProjection( - catalog->getTable(0)->getProjectoin(0)->getPartitioner(), HDFS); + catalog->getTable(0)->getProjection(0)->getPartitioner(), HDFS); cout << "in ======================================" << endl; @@ -74,7 +74,7 @@ static int variable_schema_test() { column_list.push_back(column_type(t_double)); column_list.push_back(column_type(t_string)); PhysicalProjectionScan::State scan_state( - catalog->getTable(0)->getProjectoin(0)->getProjectionID(), + catalog->getTable(0)->getProjection(0)->getProjectionID(), new SchemaVar(column_list), 64 * 1024 - sizeof(unsigned)); PhysicalOperatorBase* scan = new PhysicalProjectionScan(scan_state); //------------------------------------------------------------------ diff --git a/common/ids.h b/common/ids.h index 374162d8f..fe1939d87 100755 --- a/common/ids.h +++ b/common/ids.h @@ -28,207 +28,220 @@ typedef unsigned ColumnOffset; typedef unsigned PartitionOffset; typedef int ChunkOffset; typedef unsigned long ExpanderID; -/*the following ids are based on the assumption that the TableOffset is globally unique.*/ +/*the following ids are based on the assumption that the TableOffset is globally + * unique.*/ struct NodeAddress { - NodeAddress(){}; - NodeAddress(std::string ip, std::string port) : - ip(ip), port(port) { - } - ; - bool operator ==(const NodeAddress & r) const { - return this->ip == r.ip && this->port == r.port; - } - std::string ip; - std::string port; + NodeAddress(){}; + NodeAddress(std::string ip, std::string port) : ip(ip), port(port){}; + bool operator==(const NodeAddress& r) const { + return this->ip == r.ip && this->port == r.port; + } + std::string ip; + std::string port; }; /* for boost::unordered_map*/ -static size_t hash_value(const NodeAddress& node_addr){ - size_t seed=0; - boost::hash_combine(seed,boost::hash_value(node_addr.ip)); - boost::hash_combine(seed,boost::hash_value(node_addr.port)); - return seed; +static size_t hash_value(const NodeAddress& node_addr) { + size_t seed = 0; + boost::hash_combine(seed, boost::hash_value(node_addr.ip)); + boost::hash_combine(seed, boost::hash_value(node_addr.port)); + return seed; } /** * AttributeID: an attribute in a table has an unique AttributeID*/ -struct AttributeID{ - AttributeID(TableID tid,AttributeOffset off):table_id(tid),offset(off){}; - AttributeID(){}; - TableID table_id; - AttributeOffset offset; - bool operator==(const AttributeID& r)const{ - return table_id==r.table_id&&offset==r.offset; - } +struct AttributeID { + AttributeID(TableID tid, AttributeOffset off) : table_id(tid), offset(off){}; + AttributeID(){}; + TableID table_id; + AttributeOffset offset; + bool operator==(const AttributeID& r) const { + return table_id == r.table_id && offset == r.offset; + } }; /* for boost::unordered_map*/ -static size_t hash_value(const AttributeID& key){ - size_t seed=0; - boost::hash_combine(seed,boost::hash_value(key.offset)); - boost::hash_combine(seed,boost::hash_value(key.table_id)); - return seed; +static size_t hash_value(const AttributeID& key) { + size_t seed = 0; + boost::hash_combine(seed, boost::hash_value(key.offset)); + boost::hash_combine(seed, boost::hash_value(key.table_id)); + return seed; } - /** * ProjectionID: a projection has an unique ProjectionID */ -struct ProjectionID{ - ProjectionID(){}; - ProjectionID(TableID tid,ProjectionOffset off):table_id(tid),projection_off(off){}; - ProjectionID(const ProjectionID& r):table_id(r.table_id),projection_off(r.projection_off){}; - TableID table_id; - ProjectionOffset projection_off; - bool operator==(const ProjectionID& r)const{ - return table_id==r.table_id&& projection_off==r.projection_off; - } - bool operator<(const ProjectionID& r)const{ - if (table_id < r.table_id) - return true; - else if (table_id == r.table_id) - return (projection_off < r.projection_off); - else - return false; - } - - /* for boost::serialization*/ - friend class boost::serialization::access; - template - void serialize(Archive & ar, const unsigned int version){ - ar & table_id & projection_off; - } - +struct ProjectionID { + ProjectionID(){}; + ProjectionID(TableID tid, ProjectionOffset off) + : table_id(tid), projection_off(off){}; + ProjectionID(const ProjectionID& r) + : table_id(r.table_id), projection_off(r.projection_off){}; + TableID table_id; + ProjectionOffset projection_off; + bool operator==(const ProjectionID& r) const { + return table_id == r.table_id && projection_off == r.projection_off; + } + bool operator<(const ProjectionID& r) const { + if (table_id < r.table_id) + return true; + else if (table_id == r.table_id) + return (projection_off < r.projection_off); + else + return false; + } + + /* for boost::serialization*/ + friend class boost::serialization::access; + template + void serialize(Archive& ar, const unsigned int version) { + ar& table_id& projection_off; + } }; /* for boost::unordered_map*/ -static size_t hash_value(const ProjectionID& key){ - size_t seed=0; - boost::hash_combine(seed,boost::hash_value(key.table_id)); - boost::hash_combine(seed,boost::hash_value(key.projection_off)); - return seed; +static size_t hash_value(const ProjectionID& key) { + size_t seed = 0; + boost::hash_combine(seed, boost::hash_value(key.table_id)); + boost::hash_combine(seed, boost::hash_value(key.projection_off)); + return seed; } - /** - * ColumnID: a Column corresponds to an attribute and is physically stored in one projection. + * ColumnID: a Column corresponds to an attribute and is physically stored in + * one projection. */ -struct ColumnID{ - ColumnID(){}; - ColumnID(ProjectionID pid,ColumnOffset off):projection_id(pid),column_off(off){}; - ProjectionID projection_id; - // the index of column in projection, not in table - ColumnOffset column_off; - bool operator==(const ColumnID &r)const{ - return projection_id==r.projection_id&&column_off==r.column_off; - } - - friend class boost::serialization::access; - template - void serialize(Archive &ar, const unsigned int version) - { -// ar & projection_id & column_off & partitioner & fileLocations & hdfsFilePath & blkMemoryLocations & Projection_name_; - ar & projection_id & column_off; - } - - +struct ColumnID { + ColumnID(){}; + ColumnID(ProjectionID pid, ColumnOffset off) + : projection_id(pid), column_off(off){}; + ProjectionID projection_id; + // the index of column in projection, not in table + ColumnOffset column_off; + bool operator==(const ColumnID& r) const { + return projection_id == r.projection_id && column_off == r.column_off; + } + + friend class boost::serialization::access; + template + void serialize(Archive& ar, const unsigned int version) { + // ar & projection_id & column_off & partitioner & fileLocations & + // hdfsFilePath & blkMemoryLocations & Projection_name_; + ar& projection_id& column_off; + } }; /* for boost::unordered_map*/ -static size_t hash_value(const ColumnID& key){ - size_t seed=0; - boost::hash_combine(seed,hash_value(key.projection_id)); - boost::hash_combine(seed,boost::hash_value(key.column_off)); - return seed; +static size_t hash_value(const ColumnID& key) { + size_t seed = 0; + boost::hash_combine(seed, hash_value(key.projection_id)); + boost::hash_combine(seed, boost::hash_value(key.column_off)); + return seed; } /** * PartitionID: a partition corresponds to one projection. */ -struct PartitionID{ - PartitionID(ProjectionID projection_id,PartitionOffset off):projection_id(projection_id),partition_off(off){}; - PartitionID(){}; - ProjectionID projection_id; - PartitionOffset partition_off; - bool operator==(const PartitionID& r)const{ - return projection_id==r.projection_id&&partition_off==r.partition_off; - } - bool operator<(const PartitionID& r)const{ - if (projection_id < r.projection_id) - return true; - else if (projection_id == r.projection_id) - return (partition_off < r.partition_off); - else - return false; - } - PartitionID(const PartitionID& r){ - projection_id=r.projection_id; - partition_off=r.partition_off; - } - std::string getName()const{ - std::ostringstream str; - str<<"T"< - void serialize(Archive &ar, const unsigned int version) - { - ar & partition_off & projection_id; - } +struct PartitionID { + PartitionID(ProjectionID projection_id, PartitionOffset off) + : projection_id(projection_id), partition_off(off){}; + PartitionID(){}; + ProjectionID projection_id; + PartitionOffset partition_off; + bool operator==(const PartitionID& r) const { + return projection_id == r.projection_id && partition_off == r.partition_off; + } + bool operator<(const PartitionID& r) const { + if (projection_id < r.projection_id) + return true; + else if (projection_id == r.projection_id) + return (partition_off < r.partition_off); + else + return false; + } + PartitionID(const PartitionID& r) { + projection_id = r.projection_id; + partition_off = r.partition_off; + } + std::string getName() const { + std::ostringstream str; + str << "T" << projection_id.table_id << "G" << projection_id.projection_off + << "P" << partition_off; + return str.str(); + } + std::string getPathAndName() const; + friend class boost::serialization::access; + template + void serialize(Archive& ar, const unsigned int version) { + ar& partition_off& projection_id; + } }; /* for boost::unordered_map*/ -static size_t hash_value(const PartitionID& key){ - size_t seed=0; - boost::hash_combine(seed,hash_value(key.projection_id)); - boost::hash_combine(seed,boost::hash_value(key.partition_off)); - return seed; +static size_t hash_value(const PartitionID& key) { + size_t seed = 0; + boost::hash_combine(seed, hash_value(key.projection_id)); + boost::hash_combine(seed, boost::hash_value(key.partition_off)); + return seed; } -struct ChunkID{ - ChunkID(){}; - ChunkID(PartitionID partition_id,ChunkOffset chunk_offset):partition_id(partition_id),chunk_off(chunk_offset){}; - ChunkID(const ChunkID& r){ - partition_id=r.partition_id; - chunk_off=r.chunk_off; - } - bool operator==(const ChunkID& r)const{ - return partition_id==r.partition_id&&chunk_off==r.chunk_off; - } - bool operator<(const ChunkID& r)const{ - if (partition_id < r.partition_id) - return true; - else if (partition_id == r.partition_id) - return (chunk_off < r.chunk_off); - else - return false; - } - PartitionID partition_id; - ChunkOffset chunk_off; +/** + * ToDo: a flag is_rt_ is add to ChunkID, MemStore need to aware this flag and + * stick the chunk (is_rt_ == true) in memory. + * + */ +struct ChunkID { + ChunkID() {} + ChunkID(PartitionID partition_id, ChunkOffset chunk_offset) + : partition_id(partition_id), chunk_off(chunk_offset) {} + ChunkID(PartitionID partition_id, ChunkOffset chunk_offset, bool is_rt) + : partition_id(partition_id), chunk_off(chunk_offset), is_rt_(is_rt) {} + ChunkID(const ChunkID& r) { + partition_id = r.partition_id; + chunk_off = r.chunk_off; + is_rt_ = r.is_rt_; + } + bool operator==(const ChunkID& r) const { + return partition_id == r.partition_id && chunk_off == r.chunk_off && + is_rt_ == r.is_rt_; + } + bool operator<(const ChunkID& r) const { + if (is_rt_ < r.is_rt_) + return true; + else if (partition_id < r.partition_id) + return true; + else if (partition_id == r.partition_id) + return (chunk_off < r.chunk_off); + else + return false; + } + PartitionID partition_id; + ChunkOffset chunk_off; + bool is_rt_ = false; }; /* for boost::unordered_map*/ -static size_t hash_value(const ChunkID& key){ - size_t seed=0; - boost::hash_combine(seed,hash_value(key.partition_id)); - boost::hash_combine(seed,boost::hash_value(key.chunk_off)); - return seed; +static size_t hash_value(const ChunkID& key) { + size_t seed = 0; + boost::hash_combine(seed, hash_value(key.partition_id)); + boost::hash_combine(seed, boost::hash_value(key.chunk_off)); + boost::hash_combine(seed, boost::hash_value(key.is_rt_)); + return seed; } -struct ExchangeID{ - ExchangeID():exchange_id(0),partition_offset(0){}; - ExchangeID(unsigned long long int exchange_id,unsigned partition_offset) - :exchange_id(exchange_id),partition_offset(partition_offset){}; - bool operator==(const ExchangeID& r)const{ - return exchange_id==r.exchange_id&&partition_offset==r.partition_offset; - } - - unsigned long long int exchange_id; - unsigned partition_offset; +struct ExchangeID { + ExchangeID() : exchange_id(0), partition_offset(0) {} + ExchangeID(unsigned long long int exchange_id, unsigned partition_offset) + : exchange_id(exchange_id), partition_offset(partition_offset) {} + bool operator==(const ExchangeID& r) const { + return exchange_id == r.exchange_id && + partition_offset == r.partition_offset; + } + + unsigned long long int exchange_id; + unsigned partition_offset; }; -static size_t hash_value(const ExchangeID& key){ - size_t seed=0; - boost::hash_combine(seed,boost::hash_value(key.exchange_id)); - boost::hash_combine(seed,boost::hash_value(key.partition_offset)); - return seed; +static size_t hash_value(const ExchangeID& key) { + size_t seed = 0; + boost::hash_combine(seed, boost::hash_value(key.exchange_id)); + boost::hash_combine(seed, boost::hash_value(key.partition_offset)); + return seed; } #endif /* IDS_H_ */ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 69f7f7a39..1d8d89abd 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -366,7 +366,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { /// get start position of current chunk if (BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( ChunkID(PartitionID(ProjectionID(table_id, prj_id), part_id), - cur_chunk_id), + cur_chunk_id, true), chunk_info)) { // In this version, the last chunk info don't updated their member: length // after inserting data, diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index 2a69b85fd..c4e33f944 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -46,7 +46,7 @@ using claims::common::rNoPartitionIdScan; using claims::common::rSuccess; using claims::common::rCodegenFailed; - +using claims::txn::GetGlobalPartId; namespace claims { namespace physical_operator { PhysicalProjectionScan::PhysicalProjectionScan(State state) @@ -102,8 +102,17 @@ bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, .c_str() << CStrError(rNoPartitionIdScan) << std::endl; SetReturnStatus(false); } else { - partition_reader_iterator_ = - partition_handle_->CreateAtomicReaderIterator(); + auto table_id = state_.projection_id_.table_id; + auto proj_id = state_.projection_id_.projection_off; + auto part_id = kPartitionOffset; + auto global_part_id = GetGlobalPartId(table_id, proj_id, part_id); + auto cp = state_.query_.scan_cp_list_[global_part_id]; + // cout << "table:" << table_id << ",proj:" << proj_id + // << ",part_id:" << part_id << ",cp:" << cp << endl; + partition_reader_iterator_ = partition_handle_->CreateTxnReaderIterator( + cp, state_.query_.scan_snapshot_[global_part_id]); + // partition_reader_iterator_ = + // partition_handle_->CreateAtomicReaderIterator(); SetReturnStatus(true); } diff --git a/stmt_handler/select_exec.cpp b/stmt_handler/select_exec.cpp index 28f845269..3076dfdf7 100644 --- a/stmt_handler/select_exec.cpp +++ b/stmt_handler/select_exec.cpp @@ -235,7 +235,7 @@ RetCode SelectExec::Execute() { logic_plan->GetTxnInfo(request); TxnClient::BeginQuery(request, query); logic_plan->SetTxnInfo(query); - cout << request.ToString() << endl; + // cout << request.ToString() << endl; PhysicalOperatorBase* physical_plan = logic_plan->GetPhysicalPlan(64 * 1024); #ifndef PRINTCONTEXT physical_plan->Print(); diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 12caeb4e0..6ae1cde7b 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -33,7 +33,7 @@ #include #include "../common/error_define.h" -#include "../Debug.h" +// #include "../Debug.h" #include "./MemoryManager.h" #include "../common/memory_handle.h" #include "../Config.h" @@ -62,10 +62,16 @@ PartitionStorage::PartitionStorage(const PartitionID& partition_id, MemoryChunkStore::GetInstance()->SetFreeAlgorithm(0); else MemoryChunkStore::GetInstance()->SetFreeAlgorithm(1); - for (unsigned i = 0; i < number_of_chunks_; i++) { - chunk_list_.push_back(new ChunkStorage( - ChunkID(partition_id_, i), BLOCK_SIZE, desirable_storage_level_)); - } + /* for (unsigned i = 0; i < number_of_chunks_; i++) { + chunk_list_.push_back(new ChunkStorage( + ChunkID(partition_id_, i), BLOCK_SIZE, desirable_storage_level_)); + rt_chunk_list_.push_back(new ChunkStorage( + ChunkID(partition_id_, i, true), BLOCK_SIZE, + desirable_storage_level_)); + }*/ + CheckAndAppendChunkList(number_of_chunks_, false); + CheckAndAppendChunkList(number_of_chunks_, true); + // cout << "*******chunk_list_" << chunk_list_.size() << endl; } PartitionStorage::~PartitionStorage() { @@ -215,3 +221,116 @@ bool PartitionStorage::AtomicPartitionReaderIterator::NextBlock( } } } + +PartitionStorage::TxnPartitionReaderIterator::TxnPartitionReaderIterator( + PartitionStorage* partition_storage, uint64_t his_cp, + const vector& rt_strip_list) + : PartitionReaderIterator(partition_storage), + last_his_block_(his_cp / BLOCK_SIZE), + block_cur_(0), + chunk_cur_(-1), + rt_block_index_(0), + rt_chunk_cur_(-1), + rt_chunk_it_(nullptr) { + for (auto& strip : rt_strip_list) { + auto begin = strip.first; + auto end = begin + strip.second; + while (begin < end) { + auto block = begin / BLOCK_SIZE; + auto len = (block + 1) * BLOCK_SIZE <= end + ? (block + 1) * BLOCK_SIZE - begin + : end - begin; + rt_strip_list_.push_back(PStrip(begin, len)); + begin += len; + } + } +} + +PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() { + for (auto block : rt_block_buffer_) free(block); +} + +bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( + BlockStreamBase*& block) { + LockGuard guard(lock_); + ChunkReaderIterator::block_accessor* ba = nullptr; + if (block_cur_ < last_his_block_) { // scan historical data + int64_t chunk_cur = block_cur_ / (CHUNK_SIZE / BLOCK_SIZE); + if (chunk_cur > chunk_cur_) { // update chunk_it_ + chunk_cur_ = chunk_cur; + ps_->CheckAndAppendChunkList(chunk_cur_ + 1, false); + if (chunk_it_ != nullptr) delete chunk_it_; + chunk_it_ = ps_->chunk_list_[chunk_cur_]->CreateChunkReaderIterator(); + } + chunk_it_->GetNextBlockAccessor(ba); + if (ba == nullptr) { + if (chunk_it_ != nullptr) delete chunk_it_; + return false; + } else { + assert(ba != nullptr); + ba->GetBlock(block); + delete ba; + ba = nullptr; + block_cur_++; + return true; + } + } else if (rt_block_index_ < rt_strip_list_.size()) { // scan real-time data + auto pos = rt_strip_list_[rt_block_index_].first; + auto offset_in_block = pos - (pos / BLOCK_SIZE) * BLOCK_SIZE; + auto len = rt_strip_list_[rt_block_index_].second; + auto rt_block_cur = pos / BLOCK_SIZE; + auto rt_chunk_cur = pos / CHUNK_SIZE; + if (rt_chunk_cur > rt_chunk_cur_) { // move to new rt chunk + rt_chunk_cur_ = rt_chunk_cur; + rt_block_cur_ = rt_chunk_cur_ * (CHUNK_SIZE / BLOCK_SIZE); + ps_->CheckAndAppendChunkList(rt_chunk_cur_ + 1, true); + if (rt_chunk_it_ != nullptr) delete rt_chunk_it_; + rt_chunk_it_ = + ps_->rt_chunk_list_[rt_chunk_cur_]->CreateChunkReaderIterator(); + assert(rt_chunk_it_ != nullptr); + } + + do { // move to rt_block_cur + rt_chunk_it_->GetNextBlockAccessor(ba); + rt_block_cur_++; + } while (rt_block_cur_ <= rt_block_cur); + + if (len == BLOCK_SIZE) { // directly return pointer + ba->GetBlock(block); + } else { + auto tuple_size = + reinterpret_cast(block)->getTupleSize(); + if (pos + len % BLOCK_SIZE == 0) + len = ((len - sizeof(unsigned)) / tuple_size) * tuple_size; + auto tuple_count = len / tuple_size; + // cout << "tuple_size:" << tuple_size << endl; + // cout << "tuple_count:" << tuple_count << endl; + ba->GetBlock(block); + auto des_addr = reinterpret_cast(malloc(BLOCK_SIZE)); + auto scr_addr = block->getBlockDataAddress() + offset_in_block; + memcpy(des_addr, scr_addr, len); + reinterpret_cast(block)->setBlockDataAddress(des_addr); + reinterpret_cast(block)->setTuplesInBlock(tuple_count); + rt_block_buffer_.push_back(des_addr); + } + delete ba; + ba = nullptr; + rt_block_index_++; + return true; + } + return false; +} +void PartitionStorage::CheckAndAppendChunkList(unsigned number_of_chunk, + bool is_rt) { + LockGuard guard(write_lock_); + if (!is_rt) { + for (auto size = chunk_list_.size(); size < number_of_chunk; size++) + chunk_list_.push_back(new ChunkStorage( + ChunkID(partition_id_, size), BLOCK_SIZE, desirable_storage_level_)); + } else { + for (auto size = rt_chunk_list_.size(); size < number_of_chunk; size++) + rt_chunk_list_.push_back( + new ChunkStorage(ChunkID(partition_id_, size, true), BLOCK_SIZE, + desirable_storage_level_)); + } +} diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index d92315ad6..f43696ebe 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -31,14 +31,15 @@ #define PARTITIONSTORAGE_H_ #include #include - +#include #include "../common/error_define.h" - +#include "../txn_manager/txn.hpp" #include "ChunkStorage.h" #include "StorageLevel.h" #include "./PartitionReaderIterator.h" #include "../utility/lock.h" - +#include "../Debug.h" +using claims::txn::PStrip; // namespace claims { // namespace storage { /** @@ -105,6 +106,46 @@ class PartitionStorage { private: Lock lock_; }; + /**********************************************************************/ + class TxnPartitionReaderIterator : public PartitionReaderIterator { + public: + /** + * @brief Method description: Construct the Iterator. Different from + * PartitionReaderIterator and AtomicPartitionReaditerator. + * It support scan from both chunk_list_ and rt_chunk_list_ , + * write into rt_chunk_list rather than chunk_list_ in + * AtomicPartitionReaderIterator + */ + + TxnPartitionReaderIterator(PartitionStorage* partition_storage, + uint64_t his_cp, + const vector& rt_strip_list); + ~TxnPartitionReaderIterator() override; + bool NextBlock(BlockStreamBase*& block) override; + + private: + void* CreateEmptyBlock() { + void* data = reinterpret_cast(malloc(BLOCK_SIZE)); + // auto block = new BlockStreamFix(BLOCK_SIZE, 0, data, 0); + return data; + } + void SetBlockTail(void* block, unsigned tuple_num) { + *reinterpret_cast(block + 64 * 1024 - sizeof(unsigned)) = + tuple_num; + } + int64_t last_his_block_; + int64_t block_cur_; + int64_t chunk_cur_; + + vector rt_strip_list_; // splited by block + int64_t rt_block_index_; + int64_t rt_block_cur_; + int64_t rt_chunk_cur_; + ChunkReaderIterator* rt_chunk_it_; + vector rt_block_buffer_; + + Lock lock_; + }; /** * @brief Method description: construct the partition container. @@ -155,10 +196,18 @@ class PartitionStorage { */ PartitionStorage::PartitionReaderIterator* CreateAtomicReaderIterator(); + PartitionStorage::PartitionReaderIterator* CreateTxnReaderIterator( + uint64_t his_cp, const vector& rt_strip_list) { + return new TxnPartitionReaderIterator(this, his_cp, rt_strip_list); + } + void CheckAndAppendChunkList(unsigned number_of_chunk, bool is_rt); + protected: PartitionID partition_id_; atomic number_of_chunks_; std::vector chunk_list_; + // add it for txn scan + std::vector rt_chunk_list_; StorageLevel desirable_storage_level_; Lock write_lock_; From d6453d8e2c83a3e24fc8150d7efb52f8f4d6d639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Sun, 28 Aug 2016 20:11:38 +0800 Subject: [PATCH 51/58] implement persist --- loader/master_loader.cpp | 3 +- loader/slave_loader.cpp | 38 +++++++++++- loader/slave_loader.h | 15 ++++- storage/BlockManager.cpp | 9 +++ storage/BlockManager.h | 7 ++- storage/ChunkStorage.cpp | 7 +++ storage/PartitionStorage.cpp | 96 ++++++++++++++++++++++++++++-- storage/PartitionStorage.h | 13 +++-- txn_manager/txn.cpp | 54 +++++++++++++---- txn_manager/txn.hpp | 75 +++++++++++++++++++----- txn_manager/txn_client.cpp | 48 ++++----------- txn_manager/txn_client.hpp | 27 ++++----- txn_manager/txn_client_test.cpp | 12 ++-- txn_manager/txn_server.cpp | 100 +++++++++++++------------------- txn_manager/txn_server.hpp | 33 +---------- 15 files changed, 349 insertions(+), 188 deletions(-) diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 34f5fd726..f9475c1ca 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -888,15 +888,14 @@ void* MasterLoader::StartMasterLoader(void* arg) { // AMQConsumer consumer(brokerURI, destURI, use_topics, client_ack); // consumer.run(master_loader); + WorkerPara para(master_loader, brokerURI, destURI, use_topics, client_ack); for (int i = 0; i < Config::master_loader_thread_num - 1; ++i) { - WorkerPara para(master_loader, brokerURI, destURI, use_topics, client_ack); // Environment::getInstance()->getThreadPool()->AddTaskInCpu( // MasterLoader::Work, ¶, (i + 1) % GetNumberOfCpus()); Environment::getInstance()->getThreadPool()->AddTask(MasterLoader::Work, ¶); } // i am also a worker - WorkerPara para(master_loader, brokerURI, destURI, use_topics, client_ack); Work(¶); while (1) sleep(10); diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 1d8d89abd..93cbefce0 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -88,6 +88,7 @@ using std::chrono::seconds; caf::actor SlaveLoader::handle; caf::actor* SlaveLoader::handles_; +caf::actor SlaveLoader::persistor; static const int txn_count_for_debug = 5000; static const char* txn_count_string = "5000"; @@ -437,7 +438,7 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, } // this method has the best performance -static behavior SlaveLoader::WorkInCAF(event_based_actor* self) { +behavior SlaveLoader::WorkInCAF(event_based_actor* self) { return {[=](LoadPacketAtom, LoadPacket* packet) { // NOLINT RetCode ret = rSuccess; EXEC_AND_DLOG(ret, StoreDataInMemory(*packet), "stored data", @@ -451,6 +452,40 @@ static behavior SlaveLoader::WorkInCAF(event_based_actor* self) { }}; } +behavior SlaveLoader::PersistInCAF(event_based_actor* self) { + self->delayed_send(self, seconds(3), CheckpointAtom::value); + return {[self](CheckpointAtom) { + cout << "slave persist.." << endl; + QueryReq query_req; + query_req.include_abort_ = true; + Query query; + auto part_list = + Environment::getInstance()->get_block_manager()->GetAllPartition(); + for (auto& part : part_list) + query_req.part_list_.push_back(GetGlobalPartId(part)); + TxnClient::BeginQuery(query_req, query); + for (auto& part : part_list) { + UInt64 g_part_id = GetGlobalPartId(part); + if (query.snapshot_[g_part_id].size() > 0) { + auto part_handler = + Environment::getInstance()->get_block_manager()->GetPartitionHandle( + part); + auto new_rt_cp = query.snapshot_[g_part_id].rbegin()->first + + query.snapshot_[g_part_id].rbegin()->second; + // merge from historical to real time + auto old_his_cp = query.his_cp_list_[g_part_id]; + auto new_his_cp = + part_handler->MergeToHis(old_his_cp, query.snapshot_[g_part_id]); + if (new_his_cp == old_his_cp) continue; + if (!part_handler->Persist(old_his_cp, new_his_cp)) continue; + TxnClient::CommitCheckpoint(query.ts_, g_part_id, new_his_cp, + new_rt_cp); + } + } + self->delayed_send(self, seconds(3), CheckpointAtom::value); + }}; +} + // It's very slow void SlaveLoader::WorkInAsync(LoadPacket* packet) { RetCode ret = rSuccess; @@ -523,6 +558,7 @@ void* SlaveLoader::StartSlaveLoader(void* arg) { SlaveLoader::handles_[i] = caf::spawn(SlaveLoader::WorkInCAF); } #endif + persistor = caf::spawn(SlaveLoader::PersistInCAF); slave_loader->ReceiveAndWorkLoop(); assert(false); diff --git a/loader/slave_loader.h b/loader/slave_loader.h index 141be6935..49d9a981c 100644 --- a/loader/slave_loader.h +++ b/loader/slave_loader.h @@ -32,18 +32,29 @@ #include #include #include +#include +#include #include "../catalog/catalog.h" #include "../storage/BlockManager.h" +#include "../txn_manager/txn.hpp" +#include "../txn_manager/txn_client.hpp" #include "caf/all.hpp" namespace claims { namespace loader { +using std::unordered_map; + using caf::behavior; using caf::event_based_actor; using std::string; using claims::catalog::Catalog; - +using claims::txn::CheckpointAtom; +using claims::txn::UInt64; +using claims::txn::Query; +using claims::txn::QueryReq; +using claims::txn::GetGlobalPartId; +using claims::txn::TxnClient; class LoadPacket; class SlaveLoader { @@ -77,6 +88,7 @@ class SlaveLoader { bool is_commited); static behavior WorkInCAF(event_based_actor* self); + static behavior PersistInCAF(event_based_actor* self); static void WorkInAsync(LoadPacket* packet); static void* HandleWork(void* arg); @@ -91,6 +103,7 @@ class SlaveLoader { private: static caf::actor handle; static caf::actor* handles_; + static caf::actor persistor; private: queue packet_queue_; diff --git a/storage/BlockManager.cpp b/storage/BlockManager.cpp index 4b75c4a29..aa35ecf4f 100755 --- a/storage/BlockManager.cpp +++ b/storage/BlockManager.cpp @@ -403,3 +403,12 @@ PartitionStorage* BlockManager::GetPartitionHandle( } return it->second; } + +vector BlockManager::GetAllPartition() { + LockGuard guard(lock); + vector part_list; + for (auto itr = partition_id_to_storage_.begin(); + itr != partition_id_to_storage_.end(); itr++) { + } + return part_list; +} diff --git a/storage/BlockManager.h b/storage/BlockManager.h index a63cbe1a8..5fb88be03 100755 --- a/storage/BlockManager.h +++ b/storage/BlockManager.h @@ -23,9 +23,11 @@ #include "../common/ids.h" #include "../common/Message.h" #include "../common/Logging.h" +#include "../txn_manager/txn.hpp" #include "../utility/lock.h" using namespace std; - +using claims::txn::UInt64; +using claims::txn::PStrip; struct ChunkInfo { ChunkID chunkId; void *hook; @@ -94,6 +96,9 @@ class BlockManager { bool RemovePartition(const PartitionID &); PartitionStorage *GetPartitionHandle(const PartitionID &partition_id) const; + vector GetAllPartition(); + UInt64 MergeHisToRt(PartitionID, const vector &strip_list, UInt64 rt); + private: BlockManager(); diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 74c3222dc..a839bd567 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -79,6 +79,13 @@ RetCode ChunkStorage::ApplyMemory() { * shifted.*/ current_storage_level_ = MEMORY; + /* + * set each block tail to "zero" in new chunk + */ + for (auto offset = 0; offset < CHUNK_SIZE; offset += BLOCK_SIZE) + *reinterpret_cast(chunk_info.hook + offset - + sizeof(unsigned)) = 0; + /* update the chunk info in the Chunk store in case that the * chunk_info is updated.*/ BlockManager::getInstance()->getMemoryChunkStore()->UpdateChunkInfo( diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 6ae1cde7b..7d9077b3e 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -39,8 +39,13 @@ #include "../Config.h" #include "../Resource/BufferManager.h" #include "../utility/lock_guard.h" - +#include "../storage/BlockManager.h" +#include "../common/file_handle/file_handle_imp.h" +#include "../common/file_handle/hdfs_file_handle_imp.h" +#include "../common/file_handle/file_handle_imp_factory.h" using claims::common::rSuccess; +using claims::common::FileHandleImpFactory; +using claims::common::kHdfs; using claims::utility::LockGuard; /** @@ -94,8 +99,8 @@ RetCode PartitionStorage::AddChunkWithMemoryToNum( if (number_of_chunks_ >= expected_number_of_chunks) return ret; for (unsigned i = number_of_chunks_; i < expected_number_of_chunks; i++) { - ChunkStorage* chunk = - new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); + ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i, true), + BLOCK_SIZE, storage_level); EXEC_AND_DLOG(ret, chunk->ApplyMemory(), "applied memory for chunk(" << partition_id_.getName() << "," << i << ")", @@ -303,8 +308,8 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( if (pos + len % BLOCK_SIZE == 0) len = ((len - sizeof(unsigned)) / tuple_size) * tuple_size; auto tuple_count = len / tuple_size; - // cout << "tuple_size:" << tuple_size << endl; - // cout << "tuple_count:" << tuple_count << endl; + // cout << "tuple_size:" << tuple_size << endl; + // cout << "tuple_count:" << tuple_count << endl; ba->GetBlock(block); auto des_addr = reinterpret_cast(malloc(BLOCK_SIZE)); auto scr_addr = block->getBlockDataAddress() + offset_in_block; @@ -334,3 +339,84 @@ void PartitionStorage::CheckAndAppendChunkList(unsigned number_of_chunk, desirable_storage_level_)); } } + +UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, + const vector& strip_list) { + auto new_his_cp = old_his_cp; + auto table_id = partition_id_.projection_id.table_id; + auto proj_id = partition_id_.projection_id.projection_off; + auto tuple_size = Catalog::getInstance() + ->getTable(table_id) + ->getProjectoin(proj_id) + ->getSchema() + ->getTupleMaxSize(); + HdfsInMemoryChunk chunk_rt, chunk_his; + for (auto& strip : strip_list) { + auto begin = strip.first; + auto end = strip.first + strip.second; + while (begin < end) { + // update historical chunk cur + if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( + ChunkID(partition_id_, begin / CHUNK_SIZE), chunk_his)) + return old_his_cp; + // update real time chunk cur + if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( + ChunkID(partition_id_, new_his_cp / CHUNK_SIZE), chunk_rt)) + return old_his_cp; + // each step move just one full block or even partly block + auto move = BLOCK_SIZE - (begin + BLOCK_SIZE) % BLOCK_SIZE; + if (move == BLOCK_SIZE) { // full block + memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, + chunk_rt.hook + begin % CHUNK_SIZE, move); + } else { + auto tuple_count = (move - sizeof(unsigned)) / tuple_size; + if ((begin + move) % BLOCK_SIZE == 0) { + auto real_move = tuple_count * tuple_size; + memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, + chunk_rt.hook + begin % CHUNK_SIZE, real_move); + } else { + memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, + chunk_rt.hook + begin % CHUNK_SIZE, move); + } + auto tail_offset = + (begin / BLOCK_SIZE + 1) * BLOCK_SIZE - sizeof(unsigned); + *reinterpret_cast(chunk_his.hook + tail_offset) += + tuple_count; + } + begin += move; + new_his_cp += BLOCK_SIZE; + } + } + return new_his_cp; +} + +bool PartitionStorage::Persist(UInt64 old_his_cp, UInt64 new_his_cp) { + if (!Config::local_disk_mode) + return PersistHDFS(old_his_cp, new_his_cp); + else + return PersistDisk(old_his_cp, new_his_cp); +} + +bool PartitionStorage::PersistHDFS(UInt64 old_his_cp, UInt64 new_his_cp) { + /* + * ToDo Truncate need to be implemented + */ + auto file_handle = FileHandleImpFactory::Instance().CreateFileHandleImp( + kHdfs, partition_id_.getPathAndName()); + if (file_handle == nullptr) + return false; + HdfsInMemoryChunk chunk_his; + auto begin = old_his_cp; + auto end = new_his_cp; + while (begin < end) { + if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( + ChunkID(partition_id_, begin / CHUNK_SIZE), chunk_his)) + return false; + auto move = CHUNK_SIZE - (begin + CHUNK_SIZE) % CHUNK_SIZE; + file_handle->Append(chunk_his.hook, move); + begin += move; + } + return true; +} + +bool PartitionStorage::PersistDisk(UInt64 old_his_cp, UInt64 new_his_cp) {} diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index f43696ebe..03224a47c 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -34,12 +34,14 @@ #include #include "../common/error_define.h" #include "../txn_manager/txn.hpp" -#include "ChunkStorage.h" -#include "StorageLevel.h" -#include "./PartitionReaderIterator.h" +#include "../storage/ChunkStorage.h" +#include "../storage/StorageLevel.h" +#include "../storage/PartitionReaderIterator.h" #include "../utility/lock.h" #include "../Debug.h" using claims::txn::PStrip; +using claims::txn::UInt64; + // namespace claims { // namespace storage { /** @@ -201,7 +203,10 @@ class PartitionStorage { return new TxnPartitionReaderIterator(this, his_cp, rt_strip_list); } void CheckAndAppendChunkList(unsigned number_of_chunk, bool is_rt); - + UInt64 MergeToHis(UInt64 old_his_cp, const vector& strip_list); + bool Persist(UInt64 old_his_cp, UInt64 new_his_cp); + bool PersistHDFS(UInt64 old_his_cp, UInt64 new_his_cp); + bool PersistDisk(UInt64 old_his_cp, UInt64 new_his_cp); protected: PartitionID partition_id_; atomic number_of_chunks_; diff --git a/txn_manager/txn.cpp b/txn_manager/txn.cpp index f5b0159b2..f4b0f5c18 100644 --- a/txn_manager/txn.cpp +++ b/txn_manager/txn.cpp @@ -169,7 +169,7 @@ string Query::ToString() { return str; } -string Checkpoint::ToString() { +string TsCheckpoint::ToString() { string str = "*******checkpoint*******\n"; str += "Historical:"; for (auto &cp : vers_his_cp_) @@ -180,6 +180,17 @@ string Checkpoint::ToString() { str += "\n"; return str; } + +string CheckpointReq::ToString() const { + string str = "*****CheckpointReq******\n"; + str += "ts:" + to_string(ts_) + ",part:" + to_string(part_) + ",old_his_cp:" + + to_string(old_his_cp_) + "\n"; + str += "strip_list:"; + for (auto &strip : strip_list_) + str += "<" + to_string(strip.first) + "," + to_string(strip.second) + ">,"; + return str + "\n"; +} + void Snapshot::Merge(const vector &strips) { for (auto &strip : strips) part_pstrips_[strip.part_].push_back(PStrip(strip.pos_, strip.offset_)); @@ -209,7 +220,7 @@ void TxnBin::MergeSnapshot(Query &query) const { snapshot_[part].end()); Strip::Sort(query.snapshot_[part]); Strip::Merge(query.snapshot_[part]); - Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { + Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { if (pstrip.first + pstrip.second <= checkpoint) { return false; } else { @@ -220,21 +231,28 @@ void TxnBin::MergeSnapshot(Query &query) const { return true; } }); + query.abort_list_[part].insert(query.abort_list_[part].end(), + abort_list_[part].begin(), + abort_list_[part].end()); } } void TxnBin::MergeTxn(Query &query, int len) const { - for (auto i = 0; i < len; i++) { - if (txn_list_[i].IsCommit()) + for (auto i = 0; i < len; i++) + if (txn_list_[i].IsCommit()) { for (auto &strip : txn_list_[i].strip_list_) query.snapshot_[strip.first].push_back(strip.second); - } + } else if (txn_list_[i].IsAbort()) { + for (auto &strip: txn_list_[i].strip_list_) + query.abort_list_[strip.first].push_back(strip.second); + } + for (auto &part_cp : query.rt_cp_list_) { auto part = part_cp.first; auto checkpoint = part_cp.second; Strip::Sort(query.snapshot_[part]); Strip::Merge(query.snapshot_[part]); - Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { + Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { if (pstrip.first + pstrip.second <= checkpoint) { cout << "fail:<" << pstrip.first << "," << pstrip.second << ">" << pstrip.second << endl; @@ -262,25 +280,41 @@ void TxnBin::GenSnapshot(const TxnBin &prev) { status_ = true; snapshot_ = prev.snapshot_; for (auto &txn : txn_list_) - if (txn.IsCommit()) + if (txn.IsCommit()) { for (auto &strip : txn.strip_list_) snapshot_[strip.first].push_back(strip.second); + } else if (txn.IsAbort()) { + for (auto &strip : txn.strip_list_) + abort_list_[strip.first].push_back(strip.second); + } for (auto &part : snapshot_) { Strip::Sort(part.second); Strip::Merge(part.second); } + for (auto &part : abort_list_) { + Strip::Sort(part.second); + Strip::Merge(part.second); + } } void TxnBin::GenSnapshot() { status_ = true; for (auto &txn : txn_list_) - if (txn.IsCommit()) + if (txn.IsCommit()) { for (auto &strip : txn.strip_list_) snapshot_[strip.first].push_back(strip.second); + } else if (txn.IsAbort()) { + for (auto &strip : txn.strip_list_) + abort_list_[strip.first].push_back(strip.second); + } for (auto &part : snapshot_) { Strip::Sort(part.second); Strip::Merge(part.second); } + for (auto &part : abort_list_) { + Strip::Sort(part.second); + Strip::Merge(part.second); + } } -} -} +} // namespace txn +} // namespace claims diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index c1267f1b3..0130c4dd9 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -45,6 +45,7 @@ #include "caf/io/all.hpp" #include "../common/error_define.h" #include "../utility/Timer.h" +#include "../common/ids.h" #include #include #include @@ -91,8 +92,9 @@ using QueryAtom = caf::atom_constant; using CommitQueryAtom = caf::atom_constant; using CheckpointAtom = caf::atom_constant; using GCAtom = caf::atom_constant; -using CommitIngestAtom = caf::atom_constant; +using CommitIngestAtom = caf::atom_constant; using AbortIngestAtom = caf::atom_constant; +using CheckpointAtom = caf::atom_constant; using CommitCPAtom = caf::atom_constant; using AbortCPAtom = caf::atom_constant; using QuitAtom = caf::atom_constant; @@ -111,11 +113,19 @@ static const int kTimeout = 3; static const int kBlockSize = 64 * 1024; static const int kTailSize = sizeof(unsigned); static const int kTxnBinSize = 3; // 1024; + inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, UInt64 partition_id) { return partition_id + 1000 * (projeciton_id + 1000 * table_id); } +inline UInt64 GetGlobalPartId(PartitionID part) { + auto table_id = part.projection_id.table_id; + auto proj_id = part.projection_id.projection_off; + auto part_id = part.partition_off; + return GetGlobalPartId(table_id, proj_id, part_id); +} + inline UInt64 GetTableIdFromGlobalPartId(UInt64 global_partition_id) { return global_partition_id / (1000 * 1000); } @@ -264,11 +274,14 @@ inline bool operator==(const Ingest &lhs, const Ingest &rhs) { class QueryReq { public: vector part_list_; + bool include_abort_ = false; QueryReq() {} QueryReq(const vector &part_list) : part_list_(part_list) {} void InsertPart(UInt64 part) { part_list_.push_back(part); } vector get_part_list() const { return part_list_; } + bool get_include_abort() const { return include_abort_; } void set_part_list(const vector &partList) { part_list_ = partList; } + void set_include_abort(bool include_abort) { include_abort_ = include_abort; } string ToString(); }; @@ -282,10 +295,12 @@ class Query { UInt64 ts_; unordered_map> snapshot_; unordered_map his_cp_list_; + /** - * real-time checkpoint will never be send + * rt_cp_list_ and abort_list_ will never be serialized and send, */ unordered_map rt_cp_list_; + unordered_map> abort_list_; Query() {} Query(UInt64 ts, const unordered_map &his_cp_list, @@ -296,6 +311,10 @@ class Query { return snapshot_; } unordered_map getCPList() const { return his_cp_list_; } + unordered_map> getAbortList() const { + return abort_list_; + } + void setTS(UInt64 ts) { ts_ = ts; } void setSnapshot(const unordered_map> &sp) { snapshot_ = sp; @@ -303,6 +322,9 @@ class Query { void setCPList(const unordered_map &cplist) { his_cp_list_ = cplist; } + void setAbortList(const unordered_map> &abort_list) { + abort_list_ = abort_list; + } string ToString(); void GenTxnInfo() { for (auto &part_strips : snapshot_) @@ -321,8 +343,32 @@ inline bool operator==(const Query &lhs, const Query &rhs) { return lhs.snapshot_ == rhs.snapshot_ && lhs.his_cp_list_ == rhs.his_cp_list_; } +class CheckpointReq { + public: + UInt64 ts_; + UInt64 part_; + vector strip_list_; + UInt64 old_his_cp_; + CheckpointReq() {} + CheckpointReq(UInt64 ts, UInt64 part) : ts_(ts), part_(part) {} + string ToString() const; + UInt64 getTs() const { return ts_; } + UInt64 getPart() const { return part_; } + vector getStripList() const { return strip_list_; } + UInt64 getOldHisCP() const { old_his_cp_; } + void setTs(UInt64 ts) { ts_ = ts; } + void setPart(UInt64 part) { part_ = part; } + vector setStripList(const vector &strip_list) { + strip_list_ = strip_list; + } + void setOldHisCP(UInt64 old_his_cp) { old_his_cp_ = old_his_cp; } +}; +inline bool operator==(const CheckpointReq &lhs, const CheckpointReq &rhs) { + return lhs.ts_ == rhs.ts_ && lhs.part_ == rhs.part_; +} + /*********Checkpoint***********/ -class Checkpoint { +class TsCheckpoint { public: UInt64 GetHisCP(UInt64 ts) { UInt64 cp; @@ -398,8 +444,9 @@ class TxnBin { int ct_ = 0; int ct_commit_ = 0; int ct_abort_ = 0; - unordered_map> - snapshot_; // If bin is full, a snapshot is generated. + // If bin is full, a snapshot is generated. + unordered_map> snapshot_; + unordered_map> abort_list_; }; inline void CAFSerConfig() { @@ -409,22 +456,20 @@ inline void CAFSerConfig() { caf::announce( "Ingest", make_pair(&Ingest::get_id, &Ingest::set_id), make_pair(&Ingest::get_strip_list, &Ingest::set_strip_list)); - caf::announce("QueryReq", make_pair(&QueryReq::get_part_list, - &QueryReq::set_part_list)); + caf::announce( + "QueryReq", make_pair(&QueryReq::get_part_list, &QueryReq::set_part_list), + make_pair(&QueryReq::get_include_abort, &QueryReq::set_include_abort)); caf::announce("Query", make_pair(&Query::getTS, &Query::setTS), make_pair(&Query::getSnapshot, &Query::setSnapshot), make_pair(&Query::getCPList, &Query::setCPList)); - /* caf::announce( - "Checkpoint", make_pair(&Checkpoint::get_part, &Checkpoint::set_part), - make_pair(&Checkpoint::get_logic_cp, &Checkpoint::set_Logic_cp), - make_pair(&Checkpoint::get_phy_cp, &Checkpoint::set_Phy_cp), - make_pair(&Checkpoint::get_commit_strip_list, - &Checkpoint::set_commit_strip_list), - make_pair(&Checkpoint::get_abort_strip_list, - &Checkpoint::set_abort_strip_list));*/ caf::announce( "Snapshot", make_pair(&Snapshot::getHisCPS, &Snapshot::setHisCPS), make_pair(&Snapshot::getPStrps, &Snapshot::setPStrips)); + caf::announce( + "CheckpointReq", make_pair(&CheckpointReq::getTs, &CheckpointReq::setTs), + make_pair(&CheckpointReq::getPart, &CheckpointReq::setPart), + make_pair(&CheckpointReq::getStripList, &CheckpointReq::setStripList), + make_pair(&CheckpointReq::getOldHisCP, &CheckpointReq::setOldHisCP)); } } } diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index f8621e46b..4b25f0c08 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -107,7 +107,7 @@ RetCode TxnClient::BeginIngest(const FixTupleIngestReq& request, return ret; } -RetCode TxnClient::CommitIngest(const UInt64 ts) { +RetCode TxnClient::CommitIngest(UInt64 ts) { // RetCode ret = rSuccess; RetCode ret = 0; try { @@ -131,7 +131,7 @@ RetCode TxnClient::CommitIngest(const UInt64 ts) { return ret; } -RetCode TxnClient::AbortIngest(const UInt64 id) { +RetCode TxnClient::AbortIngest(UInt64 id) { // RetCode ret = rSuccess; RetCode ret = 0; try { @@ -191,44 +191,18 @@ RetCode TxnClient::CommitQuery(UInt64 ts) { return ret; } -RetCode TxnClient::BeginCheckpoint(Checkpoint& cp) { - // RetCode ret = rSuccess; - RetCode ret = 0; - // try { - // caf::scoped_actor self; - // self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - // CheckpointAtom::value, cp.part_) - // .await([&](const Checkpoint& checkpoint, RetCode r) { - // cp = checkpoint; - // ret = r; - // }, - // caf::after(seconds(kTimeout)) >> [&] { - // // ret = - // // rLinkTmTimeout; - // ret = -1; - // cout << "time out" << - // endl; - // }); - // } catch (...) { - // cout << "link fail" << endl; - // // return rLinkTmFail; - // return -1; - // } - return ret; -} - -RetCode TxnClient::CommitCheckpoint(const UInt64 logic_cp, - const UInt64 phy_cp) { +RetCode TxnClient::CommitCheckpoint(UInt64 ts, UInt64 part, UInt64 his_cp, + UInt64 rt_cp) { // RetCode ret = rSuccess; RetCode ret = 0; try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - CommitCPAtom::value, logic_cp, phy_cp) - .await([&](RetCode r) { ret = r; }, - caf::after(seconds(kTimeout)) >> [&] { - // ret = - // rLinkTmTimeout; + CommitCPAtom::value, his_cp, rt_cp) + .await([&ret](RetCode r) { ret = r; }, + caf::after(seconds(kTimeout)) >> [&ret] { + // ret = + // rLinkTmTimeout; ret = -1; cout << "time out" << endl; }); @@ -239,5 +213,5 @@ RetCode TxnClient::CommitCheckpoint(const UInt64 logic_cp, } return ret; } -} -} +} // namespace txn +} // namespace claims diff --git a/txn_manager/txn_client.hpp b/txn_manager/txn_client.hpp index ee5157c21..4c7f5586e 100644 --- a/txn_manager/txn_client.hpp +++ b/txn_manager/txn_client.hpp @@ -20,8 +20,8 @@ * * Created on: 2016年4月10日 * Author: imdb - * Email: - * + * Email: + * * Description: * */ @@ -62,28 +62,25 @@ using std::atomic; using std::chrono::seconds; using std::chrono::milliseconds; -namespace claims{ -namespace txn{ - +namespace claims { +namespace txn { - -class TxnClient{ +class TxnClient { public: static string ip_; static int port_; static caf::actor proxy_; static RetCode Init(string ip = kTxnIp, int port = kTxnPort); static RetCode Debug(string flag); - static RetCode BeginIngest(const FixTupleIngestReq & request, Ingest & ingest); - static RetCode CommitIngest(const UInt64 id); - static RetCode AbortIngest(const UInt64 id); - static RetCode BeginQuery(const QueryReq & request, Query & query); + static RetCode BeginIngest(const FixTupleIngestReq& request, Ingest& ingest); + static RetCode CommitIngest(UInt64 id); + static RetCode AbortIngest(UInt64 id); + static RetCode BeginQuery(const QueryReq& request, Query& query); static RetCode CommitQuery(UInt64 ts); - static RetCode BeginCheckpoint(Checkpoint & cp); - static RetCode CommitCheckpoint(const UInt64 logic_cp, const UInt64 phy_cp); + static RetCode CommitCheckpoint(UInt64 ts, UInt64 part, UInt64 his_cp, + UInt64 rt_cp); }; - } } -#endif // TXN_CLIENT_HPP_ +#endif // TXN_CLIENT_HPP_ diff --git a/txn_manager/txn_client_test.cpp b/txn_manager/txn_client_test.cpp index f57fe42a7..3df022024 100644 --- a/txn_manager/txn_client_test.cpp +++ b/txn_manager/txn_client_test.cpp @@ -74,7 +74,7 @@ using FailAtom = caf::atom_constant; using QuitAtom = caf::atom_constant; using claims::txn::Snapshot; using claims::txn::CAFSerConfig; -using claims::txn::QueryTracker; +// using claims::txn::QueryTracker; class Foo { public: vector request1; @@ -309,12 +309,12 @@ int main(int argc, const char **argv) { request.InsertStrip(4, 2, 2); for (auto i = 0; i < 40; i++) { TxnClient::BeginIngest(request, ingest); - // if (i % 10 != 0) - TxnClient::CommitIngest(ingest.ts_); - // else - // TxnClient::AbortIngest(ingest.ts_); + if (i % 10 != 0) + TxnClient::CommitIngest(ingest.ts_); + else + TxnClient::AbortIngest(ingest.ts_); } - TxnClient::Debug("core"); + // TxnClient::Debug("core"); } else if (type == "txnclient") { TxnClient::Init("127.0.0.1", 8089); auto job = []() { diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index efbdc9075..7414dc46d 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -45,8 +45,6 @@ namespace txn { /***************/ atomic TimeStamp::now_; -caf::actor QueryTracker::tracker_; -set QueryTracker::active_querys_; /**************/ @@ -57,32 +55,10 @@ vector TxnServer::cores_; bool TxnServer::active_ = false; unordered_map> TxnServer::pos_list_; -unordered_map TxnServer::cp_list_; +unordered_map TxnServer::cp_list_; set TxnServer::active_querys_; caf::actor test; -caf::behavior QueryTracker::make_behavior() { - this->delayed_send(this, seconds(3), GCAtom::value); - return {[](BeginAtom, UInt64 ts) { active_querys_.insert(ts); }, - [](CommitAtom, UInt64 ts) { active_querys_.erase(ts); }, - [this](GCAtom) { - /** - * TODO broadcast all components what minimum timestamp - * is still alive. - */ - UInt64 ts; - if (active_querys_.size() > 0) - ts = *active_querys_.begin(); - else - ts = TimeStamp::Gen(); - for (auto& core : TxnServer::cores_) - caf::anon_send(core, GCAtom::value, ts); - this->delayed_send(this, seconds(3), GCAtom::value); - }}; -} - -caf::behavior CheckpointTracker::make_behavior() {} - caf::behavior TxnCore::make_behavior() { this->delayed_send(this, seconds(3 + core_id_), MergeAtom::value); @@ -112,35 +88,62 @@ caf::behavior TxnCore::make_behavior() { txnbin_list_[id].AbortTxn(pos); return caf::make_message(rSuccess); }, - [this](QueryAtom, shared_ptr query) -> caf::message { + [this](QueryAtom, shared_ptr query, + bool include_abort) -> caf::message { auto id = TxnBin::GetTxnBinID(query->ts_, TxnServer::concurrency_); auto pos = TxnBin::GetTxnBinPos(query->ts_, TxnServer::concurrency_); auto ts = TxnBin::GetTxnBinMaxTs(id, TxnServer::concurrency_, core_id_); auto remain = kTxnBinSize - (ts - query->ts_) / TxnServer::concurrency_; - // cout << "query core:" << core_id_ << endl; - // cout << query->ts_ << "," << id << "," << pos << "," << ts << - // "," - // << remain << endl; if (remain > 0) { txnbin_list_[id].MergeTxn(*query, remain); - // cout << "first txnbin<" << core_id_ << "," << id << ">txn<" << 0 - // << "," << remain << ">" << endl; } while (id > 0) { --id; if (txnbin_list_[id].IsSnapshot()) { - // cout << "txnbin<" << core_id_ << "," << id << ">snapshot" << - // endl; txnbin_list_[id].MergeSnapshot(*query); break; } else { - // cout << "txnbin<" << core_id_ << "," << id << ">full" << endl; txnbin_list_[id].MergeTxn(*query, kTxnBinSize); } } auto next_core_id = (core_id_ + 1) % TxnServer::concurrency_; - if (next_core_id != TxnServer::GetCoreID(query->ts_)) + if (next_core_id != TxnServer::GetCoreID(query->ts_)) { + // scan next core this->forward_to(TxnServer::cores_[next_core_id]); + } else if (include_abort) { // process the final query + for (auto& part_cp : query->rt_cp_list_) { + auto part = part_cp.first; + auto checkpoint = part_cp.second; + query->abort_list_[part].insert(query->abort_list_[part].end(), + query->snapshot_[part].begin(), + query->snapshot_[part].end()); + Strip::Sort(query->abort_list_[part]); + Strip::Merge(query->abort_list_[part]); + Strip::Filter(query->abort_list_[part], + [checkpoint](PStrip& pstrip) -> bool { + if (pstrip.first + pstrip.second <= checkpoint) { + return false; + } else { + if (pstrip.first < checkpoint && + pstrip.first + pstrip.second > checkpoint) + pstrip = make_pair(checkpoint, + pstrip.first + pstrip.second - checkpoint); + return true; + } + }); + auto abort_pos = query->abort_list_[part][0].first + + query->abort_list_[part][0].second; + if (query->abort_list_[part].size() > 0) { + Strip::Filter(query->snapshot_[part], + [abort_pos](PStrip& pstrip) -> bool { + if (pstrip.first + pstrip.second <= abort_pos) + return true; + else + return false; + }); + } + } + } return caf::make_message(*query); }, [this](MergeAtom) { @@ -196,28 +199,6 @@ caf::behavior TxnCore::make_behavior() { txnbin_list_ = new_txnbin_list; }, - [](CheckpointAtom, shared_ptr cp) { - /* for (auto i = 0; i < size_; i++) - if (commit_[i]) { - for (auto& strip : strip_list_[i]) - if (strip.part_ == cp->part_ && strip.pos_ >= - cp->logic_cp_) - cp->commit_strip_list_.push_back( - PStrip(strip.pos_, strip.offset_)); - } else if (abort_[i]) { - for (auto& strip : strip_list_[i]) - if (strip.part_ == cp->part_ && strip.pos_ >= - cp->logic_cp_) - cp->abort_strip_list_.push_back( - PStrip(strip.pos_, strip.offset_)); - } - if (core_id_ != TxnServer::cores_.size() - 1) - this->forward_to(TxnServer::cores_[core_id_ + 1]); - else { - current_message() = caf::make_message(MergeAtom::value, cp); - this->forward_to(TxnServer::cores_[TxnServer::SelectCoreId()]); - }*/ - }, [](GCAtom) { /* auto size_old = size_; auto pos = 0; @@ -282,7 +263,8 @@ caf::behavior TxnServer::make_behavior() { auto query = make_shared(ts, GetHisCPList(ts, request.part_list_), GetRtCPList(ts, request.part_list_)); - current_message() = caf::make_message(QueryAtom::value, query); + current_message() = + caf::make_message(QueryAtom::value, query, request.include_abort_); forward_to(cores_[GetCoreID(ts)]); // cout << "**********query:" << ts << " begin**************" << endl; }, diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index 5c954d21a..d82571c8f 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -85,37 +85,6 @@ class TimeStamp { static atomic now_; }; -class QueryTracker : public caf::event_based_actor { - public: - static RetCode Init() { - tracker_ = caf::spawn(); - return rSuccess; - } - static RetCode Begin(UInt64 ts) { - caf::anon_send(tracker_, BeginAtom::value, ts); - } - static RetCode Commit(UInt64 ts) { - caf::anon_send(tracker_, CommitAtom::value, ts); - } - caf::behavior make_behavior() override; - - private: - static caf::actor tracker_; - static set active_querys_; -}; - -class CheckpointTracker : public caf::event_based_actor { - public: - static RetCode Init() { - tracker_ = caf::spawn(); - return rSuccess; - } - caf::behavior make_behavior() override; - - private: - static caf::actor tracker_; -}; - class TxnCore : public caf::event_based_actor { public: UInt64 core_id_; @@ -135,7 +104,7 @@ class TxnServer : public caf::event_based_actor { static caf::actor proxy_; static vector cores_; static unordered_map> pos_list_; - static unordered_map cp_list_; + static unordered_map cp_list_; // static unordered_map> CountList; /**************** User APIs ***************/ static RetCode Init(int concurrency = kConcurrency, int port = kTxnPort); From 01b07d198542dc31b9f137fdf00b2a2216ea8c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Fri, 2 Sep 2016 14:17:39 +0800 Subject: [PATCH 52/58] fix bugs for ingestion and txn-scan(rt chunk list bug) --- Environment.cpp | 32 ++++++++---- conf/config | 15 ++++-- loader/AMQ_consumer.cpp | 3 +- loader/master_loader.cpp | 18 ++++++- loader/slave_loader.cpp | 20 +++++--- .../physical_projection_scan.cpp | 5 ++ storage/BlockManager.cpp | 1 + storage/ChunkStorage.cpp | 4 ++ storage/PartitionStorage.cpp | 51 ++++++++++++------- storage/PartitionStorage.h | 4 +- 10 files changed, 112 insertions(+), 41 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index cdb90cdfe..6f1e453a7 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -135,13 +135,24 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { logging_->log("Initializing txn log server"); if (!InitTxnLog()) LOG(ERROR) << "failed to initialize txn log"; - /** - * Binding all partition for each projection - * Because - */ - sleep(3); - logging_->log("Advanced Bind all partition for each projection"); - if (!AdvancedBindAllPart()) LOG(ERROR) << "failed to bing partitions"; + if (ismaster) { + /** + * Binding all partition for each projection + */ + sleep(3); + logging_->log("Advanced Bind all partition for each projection"); + if (!AdvancedBindAllPart()) { + cout << "failed to bind partitions" << endl; + LOG(ERROR) << "failed to bing partitions"; + } + cout << "bind partition...." << endl; + auto parts = BlockManager::getInstance()->GetAllPartition(); + assert(parts.size() > 0); +/* for (auto& part : parts) + cout << "binding<" << part.projection_id.table_id << "," + << part.projection_id.projection_off << "," << part.partition_off + << ">" << endl;*/ + } #ifndef DEBUG_MODE if (ismaster) { @@ -317,8 +328,11 @@ bool Environment::AdvancedBindAllPart() { for (auto proj_id = 0; proj_id < proj_count; proj_id++) { auto proj = table->getProjectoin(proj_id); if (!proj->AllPartitionBound()) { - Catalog::getInstance()->getBindingModele()->BindingEntireProjection( - proj->getPartitioner(), DESIRIABLE_STORAGE_LEVEL); + bool ret = + Catalog::getInstance()->getBindingModele()->BindingEntireProjection( + proj->getPartitioner(), DESIRIABLE_STORAGE_LEVEL); + // cout << "binding<" << table_id << "," << proj_id << ">:" << ret << + // endl; } } } diff --git a/conf/config b/conf/config index 224d5ae02..8ea5140a6 100755 --- a/conf/config +++ b/conf/config @@ -27,7 +27,12 @@ client_listener_port = 10000 #data="/home/imdb/data/wangli/" #data="/home/imdb/data/POC/sample/" #data="/home/minqi/git/Data/data/tpc-h/1-partition/sf-1/" + + data="/home/imdb/config/tpc-h/1-partition/sf-1/" +#data="/home/claims/data/tpc-h/sf1/1partition/" + + #data="/home/imdb/data/SF-1/" #data="/home/imdb/data/SF-1/" #data="/home/imdb/data/stock/" @@ -37,10 +42,9 @@ data="/home/imdb/config/tpc-h/1-partition/sf-1/" #data="/home/fish/data/test/" #data="/home/imdb/data/POC/" #data="/home/imdb/data/POC/" -#hdfs主节点 - -hdfs_master_ip="127.0.0.1" +#hdfs主节点 +hdfs_master_ip="58.198.176.92" #hdfs主节点端口 hdfs_master_port=9000 @@ -67,6 +71,11 @@ master_loader_ip="127.0.0.1" master_loader_port=9002 +master_loader_thread_num=1 + +slave_loader_thread_num=1 + + #事务服务器 txn_server=1 diff --git a/loader/AMQ_consumer.cpp b/loader/AMQ_consumer.cpp index 47e1c5612..941a11bc9 100644 --- a/loader/AMQ_consumer.cpp +++ b/loader/AMQ_consumer.cpp @@ -104,7 +104,8 @@ void claims::loader::AMQConsumer::onMessage(const Message* message) { if (textMessage != NULL) { text = textMessage->getText(); - + // message->acknowledge(); + // cout << "" << endl << text << endl; bool client_ack = client_ack_; mloader_->Ingest(text, [message, client_ack]() -> int { if (client_ack) { diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index f9475c1ca..d4cc95192 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -164,6 +164,12 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, mloader->slave_addr_to_socket_[NodeAddress(ip, "")] = new_slave_fd; mloader->socket_fd_to_lock_[new_slave_fd] = Lock(); DLOG(INFO) << "start to send test message to slave"; + + cout << "slave_node <=> socket_fd" << endl; + for (auto node = mloader->slave_addr_to_socket_.begin(); + node != mloader->slave_addr_to_socket_.end(); node++) + cout << "ip:" << node->first.ip << ", socket fd:" << node->second + << endl; /* /// test whether socket works well ostringstream oss; @@ -399,6 +405,12 @@ RetCode MasterLoader::Ingest(const string& message, /// reply ACK to MQ EXEC_AND_DLOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); + cout << ingest.ToString() << endl; +/* auto data_size = 0L; + for (auto& part : partition_buffers) { + for (auto& buffer : part) data_size += buffer.length_; + } + cout << "send_data_size:" << data_size << endl;*/ /// distribute partition load task EXEC_AND_DLOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), @@ -753,6 +765,9 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, DLOG(INFO) << "node address is " << addr.ip << ":" << addr.port; addr.port = ""; // the port is used for OLAP, not for loading socket_fd = slave_addr_to_socket_[addr]; + + cout << "node id:" << node_id_in_rmm << ",node address:" << addr.ip << ":" + << addr.port << ",socket fd:" << socket_fd << endl; return ret; } @@ -784,6 +799,7 @@ RetCode MasterLoader::SendPacket(const int socket_fd, } total_write_num += write_num; } + cout << "send data bytes:" << total_write_num << endl; #ifdef MASTER_LOADER_PREF // if (__sync_add_and_fetch(&sent_packetcount, 1) == txn_count_for_debug * 4) { // cout << "send " << sent_packetcount << " packets used " << send_total_time @@ -847,7 +863,7 @@ void* MasterLoader::StartMasterLoader(void* arg) { // std::string brokerURI = "failover:(tcp://" - "10.11.1.192:61616?wireFormat=openwire&connection.useAsyncSend=true" + "58.198.176.92:61616?wireFormat=openwire&connection.useAsyncSend=true" // "&transport.commandTracingEnabled=true" // "&transport.tcpTracingEnabled=true" // "&wireFormat.tightEncodingEnabled=true" diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 93cbefce0..2ec732702 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -241,15 +241,21 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { if (-1 == (real_read_num = recv(master_fd_, head_buffer, LoadPacket::kHeadLength, MSG_WAITALL))) { PLOG(ERROR) << "failed to receive message length from master"; + cout << "1 received_data_size:" << real_read_num << endl; return rFailure; } else if (0 == real_read_num) { PLOG(ERROR) << "master loader socket has been closed"; + cout << "2 received_data_size:" << real_read_num << endl; + cout << "listen fd:" << listening_fd_ << ",master fd:" << master_fd_ + << endl; return rFailure; } else if (real_read_num < LoadPacket::kHeadLength) { + cout << "3 received_data_size:" << real_read_num << endl; LOG(ERROR) << "received message error! only read " << real_read_num << " bytes"; continue; } + cout << "4 received_data_size:" << real_read_num << endl; GET_TIME_SL(start_handle); PERFLOG("received packet head"); uint64_t data_length = @@ -347,10 +353,10 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; EXEC_AND_DLOG_RETURN( - ret, part_storage->AddChunkWithMemoryToNum(last_chunk_id + 1, HDFS), + ret, part_storage->AddRtChunkWithMemoryToNum(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); - - /// copy data into applied memory + // cout << "******1*****" << endl; + // copy data into applied memory const uint64_t tuple_size = Catalog::getInstance() ->getTable(table_id) ->getProjectoin(prj_id) @@ -381,6 +387,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { InMemoryChunkWriterIterator writer(chunk_info.hook, CHUNK_SIZE, cur_block_id, BLOCK_SIZE, pos_in_block, tuple_size); + // cout << "store data length:" << data_length << endl; do { // write to every block uint64_t written_length = writer.Write(packet.data_buffer_ + total_written_length, @@ -388,8 +395,8 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { total_written_length += written_length; DLOG(INFO) << "written " << written_length << " bytes into chunk:" << cur_chunk_id - << ". Now total written " << total_written_length - << " bytes"; + << ". Now total written " << total_written_length << " bytes" + << endl; if (total_written_length == data_length) { // all tuple is written into memory return rSuccess; @@ -397,7 +404,6 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { assert(false); } } while (writer.NextBlock()); - ++cur_chunk_id; // get next chunk to write DLOG(INFO) << "Now chunk id is " << cur_chunk_id << ", total number of chunk is" << part_storage->GetChunkNum(); @@ -453,7 +459,7 @@ behavior SlaveLoader::WorkInCAF(event_based_actor* self) { } behavior SlaveLoader::PersistInCAF(event_based_actor* self) { - self->delayed_send(self, seconds(3), CheckpointAtom::value); + // self->delayed_send(self, seconds(3), CheckpointAtom::value); return {[self](CheckpointAtom) { cout << "slave persist.." << endl; QueryReq query_req; diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index c4e33f944..d9836ad5c 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -111,6 +111,11 @@ bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, // << ",part_id:" << part_id << ",cp:" << cp << endl; partition_reader_iterator_ = partition_handle_->CreateTxnReaderIterator( cp, state_.query_.scan_snapshot_[global_part_id]); + cout << "scan part:" << global_part_id << endl; + cout << "checkpoint:" << cp << endl; + for (auto& part : state_.query_.scan_snapshot_[global_part_id]) { + cout << "<" << part.first << "," << part.second << ">" << endl; + } // partition_reader_iterator_ = // partition_handle_->CreateAtomicReaderIterator(); SetReturnStatus(true); diff --git a/storage/BlockManager.cpp b/storage/BlockManager.cpp index aa35ecf4f..0fb176cf6 100755 --- a/storage/BlockManager.cpp +++ b/storage/BlockManager.cpp @@ -409,6 +409,7 @@ vector BlockManager::GetAllPartition() { vector part_list; for (auto itr = partition_id_to_storage_.begin(); itr != partition_id_to_storage_.end(); itr++) { + part_list.push_back(itr->first); } return part_list; } diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index a839bd567..3e6aa4d91 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -73,6 +73,10 @@ RetCode ChunkStorage::ApplyMemory() { RetCode ret = claims::common::rSuccess; HdfsInMemoryChunk chunk_info; chunk_info.length = CHUNK_SIZE; + cout << "apply memory:<" << chunk_id_.partition_id.projection_id.table_id + << "," << chunk_id_.partition_id.projection_id.projection_off << "," + << chunk_id_.partition_id.partition_off << "," << chunk_id_.chunk_off + << ">" << endl; if (BlockManager::getInstance()->getMemoryChunkStore()->ApplyChunk( chunk_id_, chunk_info.hook)) { /* there is enough memory storage space, so the storage level can be diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 7d9077b3e..fb2408ee0 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -60,6 +60,7 @@ PartitionStorage::PartitionStorage(const PartitionID& partition_id, const StorageLevel& storage_level) : partition_id_(partition_id), number_of_chunks_(number_of_chunks), + number_of_rt_chunks_(0), desirable_storage_level_(storage_level) { if (number_of_chunks_ * CHUNK_SIZE / 1024 / 1024 > BufferManager::getInstance()->getStorageMemoryBudegeInMilibyte() * @@ -74,8 +75,8 @@ PartitionStorage::PartitionStorage(const PartitionID& partition_id, ChunkID(partition_id_, i, true), BLOCK_SIZE, desirable_storage_level_)); }*/ - CheckAndAppendChunkList(number_of_chunks_, false); - CheckAndAppendChunkList(number_of_chunks_, true); + // CheckAndAppendChunkList(number_of_chunks_, false); + // CheckAndAppendChunkList(number_of_chunks_, true); // cout << "*******chunk_list_" << chunk_list_.size() << endl; } @@ -88,28 +89,29 @@ PartitionStorage::~PartitionStorage() { void PartitionStorage::AddNewChunk() { number_of_chunks_++; } -RetCode PartitionStorage::AddChunkWithMemoryToNum( +RetCode PartitionStorage::AddRtChunkWithMemoryToNum( unsigned expected_number_of_chunks, const StorageLevel& storage_level) { RetCode ret = rSuccess; - if (number_of_chunks_ >= expected_number_of_chunks) return ret; - DLOG(INFO) << "now chunk number:" << number_of_chunks_ - << ". expected chunk num:" << expected_number_of_chunks; - + // cout << "******-1*****" << endl; + if (number_of_rt_chunks_ >= expected_number_of_chunks) return ret; + DLOG(INFO) << "now rt chunk number:" << number_of_rt_chunks_ + << ". expected rt chunk num:" << expected_number_of_chunks; + // cout << "******0*****" << endl; LockGuard guard(write_lock_); - if (number_of_chunks_ >= expected_number_of_chunks) return ret; + if (number_of_rt_chunks_ >= expected_number_of_chunks) return ret; - for (unsigned i = number_of_chunks_; i < expected_number_of_chunks; i++) { + for (unsigned i = number_of_rt_chunks_; i < expected_number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i, true), BLOCK_SIZE, storage_level); - EXEC_AND_DLOG(ret, chunk->ApplyMemory(), "applied memory for chunk(" + EXEC_AND_DLOG(ret, chunk->ApplyMemory(), "applied memory for rt chunk(" << partition_id_.getName() << "," << i << ")", - "failed to apply memory for chunk(" << partition_id_.getName() - << "," << i << ")"); - chunk_list_.push_back(chunk); + "failed to apply memory for rt chunk(" + << partition_id_.getName() << "," << i << ")"); + rt_chunk_list_.push_back(chunk); } - number_of_chunks_ = expected_number_of_chunks; - assert(chunk_list_.size() == number_of_chunks_); + number_of_rt_chunks_ = expected_number_of_chunks; + assert(rt_chunk_list_.size() == number_of_rt_chunks_); return ret; } @@ -128,9 +130,12 @@ void PartitionStorage::UpdateChunksWithInsertOrAppend( chunk_list_.back()->GetChunkID()); chunk_list_.back()->SetCurrentStorageLevel(HDFS); } - for (unsigned i = number_of_chunks_; i < number_of_chunks; i++) + for (unsigned i = number_of_chunks_; i < number_of_chunks; i++) { chunk_list_.push_back( new ChunkStorage(ChunkID(partition_id, i), BLOCK_SIZE, storage_level)); + /* rt_chunk_list_.push_back(new ChunkStorage(ChunkID(partition_id, i, true), + BLOCK_SIZE, storage_level));*/ + } number_of_chunks_ = number_of_chunks; } @@ -249,6 +254,11 @@ PartitionStorage::TxnPartitionReaderIterator::TxnPartitionReaderIterator( begin += len; } } + /* string str = "rt:"; + for (auto& strip : rt_strip_list_) { + str += "<" + to_string(strip.first) + "," + to_string(strip.second) + ">"; + } + cout << str << endl;*/ } PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() { @@ -282,10 +292,13 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( } else if (rt_block_index_ < rt_strip_list_.size()) { // scan real-time data auto pos = rt_strip_list_[rt_block_index_].first; auto offset_in_block = pos - (pos / BLOCK_SIZE) * BLOCK_SIZE; + assert(offset_in_block >= 0); auto len = rt_strip_list_[rt_block_index_].second; auto rt_block_cur = pos / BLOCK_SIZE; auto rt_chunk_cur = pos / CHUNK_SIZE; - if (rt_chunk_cur > rt_chunk_cur_) { // move to new rt chunk + // cout << "visit rt:<" << rt_chunk_cur << "," << rt_block_cur << ">" << + // endl; + if (rt_chunk_cur != rt_chunk_cur_) { // move to new rt chunk rt_chunk_cur_ = rt_chunk_cur; rt_block_cur_ = rt_chunk_cur_ * (CHUNK_SIZE / BLOCK_SIZE); ps_->CheckAndAppendChunkList(rt_chunk_cur_ + 1, true); @@ -296,6 +309,7 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( } do { // move to rt_block_cur + if (ba != nullptr) delete ba; rt_chunk_it_->GetNextBlockAccessor(ba); rt_block_cur_++; } while (rt_block_cur_ <= rt_block_cur); @@ -403,8 +417,7 @@ bool PartitionStorage::PersistHDFS(UInt64 old_his_cp, UInt64 new_his_cp) { */ auto file_handle = FileHandleImpFactory::Instance().CreateFileHandleImp( kHdfs, partition_id_.getPathAndName()); - if (file_handle == nullptr) - return false; + if (file_handle == nullptr) return false; HdfsInMemoryChunk chunk_his; auto begin = old_his_cp; auto end = new_his_cp; diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index 03224a47c..3cbaa72b4 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -165,7 +165,7 @@ class PartitionStorage { void AddNewChunk(); - RetCode AddChunkWithMemoryToNum(unsigned expected_number_of_chunks, + RetCode AddRtChunkWithMemoryToNum(unsigned expected_number_of_chunks, const StorageLevel& storage_level); const int GetChunkNum() const { return chunk_list_.size(); } @@ -207,11 +207,13 @@ class PartitionStorage { bool Persist(UInt64 old_his_cp, UInt64 new_his_cp); bool PersistHDFS(UInt64 old_his_cp, UInt64 new_his_cp); bool PersistDisk(UInt64 old_his_cp, UInt64 new_his_cp); + protected: PartitionID partition_id_; atomic number_of_chunks_; std::vector chunk_list_; // add it for txn scan + atomic number_of_rt_chunks_; std::vector rt_chunk_list_; StorageLevel desirable_storage_level_; From 5db604e04932dd18bb390aa6902bafca34c93cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Mon, 5 Sep 2016 16:26:25 +0800 Subject: [PATCH 53/58] fix bug for memory merge and persist(don't implement push to HDFS) --- Environment.cpp | 11 +++--- loader/slave_loader.cpp | 15 +++++--- storage/ChunkStorage.cpp | 2 +- storage/ChunkStorage.h | 1 + storage/PartitionStorage.cpp | 73 +++++++++++++++++++++++++++--------- storage/PartitionStorage.h | 7 +++- txn_manager/txn.cpp | 8 ++-- txn_manager/txn_client.cpp | 4 +- txn_manager/txn_server.cpp | 10 +++-- 9 files changed, 88 insertions(+), 43 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index 6f1e453a7..fd2b23cfa 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -144,14 +144,15 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { if (!AdvancedBindAllPart()) { cout << "failed to bind partitions" << endl; LOG(ERROR) << "failed to bing partitions"; + } else { + cout << "success to bind partitions" << endl; } cout << "bind partition...." << endl; auto parts = BlockManager::getInstance()->GetAllPartition(); - assert(parts.size() > 0); -/* for (auto& part : parts) - cout << "binding<" << part.projection_id.table_id << "," - << part.projection_id.projection_off << "," << part.partition_off - << ">" << endl;*/ + /* for (auto& part : parts) + cout << "binding<" << part.projection_id.table_id << "," + << part.projection_id.projection_off << "," << part.partition_off + << ">" << endl;*/ } #ifndef DEBUG_MODE diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 2ec732702..4f2343860 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -353,7 +353,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; EXEC_AND_DLOG_RETURN( - ret, part_storage->AddRtChunkWithMemoryToNum(last_chunk_id + 1, HDFS), + ret, part_storage->AddRtChunkWithMemoryApply(last_chunk_id + 1, HDFS), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); // cout << "******1*****" << endl; // copy data into applied memory @@ -459,9 +459,8 @@ behavior SlaveLoader::WorkInCAF(event_based_actor* self) { } behavior SlaveLoader::PersistInCAF(event_based_actor* self) { - // self->delayed_send(self, seconds(3), CheckpointAtom::value); + self->delayed_send(self, seconds(5), CheckpointAtom::value); return {[self](CheckpointAtom) { - cout << "slave persist.." << endl; QueryReq query_req; query_req.include_abort_ = true; Query query; @@ -482,13 +481,17 @@ behavior SlaveLoader::PersistInCAF(event_based_actor* self) { auto old_his_cp = query.his_cp_list_[g_part_id]; auto new_his_cp = part_handler->MergeToHis(old_his_cp, query.snapshot_[g_part_id]); - if (new_his_cp == old_his_cp) continue; - if (!part_handler->Persist(old_his_cp, new_his_cp)) continue; + // cout << "new_his_cp:" << new_his_cp << endl; + /* if (new_his_cp == old_his_cp) continue; + if (!part_handler->Persist(old_his_cp, new_his_cp)) continue; + */ TxnClient::CommitCheckpoint(query.ts_, g_part_id, new_his_cp, new_rt_cp); + /*cout << "persist:" << g_part_id << ":" << new_his_cp << "," + << new_rt_cp << endl;*/ } } - self->delayed_send(self, seconds(3), CheckpointAtom::value); + self->delayed_send(self, seconds(7), CheckpointAtom::value); }}; } diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 3e6aa4d91..73bb209bf 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -129,7 +129,7 @@ ChunkReaderIterator* ChunkStorage::CreateChunkReaderIterator() { if (BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( chunk_id_, chunk_info)) ret = new InMemoryChunkReaderItetaor(chunk_info.hook, chunk_info.length, - chunk_info.length / block_size_, + CHUNK_SIZE / BLOCK_SIZE, block_size_, chunk_id_); else ret = NULL; diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index f0ff7a79b..da4a73a2d 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -327,6 +327,7 @@ class ChunkStorage { } RetCode ApplyMemory(); + StorageLevel GetStorgeLevel() const { current_storage_level_; } private: unsigned block_size_; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index fb2408ee0..01d2d21fc 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -89,7 +89,34 @@ PartitionStorage::~PartitionStorage() { void PartitionStorage::AddNewChunk() { number_of_chunks_++; } -RetCode PartitionStorage::AddRtChunkWithMemoryToNum( +RetCode PartitionStorage::AddHisChunkWithMemoryApply( + unsigned expected_number_of_chunks, const StorageLevel& storage_level) { + RetCode ret = rSuccess; + if (chunk_list_.size() >= expected_number_of_chunks) return ret; + DLOG(INFO) << "now chunk number:" << number_of_chunks_ + << ". expected chunk num:" << expected_number_of_chunks; + LockGuard guard(write_lock_); + if (chunk_list_.size() >= expected_number_of_chunks) return ret; + for (unsigned i = chunk_list_.size(); i < expected_number_of_chunks; i++) { + ChunkStorage* chunk = + new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); + chunk_list_.push_back(chunk); + /* EXEC_AND_DLOG(ret, chunk_list_[i]->ApplyMemory(), + "applied memory for chunk(" << partition_id_.getName() << + "," + << i << ")", + "failed to apply memory for chunk(" << + partition_id_.getName() + << "," << i << ")");*/ + } + + number_of_chunks_ = expected_number_of_chunks; + assert(chunk_list_.size() == number_of_chunks_); + + return ret; +} + +RetCode PartitionStorage::AddRtChunkWithMemoryApply( unsigned expected_number_of_chunks, const StorageLevel& storage_level) { RetCode ret = rSuccess; // cout << "******-1*****" << endl; @@ -103,13 +130,14 @@ RetCode PartitionStorage::AddRtChunkWithMemoryToNum( for (unsigned i = number_of_rt_chunks_; i < expected_number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i, true), BLOCK_SIZE, storage_level); + rt_chunk_list_.push_back(chunk); EXEC_AND_DLOG(ret, chunk->ApplyMemory(), "applied memory for rt chunk(" << partition_id_.getName() << "," << i << ")", "failed to apply memory for rt chunk(" << partition_id_.getName() << "," << i << ")"); - rt_chunk_list_.push_back(chunk); } + number_of_rt_chunks_ = expected_number_of_chunks; assert(rt_chunk_list_.size() == number_of_rt_chunks_); @@ -271,7 +299,7 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( ChunkReaderIterator::block_accessor* ba = nullptr; if (block_cur_ < last_his_block_) { // scan historical data int64_t chunk_cur = block_cur_ / (CHUNK_SIZE / BLOCK_SIZE); - if (chunk_cur > chunk_cur_) { // update chunk_it_ + if (chunk_cur != chunk_cur_) { // update chunk_it_ chunk_cur_ = chunk_cur; ps_->CheckAndAppendChunkList(chunk_cur_ + 1, false); if (chunk_it_ != nullptr) delete chunk_it_; @@ -280,6 +308,7 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( chunk_it_->GetNextBlockAccessor(ba); if (ba == nullptr) { if (chunk_it_ != nullptr) delete chunk_it_; + assert(false); return false; } else { assert(ba != nullptr); @@ -369,32 +398,40 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, auto begin = strip.first; auto end = strip.first + strip.second; while (begin < end) { + auto move = BLOCK_SIZE - (begin + BLOCK_SIZE) % BLOCK_SIZE; + if (move > end - begin) move = end - begin; // update historical chunk cur + AddHisChunkWithMemoryApply(begin / CHUNK_SIZE + 1, MEMORY); + auto chunkit = + chunk_list_[begin / CHUNK_SIZE]->CreateChunkReaderIterator(); + assert(chunkit != nullptr); if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( - ChunkID(partition_id_, begin / CHUNK_SIZE), chunk_his)) - return old_his_cp; + chunk_list_[begin / CHUNK_SIZE]->GetChunkID(), chunk_his)) { + assert(false); + } // update real time chunk cur if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( - ChunkID(partition_id_, new_his_cp / CHUNK_SIZE), chunk_rt)) + ChunkID(partition_id_, new_his_cp / CHUNK_SIZE, true), + chunk_rt)) { + assert(false); return old_his_cp; + } // each step move just one full block or even partly block - auto move = BLOCK_SIZE - (begin + BLOCK_SIZE) % BLOCK_SIZE; if (move == BLOCK_SIZE) { // full block memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, chunk_rt.hook + begin % CHUNK_SIZE, move); } else { - auto tuple_count = (move - sizeof(unsigned)) / tuple_size; - if ((begin + move) % BLOCK_SIZE == 0) { - auto real_move = tuple_count * tuple_size; - memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, - chunk_rt.hook + begin % CHUNK_SIZE, real_move); - } else { - memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, - chunk_rt.hook + begin % CHUNK_SIZE, move); - } + auto real_move = + (begin + move) % BLOCK_SIZE != 0 ? move : move - sizeof(unsigned); + auto tuple_count = real_move / tuple_size; + // cout << "tuple count,size : " << tuple_count << "," << tuple_size + // << endl; + memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, + chunk_rt.hook + begin % CHUNK_SIZE, move); auto tail_offset = - (begin / BLOCK_SIZE + 1) * BLOCK_SIZE - sizeof(unsigned); - *reinterpret_cast(chunk_his.hook + tail_offset) += + (new_his_cp + BLOCK_SIZE) % CHUNK_SIZE - sizeof(unsigned); + // cout << "tail_offset:" << tail_offset << endl; + *reinterpret_cast(chunk_his.hook + tail_offset) = tuple_count; } begin += move; diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index 3cbaa72b4..19dd58892 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -165,8 +165,11 @@ class PartitionStorage { void AddNewChunk(); - RetCode AddRtChunkWithMemoryToNum(unsigned expected_number_of_chunks, - const StorageLevel& storage_level); + RetCode AddHisChunkWithMemoryApply(unsigned expected_number_of_chunks, + const StorageLevel& storage_level); + + RetCode AddRtChunkWithMemoryApply(unsigned expected_number_of_chunks, + const StorageLevel& storage_level); const int GetChunkNum() const { return chunk_list_.size(); } diff --git a/txn_manager/txn.cpp b/txn_manager/txn.cpp index f4b0f5c18..72bc8113e 100644 --- a/txn_manager/txn.cpp +++ b/txn_manager/txn.cpp @@ -220,7 +220,7 @@ void TxnBin::MergeSnapshot(Query &query) const { snapshot_[part].end()); Strip::Sort(query.snapshot_[part]); Strip::Merge(query.snapshot_[part]); - Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { + Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { if (pstrip.first + pstrip.second <= checkpoint) { return false; } else { @@ -243,7 +243,7 @@ void TxnBin::MergeTxn(Query &query, int len) const { for (auto &strip : txn_list_[i].strip_list_) query.snapshot_[strip.first].push_back(strip.second); } else if (txn_list_[i].IsAbort()) { - for (auto &strip: txn_list_[i].strip_list_) + for (auto &strip : txn_list_[i].strip_list_) query.abort_list_[strip.first].push_back(strip.second); } @@ -252,10 +252,8 @@ void TxnBin::MergeTxn(Query &query, int len) const { auto checkpoint = part_cp.second; Strip::Sort(query.snapshot_[part]); Strip::Merge(query.snapshot_[part]); - Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { + Strip::Filter(query.snapshot_[part], [checkpoint](PStrip &pstrip) -> bool { if (pstrip.first + pstrip.second <= checkpoint) { - cout << "fail:<" << pstrip.first << "," << pstrip.second << ">" - << pstrip.second << endl; return false; } else { if (pstrip.first < checkpoint && diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index 4b25f0c08..778351684 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -198,7 +198,7 @@ RetCode TxnClient::CommitCheckpoint(UInt64 ts, UInt64 part, UInt64 his_cp, try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, - CommitCPAtom::value, his_cp, rt_cp) + CommitCPAtom::value, ts, part, his_cp, rt_cp) .await([&ret](RetCode r) { ret = r; }, caf::after(seconds(kTimeout)) >> [&ret] { // ret = @@ -207,7 +207,7 @@ RetCode TxnClient::CommitCheckpoint(UInt64 ts, UInt64 part, UInt64 his_cp, cout << "time out" << endl; }); } catch (...) { - cout << "link fail" << endl; + cout << "link fail @ CommitCheckpoint" << endl; // return rLinkTmFail; return -1; } diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 7414dc46d..3c3f8ce20 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -110,7 +110,8 @@ caf::behavior TxnCore::make_behavior() { if (next_core_id != TxnServer::GetCoreID(query->ts_)) { // scan next core this->forward_to(TxnServer::cores_[next_core_id]); - } else if (include_abort) { // process the final query + } else if (include_abort && false) { // process the final query + cout << "size of abort list: " << endl; for (auto& part_cp : query->rt_cp_list_) { auto part = part_cp.first; auto checkpoint = part_cp.second; @@ -131,9 +132,9 @@ caf::behavior TxnCore::make_behavior() { return true; } }); - auto abort_pos = query->abort_list_[part][0].first + - query->abort_list_[part][0].second; if (query->abort_list_[part].size() > 0) { + auto abort_pos = query->abort_list_[part][0].first + + query->abort_list_[part][0].second; Strip::Filter(query->snapshot_[part], [abort_pos](PStrip& pstrip) -> bool { if (pstrip.first + pstrip.second <= abort_pos) @@ -276,7 +277,8 @@ caf::behavior TxnServer::make_behavior() { -> caf::message { cp_list_[part].SetHisCP(ts, his_cp); cp_list_[part].SetRtCP(ts, rt_cp); - return caf::make_message(OkAtom::value); + cout << "commit " << part << ":" << his_cp << "," << rt_cp << endl; + return caf::make_message(rSuccess); }, [this](GCAtom) { UInt64 ts; From 0934b46df068aed8659fb81aa829241116bab508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Sun, 11 Sep 2016 08:51:17 +0800 Subject: [PATCH 54/58] fix a bug for rt-scan --- Environment.cpp | 14 ++++---- conf/config | 4 +-- loader/master_loader.cpp | 18 +++++----- loader/slave_loader.cpp | 21 +++++------- node_manager/slave_node.cpp | 9 +++-- .../physical_projection_scan.cpp | 10 +++--- storage/ChunkStorage.cpp | 2 +- storage/PartitionStorage.cpp | 34 ++++++++++--------- 8 files changed, 58 insertions(+), 54 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index fd2b23cfa..d675337b3 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -135,11 +135,17 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { logging_->log("Initializing txn log server"); if (!InitTxnLog()) LOG(ERROR) << "failed to initialize txn log"; +#ifndef DEBUG_MODE + if (ismaster) { + initializeClientListener(); + } +#endif + if (ismaster) { /** * Binding all partition for each projection */ - sleep(3); + sleep(15); logging_->log("Advanced Bind all partition for each projection"); if (!AdvancedBindAllPart()) { cout << "failed to bind partitions" << endl; @@ -154,12 +160,6 @@ Environment::Environment(bool ismaster) : ismaster_(ismaster) { << part.projection_id.projection_off << "," << part.partition_off << ">" << endl;*/ } - -#ifndef DEBUG_MODE - if (ismaster) { - initializeClientListener(); - } -#endif } Environment::~Environment() { diff --git a/conf/config b/conf/config index 8ea5140a6..a5be701f2 100755 --- a/conf/config +++ b/conf/config @@ -73,7 +73,7 @@ master_loader_port=9002 master_loader_thread_num=1 -slave_loader_thread_num=1 +slave_loader_thread_num=4 #事务服务器 @@ -86,7 +86,7 @@ txn_server_ip="127.0.0.1" txn_server_port=9100 #事务日志 -txn_log=1 +txn_log=0 txn_log_path="txn-log" diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index d4cc95192..16538338d 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -405,12 +405,12 @@ RetCode MasterLoader::Ingest(const string& message, /// reply ACK to MQ EXEC_AND_DLOG(ret, ack_function(), "replied to MQ", "failed to reply to MQ"); - cout << ingest.ToString() << endl; -/* auto data_size = 0L; - for (auto& part : partition_buffers) { - for (auto& buffer : part) data_size += buffer.length_; - } - cout << "send_data_size:" << data_size << endl;*/ + // cout << ingest.ToString() << endl; + /* auto data_size = 0L; + for (auto& part : partition_buffers) { + for (auto& buffer : part) data_size += buffer.length_; + } + cout << "send_data_size:" << data_size << endl;*/ /// distribute partition load task EXEC_AND_DLOG(ret, SendPartitionTupleToSlave(table, partition_buffers, ingest), @@ -766,8 +766,8 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, addr.port = ""; // the port is used for OLAP, not for loading socket_fd = slave_addr_to_socket_[addr]; - cout << "node id:" << node_id_in_rmm << ",node address:" << addr.ip << ":" - << addr.port << ",socket fd:" << socket_fd << endl; + /* cout << "node id:" << node_id_in_rmm << ",node address:" << addr.ip << ":" + << addr.port << ",socket fd:" << socket_fd << endl;*/ return ret; } @@ -799,7 +799,7 @@ RetCode MasterLoader::SendPacket(const int socket_fd, } total_write_num += write_num; } - cout << "send data bytes:" << total_write_num << endl; +// cout << "send data bytes:" << total_write_num << endl; #ifdef MASTER_LOADER_PREF // if (__sync_add_and_fetch(&sent_packetcount, 1) == txn_count_for_debug * 4) { // cout << "send " << sent_packetcount << " packets used " << send_total_time diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index 4f2343860..ab4bf47c7 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -241,21 +241,15 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { if (-1 == (real_read_num = recv(master_fd_, head_buffer, LoadPacket::kHeadLength, MSG_WAITALL))) { PLOG(ERROR) << "failed to receive message length from master"; - cout << "1 received_data_size:" << real_read_num << endl; return rFailure; } else if (0 == real_read_num) { PLOG(ERROR) << "master loader socket has been closed"; - cout << "2 received_data_size:" << real_read_num << endl; - cout << "listen fd:" << listening_fd_ << ",master fd:" << master_fd_ - << endl; return rFailure; } else if (real_read_num < LoadPacket::kHeadLength) { - cout << "3 received_data_size:" << real_read_num << endl; LOG(ERROR) << "received message error! only read " << real_read_num << " bytes"; continue; } - cout << "4 received_data_size:" << real_read_num << endl; GET_TIME_SL(start_handle); PERFLOG("received packet head"); uint64_t data_length = @@ -265,7 +259,6 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { DLOG(INFO) << "real packet length is :" << real_packet_length << ". date length is " << data_length; assert(data_length >= 4 && data_length <= 10000000); - char* data_buffer = Malloc(data_length); MemoryGuard guard(data_buffer); // auto-release if (NULL == data_buffer) { @@ -407,7 +400,11 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { ++cur_chunk_id; // get next chunk to write DLOG(INFO) << "Now chunk id is " << cur_chunk_id << ", total number of chunk is" << part_storage->GetChunkNum(); - assert(cur_chunk_id < part_storage->GetChunkNum()); + if (cur_chunk_id < part_storage->GetChunkNum()) { + cout << "cur_chunk_id:" << cur_chunk_id + << " chunk num:" << part_storage->GetChunkNum() << endl; + assert(cur_chunk_id < part_storage->GetChunkNum() && cur_chunk_id); + } cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; } else { @@ -459,7 +456,7 @@ behavior SlaveLoader::WorkInCAF(event_based_actor* self) { } behavior SlaveLoader::PersistInCAF(event_based_actor* self) { - self->delayed_send(self, seconds(5), CheckpointAtom::value); + self->delayed_send(self, seconds(30), CheckpointAtom::value); return {[self](CheckpointAtom) { QueryReq query_req; query_req.include_abort_ = true; @@ -487,11 +484,11 @@ behavior SlaveLoader::PersistInCAF(event_based_actor* self) { */ TxnClient::CommitCheckpoint(query.ts_, g_part_id, new_his_cp, new_rt_cp); - /*cout << "persist:" << g_part_id << ":" << new_his_cp << "," - << new_rt_cp << endl;*/ + cout << "persist:" << g_part_id << ":" << new_his_cp << "," + << new_rt_cp << endl; } } - self->delayed_send(self, seconds(7), CheckpointAtom::value); + self->delayed_send(self, seconds(30), CheckpointAtom::value); }}; } diff --git a/node_manager/slave_node.cpp b/node_manager/slave_node.cpp index 58408d02c..3cf3f2abd 100644 --- a/node_manager/slave_node.cpp +++ b/node_manager/slave_node.cpp @@ -239,8 +239,13 @@ RetCode SlaveNode::RegisterToMaster() { { int retry_max_time = 10; int time = 0; - caf::actor master_actor = - remote_actor(Config::master_loader_ip, Config::master_loader_port); + caf::actor master_actor; + try { + master_actor = + remote_actor(Config::master_loader_ip, Config::master_loader_port); + } catch (exception& e) { + LOG(ERROR) << "register to master loader fail"; + } while (1) { try { caf::scoped_actor self; diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index d9836ad5c..98eb7b184 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -111,11 +111,11 @@ bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, // << ",part_id:" << part_id << ",cp:" << cp << endl; partition_reader_iterator_ = partition_handle_->CreateTxnReaderIterator( cp, state_.query_.scan_snapshot_[global_part_id]); - cout << "scan part:" << global_part_id << endl; - cout << "checkpoint:" << cp << endl; - for (auto& part : state_.query_.scan_snapshot_[global_part_id]) { - cout << "<" << part.first << "," << part.second << ">" << endl; - } + cout << "scan part:" << global_part_id << endl; + cout << "checkpoint:" << cp << endl; + for (auto& part : state_.query_.scan_snapshot_[global_part_id]) { + cout << "<" << part.first << "," << part.second << ">" << endl; + } // partition_reader_iterator_ = // partition_handle_->CreateAtomicReaderIterator(); SetReturnStatus(true); diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 73bb209bf..451dd8bb0 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -65,7 +65,7 @@ ChunkStorage::ChunkStorage(const ChunkID& chunk_id, const unsigned& block_size, chunk_size_(CHUNK_SIZE) {} ChunkStorage::~ChunkStorage() { - // TODO(wangli): Auto-generated destructor stub + // TODO(wangli): Auto-generated destructor stub } // apply memory for chunk size for writing later by slave loader diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 01d2d21fc..b3b325720 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -92,22 +92,21 @@ void PartitionStorage::AddNewChunk() { number_of_chunks_++; } RetCode PartitionStorage::AddHisChunkWithMemoryApply( unsigned expected_number_of_chunks, const StorageLevel& storage_level) { RetCode ret = rSuccess; + LockGuard guard(write_lock_); if (chunk_list_.size() >= expected_number_of_chunks) return ret; DLOG(INFO) << "now chunk number:" << number_of_chunks_ << ". expected chunk num:" << expected_number_of_chunks; - LockGuard guard(write_lock_); if (chunk_list_.size() >= expected_number_of_chunks) return ret; for (unsigned i = chunk_list_.size(); i < expected_number_of_chunks; i++) { ChunkStorage* chunk = new ChunkStorage(ChunkID(partition_id_, i), BLOCK_SIZE, storage_level); chunk_list_.push_back(chunk); - /* EXEC_AND_DLOG(ret, chunk_list_[i]->ApplyMemory(), - "applied memory for chunk(" << partition_id_.getName() << - "," - << i << ")", - "failed to apply memory for chunk(" << - partition_id_.getName() - << "," << i << ")");*/ + EXEC_AND_DLOG(ret, chunk_list_[i]->ApplyMemory(), + "applied memory for chunk(" << partition_id_.getName() << "," + << i << ")", + "failed to apply memory for chunk(" << partition_id_.getName() + << "," << i << ")"); + assert(ret == rSuccess); } number_of_chunks_ = expected_number_of_chunks; @@ -119,12 +118,13 @@ RetCode PartitionStorage::AddHisChunkWithMemoryApply( RetCode PartitionStorage::AddRtChunkWithMemoryApply( unsigned expected_number_of_chunks, const StorageLevel& storage_level) { RetCode ret = rSuccess; + LockGuard guard(write_lock_); // cout << "******-1*****" << endl; if (number_of_rt_chunks_ >= expected_number_of_chunks) return ret; DLOG(INFO) << "now rt chunk number:" << number_of_rt_chunks_ << ". expected rt chunk num:" << expected_number_of_chunks; // cout << "******0*****" << endl; - LockGuard guard(write_lock_); + if (number_of_rt_chunks_ >= expected_number_of_chunks) return ret; for (unsigned i = number_of_rt_chunks_; i < expected_number_of_chunks; i++) { @@ -282,6 +282,7 @@ PartitionStorage::TxnPartitionReaderIterator::TxnPartitionReaderIterator( begin += len; } } + cout << "snapshot size:" << rt_strip_list_.size() << endl; /* string str = "rt:"; for (auto& strip : rt_strip_list_) { str += "<" + to_string(strip.first) + "," + to_string(strip.second) + ">"; @@ -305,6 +306,7 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( if (chunk_it_ != nullptr) delete chunk_it_; chunk_it_ = ps_->chunk_list_[chunk_cur_]->CreateChunkReaderIterator(); } + assert(chunk_it_!=nullptr); chunk_it_->GetNextBlockAccessor(ba); if (ba == nullptr) { if (chunk_it_ != nullptr) delete chunk_it_; @@ -401,18 +403,18 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, auto move = BLOCK_SIZE - (begin + BLOCK_SIZE) % BLOCK_SIZE; if (move > end - begin) move = end - begin; // update historical chunk cur - AddHisChunkWithMemoryApply(begin / CHUNK_SIZE + 1, MEMORY); - auto chunkit = - chunk_list_[begin / CHUNK_SIZE]->CreateChunkReaderIterator(); - assert(chunkit != nullptr); + AddHisChunkWithMemoryApply(begin / CHUNK_SIZE + 1, HDFS); + /* auto chunkit = + chunk_list_[begin / CHUNK_SIZE]->CreateChunkReaderIterator(); + assert(chunkit != nullptr);*/ if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( chunk_list_[begin / CHUNK_SIZE]->GetChunkID(), chunk_his)) { - assert(false); + // cout << "@@@@@@chunk id:" << begin / CHUNK_SIZE << "@@@@@@" << endl; + assert(false && begin && begin / CHUNK_SIZE); } // update real time chunk cur if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( - ChunkID(partition_id_, new_his_cp / CHUNK_SIZE, true), - chunk_rt)) { + rt_chunk_list_[begin / CHUNK_SIZE]->GetChunkID(), chunk_rt)) { assert(false); return old_his_cp; } From 98d14f8d58d79313f4deb6048c5bfc257ccc78ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Sat, 17 Sep 2016 17:37:28 +0800 Subject: [PATCH 55/58] fix a bug for "count (*) " miss lines caused by block tail wrong init --- conf/config | 2 +- loader/master_loader.cpp | 90 +++++++++++-------- loader/master_loader.h | 5 +- loader/slave_loader.cpp | 38 ++++++-- loader/slave_loader.h | 6 ++ .../physical_projection_scan.cpp | 10 +-- storage/ChunkStorage.cpp | 21 ++++- storage/ChunkStorage.h | 9 +- storage/PartitionStorage.cpp | 45 +++++++--- storage/PartitionStorage.h | 19 +++- txn_manager/txn_server.cpp | 2 +- 11 files changed, 178 insertions(+), 69 deletions(-) diff --git a/conf/config b/conf/config index a5be701f2..3ffe7fc60 100755 --- a/conf/config +++ b/conf/config @@ -73,7 +73,7 @@ master_loader_port=9002 master_loader_thread_num=1 -slave_loader_thread_num=4 +slave_loader_thread_num=1 #事务服务器 diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 16538338d..f2d886364 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -112,6 +112,9 @@ static const int txn_count_for_debug = 10000; namespace claims { namespace loader { + +ofstream MasterLoader::logfile; + void MasterLoader::IngestionRequest::Show() { DLOG(INFO) << "table name:" << table_name_ << ", column separator:" << col_sep_ @@ -171,44 +174,47 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, cout << "ip:" << node->first.ip << ", socket fd:" << node->second << endl; /* - /// test whether socket works well - ostringstream oss; - oss << "hello, i'm master, whose address is " - << mloader->master_loader_ip << ":" - << to_string(mloader->master_loader_port) << ". \0"; - - int message_length = oss.str().length(); - DLOG(INFO) << "message length is " << message_length; - - if (-1 == - write(new_slave_fd, - reinterpret_cast(&message_length), 4)) { - PLOG(ERROR) << "failed to send message length to slave(" << ip - << ":" - << port << ")"; - } else { - DLOG(INFO) << "message length is sent"; - } - if (-1 == write(new_slave_fd, oss.str().c_str(), - message_length)) { - PLOG(ERROR) << "failed to send message to slave(" << ip << ":" - << port - << ")"; - } else { - DLOG(INFO) << "message buffer is sent"; - } - */ + /// test whether socket works well + ostringstream oss; + oss << "hello, i'm master, whose address is " + << mloader->master_loader_ip << ":" + << to_string(mloader->master_loader_port) << ". \0"; + + int message_length = oss.str().length(); + DLOG(INFO) << "message length is " << message_length; + + if (-1 == + write(new_slave_fd, + reinterpret_cast(&message_length), 4)) { + PLOG(ERROR) << "failed to send message length to slave(" + << + ip + << ":" + << port << ")"; + } else { + DLOG(INFO) << "message length is sent"; + } + if (-1 == write(new_slave_fd, oss.str().c_str(), + message_length)) { + PLOG(ERROR) << "failed to send message to slave(" << ip << + ":" + << port + << ")"; + } else { + DLOG(INFO) << "message buffer is sent"; + } + */ return 1; }, [=](LoadAckAtom, uint64_t txn_id, bool is_commited) { // NOLINT /* - TODO(ANYONE): there should be a thread checking whether - transaction overtime periodically and abort these transaction - and delete from map. - Consider that: if this function access the item in map just deleted - by above thread, unexpected thing happens. - */ + TODO(ANYONE): there should be a thread checking whether + transaction overtime periodically and abort these transaction + and delete from map. + Consider that: if this function access the item in map just deleted + by above thread, unexpected thing happens. + */ DLOG(INFO) << "received a commit result " << is_commited << " of txn with id:" << txn_id; @@ -244,7 +250,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, mloader->txn_commint_info_.erase(txn_id); mloader->commit_info_spin_lock_.release(); - // FOR DEBUG + // FOR DEBUG #ifdef MASTER_LOADER_PREF if (++debug_finished_txn_count == txn_count_for_debug) { cout << "\n" << txn_count_for_debug << " txn used " @@ -462,6 +468,10 @@ RetCode MasterLoader::GetRequestFromMessage(const string& message, pos = next_pos + 1; next_pos = message.find(',', pos); req->row_sep_ = message.substr(pos, next_pos - pos); + /* + logfile << "table:" << req->table_name_ << ",col sep:" << req->col_sep_ + << ",row sep" << req->row_sep_ << endl; + */ pos = next_pos + 1; @@ -549,7 +559,13 @@ RetCode MasterLoader::GetPartitionTuples( "tuple is invalid." << tuple_string); #endif correct_tuple_buffer.push_back(tuple_buffer); + + /* for (auto i = 1; i < table_schema->getncolumns(); i++) { + logfile << table_schema->getColumnValue(tuple_buffer, i) << "|"; + } + logfile << endl;*/ } + // logfile.flush(); PERFLOG("all tuples are tovalued"); // map every tuple in different partition @@ -767,7 +783,7 @@ RetCode MasterLoader::SelectSocket(const TableDescriptor* table, socket_fd = slave_addr_to_socket_[addr]; /* cout << "node id:" << node_id_in_rmm << ",node address:" << addr.ip << ":" - << addr.port << ",socket fd:" << socket_fd << endl;*/ + << addr.port << ",socket fd:" << socket_fd << endl;*/ return ret; } @@ -845,6 +861,8 @@ void* MasterLoader::Work(void* arg) { } void* MasterLoader::StartMasterLoader(void* arg) { + remove("master_loader_log.txt"); + logfile.open("master_loader_log.txt"); Config::getInstance(); LOG(INFO) << "start master loader..."; @@ -875,7 +893,7 @@ void* MasterLoader::StartMasterLoader(void* arg) { // use a topic or queue set the 'useTopics' flag. //============================================================ std::string destURI = - "t123?consumer.prefetchSize = 1 "; // ?consumer.prefetchSize=1"; + "t1234?consumer.prefetchSize = 1 "; // ?consumer.prefetchSize=1"; //============================================================ // set to true to use topics instead of queues diff --git a/loader/master_loader.h b/loader/master_loader.h index e69690d0a..e4115d665 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -51,7 +52,7 @@ using std::function; using std::set; using std::unordered_map; using std::unordered_set; - +using std::ofstream; namespace claims { namespace catalog { class TableDescriptor; @@ -224,6 +225,8 @@ class MasterLoader { unordered_map txn_start_time_; static uint64_t txn_average_delay_; + + static ofstream logfile; }; } /* namespace loader */ diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index ab4bf47c7..de40f21b7 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -96,6 +96,8 @@ static const char* txn_count_string = "5000"; namespace claims { namespace loader { +ofstream SlaveLoader::logfile; + SlaveLoader::SlaveLoader() {} SlaveLoader::~SlaveLoader() {} @@ -362,6 +364,19 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { uint64_t total_written_length = 0; uint64_t data_length = packet.data_length_; HdfsInMemoryChunk chunk_info; + + Schema* schema = Catalog::getInstance() + ->getTable(table_id) + ->getProjectoin(prj_id) + ->getSchema(); + + /* for (auto p = 0; p < data_length; p += tuple_size) { + for (auto col = 1; col < schema->getncolumns(); col++) { + logfile << schema->getColumnValue(packet.data_buffer_ + p, col) + "|"; + } + logfile << endl; + }*/ + while (total_written_length < data_length) { /// get start position of current chunk if (BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( @@ -379,12 +394,16 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { << chunk_info.hook + CHUNK_SIZE; InMemoryChunkWriterIterator writer(chunk_info.hook, CHUNK_SIZE, cur_block_id, BLOCK_SIZE, pos_in_block, - tuple_size); + tuple_size, schema); // cout << "store data length:" << data_length << endl; do { // write to every block +/* logfile << cur_chunk_id << "->" << writer.GetBlockId() << "->" + << writer.GetBlockPos() << ",";*/ uint64_t written_length = writer.Write(packet.data_buffer_ + total_written_length, data_length - total_written_length); +/* logfile << writer.GetBlockPos() + written_length << "," + << written_length / schema->getTupleMaxSize() << endl;*/ total_written_length += written_length; DLOG(INFO) << "written " << written_length << " bytes into chunk:" << cur_chunk_id @@ -399,11 +418,12 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { } while (writer.NextBlock()); ++cur_chunk_id; // get next chunk to write DLOG(INFO) << "Now chunk id is " << cur_chunk_id - << ", total number of chunk is" << part_storage->GetChunkNum(); - if (cur_chunk_id < part_storage->GetChunkNum()) { + << ", total number of chunk is" + << part_storage->GetRTChunkNum(); + if (cur_chunk_id < part_storage->GetRTChunkNum()) { cout << "cur_chunk_id:" << cur_chunk_id - << " chunk num:" << part_storage->GetChunkNum() << endl; - assert(cur_chunk_id < part_storage->GetChunkNum() && cur_chunk_id); + << " chunk num:" << part_storage->GetRTChunkNum() << endl; + assert(cur_chunk_id < part_storage->GetRTChunkNum() && cur_chunk_id); } cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; @@ -412,6 +432,7 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { assert(false && "no chunk with this chunk id"); } } + return ret; } @@ -442,8 +463,11 @@ RetCode SlaveLoader::SendAckToMasterLoader(const uint64_t& txn_id, // this method has the best performance behavior SlaveLoader::WorkInCAF(event_based_actor* self) { + remove("slave_loader_log.txt"); + logfile.open("slave_loader_log.txt"); return {[=](LoadPacketAtom, LoadPacket* packet) { // NOLINT RetCode ret = rSuccess; + EXEC_AND_DLOG(ret, StoreDataInMemory(*packet), "stored data", "failed to store"); /// return result to master loader @@ -456,7 +480,7 @@ behavior SlaveLoader::WorkInCAF(event_based_actor* self) { } behavior SlaveLoader::PersistInCAF(event_based_actor* self) { - self->delayed_send(self, seconds(30), CheckpointAtom::value); + // self->delayed_send(self, seconds(10), CheckpointAtom::value); return {[self](CheckpointAtom) { QueryReq query_req; query_req.include_abort_ = true; @@ -488,7 +512,7 @@ behavior SlaveLoader::PersistInCAF(event_based_actor* self) { << new_rt_cp << endl; } } - self->delayed_send(self, seconds(30), CheckpointAtom::value); + self->delayed_send(self, seconds(10), CheckpointAtom::value); }}; } diff --git a/loader/slave_loader.h b/loader/slave_loader.h index 49d9a981c..4816b5416 100644 --- a/loader/slave_loader.h +++ b/loader/slave_loader.h @@ -29,11 +29,13 @@ #ifndef LOADER_SLAVE_LOADER_H_ #define LOADER_SLAVE_LOADER_H_ #include +#include #include #include #include #include #include +#include #include "../catalog/catalog.h" #include "../storage/BlockManager.h" #include "../txn_manager/txn.hpp" @@ -48,6 +50,7 @@ using std::unordered_map; using caf::behavior; using caf::event_based_actor; using std::string; +using std::fstream; using claims::catalog::Catalog; using claims::txn::CheckpointAtom; using claims::txn::UInt64; @@ -110,6 +113,9 @@ class SlaveLoader { SpineLock queue_lock_; semaphore packet_count_; Lock partition_storage_lock_; + + public: + static ofstream logfile; }; } /* namespace loader */ diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index 98eb7b184..b6e74dd0a 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -111,11 +111,11 @@ bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, // << ",part_id:" << part_id << ",cp:" << cp << endl; partition_reader_iterator_ = partition_handle_->CreateTxnReaderIterator( cp, state_.query_.scan_snapshot_[global_part_id]); - cout << "scan part:" << global_part_id << endl; - cout << "checkpoint:" << cp << endl; - for (auto& part : state_.query_.scan_snapshot_[global_part_id]) { - cout << "<" << part.first << "," << part.second << ">" << endl; - } + cout << "version:" << state_.query_.ts_ << ",part:" << global_part_id + << ",checkpoint:" << cp; + for (auto& part : state_.query_.scan_snapshot_[global_part_id]) + cout << "<" << part.first << "," << part.second << ">"; + cout << endl; // partition_reader_iterator_ = // partition_handle_->CreateAtomicReaderIterator(); SetReturnStatus(true); diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 451dd8bb0..3ddb64f6e 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -37,7 +37,7 @@ #include "../Debug.h" #include "../utility/rdtsc.h" #include "../utility/warmup.h" - +#include "../loader/slave_loader.cpp" using claims::common::CStrError; using claims::common::rUnkownStroageLevel; using claims::common::rFailOpenFileInDiskChunkReaderIterator; @@ -45,6 +45,7 @@ using claims::common::rFailReadOneBlockInDiskChunkReaderIterator; using claims::common::rFailOpenHDFSFileInStorage; using claims::common::rFailSetStartOffsetInStorage; using claims::common::HdfsConnector; +using claims::loader::SlaveLoader; bool ChunkReaderIterator::NextBlock() { lock_.acquire(); if (this->cur_block_ >= this->number_of_blocks_) { @@ -86,9 +87,11 @@ RetCode ChunkStorage::ApplyMemory() { /* * set each block tail to "zero" in new chunk */ - for (auto offset = 0; offset < CHUNK_SIZE; offset += BLOCK_SIZE) + for (auto offset = BLOCK_SIZE; offset <= CHUNK_SIZE; offset += BLOCK_SIZE) { *reinterpret_cast(chunk_info.hook + offset - sizeof(unsigned)) = 0; + // cout << "block:" << offset / BLOCK_SIZE << "->" << 0 << endl; + } /* update the chunk info in the Chunk store in case that the * chunk_info is updated.*/ @@ -489,6 +492,20 @@ uint64_t InMemoryChunkWriterIterator::Write(const void* const buffer_to_write, << ". buffer to write: " << buffer_to_write; memcpy(block_offset + pos_in_block_, buffer_to_write, actual_written_tuple_count * tuple_size_); + /* for (auto p = 0; p < actual_written_tuple_count; p++) { + for (auto c = 1; c < schema_->getncolumns(); c++) + SlaveLoader::logfile + << schema_->getColumnValue(buffer_to_write + p * tuple_size_, c) + << "|"; + SlaveLoader::logfile << schema_->getTupleMaxSize() << endl; + }*/ + /* + SlaveLoader::logfile << block_id_ << "," << pos_in_block_ << "," + << pos_in_block_ + + actual_written_tuple_count* tuple_size_ << + endl; + */ + DLOG(INFO) << "copy " << actual_written_tuple_count * tuple_size_ << " bytes into block:" << block_id_; diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index da4a73a2d..e12e20fdd 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -276,13 +276,15 @@ class InMemoryChunkWriterIterator { public: InMemoryChunkWriterIterator(void* chunk_offset, uint64_t chunk_size, uint64_t block_id, uint64_t block_size, - uint64_t pos_in_block, uint64_t tuple_size) + uint64_t pos_in_block, uint64_t tuple_size, + Schema* schema) : chunk_offset_(chunk_offset), chunk_size_(chunk_size), block_id_(block_id), block_size_(block_size), pos_in_block_(pos_in_block), - tuple_size_(tuple_size) {} + tuple_size_(tuple_size), + schema_(schema) {} public: uint64_t Write(const void* const buffer_to_write, uint64_t length_to_write); @@ -292,6 +294,8 @@ class InMemoryChunkWriterIterator { pos_in_block_ = 0; return true; } + uint64_t GetBlockId() const { return block_id_; } + uint64_t GetBlockPos() const { return pos_in_block_; } private: void* chunk_offset_; @@ -300,6 +304,7 @@ class InMemoryChunkWriterIterator { uint64_t block_size_; uint64_t pos_in_block_; uint64_t tuple_size_; + Schema* schema_; }; class ChunkStorage { public: diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index b3b325720..9146d8867 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -38,7 +38,6 @@ #include "../common/memory_handle.h" #include "../Config.h" #include "../Resource/BufferManager.h" -#include "../utility/lock_guard.h" #include "../storage/BlockManager.h" #include "../common/file_handle/file_handle_imp.h" #include "../common/file_handle/hdfs_file_handle_imp.h" @@ -48,6 +47,8 @@ using claims::common::FileHandleImpFactory; using claims::common::kHdfs; using claims::utility::LockGuard; +ofstream PartitionStorage::TxnPartitionReaderIterator::logfile; + /** * According to number_of_chunks, construct chunk from partition and add into * the chunk_list_. Meantime, you can get specific information about chunk. when @@ -87,12 +88,21 @@ PartitionStorage::~PartitionStorage() { chunk_list_.clear(); } -void PartitionStorage::AddNewChunk() { number_of_chunks_++; } +void PartitionStorage::AddNewRTChunk() { + LockGuard guard(write_lock_); + number_of_rt_chunks_++; +} + +void PartitionStorage::AddNewHisChunk() { + LockGuard guard(write_lock_); + number_of_chunks_++; +} RetCode PartitionStorage::AddHisChunkWithMemoryApply( unsigned expected_number_of_chunks, const StorageLevel& storage_level) { RetCode ret = rSuccess; LockGuard guard(write_lock_); + // cout << "1..." << endl; if (chunk_list_.size() >= expected_number_of_chunks) return ret; DLOG(INFO) << "now chunk number:" << number_of_chunks_ << ". expected chunk num:" << expected_number_of_chunks; @@ -107,6 +117,7 @@ RetCode PartitionStorage::AddHisChunkWithMemoryApply( "failed to apply memory for chunk(" << partition_id_.getName() << "," << i << ")"); assert(ret == rSuccess); + // cout << "2...." << ret << endl; } number_of_chunks_ = expected_number_of_chunks; @@ -173,6 +184,7 @@ void PartitionStorage::UpdateChunksWithInsertOrAppend( * call back actual method. */ void PartitionStorage::RemoveAllChunks(const PartitionID& partition_id) { + LockGuard guard(write_lock_); if (!chunk_list_.empty()) { vector::iterator iter = chunk_list_.begin(); MemoryChunkStore* mcs = MemoryChunkStore::GetInstance(); @@ -288,15 +300,18 @@ PartitionStorage::TxnPartitionReaderIterator::TxnPartitionReaderIterator( str += "<" + to_string(strip.first) + "," + to_string(strip.second) + ">"; } cout << str << endl;*/ + logfile.open("ps_log.txt"); } PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() { for (auto block : rt_block_buffer_) free(block); + logfile.close(); } bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( BlockStreamBase*& block) { LockGuard guard(lock_); + HdfsInMemoryChunk chunk; ChunkReaderIterator::block_accessor* ba = nullptr; if (block_cur_ < last_his_block_) { // scan historical data int64_t chunk_cur = block_cur_ / (CHUNK_SIZE / BLOCK_SIZE); @@ -306,7 +321,7 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( if (chunk_it_ != nullptr) delete chunk_it_; chunk_it_ = ps_->chunk_list_[chunk_cur_]->CreateChunkReaderIterator(); } - assert(chunk_it_!=nullptr); + assert(chunk_it_ != nullptr); chunk_it_->GetNextBlockAccessor(ba); if (ba == nullptr) { if (chunk_it_ != nullptr) delete chunk_it_; @@ -318,6 +333,8 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( delete ba; ba = nullptr; block_cur_++; + /* logfile << chunk_cur_ << "->" << (block_cur_ - 1) % 1024 << "->" + << "full1" << endl;*/ return true; } } else if (rt_block_index_ < rt_strip_list_.size()) { // scan real-time data @@ -338,7 +355,6 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( ps_->rt_chunk_list_[rt_chunk_cur_]->CreateChunkReaderIterator(); assert(rt_chunk_it_ != nullptr); } - do { // move to rt_block_cur if (ba != nullptr) delete ba; rt_chunk_it_->GetNextBlockAccessor(ba); @@ -347,6 +363,9 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( if (len == BLOCK_SIZE) { // directly return pointer ba->GetBlock(block); + /* logfile << rt_chunk_cur_ << "->" << (rt_block_cur_ - 1) % 1024 << + "->" + << "full2" << endl;*/ } else { auto tuple_size = reinterpret_cast(block)->getTupleSize(); @@ -359,10 +378,21 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( auto des_addr = reinterpret_cast(malloc(BLOCK_SIZE)); auto scr_addr = block->getBlockDataAddress() + offset_in_block; memcpy(des_addr, scr_addr, len); + *reinterpret_cast(des_addr + BLOCK_SIZE - sizeof(unsigned)) = + tuple_count; reinterpret_cast(block)->setBlockDataAddress(des_addr); reinterpret_cast(block)->setTuplesInBlock(tuple_count); rt_block_buffer_.push_back(des_addr); + /* logfile << rt_chunk_cur_ << "->" << (rt_block_cur_ - 1) % 1024 << + "->" + << offset_in_block << "," << offset_in_block + len << "," + << tuple_count << endl;*/ } + auto cout = + *reinterpret_cast(block->getBlockDataAddress() + + BLOCK_SIZE - sizeof(unsigned int)); + /* logfile << rt_chunk_cur_ << "->" << (rt_block_cur_ - 1) % 1024 << "->" + << cout << endl;*/ delete ba; ba = nullptr; rt_block_index_++; @@ -404,12 +434,8 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, if (move > end - begin) move = end - begin; // update historical chunk cur AddHisChunkWithMemoryApply(begin / CHUNK_SIZE + 1, HDFS); - /* auto chunkit = - chunk_list_[begin / CHUNK_SIZE]->CreateChunkReaderIterator(); - assert(chunkit != nullptr);*/ if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( chunk_list_[begin / CHUNK_SIZE]->GetChunkID(), chunk_his)) { - // cout << "@@@@@@chunk id:" << begin / CHUNK_SIZE << "@@@@@@" << endl; assert(false && begin && begin / CHUNK_SIZE); } // update real time chunk cur @@ -426,13 +452,10 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, auto real_move = (begin + move) % BLOCK_SIZE != 0 ? move : move - sizeof(unsigned); auto tuple_count = real_move / tuple_size; - // cout << "tuple count,size : " << tuple_count << "," << tuple_size - // << endl; memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, chunk_rt.hook + begin % CHUNK_SIZE, move); auto tail_offset = (new_his_cp + BLOCK_SIZE) % CHUNK_SIZE - sizeof(unsigned); - // cout << "tail_offset:" << tail_offset << endl; *reinterpret_cast(chunk_his.hook + tail_offset) = tuple_count; } diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index 19dd58892..e3c5f06c2 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -38,10 +38,11 @@ #include "../storage/StorageLevel.h" #include "../storage/PartitionReaderIterator.h" #include "../utility/lock.h" +#include "../utility/lock_guard.h" #include "../Debug.h" using claims::txn::PStrip; using claims::txn::UInt64; - +using claims::utility::LockGuard; // namespace claims { // namespace storage { /** @@ -147,6 +148,9 @@ class PartitionStorage { vector rt_block_buffer_; Lock lock_; + + public: + static ofstream logfile; }; /** @@ -163,7 +167,8 @@ class PartitionStorage { */ virtual ~PartitionStorage(); - void AddNewChunk(); + void AddNewRTChunk(); + void AddNewHisChunk(); RetCode AddHisChunkWithMemoryApply(unsigned expected_number_of_chunks, const StorageLevel& storage_level); @@ -171,7 +176,15 @@ class PartitionStorage { RetCode AddRtChunkWithMemoryApply(unsigned expected_number_of_chunks, const StorageLevel& storage_level); - const int GetChunkNum() const { return chunk_list_.size(); } + int GetRTChunkNum() { + LockGuard guard(write_lock_); + return rt_chunk_list_.size(); + } + + int GetHisChunkNum() { + LockGuard guard(write_lock_); + return chunk_list_.size(); + } /** * @brief Method description: Expand the container of partition diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index 3c3f8ce20..af4705326 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -238,7 +238,7 @@ caf::behavior TxnServer::make_behavior() { } catch (...) { cout << "txn server bind to port:" << port_ << " fail" << endl; } - this->delayed_send(this, seconds(3), GCAtom::value); + // this->delayed_send(this, seconds(3), GCAtom::value); return { [this](DebugAtom, string flag) -> caf::message { cout << "debug begin" << endl; From b50393de6a4249f699b60ed273f347987dc94b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Sat, 22 Oct 2016 21:56:06 +0800 Subject: [PATCH 56/58] fix bug for memory leak(data ingestion) and chunk memory alloc(level HDFS->MEMORY) --- Config.cpp | 8 ++++ Config.h | 2 + Server.cpp | 8 +++- catalog/catalog.cpp | 4 +- conf/config | 3 ++ loader/load_packet.cpp | 2 +- loader/load_packet.h | 13 +++--- loader/master_loader.cpp | 43 +++++++++++++------ loader/slave_loader.cpp | 43 +++++++++---------- .../physical_projection_scan.cpp | 3 +- storage/BlockManager.cpp | 11 +++-- storage/BlockManager.h | 4 +- storage/ChunkStorage.cpp | 24 +++++++---- storage/ChunkStorage.h | 10 +++-- storage/MemoryManager.cpp | 2 + storage/PartitionStorage.cpp | 24 +++++++---- 16 files changed, 131 insertions(+), 73 deletions(-) diff --git a/Config.cpp b/Config.cpp index 025fba699..f4b29acea 100644 --- a/Config.cpp +++ b/Config.cpp @@ -94,6 +94,8 @@ int Config::memory_utilization; bool Config::is_master_loader; std::string Config::master_loader_ip; int Config::master_loader_port; +std::string Config::amq_url; +std::string Config::amq_topic; bool Config::enable_txn_server; int Config::txn_server_cores; @@ -173,6 +175,10 @@ void Config::initialize() { master_loader_port = getInt("master_loader_port", 9001); + amq_url = getString("amq_url", "58.198.176.92:61616"); + + amq_topic = getString("amq_topic", "claims"); + // txn manager enable_txn_server = getBoolean("txn_server", true); @@ -250,6 +256,8 @@ void Config::print_configure() const { std::cout << "catalog_file:" << catalog_file << std::endl; std::cout << "codegen:" << enable_codegen << std::endl; std::cout << "load_thread_num:" << load_thread_num << std::endl; + std::cout << "amq_url:" << amq_url << std::endl; + std::cout << "amq_topic:" << amq_topic << std::endl; std::cout << "enable_txn_serverr:" << enable_txn_server << std::endl; std::cout << "txn_server_cores:" << txn_server_cores << std::endl; diff --git a/Config.h b/Config.h index 3828aec65..1380b0066 100644 --- a/Config.h +++ b/Config.h @@ -83,6 +83,8 @@ class Config { static bool is_master_loader; static std::string master_loader_ip; static int master_loader_port; + static std::string amq_url; + static std::string amq_topic; static bool enable_txn_server; static int txn_server_cores; diff --git a/Server.cpp b/Server.cpp index 7f9cc8755..02bcf3d32 100644 --- a/Server.cpp +++ b/Server.cpp @@ -2,6 +2,7 @@ #include #include #include +#include // #include "Test/set_up_environment.h" #include "./Test/TestMain.cpp" #include "common/log/logging.h" @@ -100,16 +101,19 @@ int main(int argc, char** argv) { actor = "slave"; #ifndef FORK + std::string cmd; if (master) { Environment::getInstance(master); // create_poc_data_four_partitions(); // create_poc_data_one_partitions(); // print_welcome(); // ExecuteLogicalQueryPlan(); - while (true) sleep(1); + while (std::cin.get() != 'q') { + sleep(1); + } } else { Environment::getInstance(master); - while (true) sleep(1); + while (std::cin.get() != 'q') sleep(1); } #else int pid = fork(); diff --git a/catalog/catalog.cpp b/catalog/catalog.cpp index e3f084b8e..41a897664 100644 --- a/catalog/catalog.cpp +++ b/catalog/catalog.cpp @@ -260,11 +260,11 @@ RetCode Catalog::restoreCatalog() { } else if (!read_connector_->CanAccess()) { LOG(INFO) << "The catalog file and data file all are not existed" << endl; return rSuccess; - } else if (!IsDataFileExist()) { + } /* else if (!IsDataFileExist()) { LOG(WARNING) << "There are no data file while catalog file exists. " "The catalog file will be overwrite" << endl; return rSuccess; - } else { + }*/ else { EXEC_AND_ONLY_LOG_ERROR(ret, read_connector_->Open(), "failed to open catalog file: " << Config::catalog_file << " with Read mode"); diff --git a/conf/config b/conf/config index 3ffe7fc60..7e07b8960 100755 --- a/conf/config +++ b/conf/config @@ -75,6 +75,9 @@ master_loader_thread_num=1 slave_loader_thread_num=1 +amq_url="58.198.176.92:61616" + +amq_topic="t1234" #事务服务器 txn_server=1 diff --git a/loader/load_packet.cpp b/loader/load_packet.cpp index d7a0c95f3..a1c90f7fa 100644 --- a/loader/load_packet.cpp +++ b/loader/load_packet.cpp @@ -61,7 +61,7 @@ RetCode LoadPacket::Serialize() { } RetCode LoadPacket::Deserialize(const void* const head_buffer, - void* data_buffer) { + const void* const data_buffer) { txn_id_ = *reinterpret_cast(head_buffer); global_part_id_ = *reinterpret_cast(head_buffer + sizeof(uint64_t)); diff --git a/loader/load_packet.h b/loader/load_packet.h index a0102037f..3483cd2a7 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -44,11 +44,11 @@ using OkAtom = caf::atom_constant; /************** LoadPacket format *****************/ /** field type length **********/ /****************************************************/ -/** transaction_id uint64_t 4 **/ -/** global_part_id uint64_t 4 **/ -/** position uint64_t 4 **/ -/** offset uint64_t 4 **/ -/** date_length uint64_t 4 **/ +/** transaction_id uint64_t 8 **/ +/** global_part_id uint64_t 8 **/ +/** position uint64_t 8 **/ +/** offset uint64_t 8 **/ +/** date_length uint64_t 8 **/ /** data void* data_length **/ /****************************************************/ struct LoadPacket { @@ -67,7 +67,8 @@ struct LoadPacket { ~LoadPacket(); RetCode Serialize(); - RetCode Deserialize(const void* const head_buffer, void* data_buffer); + RetCode Deserialize(const void* const head_buffer, + const void* const data_buffer); public: static const int kHeadLength = 5 * sizeof(uint64_t); diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index f2d886364..15ff2b757 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -423,6 +423,8 @@ RetCode MasterLoader::Ingest(const string& message, "sent every partition data to send queue", "failed to send every partition data to queue"); + for (auto& part_list : partition_buffers) + for (auto& part : part_list) delete part.buffer_; assert(rSuccess == ret); return ret; @@ -517,8 +519,7 @@ RetCode MasterLoader::GetPartitionTuples( Schema* table_schema = table->getSchema(); MemoryGuard table_schema_guard(table_schema); vector correct_tuple_buffer; - STLGuardWithRetCode> guard(correct_tuple_buffer, - ret); // attention! + // STLMemoryGuard> guard(correct_tuple_buffer); // attention! // must set RetCode 'ret' before returning error code!!!! ThreeLayerSTLGuardWithRetCode>>> return_tuple_buffer_guard(tuple_buffer_per_part, ret); // attention! @@ -588,6 +589,7 @@ RetCode MasterLoader::GetPartitionTuples( // extract the sub tuple according to the projection schema void* target = Malloc(tuple_max_length); // newmalloc if (target == NULL) { + assert(false); return (ret = claims::common::rNoMemory); } sub_tuple.getSubTuple(tuple_buffer, target); @@ -603,6 +605,7 @@ RetCode MasterLoader::GetPartitionTuples( tuple_buffer_per_part[i][part].push_back(target); } } + for (auto& tuple : correct_tuple_buffer) delete tuple; return ret; } @@ -619,6 +622,9 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( ->getPartitioner() ->getNumberOfPartitions() && "partition number is not match"); + Schema* schema = table->getProjectoin(i)->getSchema(); + MemoryGuard schema_guard(schema); + int tuple_len = schema->getTupleMaxSize(); for (int j = 0; j < tuple_buffer_per_part[i].size(); ++j) { int tuple_count = tuple_buffer_per_part[i][j].size(); /* @@ -626,7 +632,7 @@ RetCode MasterLoader::MergePartitionTupleIntoOneBuffer( * buffer indicates the index of partition */ // if (0 == tuple_count) continue; - int tuple_len = table->getProjectoin(i)->getSchema()->getTupleMaxSize(); + int buffer_len = tuple_count * tuple_len; DLOG(INFO) << "the tuple length of prj:" << i << ",part:" << j << ",table:" << table->getTableName() << " is:" << tuple_len; @@ -659,9 +665,12 @@ RetCode MasterLoader::ApplyTransaction( uint64_t table_id = table->get_table_id(); FixTupleIngestReq req; + for (int i = 0; i < table->getNumberOfProjection(); ++i) { ProjectionDescriptor* prj = table->getProjectoin(i); - uint64_t tuple_length = prj->getSchema()->getTupleMaxSize(); + Schema* schema = prj->getSchema(); + MemoryGuard schema_guard(schema); + uint64_t tuple_length = schema->getTupleMaxSize(); for (int j = 0; j < prj->getPartitioner()->getNumberOfPartitions(); ++j) { if (partition_buffers[i][j].length_ == 0) continue; req.InsertStrip(GetGlobalPartId(table_id, i, j), tuple_length, @@ -879,21 +888,29 @@ void* MasterLoader::StartMasterLoader(void* arg) { // tcp://127.0.0.1:61616?wireFormat=openwire same as above // tcp://127.0.0.1:61613?wireFormat=stomp use stomp instead // - std::string brokerURI = - "failover:(tcp://" - "58.198.176.92:61616?wireFormat=openwire&connection.useAsyncSend=true" - // "&transport.commandTracingEnabled=true" - // "&transport.tcpTracingEnabled=true" - // "&wireFormat.tightEncodingEnabled=true" - ")"; - + /* std::string brokerURI = + "failover:(tcp://" + "58.198.176.92:61616?wireFormat=openwire&connection.useAsyncSend=true" + + // "&transport.commandTracingEnabled=true" + // "&transport.tcpTracingEnabled=true" + // "&wireFormat.tightEncodingEnabled=true" + ")";*/ + std::string brokerURI = "failover:(tcp://" + Config::amq_url + + "?wireFormat=openwire&connection.useAsyncSend=true" + + // "&transport.commandTracingEnabled=true" + // "&transport.tcpTracingEnabled=true" + // "&wireFormat.tightEncodingEnabled=true" + ")"; //============================================================ // This is the Destination Name and URI options. Use this to // customize where the consumer listens, to have the consumer // use a topic or queue set the 'useTopics' flag. //============================================================ std::string destURI = - "t1234?consumer.prefetchSize = 1 "; // ?consumer.prefetchSize=1"; + Config::amq_topic + + "?consumer.prefetchSize = 1 "; // ?consumer.prefetchSize=1"; //============================================================ // set to true to use topics instead of queues diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index de40f21b7..e82d1f285 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -262,7 +262,6 @@ RetCode SlaveLoader::ReceiveAndWorkLoop() { << ". date length is " << data_length; assert(data_length >= 4 && data_length <= 10000000); char* data_buffer = Malloc(data_length); - MemoryGuard guard(data_buffer); // auto-release if (NULL == data_buffer) { ELOG((ret = claims::common::rNoMemory), "no memory to hold data of message from master"); @@ -348,15 +347,16 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { << " CHUNK SIZE is:" << CHUNK_SIZE << " last chunk id is:" << last_chunk_id; EXEC_AND_DLOG_RETURN( - ret, part_storage->AddRtChunkWithMemoryApply(last_chunk_id + 1, HDFS), + ret, part_storage->AddRtChunkWithMemoryApply(last_chunk_id + 1, MEMORY), "added chunk to " << last_chunk_id + 1, "failed to add chunk"); // cout << "******1*****" << endl; // copy data into applied memory - const uint64_t tuple_size = Catalog::getInstance() - ->getTable(table_id) - ->getProjectoin(prj_id) - ->getSchema() - ->getTupleMaxSize(); + Schema* schema = Catalog::getInstance() + ->getTable(table_id) + ->getProjectoin(prj_id) + ->getSchema(); + MemoryGuard schema_guard(schema); + const uint64_t tuple_size = schema->getTupleMaxSize(); uint64_t cur_chunk_id = packet.pos_ / CHUNK_SIZE; uint64_t cur_block_id = (packet.pos_ % CHUNK_SIZE) / BLOCK_SIZE; @@ -365,11 +365,6 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { uint64_t data_length = packet.data_length_; HdfsInMemoryChunk chunk_info; - Schema* schema = Catalog::getInstance() - ->getTable(table_id) - ->getProjectoin(prj_id) - ->getSchema(); - /* for (auto p = 0; p < data_length; p += tuple_size) { for (auto col = 1; col < schema->getncolumns(); col++) { logfile << schema->getColumnValue(packet.data_buffer_ + p, col) + "|"; @@ -397,13 +392,13 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { tuple_size, schema); // cout << "store data length:" << data_length << endl; do { // write to every block -/* logfile << cur_chunk_id << "->" << writer.GetBlockId() << "->" - << writer.GetBlockPos() << ",";*/ + /* logfile << cur_chunk_id << "->" << writer.GetBlockId() << "->" + << writer.GetBlockPos() << ",";*/ uint64_t written_length = writer.Write(packet.data_buffer_ + total_written_length, data_length - total_written_length); -/* logfile << writer.GetBlockPos() + written_length << "," - << written_length / schema->getTupleMaxSize() << endl;*/ + /* logfile << writer.GetBlockPos() + written_length << "," + << written_length / schema->getTupleMaxSize() << endl;*/ total_written_length += written_length; DLOG(INFO) << "written " << written_length << " bytes into chunk:" << cur_chunk_id @@ -421,18 +416,17 @@ RetCode SlaveLoader::StoreDataInMemory(const LoadPacket& packet) { << ", total number of chunk is" << part_storage->GetRTChunkNum(); if (cur_chunk_id < part_storage->GetRTChunkNum()) { - cout << "cur_chunk_id:" << cur_chunk_id - << " chunk num:" << part_storage->GetRTChunkNum() << endl; assert(cur_chunk_id < part_storage->GetRTChunkNum() && cur_chunk_id); } cur_block_id = 0; // the block id of next chunk is 0 pos_in_block = 0; } else { LOG(INFO) << "chunk id is " << cur_chunk_id << endl; + cout << "get chunk:" << cur_chunk_id << " failed" << endl; assert(false && "no chunk with this chunk id"); } } - + delete schema; return ret; } @@ -475,12 +469,13 @@ behavior SlaveLoader::WorkInCAF(event_based_actor* self) { "sent commit result of " << packet->txn_id_ << " to master loader", "failed to send commit res to master loader"); + DELETE_PTR(packet->data_buffer_); DELETE_PTR(packet); }}; } behavior SlaveLoader::PersistInCAF(event_based_actor* self) { - // self->delayed_send(self, seconds(10), CheckpointAtom::value); + // self->delayed_send(self, seconds(20), CheckpointAtom::value); return {[self](CheckpointAtom) { QueryReq query_req; query_req.include_abort_ = true; @@ -500,19 +495,21 @@ behavior SlaveLoader::PersistInCAF(event_based_actor* self) { query.snapshot_[g_part_id].rbegin()->second; // merge from historical to real time auto old_his_cp = query.his_cp_list_[g_part_id]; + cout << "before merge " << endl; auto new_his_cp = part_handler->MergeToHis(old_his_cp, query.snapshot_[g_part_id]); + cout << "after merge" << endl; // cout << "new_his_cp:" << new_his_cp << endl; /* if (new_his_cp == old_his_cp) continue; if (!part_handler->Persist(old_his_cp, new_his_cp)) continue; */ TxnClient::CommitCheckpoint(query.ts_, g_part_id, new_his_cp, new_rt_cp); - cout << "persist:" << g_part_id << ":" << new_his_cp << "," - << new_rt_cp << endl; + /* cout << "persist:" << g_part_id << ":" << new_his_cp << "," + << new_rt_cp << endl;*/ } } - self->delayed_send(self, seconds(10), CheckpointAtom::value); + self->delayed_send(self, seconds(20), CheckpointAtom::value); }}; } diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index b6e74dd0a..f4d9e1408 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -112,7 +112,8 @@ bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, partition_reader_iterator_ = partition_handle_->CreateTxnReaderIterator( cp, state_.query_.scan_snapshot_[global_part_id]); cout << "version:" << state_.query_.ts_ << ",part:" << global_part_id - << ",checkpoint:" << cp; + << ",checkpoint chunk:" << cp / CHUNK_SIZE + << ",block:" << (cp % CHUNK_SIZE) / BLOCK_SIZE << cp; for (auto& part : state_.query_.scan_snapshot_[global_part_id]) cout << "<" << part.first << "," << part.second << ">"; cout << endl; diff --git a/storage/BlockManager.cpp b/storage/BlockManager.cpp index 0fb176cf6..e664e3ab7 100755 --- a/storage/BlockManager.cpp +++ b/storage/BlockManager.cpp @@ -17,11 +17,12 @@ #include "../Config.h" #include "../common/error_define.h" #include "../common/error_no.h" +// #include "../utility/lock_guard.h" using claims::common::rLoadFromHdfsOpenFailed; using claims::common::rLoadFromDiskOpenFailed; using claims::common::rUnbindPartitionFailed; using claims::common::HdfsConnector; - +// using claims::utility::LockGuard; BlockManager* BlockManager::blockmanager_ = NULL; BlockManager* BlockManager::getInstance() { @@ -342,7 +343,8 @@ string BlockManager::askForMatch(string filename, BlockManagerId bmi) { // return file_proj_[filename.c_str()]; } -bool BlockManager::ContainsPartition(const PartitionID& part) const { +bool BlockManager::ContainsPartition(const PartitionID& part) { + LockGuard guard(lock); boost::unordered_map::const_iterator it = partition_id_to_storage_.find(part); return !(it == partition_id_to_storage_.cend()); @@ -351,6 +353,7 @@ bool BlockManager::ContainsPartition(const PartitionID& part) const { bool BlockManager::AddPartition(const PartitionID& partition_id, const unsigned& number_of_chunks, const StorageLevel& desirable_storage_level) { + LockGuard lock_guard(lock); boost::unordered_map::const_iterator it = partition_id_to_storage_.find(partition_id); if (it != partition_id_to_storage_.cend()) { @@ -379,6 +382,7 @@ bool BlockManager::AddPartition(const PartitionID& partition_id, } bool BlockManager::RemovePartition(const PartitionID& partition_id) { + LockGuard guard(lock); boost::unordered_map::iterator it = partition_id_to_storage_.find(partition_id); if (it == partition_id_to_storage_.cend()) { @@ -393,7 +397,8 @@ bool BlockManager::RemovePartition(const PartitionID& partition_id) { } PartitionStorage* BlockManager::GetPartitionHandle( - const PartitionID& partition_id) const { + const PartitionID& partition_id) { + LockGuard guard(lock); DLOG(INFO) << "partid2storage size is:" << partition_id_to_storage_.size(); DLOG(INFO) << "going to find storage [" << partition_id.getName() << "]"; boost::unordered_map::const_iterator it = diff --git a/storage/BlockManager.h b/storage/BlockManager.h index 5fb88be03..297d18d9a 100755 --- a/storage/BlockManager.h +++ b/storage/BlockManager.h @@ -90,11 +90,11 @@ class BlockManager { /* poc测试 */ BlockManagerId *getId(); string askForMatch(string filename, BlockManagerId bmi); - bool ContainsPartition(const PartitionID &part) const; + bool ContainsPartition(const PartitionID &part); bool AddPartition(const PartitionID &, const unsigned &number_of_chunks, const StorageLevel &desirable_storage_level); bool RemovePartition(const PartitionID &); - PartitionStorage *GetPartitionHandle(const PartitionID &partition_id) const; + PartitionStorage *GetPartitionHandle(const PartitionID &partition_id); vector GetAllPartition(); UInt64 MergeHisToRt(PartitionID, const vector &strip_list, UInt64 rt); diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 3ddb64f6e..e1fab9192 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -57,14 +57,23 @@ bool ChunkReaderIterator::NextBlock() { return true; } -ChunkStorage::ChunkStorage(const ChunkID& chunk_id, const unsigned& block_size, - const StorageLevel& desirable_storage_level) +ChunkStorage::ChunkStorage(const ChunkID& chunk_id, const unsigned block_size, + const StorageLevel desirable_storage_level) : chunk_id_(chunk_id), block_size_(block_size), desirable_storage_level_(desirable_storage_level), current_storage_level_(HDFS), chunk_size_(CHUNK_SIZE) {} +ChunkStorage::ChunkStorage(const ChunkID& chunk_id, const unsigned block_size, + const StorageLevel desirable_storage_level, + const StorageLevel current_storage_level) + : chunk_id_(chunk_id), + block_size_(block_size), + desirable_storage_level_(desirable_storage_level), + current_storage_level_(current_storage_level), + chunk_size_(CHUNK_SIZE) {} + ChunkStorage::~ChunkStorage() { // TODO(wangli): Auto-generated destructor stub } @@ -74,10 +83,6 @@ RetCode ChunkStorage::ApplyMemory() { RetCode ret = claims::common::rSuccess; HdfsInMemoryChunk chunk_info; chunk_info.length = CHUNK_SIZE; - cout << "apply memory:<" << chunk_id_.partition_id.projection_id.table_id - << "," << chunk_id_.partition_id.projection_id.projection_off << "," - << chunk_id_.partition_id.partition_off << "," << chunk_id_.chunk_off - << ">" << endl; if (BlockManager::getInstance()->getMemoryChunkStore()->ApplyChunk( chunk_id_, chunk_info.hook)) { /* there is enough memory storage space, so the storage level can be @@ -90,9 +95,11 @@ RetCode ChunkStorage::ApplyMemory() { for (auto offset = BLOCK_SIZE; offset <= CHUNK_SIZE; offset += BLOCK_SIZE) { *reinterpret_cast(chunk_info.hook + offset - sizeof(unsigned)) = 0; - // cout << "block:" << offset / BLOCK_SIZE << "->" << 0 << endl; } +/* cout << "Success to apply mem chunk:" + << chunk_id_.partition_id.partition_off << "," << chunk_id_.chunk_off + << endl;*/ /* update the chunk info in the Chunk store in case that the * chunk_info is updated.*/ BlockManager::getInstance()->getMemoryChunkStore()->UpdateChunkInfo( @@ -102,7 +109,8 @@ RetCode ChunkStorage::ApplyMemory() { * The storage memory is full, some swap algorithm is needed here. * TODO: swap algorithm. */ - printf("Failed to get memory chunk budget!\n"); +/* cout << "Failed to apply mem chunk:" << chunk_id_.partition_id.partition_off + << "," << chunk_id_.chunk_off << endl;*/ ret = claims::common::rNoMemory; assert(false); } diff --git a/storage/ChunkStorage.h b/storage/ChunkStorage.h index e12e20fdd..0a945fada 100755 --- a/storage/ChunkStorage.h +++ b/storage/ChunkStorage.h @@ -312,8 +312,12 @@ class ChunkStorage { * Considering that how block size effects the performance is to be tested, * here we leave a parameter block_size for the performance test concern. */ - ChunkStorage(const ChunkID& chunk_id, const unsigned& block_size, - const StorageLevel& desirable_storage_level); + ChunkStorage(const ChunkID& chunk_id, const unsigned block_size, + const StorageLevel desirable_storage_level); + + ChunkStorage(const ChunkID& chunk_id, const unsigned block_size, + const StorageLevel desirable_storage_level, + const StorageLevel current_storage_level); virtual ~ChunkStorage(); @@ -327,7 +331,7 @@ class ChunkStorage { ChunkID GetChunkID() { return chunk_id_; } - void SetCurrentStorageLevel(const StorageLevel& current_storage_level) { + void SetCurrentStorageLevel(const StorageLevel current_storage_level) { current_storage_level_ = current_storage_level; } diff --git a/storage/MemoryManager.cpp b/storage/MemoryManager.cpp index 76309f205..91ecd9162 100644 --- a/storage/MemoryManager.cpp +++ b/storage/MemoryManager.cpp @@ -88,6 +88,8 @@ bool MemoryChunkStore::ApplyChunk(ChunkID chunk_id, void*& start_address) { if (NULL != (start_address = chunk_pool_.malloc())) { chunk_list_[chunk_id] = HdfsInMemoryChunk(start_address, CHUNK_SIZE); lock_.release(); + cout << "apply meme chunk:" << chunk_id.partition_id.partition_off << "," + << chunk_id.chunk_off << ", pool size:" << chunk_list_.size() << endl; return true; } else { ELOG(rMemoryPoolMallocFail, "Error occurs when memalign!"); diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 9146d8867..6f8b8de6a 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -31,7 +31,7 @@ #include #include - +#include #include "../common/error_define.h" // #include "../Debug.h" #include "./MemoryManager.h" @@ -42,6 +42,7 @@ #include "../common/file_handle/file_handle_imp.h" #include "../common/file_handle/hdfs_file_handle_imp.h" #include "../common/file_handle/file_handle_imp_factory.h" +#include "../utility/resource_guard.h" using claims::common::rSuccess; using claims::common::FileHandleImpFactory; using claims::common::kHdfs; @@ -388,7 +389,7 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( << offset_in_block << "," << offset_in_block + len << "," << tuple_count << endl;*/ } - auto cout = + auto count = *reinterpret_cast(block->getBlockDataAddress() + BLOCK_SIZE - sizeof(unsigned int)); /* logfile << rt_chunk_cur_ << "->" << (rt_block_cur_ - 1) % 1024 << "->" @@ -413,6 +414,7 @@ void PartitionStorage::CheckAndAppendChunkList(unsigned number_of_chunk, new ChunkStorage(ChunkID(partition_id_, size, true), BLOCK_SIZE, desirable_storage_level_)); } + assert(desirable_storage_level_ == MEMORY); } UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, @@ -420,12 +422,14 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, auto new_his_cp = old_his_cp; auto table_id = partition_id_.projection_id.table_id; auto proj_id = partition_id_.projection_id.projection_off; - auto tuple_size = Catalog::getInstance() - ->getTable(table_id) - ->getProjectoin(proj_id) - ->getSchema() - ->getTupleMaxSize(); + Schema* schema = Catalog::getInstance() + ->getTable(table_id) + ->getProjectoin(proj_id) + ->getSchema(); + MemoryGuard schema_guard(schema); + auto tuple_size = schema->getTupleMaxSize(); HdfsInMemoryChunk chunk_rt, chunk_his; + if (strip_list.size() > 0) cout << "{before merge" << endl; for (auto& strip : strip_list) { auto begin = strip.first; auto end = strip.first + strip.second; @@ -433,12 +437,13 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, auto move = BLOCK_SIZE - (begin + BLOCK_SIZE) % BLOCK_SIZE; if (move > end - begin) move = end - begin; // update historical chunk cur - AddHisChunkWithMemoryApply(begin / CHUNK_SIZE + 1, HDFS); + AddHisChunkWithMemoryApply(begin / CHUNK_SIZE + 1, MEMORY); if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( chunk_list_[begin / CHUNK_SIZE]->GetChunkID(), chunk_his)) { assert(false && begin && begin / CHUNK_SIZE); } // update real time chunk cur + // real-time chunk need't check if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( rt_chunk_list_[begin / CHUNK_SIZE]->GetChunkID(), chunk_rt)) { assert(false); @@ -448,7 +453,7 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, if (move == BLOCK_SIZE) { // full block memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, chunk_rt.hook + begin % CHUNK_SIZE, move); - } else { + } else { // partly block auto real_move = (begin + move) % BLOCK_SIZE != 0 ? move : move - sizeof(unsigned); auto tuple_count = real_move / tuple_size; @@ -463,6 +468,7 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, new_his_cp += BLOCK_SIZE; } } + if (strip_list.size() > 0) cout << "}after merge" << endl; return new_his_cp; } From 4577613a1ccc46fad34a89b7540548ce5d881c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Tue, 17 Jan 2017 16:01:19 +0800 Subject: [PATCH 57/58] implement data persitence for ingestion --- Environment.cpp | 36 ++++- catalog/partitioner.cpp | 26 +++ catalog/partitioner.h | 12 ++ common/error_define.h | 3 + conf/config | 7 +- loader/AMQ_consumer.cpp | 2 +- loader/load_packet.h | 1 + loader/master_loader.cpp | 23 ++- loader/master_loader.h | 2 + loader/slave_loader.cpp | 70 ++++++-- node_manager/master_node.cpp | 50 ++++-- node_manager/master_node.h | 2 + node_manager/slave_node.cpp | 14 ++ node_manager/slave_node.h | 1 + .../physical_projection_scan.cpp | 7 +- storage/PartitionStorage.cpp | 149 +++++++++++++----- storage/PartitionStorage.h | 5 +- txn_manager/Makefile.am | 29 +++- txn_manager/txn.cpp | 12 ++ txn_manager/txn.hpp | 9 +- txn_manager/txn_client.cpp | 32 ++-- txn_manager/txn_server.cpp | 68 ++++++-- 22 files changed, 441 insertions(+), 119 deletions(-) diff --git a/Environment.cpp b/Environment.cpp index d675337b3..a825af2eb 100755 --- a/Environment.cpp +++ b/Environment.cpp @@ -61,6 +61,7 @@ using claims::txn::TxnClient; using claims::txn::LogServer; using claims::txn::LogClient; using claims::txn::GetGlobalPartId; +using claims::txn::TimeStamp; using claims::NodeAddr; using claims::NodeSegmentID; using claims::StmtExecTracker; @@ -286,14 +287,14 @@ bool Environment::InitTxnManager() { auto table_count = cat->getNumberOfTable(); // cout << "table count:" << table_count << endl; for (unsigned table_id : cat->getAllTableIDs()) { - // cout << "table id :" << table_id << endl; + // cout << "start table id :" << table_id << endl; auto table = cat->getTable(table_id); if (NULL == table) { - cout << " No table whose id is:" << table_id << endl; + // cout << " No table whose id is:" << table_id << endl; assert(false); } auto proj_count = table->getNumberOfProjection(); - // cout << "proj_count:" << proj_count << endl; + for (auto proj_id = 0; proj_id < proj_count; proj_id++) { auto proj = table->getProjectoin(proj_id); if (NULL == proj) { @@ -301,23 +302,42 @@ bool Environment::InitTxnManager() { << " in table:" << table->getTableName() << endl; assert(false); } + // cout << "start proj_id:" << proj_id << endl; auto part = proj->getPartitioner(); auto part_count = part->getNumberOfPartitions(); - // cout << "part_count:" << part_count << endl; for (auto part_id = 0; part_id < part_count; part_id++) { + // cout << "start part_id:" << part_id << endl; auto global_part_id = GetGlobalPartId(table_id, proj_id, part_id); pos_list[global_part_id] = his_cp_list[global_part_id] = rt_cp_list[global_part_id] = - part->getPartitionBlocks(part_id) * BLOCK_SIZE; + static_cast(part->getPartitionBlocks(part_id)) * + BLOCK_SIZE; + // cout << "pos2 => " << pos_list[global_part_id] << " bytes" << endl; } } } + // cout << "before init pos list..." << endl; + // for (auto& pos : pos_list) + // cout << pos.first << " => " << pos.second << endl; + // cout << "before init his cp list..." << endl; + // for (auto& cp : his_cp_list) + // cout << cp.first << " => " << cp.second << endl; + // cout << "before init rt list..." << endl; + // for (auto& cp : rt_cp_list) cout << cp.first << " => " << cp.second << + // endl; + TxnServer::LoadCPList(0, his_cp_list, rt_cp_list); TxnServer::LoadPos(pos_list); - cout << "*******pos_list*******" << endl; - for (auto& pos : TxnServer::pos_list_) - cout << "partition[" << pos.first << "] => " << pos.second << endl; + // cout << "init pos list..." << endl; + // for (auto& pos : TxnServer::pos_list_) + // cout << "part[" << pos.first << "] => " << pos.second << endl; + // cout << "init checkpoint list..." << endl; + // for (auto& pos : TxnServer::cp_list_) { + // cout << "part[" << pos.first << "] => his:" << pos.second.GetHisCP(0) + // << ", rt:" << pos.second.GetRtCP(0) << endl; + //} } + sleep(1); TxnClient::Init(Config::txn_server_ip, Config::txn_server_port); return true; } diff --git a/catalog/partitioner.cpp b/catalog/partitioner.cpp index 86423dd2b..b8ba713d0 100644 --- a/catalog/partitioner.cpp +++ b/catalog/partitioner.cpp @@ -185,6 +185,32 @@ bool Partitioner::allPartitionBound() const { return true; } +void Partitioner::addPartitionCardinality(unsigned partition_index, + unsigned long value) { + partition_info_list[partition_index]->number_of_tuples_ += value; +} + +void Partitioner::addPartitionBlocks(unsigned partition_index, unsigned value) { + partition_info_list[partition_index]->number_of_blocks += value; +} + +void Partitioner::addPartitionChunks(unsigned partition_index, unsigned value) { + partition_info_list[partition_index]->number_of_blocks += value * 1024; +} + +void Partitioner::setPartitionCardinality(unsigned partition_index, + unsigned long value) { + partition_info_list[partition_index]->number_of_tuples_ = value; +} + +void Partitioner::setPartitionBlocks(unsigned partition_index, unsigned value) { + partition_info_list[partition_index]->number_of_blocks = value; +} + +void Partitioner::setPartitionChunks(unsigned partition_index, unsigned value) { + partition_info_list[partition_index]->number_of_blocks = value * 1024; +} + vector Partitioner::getPartitionIDList() { vector ret; ret.clear(); diff --git a/catalog/partitioner.h b/catalog/partitioner.h index 0ba8ed9f8..3ecaaeb13 100644 --- a/catalog/partitioner.h +++ b/catalog/partitioner.h @@ -280,6 +280,18 @@ class Partitioner { vector getPartitionIDList(); + void addPartitionCardinality(unsigned partitoin_index, unsigned long value); + + void addPartitionBlocks(unsigned partitoin_index, unsigned value); + + void addPartitionChunks(unsigned partition_index, unsigned value); + + void setPartitionCardinality(unsigned partition_index, unsigned long value); + + void setPartitionBlocks(unsigned partition_index, unsigned value); + + void setPartitionChunks(unsigned partition_index, unsigned value); + private: Attribute *partition_key_; PartitionFunction *partition_function_; diff --git a/common/error_define.h b/common/error_define.h index e37e13740..ae52920dc 100644 --- a/common/error_define.h +++ b/common/error_define.h @@ -283,6 +283,9 @@ const int rBeginQueryFail = -2504; const int rBeginCheckpointFail = -2505; const int rCommitCheckpointFail = -2506; +const int rGetFileHandleFail = -2507; +const int rGetMemoryHandleFail = -2508; + /* errorno for codegen -3001 ~ -4000 */ const int rTestError = -3001; diff --git a/conf/config b/conf/config index 7e07b8960..76da635ff 100755 --- a/conf/config +++ b/conf/config @@ -44,7 +44,8 @@ data="/home/imdb/config/tpc-h/1-partition/sf-1/" #data="/home/imdb/data/POC/" #hdfs主节点 -hdfs_master_ip="58.198.176.92" +#hdfs_master_ip="58.198.176.92" +hdfs_master_ip="219.228.147.33" #hdfs主节点端口 hdfs_master_port=9000 @@ -71,9 +72,9 @@ master_loader_ip="127.0.0.1" master_loader_port=9002 -master_loader_thread_num=1 +master_loader_thread_num=8 -slave_loader_thread_num=1 +slave_loader_thread_num=6 amq_url="58.198.176.92:61616" diff --git a/loader/AMQ_consumer.cpp b/loader/AMQ_consumer.cpp index 941a11bc9..1878b804e 100644 --- a/loader/AMQ_consumer.cpp +++ b/loader/AMQ_consumer.cpp @@ -101,7 +101,7 @@ void claims::loader::AMQConsumer::onMessage(const Message* message) { try { const TextMessage* textMessage = dynamic_cast(message); string text = ""; - + mloader_->AddCount(); if (textMessage != NULL) { text = textMessage->getText(); // message->acknowledge(); diff --git a/loader/load_packet.h b/loader/load_packet.h index 3483cd2a7..23fcd253d 100644 --- a/loader/load_packet.h +++ b/loader/load_packet.h @@ -40,6 +40,7 @@ using LoadAckAtom = caf::atom_constant; using RegNodeAtom = caf::atom_constant; using BindPartAtom = caf::atom_constant; using OkAtom = caf::atom_constant; +using AddBlockAtom = caf::atom_constant; /************** LoadPacket format *****************/ /** field type length **********/ diff --git a/loader/master_loader.cpp b/loader/master_loader.cpp index 15ff2b757..3fd5eab93 100644 --- a/loader/master_loader.cpp +++ b/loader/master_loader.cpp @@ -107,7 +107,7 @@ uint64_t MasterLoader::debug_consumed_message_count = 0; timeval MasterLoader::start_time; uint64_t MasterLoader::txn_average_delay_ = 0; static int MasterLoader::buffer_full_time = 0; - +atomic message_count; static const int txn_count_for_debug = 10000; namespace claims { @@ -115,6 +115,24 @@ namespace loader { ofstream MasterLoader::logfile; +void MasterLoader::AddCount() { + message_count.fetch_add(1); + // cout << "new message" << endl; +} + +behavior MasterLoader::Monitor(event_based_actor* self) { + static uint64_t last_count = 0; + self->delayed_send(self, seconds(5), "monitor"); + return {[self](const string& str) { + auto tmp = message_count.load(); + if (tmp != last_count) { + last_count = tmp; + cout << "master_loader receive count==>" << last_count << endl; + } + self->delayed_send(self, seconds(5), "monitor"); + }}; +} + void MasterLoader::IngestionRequest::Show() { DLOG(INFO) << "table name:" << table_name_ << ", column separator:" << col_sep_ @@ -298,6 +316,7 @@ static behavior MasterLoader::ReceiveSlaveReg(event_based_actor* self, RetCode MasterLoader::ConnectWithSlaves() { int ret = rSuccess; try { + auto monitor_actor = spawn(MasterLoader::Monitor); auto listening_actor = spawn(&MasterLoader::ReceiveSlaveReg, this); publish(listening_actor, master_loader_port_, nullptr, true); DLOG(INFO) << "published in " << master_loader_ip_ << ":" @@ -316,7 +335,7 @@ RetCode MasterLoader::Ingest(const string& message, static uint64_t get_tuple_time = 0; static uint64_t merge_tuple_time = 0; static uint64_t time_before_txn = 0; - +// assert(false); #ifdef MASTER_LOADER_PREF uint64_t temp_message_count = __sync_add_and_fetch(&debug_consumed_message_count, 1); diff --git a/loader/master_loader.h b/loader/master_loader.h index e4115d665..83a75a640 100644 --- a/loader/master_loader.h +++ b/loader/master_loader.h @@ -128,6 +128,8 @@ class MasterLoader { MasterLoader(); ~MasterLoader(); + static behavior Monitor(event_based_actor* self); + void AddCount(); RetCode ConnectWithSlaves(); RetCode Ingest(const string& message, function ack_function); diff --git a/loader/slave_loader.cpp b/loader/slave_loader.cpp index e82d1f285..ac1e748c3 100644 --- a/loader/slave_loader.cpp +++ b/loader/slave_loader.cpp @@ -50,6 +50,9 @@ #include "../storage/PartitionStorage.h" #include "../txn_manager/txn.hpp" #include "../utility/resource_guard.h" + +#include "../node_manager/base_node.h" + using caf::behavior; using caf::event_based_actor; using caf::io::remote_actor; @@ -60,6 +63,7 @@ using claims::common::rFailure; using claims::txn::GetPartitionIdFromGlobalPartId; using claims::txn::GetProjectionIdFromGlobalPartId; using claims::txn::GetTableIdFromGlobalPartId; +using claims::MasterNode; using std::chrono::milliseconds; using std::chrono::seconds; @@ -475,7 +479,9 @@ behavior SlaveLoader::WorkInCAF(event_based_actor* self) { } behavior SlaveLoader::PersistInCAF(event_based_actor* self) { - // self->delayed_send(self, seconds(20), CheckpointAtom::value); + self->delayed_send(self, + seconds(30 + Environment::getInstance()->getNodeID() * 2), + CheckpointAtom::value); return {[self](CheckpointAtom) { QueryReq query_req; query_req.include_abort_ = true; @@ -493,23 +499,61 @@ behavior SlaveLoader::PersistInCAF(event_based_actor* self) { part); auto new_rt_cp = query.snapshot_[g_part_id].rbegin()->first + query.snapshot_[g_part_id].rbegin()->second; + /*for debug new_rt_cp = query.snapshot_[g_part_id].begin()->first + + query.snapshot_[g_part_id].begin()->second;*/ + /* for debug vector ch_list = { + {query.snapshot_[g_part_id].begin()->first, + query.snapshot_[g_part_id].begin()->second}}; */ + auto old_rt_cp = query.snapshot_[g_part_id].begin()->first; // merge from historical to real time auto old_his_cp = query.his_cp_list_[g_part_id]; - cout << "before merge " << endl; - auto new_his_cp = - part_handler->MergeToHis(old_his_cp, query.snapshot_[g_part_id]); - cout << "after merge" << endl; + auto new_his_cp = part_handler->MergeToHis( + old_his_cp, /*ch_list */ query.snapshot_[g_part_id]); + cout << "merge his:<" << old_his_cp / BLOCK_SIZE << "," + << old_his_cp % BLOCK_SIZE << "> => <" << new_his_cp / BLOCK_SIZE + << "," << new_his_cp % BLOCK_SIZE << ">" << endl; + cout << "merge rt:<" << old_rt_cp / BLOCK_SIZE << "," + << old_rt_cp % BLOCK_SIZE << "> => <" << new_rt_cp / BLOCK_SIZE + << "," << new_rt_cp % BLOCK_SIZE << ">" << endl; // cout << "new_his_cp:" << new_his_cp << endl; - /* if (new_his_cp == old_his_cp) continue; - if (!part_handler->Persist(old_his_cp, new_his_cp)) continue; - */ - TxnClient::CommitCheckpoint(query.ts_, g_part_id, new_his_cp, - new_rt_cp); - /* cout << "persist:" << g_part_id << ":" << new_his_cp << "," - << new_rt_cp << endl;*/ + if (new_his_cp == old_his_cp) { + cout << "don't need to create checkpoint" << endl; + continue; + } + RetCode ret = rSuccess; + if (ret == rSuccess) + ret = part_handler->Persist(old_his_cp, new_his_cp); + else + continue; + if (ret == rSuccess) + ret = TxnClient::CommitCheckpoint(query.ts_, g_part_id, new_his_cp, + new_rt_cp); + else + continue; + if (ret == rSuccess) { + auto slave_node = Environment::getInstance()->get_slave_node(); + ret = slave_node->AddBlocks(g_part_id, + (new_his_cp - old_his_cp) / BLOCK_SIZE); + /* auto cata = Catalog::getInstance(); + auto proj = cata->getProjection(part.projection_id); + proj->getPartitioner()->addPartitionBlocks( + part.partition_off, (new_his_cp - old_his_cp) / + BLOCK_SIZE); + auto schema = proj->getSchema(); + MemoryGuard schema_guard(schema); + auto tuple_size = schema->getTupleMaxSize(); + proj->getPartitioner()->addPartitionCardinality( + part.partition_off, (new_rt_cp - old_rt_cp) / + tuple_size); + ret = cata->saveCatalog();*/ + } + if (ret == rSuccess) + cout << "Persist success" << endl; + else + cout << "Persist fail" << endl; } } - self->delayed_send(self, seconds(20), CheckpointAtom::value); + self->delayed_send(self, seconds(30), CheckpointAtom::value); }}; } diff --git a/node_manager/master_node.cpp b/node_manager/master_node.cpp index e0578b5d8..7bf0aee9c 100644 --- a/node_manager/master_node.cpp +++ b/node_manager/master_node.cpp @@ -39,11 +39,18 @@ #include "../common/error_define.h" #include "../common/ids.h" #include "../common/Message.h" +#include "../loader/load_packet.h" #include "../Environment.h" +#include "../txn_manager/txn.hpp" using caf::io::remote_actor; using caf::make_message; using std::make_pair; using claims::common::rConRemoteActorError; +using claims::loader::AddBlockAtom; +using claims::txn::GetTableIdFromGlobalPartId; +using claims::txn::GetProjectionIdFromGlobalPartId; +using claims::txn::GetPartitionIdFromGlobalPartId; + namespace claims { MasterNode* MasterNode::instance_ = 0; class MasterNodeActor : public event_based_actor { @@ -64,17 +71,22 @@ class MasterNodeActor : public event_based_actor { ->RegisterNewSlave(id); return make_message(OkAtom::value, id, *((BaseNode*)master_node_)); }, - [&](StorageBudgetAtom, const StorageBudgetMessage& message) { - Environment::getInstance() - ->getResourceManagerMaster() - ->RegisterDiskBuget(message.nodeid, message.disk_budget); - Environment::getInstance() - ->getResourceManagerMaster() - ->RegisterMemoryBuget(message.nodeid, message.memory_budget); - LOG(INFO) << "receive storage budget message!! node: " - << message.nodeid << " : disk = " << message.disk_budget - << " , mem = " << message.memory_budget << endl; - return make_message(OkAtom::value); + [&](StorageBudgetAtom, const StorageBudgetMessage& message) + -> caf::message { + Environment::getInstance() + ->getResourceManagerMaster() + ->RegisterDiskBuget(message.nodeid, message.disk_budget); + Environment::getInstance() + ->getResourceManagerMaster() + ->RegisterMemoryBuget(message.nodeid, message.memory_budget); + LOG(INFO) << "receive storage budget message!! node: " + << message.nodeid << " : disk = " << message.disk_budget + << " , mem = " << message.memory_budget << endl; + return make_message(OkAtom::value); + }, + [=](AddBlockAtom, int part_id, int block_num) -> caf::message { + RetCode ret = master_node_->AddBlock(part_id, block_num); + return make_message(ret); }, [=](ExitAtom) { LOG(INFO) << "master " << master_node_->get_node_id() << " finish!" @@ -167,4 +179,20 @@ void MasterNode::FinishAllNode() { } self->send(master_actor_, ExitAtom::value); } + +RetCode MasterNode::AddBlock(int g_part_id, int block_num) { + /* cout << "try to add block on patition:" << part_id << " block num" + << block_num << endl;*/ + auto cata = Environment::getInstance()->getCatalog(); + auto table_id = GetTableIdFromGlobalPartId(g_part_id); + auto proj_id = GetProjectionIdFromGlobalPartId(g_part_id); + auto part_id = GetPartitionIdFromGlobalPartId(g_part_id); + + auto table = cata->getTable(table_id); + auto proj = table->getProjectoin(proj_id); + proj->getPartitioner()->addPartitionBlocks(part_id, block_num); + cout << "add " << block_num << " on part " << part_id << endl; + return cata->saveCatalog(); +} + } // namespace claims diff --git a/node_manager/master_node.h b/node_manager/master_node.h index 8a1373655..f78a09cbe 100644 --- a/node_manager/master_node.h +++ b/node_manager/master_node.h @@ -61,6 +61,8 @@ class MasterNode : public BaseNode { unsigned int AddOneNode(string node_ip, uint16_t node_port); RetCode BroastNodeInfo(const unsigned int& node_id, const string& node_ip, const uint16_t& node_port); + RetCode AddBlock(int part_id, int block_num); + MasterNode(); MasterNode(string node_ip, uint16_t node_port); diff --git a/node_manager/slave_node.cpp b/node_manager/slave_node.cpp index 3cf3f2abd..db387aedc 100644 --- a/node_manager/slave_node.cpp +++ b/node_manager/slave_node.cpp @@ -53,6 +53,7 @@ using claims::common::rConRemoteActorError; using claims::common::rRegisterToMasterTimeOut; using claims::common::rRegisterToMasterError; using claims::loader::RegNodeAtom; +using claims::loader::AddBlockAtom; namespace claims { SlaveNode* SlaveNode::instance_ = 0; class SlaveNodeActor : public event_based_actor { @@ -268,4 +269,17 @@ RetCode SlaveNode::RegisterToMaster() { return ret; } +RetCode SlaveNode::AddBlocks(int part_id, int block_num) { + RetCode ret = rSuccess; + try { + // cout << "try slave send add block on part:" << part_id << endl; + caf::scoped_actor self; + self->sync_send(master_actor_, AddBlockAtom::value, part_id, block_num) + .await([&](int r) { ret = r; }); + } catch (exception& e) { + cout << "slave send add block on part:" << part_id << " fail" << endl; + } + return ret; +} + } /* namespace claims */ diff --git a/node_manager/slave_node.h b/node_manager/slave_node.h index aa2f25e13..b850b2919 100644 --- a/node_manager/slave_node.h +++ b/node_manager/slave_node.h @@ -48,6 +48,7 @@ class SlaveNode : public BaseNode { void CreateActor(); virtual ~SlaveNode(); RetCode RegisterToMaster(); + RetCode AddBlocks(int part, int block_num); static SlaveNode* GetInstance(); RetCode AddOneNode(const unsigned int& node_id, const string& node_ip, const uint16_t& node_port); diff --git a/physical_operator/physical_projection_scan.cpp b/physical_operator/physical_projection_scan.cpp index f4d9e1408..263e0e9ad 100644 --- a/physical_operator/physical_projection_scan.cpp +++ b/physical_operator/physical_projection_scan.cpp @@ -112,10 +112,11 @@ bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, partition_reader_iterator_ = partition_handle_->CreateTxnReaderIterator( cp, state_.query_.scan_snapshot_[global_part_id]); cout << "version:" << state_.query_.ts_ << ",part:" << global_part_id - << ",checkpoint chunk:" << cp / CHUNK_SIZE - << ",block:" << (cp % CHUNK_SIZE) / BLOCK_SIZE << cp; + << ",checkpoint :" + << "block:" << cp / BLOCK_SIZE << "," << cp % BLOCK_SIZE << endl; for (auto& part : state_.query_.scan_snapshot_[global_part_id]) - cout << "<" << part.first << "," << part.second << ">"; + cout << "[<" << part.first / BLOCK_SIZE << "," + << part.first % BLOCK_SIZE << ">," << part.second << "]"; cout << endl; // partition_reader_iterator_ = // partition_handle_->CreateAtomicReaderIterator(); diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 6f8b8de6a..7601aec67 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -45,10 +45,18 @@ #include "../utility/resource_guard.h" using claims::common::rSuccess; using claims::common::FileHandleImpFactory; +using claims::common::FileHandleImp; using claims::common::kHdfs; +using claims::common::kDisk; using claims::utility::LockGuard; +using claims::common::rGetFileHandleFail; +using claims::common::rGetMemoryHandleFail; -ofstream PartitionStorage::TxnPartitionReaderIterator::logfile; +ofstream logfile; +ofstream qylog; +int qylog_count = 0; +ofstream mvlog; +int mvlog_count = 0; /** * According to number_of_chunks, construct chunk from partition and add into @@ -80,6 +88,10 @@ PartitionStorage::PartitionStorage(const PartitionID& partition_id, // CheckAndAppendChunkList(number_of_chunks_, false); // CheckAndAppendChunkList(number_of_chunks_, true); // cout << "*******chunk_list_" << chunk_list_.size() << endl; + // cout << "open ps log" << endl; + // if (!logfile.is_open()) logfile.open("ps_log.txt"); + if (!qylog.is_open()) qylog.open("qy_log.txt"); + if (!mvlog.is_open()) mvlog.open("mv_log.txt"); } PartitionStorage::~PartitionStorage() { @@ -87,6 +99,8 @@ PartitionStorage::~PartitionStorage() { DELETE_PTR(chunk_list_[i]); } chunk_list_.clear(); + // cout << "close ps log" << endl; + // logfile.close(); } void PartitionStorage::AddNewRTChunk() { @@ -301,12 +315,11 @@ PartitionStorage::TxnPartitionReaderIterator::TxnPartitionReaderIterator( str += "<" + to_string(strip.first) + "," + to_string(strip.second) + ">"; } cout << str << endl;*/ - logfile.open("ps_log.txt"); + qylog << "*********query:" << qylog_count++ << "***********" << endl; } PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() { for (auto block : rt_block_buffer_) free(block); - logfile.close(); } bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( @@ -334,8 +347,13 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( delete ba; ba = nullptr; block_cur_++; - /* logfile << chunk_cur_ << "->" << (block_cur_ - 1) % 1024 << "->" - << "full1" << endl;*/ + /* logfile << "H<" << chunk_it_->chunk_id_.chunk_off << "," << + block_cur_ + << "," << 0 << "," << block->getTuplesInBlock() << ">" << + endl;*/ + if (!block->Full()) + qylog << "HgetTuplesInBlock() << ">" << endl; return true; } } else if (rt_block_index_ < rt_strip_list_.size()) { // scan real-time data @@ -364,17 +382,20 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( if (len == BLOCK_SIZE) { // directly return pointer ba->GetBlock(block); - /* logfile << rt_chunk_cur_ << "->" << (rt_block_cur_ - 1) % 1024 << - "->" - << "full2" << endl;*/ + /* logfile << "R<" << rt_chunk_it_->chunk_id_.chunk_off << "," + << rt_block_cur << "," << offset_in_block << "," + << block->getTuplesInBlock() << ">" << endl;*/ + if (!block->Full()) + qylog << "warning: RgetTuplesInBlock() << ">" << endl; } else { auto tuple_size = reinterpret_cast(block)->getTupleSize(); - if (pos + len % BLOCK_SIZE == 0) - len = ((len - sizeof(unsigned)) / tuple_size) * tuple_size; + + if ((pos + len) % BLOCK_SIZE == 0) len -= sizeof(unsigned); + assert(pos / BLOCK_SIZE == (pos + len) / BLOCK_SIZE); auto tuple_count = len / tuple_size; - // cout << "tuple_size:" << tuple_size << endl; - // cout << "tuple_count:" << tuple_count << endl; + ba->GetBlock(block); auto des_addr = reinterpret_cast(malloc(BLOCK_SIZE)); auto scr_addr = block->getBlockDataAddress() + offset_in_block; @@ -384,21 +405,20 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( reinterpret_cast(block)->setBlockDataAddress(des_addr); reinterpret_cast(block)->setTuplesInBlock(tuple_count); rt_block_buffer_.push_back(des_addr); - /* logfile << rt_chunk_cur_ << "->" << (rt_block_cur_ - 1) % 1024 << - "->" - << offset_in_block << "," << offset_in_block + len << "," - << tuple_count << endl;*/ + /* logfile << "R<" << rt_block_cur << "," << offset_in_block << "," + << block->getTuplesInBlock() << ">" << endl;*/ + qylog << "RgetTuplesInBlock() << ">" << endl; } auto count = *reinterpret_cast(block->getBlockDataAddress() + BLOCK_SIZE - sizeof(unsigned int)); - /* logfile << rt_chunk_cur_ << "->" << (rt_block_cur_ - 1) % 1024 << "->" - << cout << endl;*/ delete ba; ba = nullptr; rt_block_index_++; return true; } + /* logfile << "********* query end **********" << endl;*/ return false; } void PartitionStorage::CheckAndAppendChunkList(unsigned number_of_chunk, @@ -429,7 +449,17 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, MemoryGuard schema_guard(schema); auto tuple_size = schema->getTupleMaxSize(); HdfsInMemoryChunk chunk_rt, chunk_his; - if (strip_list.size() > 0) cout << "{before merge" << endl; + // if (strip_list.size() > 0) cout << "{before merge" << endl; + if (strip_list.size() > 0) { + mvlog << "*********mov:" << mvlog_count++ << "************" << endl; + mvlog << "plan:"; + for (auto& strip : strip_list) + mvlog << "<" << strip.first / BLOCK_SIZE << "," + << strip.first % BLOCK_SIZE << "=>" + << (strip.first + strip.second) / BLOCK_SIZE << "," + << (strip.first + strip.second) % BLOCK_SIZE << ">"; + mvlog << endl; + } for (auto& strip : strip_list) { auto begin = strip.first; auto end = strip.first + strip.second; @@ -437,10 +467,11 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, auto move = BLOCK_SIZE - (begin + BLOCK_SIZE) % BLOCK_SIZE; if (move > end - begin) move = end - begin; // update historical chunk cur - AddHisChunkWithMemoryApply(begin / CHUNK_SIZE + 1, MEMORY); + AddHisChunkWithMemoryApply(new_his_cp / CHUNK_SIZE + 1, MEMORY); + // be carry! the chunk is not begin/CHUN_SIZE if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( - chunk_list_[begin / CHUNK_SIZE]->GetChunkID(), chunk_his)) { - assert(false && begin && begin / CHUNK_SIZE); + chunk_list_[new_his_cp / CHUNK_SIZE]->GetChunkID(), chunk_his)) { + // assert(false && begin && begin / CHUNK_SIZE); } // update real time chunk cur // real-time chunk need't check @@ -450,33 +481,79 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, return old_his_cp; } // each step move just one full block or even partly block + auto tail_offset = + (new_his_cp + BLOCK_SIZE) % CHUNK_SIZE - sizeof(unsigned); + auto his_addr = chunk_his.hook + new_his_cp % CHUNK_SIZE; + auto rt_addr = chunk_rt.hook + begin % CHUNK_SIZE; if (move == BLOCK_SIZE) { // full block - memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, - chunk_rt.hook + begin % CHUNK_SIZE, move); + memcpy(his_addr, rt_addr, BLOCK_SIZE); + /* logfile << "full<" << begin / BLOCK_SIZE << "," << begin % + BLOCK_SIZE + << "," + << *reinterpret_cast(rt_addr + BLOCK_SIZE - + sizeof(unsigned)) << "> + => <" + << new_his_cp / BLOCK_SIZE << "," << new_his_cp % + BLOCK_SIZE + << "," + << *reinterpret_cast( + his_addr + BLOCK_SIZE - sizeof(unsigned)) << ">" + << endl;*/ } else { // partly block auto real_move = (begin + move) % BLOCK_SIZE != 0 ? move : move - sizeof(unsigned); auto tuple_count = real_move / tuple_size; - memcpy(chunk_his.hook + new_his_cp % CHUNK_SIZE, - chunk_rt.hook + begin % CHUNK_SIZE, move); - auto tail_offset = - (new_his_cp + BLOCK_SIZE) % CHUNK_SIZE - sizeof(unsigned); - *reinterpret_cast(chunk_his.hook + tail_offset) = + memcpy(his_addr, rt_addr, real_move); + *reinterpret_cast(his_addr + BLOCK_SIZE - sizeof(unsigned)) = tuple_count; + /* logfile << "part<" << begin / BLOCK_SIZE << "," << begin % + BLOCK_SIZE + << "," << tuple_count << "> => <" << new_his_cp / + BLOCK_SIZE + << "," << new_his_cp % BLOCK_SIZE << "," + << *reinterpret_cast( + his_addr + BLOCK_SIZE - sizeof(unsigned)) << ">" + << endl;*/ + mvlog << begin / BLOCK_SIZE << "=>" << new_his_cp / BLOCK_SIZE << ":" + << tuple_count << endl; } + begin += move; new_his_cp += BLOCK_SIZE; } } - if (strip_list.size() > 0) cout << "}after merge" << endl; + + // if (strip_list.size() > 0) logfile << "********************" << endl; + return new_his_cp; } -bool PartitionStorage::Persist(UInt64 old_his_cp, UInt64 new_his_cp) { +RetCode PartitionStorage::Persist(UInt64 old_his_cp, UInt64 new_his_cp) { + FileHandleImp* file_handle = nullptr; + MemoryGuard file_handle_guard(file_handle); if (!Config::local_disk_mode) - return PersistHDFS(old_his_cp, new_his_cp); + file_handle = FileHandleImpFactory::Instance().CreateFileHandleImp( + kHdfs, partition_id_.getPathAndName()); else - return PersistDisk(old_his_cp, new_his_cp); + file_handle = FileHandleImpFactory::Instance().CreateFileHandleImp( + kDisk, partition_id_.getPathAndName()); + if (file_handle == nullptr) return rGetFileHandleFail; + HdfsInMemoryChunk chunk_his; + auto begin = old_his_cp; + auto end = new_his_cp; + while (begin < end) { + if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( + ChunkID(partition_id_, begin / CHUNK_SIZE), chunk_his)) + return rGetMemoryHandleFail; + auto move = CHUNK_SIZE - (begin + CHUNK_SIZE) % CHUNK_SIZE; + if (begin + move > end) move = end - begin; + file_handle->Append(chunk_his.hook + begin % CHUNK_SIZE, move); + /* cout << "move chunk:" << begin / CHUNK_SIZE << "," << begin % + CHUNK_SIZE + << " step:" << move << endl;*/ + begin += move; + } + return rSuccess; } bool PartitionStorage::PersistHDFS(UInt64 old_his_cp, UInt64 new_his_cp) { @@ -485,19 +562,19 @@ bool PartitionStorage::PersistHDFS(UInt64 old_his_cp, UInt64 new_his_cp) { */ auto file_handle = FileHandleImpFactory::Instance().CreateFileHandleImp( kHdfs, partition_id_.getPathAndName()); - if (file_handle == nullptr) return false; + if (file_handle == nullptr) return rGetFileHandleFail; HdfsInMemoryChunk chunk_his; auto begin = old_his_cp; auto end = new_his_cp; while (begin < end) { if (!BlockManager::getInstance()->getMemoryChunkStore()->GetChunk( ChunkID(partition_id_, begin / CHUNK_SIZE), chunk_his)) - return false; + return rGetMemoryHandleFail; auto move = CHUNK_SIZE - (begin + CHUNK_SIZE) % CHUNK_SIZE; file_handle->Append(chunk_his.hook, move); begin += move; } - return true; + return rSuccess; } bool PartitionStorage::PersistDisk(UInt64 old_his_cp, UInt64 new_his_cp) {} diff --git a/storage/PartitionStorage.h b/storage/PartitionStorage.h index e3c5f06c2..130946e9a 100755 --- a/storage/PartitionStorage.h +++ b/storage/PartitionStorage.h @@ -150,7 +150,6 @@ class PartitionStorage { Lock lock_; public: - static ofstream logfile; }; /** @@ -220,7 +219,7 @@ class PartitionStorage { } void CheckAndAppendChunkList(unsigned number_of_chunk, bool is_rt); UInt64 MergeToHis(UInt64 old_his_cp, const vector& strip_list); - bool Persist(UInt64 old_his_cp, UInt64 new_his_cp); + RetCode Persist(UInt64 old_his_cp, UInt64 new_his_cp); bool PersistHDFS(UInt64 old_his_cp, UInt64 new_his_cp); bool PersistDisk(UInt64 old_his_cp, UInt64 new_his_cp); @@ -234,6 +233,8 @@ class PartitionStorage { StorageLevel desirable_storage_level_; Lock write_lock_; + + // static ofstream logfile; }; //} // namespace storage //} // namespace claims diff --git a/txn_manager/Makefile.am b/txn_manager/Makefile.am index c6a7099c6..fa9374c87 100644 --- a/txn_manager/Makefile.am +++ b/txn_manager/Makefile.am @@ -1,6 +1,11 @@ -AM_CPPFLAGS= -fPIC -fpermissive +AM_CPPFLAGS= -fPIC -fpermissive\ +-I${BOOST_HOME} \ +-I${BOOST_HOME}/boost/serialization \ +-I${HADOOP_HOME}/include\ +-I${JAVA_HOME}/include\ +-I${JAVA_HOME}/include/linux -AM_LDFLAGS=-lc -lm -lrt -ll -ly -lstdc++ -lxs -lcaf_core -lcaf_io +AM_LDFLAGS=-lc -lm -lrt -ll -ly -lstdc++ -lxs -lcaf_core -lcaf_io -lboost_serialization if OPT_TCMALLOC AM_CPPFLAGS+=-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free @@ -10,23 +15,33 @@ endif LDADD = ../catalog/libcatalog.a \ ../catalog/stat/libstat.a \ ../loader/libloader.a \ + ../storage/libstorage.a \ ../common/libcommon.a \ ../common/expression/libexpression.a \ + ../common/Expression/libexpression.a \ + ../common/Schema/libschema.a \ + ../common/Block/libblock.a \ + ../common/type/libtypes.a \ + ../common/types/ttmath/libttmath.a \ + ../common/serialization/libserialization.a \ ../utility/libutility.a \ + ../Executor/libexecutor.a \ ../sql_parser/ast_node/libast_node.a \ ../sql_parser/parser/libparser.a \ + ../BufferManager/libbuffermanager.a \ + ../node_manager/libnodemanager.a \ ${BOOST_HOME}/stage/lib/libboost_system.a \ ${BOOST_HOME}/stage/lib/libboost_system.so \ ${BOOST_HOME}/stage/lib/libboost_serialization.a \ - ${BOOST_HOME}/stage/lib/libboost_serialization.a + ${BOOST_HOME}/stage/lib/libboost_serialization.so noinst_LIBRARIES=libtxnmanager.a libtxnmanager_a_SOURCES = \ - txn.hpp txn.cpp \ - txn_client.hpp txn_client.cpp \ - txn_server.hpp txn_server.cpp \ - txn_log.hpp txn_log.cpp + txn.hpp txn.cpp \ + txn_client.hpp txn_client.cpp \ + txn_server.hpp txn_server.cpp \ + txn_log.hpp txn_log.cpp diff --git a/txn_manager/txn.cpp b/txn_manager/txn.cpp index 72bc8113e..d37666c5a 100644 --- a/txn_manager/txn.cpp +++ b/txn_manager/txn.cpp @@ -27,6 +27,7 @@ */ #include "txn.hpp" #include "txn_server.hpp" + namespace claims { namespace txn { @@ -238,11 +239,22 @@ void TxnBin::MergeSnapshot(Query &query) const { } void TxnBin::MergeTxn(Query &query, int len) const { + /* if (ct_ < len) { + cout << "ct:" << ct_ << ",len:" << len << endl; + assert(false); + }*/ + for (auto i = 0; i < len; i++) { + if (!(txn_list_[i].IsCommit() || txn_list_[i].IsAbort())) { + // cout << "error to scan txn!!!!!!!!!" << endl; + } + } for (auto i = 0; i < len; i++) if (txn_list_[i].IsCommit()) { + query.scan_count_++; for (auto &strip : txn_list_[i].strip_list_) query.snapshot_[strip.first].push_back(strip.second); } else if (txn_list_[i].IsAbort()) { + assert(false); for (auto &strip : txn_list_[i].strip_list_) query.abort_list_[strip.first].push_back(strip.second); } diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index 0130c4dd9..eaba43a27 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -112,7 +112,7 @@ static const int kGCTime = 5; static const int kTimeout = 3; static const int kBlockSize = 64 * 1024; static const int kTailSize = sizeof(unsigned); -static const int kTxnBinSize = 3; // 1024; +static const int kTxnBinSize = 10; // 1024; inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, UInt64 partition_id) { @@ -301,7 +301,7 @@ class Query { */ unordered_map rt_cp_list_; unordered_map> abort_list_; - + UInt64 scan_count_ = 0; Query() {} Query(UInt64 ts, const unordered_map &his_cp_list, const unordered_map &rt_cp_list) @@ -336,7 +336,7 @@ class Query { map scan_cp_list_; template void serialize(Archive &ar, const unsigned int version) { - ar &scan_snapshot_ &scan_cp_list_; + ar &ts_ &scan_snapshot_ &scan_cp_list_; } }; inline bool operator==(const Query &lhs, const Query &rhs) { @@ -434,7 +434,8 @@ class TxnBin { } static UInt64 GetTxnBinMaxTs(UInt64 txnbin_id, UInt64 core_num, UInt64 core_id) { - return (txnbin_id + 1) * kTxnBinSize * core_num + core_id; + // return (txnbin_id + 1) * kTxnBinSize * core_num + core_id; + return txnbin_id * kTxnBinSize * core_num + kTxnBinSize + core_id; } Txn txn_list_[kTxnBinSize]; diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index 778351684..1c375728c 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -97,10 +97,11 @@ RetCode TxnClient::BeginIngest(const FixTupleIngestReq& request, caf::others >> []() { cout << " unkown message" << endl; }, caf::after(seconds(kTimeout)) >> [&] { ret = -1; - cout << "time out" << endl; + cout << "begin ingest timeout" + << endl; }); } catch (...) { - cout << "link fail" << endl; + cout << "begin ingest link fail" << endl; // return rLinkTmFail; return -1; } @@ -121,7 +122,9 @@ RetCode TxnClient::CommitIngest(UInt64 ts) { // ret = // rLinkTmTimeout; ret = -1; - cout << "time out" << endl; + cout + << "commit ingest timeout" + << endl; }); } catch (...) { cout << "link fail" << endl; @@ -144,7 +147,8 @@ RetCode TxnClient::AbortIngest(UInt64 id) { // ret = // rLinkTmTimeout; ret = -1; - cout << "time out" << endl; + cout << "abort ingest timeout" + << endl; }); } catch (...) { cout << "link fail" << endl; @@ -162,12 +166,13 @@ RetCode TxnClient::BeginQuery(const QueryReq& request, Query& query) { self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, QueryAtom::value, request) .await([&query](const Query& q) { query = q; }, - caf::after(seconds(kTimeout)) >> [&ret] { - ret = -1; - cout << "time out" << endl; - }); + caf::after(seconds(15)) >> [&ret] { + ret = -1; + cout << "begin query time out" + << endl; + }); } catch (...) { - cout << "link fail" << endl; + cout << "begin query link fail" << endl; // return rLinkTmFail; return -1; } @@ -183,7 +188,8 @@ RetCode TxnClient::CommitQuery(UInt64 ts) { .await([&ret](RetCode r) { ret = r; }, caf::after(seconds(kTimeout)) >> [&ret] { ret = -1; - cout << "time out" << endl; + cout << "commit query timeout" + << endl; }); } catch (...) { cout << "link to proxy fail in commitQuery" << endl; @@ -193,8 +199,7 @@ RetCode TxnClient::CommitQuery(UInt64 ts) { RetCode TxnClient::CommitCheckpoint(UInt64 ts, UInt64 part, UInt64 his_cp, UInt64 rt_cp) { - // RetCode ret = rSuccess; - RetCode ret = 0; + RetCode ret = rSuccess; try { caf::scoped_actor self; self->sync_send(TxnServer::active_ ? TxnServer::proxy_ : proxy_, @@ -204,7 +209,8 @@ RetCode TxnClient::CommitCheckpoint(UInt64 ts, UInt64 part, UInt64 his_cp, // ret = // rLinkTmTimeout; ret = -1; - cout << "time out" << endl; + cout << "commit cp timeout" + << endl; }); } catch (...) { cout << "link fail @ CommitCheckpoint" << endl; diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index af4705326..c277c9f4d 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -25,12 +25,20 @@ * Description: * */ +#include +#include #include "txn_server.hpp" - #include "caf/all.hpp" #include "txn_log.hpp" - +//#include "../catalog/catalog.h" +#include "../utility/resource_guard.h" +// using claims::catalog::Catalog; +//#include "../loader/slave_loader.h" +//#include "../Environment.h" +#include "../txn_manager/txn_server.hpp" using caf::aout; +using std::ofstream; +ofstream txn_log; namespace claims { namespace txn { // using claims::common::rSuccess; @@ -81,7 +89,7 @@ caf::behavior TxnCore::make_behavior() { return caf::make_message(rSuccess); }, [this](AbortIngestAtom, const UInt64 ts) -> caf::message { - // cout << "abort:" << ts << endl; + cout << "!!!!!!!abort:" << ts << endl; RetCode ret = rSuccess; auto id = TxnBin::GetTxnBinID(ts, TxnServer::concurrency_); auto pos = TxnBin::GetTxnBinPos(ts, TxnServer::concurrency_); @@ -90,34 +98,59 @@ caf::behavior TxnCore::make_behavior() { }, [this](QueryAtom, shared_ptr query, bool include_abort) -> caf::message { - auto id = TxnBin::GetTxnBinID(query->ts_, TxnServer::concurrency_); - auto pos = TxnBin::GetTxnBinPos(query->ts_, TxnServer::concurrency_); - auto ts = TxnBin::GetTxnBinMaxTs(id, TxnServer::concurrency_, core_id_); - auto remain = kTxnBinSize - (ts - query->ts_) / TxnServer::concurrency_; + int last_core = query->ts_ / TxnServer::concurrency_; + int bin_num = 0; + if (query->ts_ >= TxnServer::concurrency_) + bin_num = (query->ts_) / (TxnServer::concurrency_ * kTxnBinSize); + // cout << "bin_num:" << bin_num << endl; + int remain = 0; + if (query->ts_ > TxnServer::concurrency_) { + auto all_remain = + (query->ts_) % (TxnServer::concurrency_ * kTxnBinSize); + remain = (all_remain + TxnServer::concurrency_ - 1 - core_id_) / + TxnServer::concurrency_; + } + if (core_id_ > last_core && remain > 0) remain--; + if (!include_abort) { + cout << "qy_ts:" << query->ts_ << ",core:" << core_id_ + << ",bin num:" << bin_num << ",re:" << remain << endl; + } if (remain > 0) { - txnbin_list_[id].MergeTxn(*query, remain); + txnbin_list_[bin_num].MergeTxn(*query, remain); + auto count = txnbin_list_[bin_num].Count(); + if (count < remain) + cout << "!!!!!error:" << query->ts_ << "@" << core_id_ << "," + << bin_num << "?count:" << count << ",remain:" << remain + << endl; } - while (id > 0) { - --id; + + for (int id = bin_num - 1; id >= 0; id--) { if (txnbin_list_[id].IsSnapshot()) { txnbin_list_[id].MergeSnapshot(*query); break; } else { txnbin_list_[id].MergeTxn(*query, kTxnBinSize); + auto count = txnbin_list_[id].Count(); + if (count < kTxnBinSize) + cout << "$$$$$$error:" << query->ts_ << "@" << core_id_ << "," + << bin_num << "?count:" << count << endl; } } auto next_core_id = (core_id_ + 1) % TxnServer::concurrency_; if (next_core_id != TxnServer::GetCoreID(query->ts_)) { // scan next core this->forward_to(TxnServer::cores_[next_core_id]); - } else if (include_abort && false) { // process the final query - cout << "size of abort list: " << endl; + } else if (include_abort) { // + // process the final subquery + // cout << "size of abort list: " << endl; for (auto& part_cp : query->rt_cp_list_) { auto part = part_cp.first; auto checkpoint = part_cp.second; + // mix snapshot_list into abort_list query->abort_list_[part].insert(query->abort_list_[part].end(), query->snapshot_[part].begin(), query->snapshot_[part].end()); + // clear abort list Strip::Sort(query->abort_list_[part]); Strip::Merge(query->abort_list_[part]); Strip::Filter(query->abort_list_[part], @@ -132,12 +165,13 @@ caf::behavior TxnCore::make_behavior() { return true; } }); + // remove not continuous commit/abort strip if (query->abort_list_[part].size() > 0) { - auto abort_pos = query->abort_list_[part][0].first + - query->abort_list_[part][0].second; + auto continuous_pos = query->abort_list_[part][0].first + + query->abort_list_[part][0].second; Strip::Filter(query->snapshot_[part], - [abort_pos](PStrip& pstrip) -> bool { - if (pstrip.first + pstrip.second <= abort_pos) + [continuous_pos](PStrip& pstrip) -> bool { + if (pstrip.first + pstrip.second <= continuous_pos) return true; else return false; @@ -232,6 +266,7 @@ string TxnCore::ToString() { } caf::behavior TxnServer::make_behavior() { + if (!txn_log.is_open()) txn_log.open("txn_log.txt"); try { caf::io::publish(proxy_, port_, nullptr, true); cout << "txn server bind to port:" << port_ << " success" << endl; @@ -278,6 +313,7 @@ caf::behavior TxnServer::make_behavior() { cp_list_[part].SetHisCP(ts, his_cp); cp_list_[part].SetRtCP(ts, rt_cp); cout << "commit " << part << ":" << his_cp << "," << rt_cp << endl; + return caf::make_message(rSuccess); }, [this](GCAtom) { From 7aa29420b8f5180ad1c2f69c0fb0d1da9132543f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cyestodaylee=E2=80=9D?= <885626704@qq.com> Date: Fri, 17 Feb 2017 09:59:33 +0800 Subject: [PATCH 58/58] ADD: code comment for txn_manager FIX: "LoadFromDisk" should use 64bit int as file cursor --- node_manager/master_node.cpp | 17 +--- node_manager/slave_node.cpp | 2 +- node_manager/slave_node.h | 2 +- storage/BlockManager.cpp | 10 +-- storage/ChunkStorage.cpp | 13 +-- storage/PartitionStorage.cpp | 10 ++- txn_manager/txn.cpp | 9 -- txn_manager/txn.hpp | 111 +++++++++++++++++++++--- txn_manager/txn_client.cpp | 1 - txn_manager/txn_client.hpp | 33 +++++++- txn_manager/txn_server.cpp | 159 +++++++++++++++-------------------- txn_manager/txn_server.hpp | 23 ++++- 12 files changed, 235 insertions(+), 155 deletions(-) diff --git a/node_manager/master_node.cpp b/node_manager/master_node.cpp index 98c978628..6a2b245a6 100644 --- a/node_manager/master_node.cpp +++ b/node_manager/master_node.cpp @@ -159,20 +159,9 @@ class MasterNodeActor : public event_based_actor { delayed_send(this, std::chrono::seconds(kTimeout / 5), Updatelist::value); }, - [&](StorageBudgetAtom, const StorageBudgetMessage& message) - -> caf::message { - Environment::getInstance() - ->getResourceManagerMaster() - ->RegisterDiskBuget(message.nodeid, message.disk_budget); - Environment::getInstance() - ->getResourceManagerMaster() - ->RegisterMemoryBuget(message.nodeid, message.memory_budget); - LOG(INFO) << "receive storage budget message!! node: " - << message.nodeid << " : disk = " << message.disk_budget - << " , mem = " << message.memory_budget << endl; - return make_message(OkAtom::value); - }, - [=](AddBlockAtom, int part_id, int block_num) -> caf::message { + [=](AddBlockAtom, uint64_t part_id, int block_num) -> caf::message { + cout << "try to add block on partition:" << part_id << " block nums " + << block_num << endl; RetCode ret = master_node_->AddBlock(part_id, block_num); return make_message(ret); }, diff --git a/node_manager/slave_node.cpp b/node_manager/slave_node.cpp index 59fd537a7..7e6ddf0dd 100644 --- a/node_manager/slave_node.cpp +++ b/node_manager/slave_node.cpp @@ -416,7 +416,7 @@ RetCode SlaveNode::RegisterToMaster(bool isFirstRegister) { return ret; } -RetCode SlaveNode::AddBlocks(int part_id, int block_num) { +RetCode SlaveNode::AddBlocks(uint64_t part_id, int block_num) { RetCode ret = rSuccess; try { // cout << "try slave send add block on part:" << part_id << endl; diff --git a/node_manager/slave_node.h b/node_manager/slave_node.h index c536cb0cd..7dc1d1230 100644 --- a/node_manager/slave_node.h +++ b/node_manager/slave_node.h @@ -50,7 +50,7 @@ class SlaveNode : public BaseNode { RetCode RegisterToMaster(bool isFirstRegister); RetCode reRegisterToMaster(); - RetCode AddBlocks(int part, int block_num); + RetCode AddBlocks(uint64_t part, int block_num); static SlaveNode* GetInstance(); RetCode AddOneNode(const unsigned int& node_id, const string& node_ip, diff --git a/storage/BlockManager.cpp b/storage/BlockManager.cpp index cef10d26d..ca43be7ae 100755 --- a/storage/BlockManager.cpp +++ b/storage/BlockManager.cpp @@ -300,7 +300,7 @@ int BlockManager::LoadFromHdfs(const ChunkID& chunk_id, void* const& desc, int BlockManager::LoadFromDisk(const ChunkID& chunk_id, void* const& desc, const unsigned& length) const { int ret = 0; - unsigned offset = chunk_id.chunk_off; + uint64_t offset = chunk_id.chunk_off; int fd = FileOpen(chunk_id.partition_id.getPathAndName().c_str(), O_RDONLY); if (fd == -1) { // logging_->elog("Fail to open file [%s].Reason:%s", @@ -315,10 +315,9 @@ int BlockManager::LoadFromDisk(const ChunkID& chunk_id, void* const& desc, DLOG(INFO) << "file [" << chunk_id.partition_id.getPathAndName().c_str() << "] is opened for offset [" << offset << "]" << endl; } - long int file_length = lseek(fd, 0, SEEK_END); - - long start_pos = CHUNK_SIZE * offset; + uint64_t file_length = lseek(fd, 0, SEEK_END); + uint64_t start_pos = CHUNK_SIZE * offset; // logging_->log("start_pos=%ld**********\n", start_pos); DLOG(INFO) << "start_pos=" << start_pos << "*********" << endl; @@ -353,7 +352,6 @@ bool BlockManager::ContainsPartition(const PartitionID& part) { bool BlockManager::AddPartition(const PartitionID& partition_id, const unsigned& number_of_chunks, const StorageLevel& desirable_storage_level) { - lock.acquire(); // test boost::unordered_map::const_iterator it = partition_id_to_storage_.find(partition_id); @@ -401,7 +399,7 @@ bool BlockManager::RemovePartition(const PartitionID& partition_id) { PartitionStorage* BlockManager::GetPartitionHandle( const PartitionID& partition_id) { - LockGuard guard(lock); + LockGuard guard(lock); DLOG(INFO) << "partid2storage size is:" << partition_id_to_storage_.size(); DLOG(INFO) << "going to find storage [" << partition_id.getName() << "]"; boost::unordered_map::const_iterator it = diff --git a/storage/ChunkStorage.cpp b/storage/ChunkStorage.cpp index 9241ad21c..35fdc01c3 100755 --- a/storage/ChunkStorage.cpp +++ b/storage/ChunkStorage.cpp @@ -97,9 +97,10 @@ RetCode ChunkStorage::ApplyMemory() { sizeof(unsigned)) = 0; } -/* cout << "Success to apply mem chunk:" - << chunk_id_.partition_id.partition_off << "," << chunk_id_.chunk_off - << endl;*/ + /* cout << "Success to apply mem chunk:" + << chunk_id_.partition_id.partition_off << "," << + chunk_id_.chunk_off + << endl;*/ /* update the chunk info in the Chunk store in case that the * chunk_info is updated.*/ BlockManager::getInstance()->getMemoryChunkStore()->UpdateChunkInfo( @@ -109,8 +110,9 @@ RetCode ChunkStorage::ApplyMemory() { * The storage memory is full, some swap algorithm is needed here. * TODO: swap algorithm. */ -/* cout << "Failed to apply mem chunk:" << chunk_id_.partition_id.partition_off - << "," << chunk_id_.chunk_off << endl;*/ + /* cout << "Failed to apply mem chunk:" << + chunk_id_.partition_id.partition_off + << "," << chunk_id_.chunk_off << endl;*/ ret = claims::common::rNoMemory; assert(false); } @@ -124,7 +126,6 @@ RetCode ChunkStorage::ApplyMemory() { * file is chunk. */ ChunkReaderIterator* ChunkStorage::CreateChunkReaderIterator() { - lock_.acquire(); ChunkReaderIterator* ret; diff --git a/storage/PartitionStorage.cpp b/storage/PartitionStorage.cpp index 7601aec67..a06f3bd4f 100755 --- a/storage/PartitionStorage.cpp +++ b/storage/PartitionStorage.cpp @@ -302,8 +302,8 @@ PartitionStorage::TxnPartitionReaderIterator::TxnPartitionReaderIterator( auto end = begin + strip.second; while (begin < end) { auto block = begin / BLOCK_SIZE; - auto len = (block + 1) * BLOCK_SIZE <= end - ? (block + 1) * BLOCK_SIZE - begin + auto len = (static_cast(block) + 1) * BLOCK_SIZE <= end + ? (static_cast(block) + 1) * BLOCK_SIZE - begin : end - begin; rt_strip_list_.push_back(PStrip(begin, len)); begin += len; @@ -316,6 +316,7 @@ PartitionStorage::TxnPartitionReaderIterator::TxnPartitionReaderIterator( } cout << str << endl;*/ qylog << "*********query:" << qylog_count++ << "***********" << endl; + qylog << "last_his_block:" << last_his_block_ << endl; } PartitionStorage::TxnPartitionReaderIterator::~TxnPartitionReaderIterator() { @@ -352,7 +353,8 @@ bool PartitionStorage::TxnPartitionReaderIterator::NextBlock( << "," << 0 << "," << block->getTuplesInBlock() << ">" << endl;*/ if (!block->Full()) - qylog << "HgetBlock() << ",sizes:" << block->getTuplesInBlock() << ">" << endl; return true; } @@ -485,6 +487,7 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, (new_his_cp + BLOCK_SIZE) % CHUNK_SIZE - sizeof(unsigned); auto his_addr = chunk_his.hook + new_his_cp % CHUNK_SIZE; auto rt_addr = chunk_rt.hook + begin % CHUNK_SIZE; + if (move == BLOCK_SIZE) { // full block memcpy(his_addr, rt_addr, BLOCK_SIZE); /* logfile << "full<" << begin / BLOCK_SIZE << "," << begin % @@ -506,6 +509,7 @@ UInt64 PartitionStorage::MergeToHis(UInt64 old_his_cp, memcpy(his_addr, rt_addr, real_move); *reinterpret_cast(his_addr + BLOCK_SIZE - sizeof(unsigned)) = tuple_count; + /* logfile << "part<" << begin / BLOCK_SIZE << "," << begin % BLOCK_SIZE << "," << tuple_count << "> => <" << new_his_cp / diff --git a/txn_manager/txn.cpp b/txn_manager/txn.cpp index d37666c5a..d88919510 100644 --- a/txn_manager/txn.cpp +++ b/txn_manager/txn.cpp @@ -239,15 +239,6 @@ void TxnBin::MergeSnapshot(Query &query) const { } void TxnBin::MergeTxn(Query &query, int len) const { - /* if (ct_ < len) { - cout << "ct:" << ct_ << ",len:" << len << endl; - assert(false); - }*/ - for (auto i = 0; i < len; i++) { - if (!(txn_list_[i].IsCommit() || txn_list_[i].IsAbort())) { - // cout << "error to scan txn!!!!!!!!!" << endl; - } - } for (auto i = 0; i < len; i++) if (txn_list_[i].IsCommit()) { query.scan_count_++; diff --git a/txn_manager/txn.hpp b/txn_manager/txn.hpp index eaba43a27..e17497581 100644 --- a/txn_manager/txn.hpp +++ b/txn_manager/txn.hpp @@ -114,11 +114,18 @@ static const int kBlockSize = 64 * 1024; static const int kTailSize = sizeof(unsigned); static const int kTxnBinSize = 10; // 1024; +/** + * @brief generate global partition id from + * + */ inline UInt64 GetGlobalPartId(UInt64 table_id, UInt64 projeciton_id, UInt64 partition_id) { return partition_id + 1000 * (projeciton_id + 1000 * table_id); } +/** + * @brief generate global partition id from claims build-in PartitionID + */ inline UInt64 GetGlobalPartId(PartitionID part) { auto table_id = part.projection_id.table_id; auto proj_id = part.projection_id.projection_off; @@ -126,20 +133,38 @@ inline UInt64 GetGlobalPartId(PartitionID part) { return GetGlobalPartId(table_id, proj_id, part_id); } +/** + * @brief generate table id from global partition id. + */ inline UInt64 GetTableIdFromGlobalPartId(UInt64 global_partition_id) { return global_partition_id / (1000 * 1000); } +/** + * @brief generate projection id from global partition id. + */ inline UInt64 GetProjectionIdFromGlobalPartId(UInt64 global_partition_id) { return (global_partition_id % (1000 * 1000)) / 1000; } + +/** + * @brief generate partition id from global partition id. + */ inline UInt64 GetPartitionIdFromGlobalPartId(UInt64 global_partition_id) { return global_partition_id % (1000); } -/********Strip******/ +/** + * @brief: position and offset of complete strip + */ using PStrip = pair; +/** + * @brief: A tuple to describe set of rows + * @param: partition id [part_], + * position of strip [_pos], + * offset of strip[_offset] + */ class Strip { public: UInt64 part_; @@ -155,11 +180,18 @@ class Strip { void set_pos(UInt64 pos) { pos_ = pos; } void set_offset(UInt64 offset) { offset_ = offset; } string ToString(); + /** input [strip stream] ==by partition==> output [[strip stream]...] */ static void Map(vector &input, map> &output); + + /** sort strip stream [input] by order of position */ static void Sort(vector &input); static void Sort(vector &input); + + /** merge adjacent strip stream [input], input must be sorted beforehand */ static void Merge(vector &input); static void Merge(vector &input); + + /** filter strip stream [input], delete item if predicate is not successful */ static void Filter(vector &input, function predicate); static void Filter(vector &input, function predicate); }; @@ -167,7 +199,12 @@ class Strip { inline bool operator==(const Strip &a, const Strip &b) { return a.part_ == b.part_ && a.pos_ == b.pos_ && a.offset_ == b.offset_; } - +/** + * @brief: Describe the information about transaction state + * @param: transaction commit state [status_], + * real begin timestamp of transaction [realtime_], + * strip list of transaction writes [strip_list_] + */ class Txn { public: static const int kActive = 0; @@ -189,12 +226,16 @@ class Txn { string ToString(); }; +/** + * @brief: The transaction information that a query needs. + * @param: [his_cp_list_] is tuple of + * @param: [rc_cp_list_] is . + * temporary, *need not to be serialized* + * @param: [part_pstrips] unmerged strip list after historical checkpoint + */ class Snapshot { public: unordered_map his_cp_list_; - /** - * real-time checkpoint will never be send. - */ unordered_map rt_cp_list_; unordered_map> part_pstrips_; string ToString() const; @@ -203,11 +244,10 @@ class Snapshot { his_cp_list_ = cps; }; unordered_map getHisCPS() const { return his_cp_list_; } + // add strip into snapshot, ignore strip beforehand real-time checkpoint void setPStrips(const unordered_map> &part_pstrips) { if (rt_cp_list_.size() > 0) { - /** - * Need to cut off all strip before ahead real-time checkpoint - */ + // Need to cut off all strip before ahead real-time checkpoint for (auto &pstrips : part_pstrips) for (auto &pstrip : pstrips.second) if (pstrip.first >= rt_cp_list_[pstrips.first]) @@ -219,6 +259,7 @@ class Snapshot { unordered_map> getPStrps() const { return part_pstrips_; } + // merge another snapshot or strip list void Merge(const vector &strips); void Merge(const Snapshot &snapshot); }; @@ -228,9 +269,14 @@ inline bool operator==(const Snapshot &lhs, const Snapshot &rhs) { lhs.part_pstrips_ == rhs.part_pstrips_; } +/** + * @brief: object to request to ingest tuple with fix length + * use [InsertStrip] to add a request to ingest number of [tuple_count] + * tuple with size of [tuple_Size] + * @param: [content_] is fix tuple part -> + */ class FixTupleIngestReq { public: - /*fix tuple part -> */ unordered_map content_; void InsertStrip(UInt64 part, UInt64 tuple_size, UInt64 tuple_count) { content_[part] = make_pair(tuple_size, tuple_count); @@ -245,6 +291,11 @@ inline bool operator==(const FixTupleIngestReq &a, const FixTupleIngestReq &b) { return a.content_ == b.content_; } +/** + * @brief: TM return [Ingest] object to execute data ingestion + * @param: a write-time/transaction id [ts_] for ingestion transaction, + * TM allocates [strip_list_] for ingestion transaction + */ class Ingest { public: UInt64 ts_; @@ -270,7 +321,12 @@ inline bool operator==(const Ingest &lhs, const Ingest &rhs) { return lhs.ts_ == rhs.ts_; } -/************QueryReq************/ +/** + * @brief: SQL compiler use [QueryReq] to request a snapshot from TM + * @param: [part_list_] are partitions needed to scan, + * [include_abort_] is whether to scan abort strip( + * false for client query, true for create checkpoint). + */ class QueryReq { public: vector part_list_; @@ -289,7 +345,13 @@ inline bool operator==(const QueryReq &lhs, const QueryReq &rhs) { return lhs.part_list_ == rhs.part_list_; } -/***********Snapshot***********/ +/** + * @brief: [Query] is information for scan partition + * @param: [ts_] is read timestamp, + * [snapshot_] is strip list need to scan real-time chunk-list, + * [his_cp_list_] is historical checkpoint to scan historical chunk-list + * + */ class Query { public: UInt64 ts_; @@ -342,7 +404,9 @@ class Query { inline bool operator==(const Query &lhs, const Query &rhs) { return lhs.snapshot_ == rhs.snapshot_ && lhs.his_cp_list_ == rhs.his_cp_list_; } - +/** + * *not used yet* + */ class CheckpointReq { public: UInt64 ts_; @@ -367,7 +431,9 @@ inline bool operator==(const CheckpointReq &lhs, const CheckpointReq &rhs) { return lhs.ts_ == rhs.ts_ && lhs.part_ == rhs.part_; } -/*********Checkpoint***********/ +/** + * @brief: store versioned checkpoint + */ class TsCheckpoint { public: UInt64 GetHisCP(UInt64 ts) { @@ -397,8 +463,17 @@ class TsCheckpoint { map vers_rt_cp_; }; +/** + *@brief: store a list of transaction state in [txn_list_] + *@param: [ct_commit_] is number of all committed transaction, + *@param: [ct_abort] is number of all aborted transaction, + *@param: [ct_] is number of all transaction, ct_ >= ct_commit_ + ct_abort_ + *@param: [status] is whether this bin has product a "mini snapshot" in + * [snapshot_] + [abort_list_] + */ class TxnBin { public: + // get & set pos-th txn in txn Txn GetTxn(int pos) const { return txn_list_[pos]; } void SetTxn(int pos, const Txn &txn) { txn_list_[pos] = txn; @@ -408,6 +483,7 @@ class TxnBin { txn_list_[pos] = Txn(strip_list); ct_++; } + // set state tag of a transaction to "Commit" or Abort void CommitTxn(int pos) { txn_list_[pos].Commit(); ct_commit_++; @@ -416,6 +492,7 @@ class TxnBin { txn_list_[pos].Abort(); ct_abort_++; } + /** Is this bin are filled with "terminated"(commit or abort) */ bool IsFull() const { return ct_commit_ + ct_abort_ == kTxnBinSize; } bool IsSnapshot() const { return status_ == true; } int Count() const { return ct_; } @@ -426,15 +503,18 @@ class TxnBin { void MergeSnapshot(Query &query) const; void MergeTxn(Query &query, int len) const; string ToString(); + /** get bin id that transaction [ts] resides, + * [core_num] is number of cores in txn_manager */ static UInt64 GetTxnBinID(UInt64 ts, UInt64 core_num) { return (ts / core_num) / kTxnBinSize; } + /** get position of transaction [ts] in this bin */ static UInt64 GetTxnBinPos(UInt64 ts, UInt64 core_num) { return (ts / core_num) % kTxnBinSize; } + /** get the max timestamp of this bin can reside */ static UInt64 GetTxnBinMaxTs(UInt64 txnbin_id, UInt64 core_num, UInt64 core_id) { - // return (txnbin_id + 1) * kTxnBinSize * core_num + core_id; return txnbin_id * kTxnBinSize * core_num + kTxnBinSize + core_id; } @@ -450,6 +530,9 @@ class TxnBin { unordered_map> abort_list_; }; +/** + * setting CAF serialization + */ inline void CAFSerConfig() { caf::announce("FixTupleIngestReq", make_pair(&FixTupleIngestReq::get_content, diff --git a/txn_manager/txn_client.cpp b/txn_manager/txn_client.cpp index 1c375728c..1fb2d76f9 100644 --- a/txn_manager/txn_client.cpp +++ b/txn_manager/txn_client.cpp @@ -28,7 +28,6 @@ #include "txn_client.hpp" #include "txn_log.hpp" -//#include "../common/error_define.h" namespace claims { namespace txn { diff --git a/txn_manager/txn_client.hpp b/txn_manager/txn_client.hpp index 4c7f5586e..cb28074bc 100644 --- a/txn_manager/txn_client.hpp +++ b/txn_manager/txn_client.hpp @@ -64,19 +64,46 @@ using std::chrono::milliseconds; namespace claims { namespace txn { - +/** + * @brief: Client APIs to transactional operate data ingestion + * @param: [ip_],[port_] are network address of TxnServer + * @param: [proxy_] is remote actor that handles request to TxnServer + */ class TxnClient { public: static string ip_; static int port_; static caf::actor proxy_; + + /** Initialize [TxnClient] that [TxnServer] work at */ static RetCode Init(string ip = kTxnIp, int port = kTxnPort); + + /** Just for debug, print some information */ static RetCode Debug(string flag); + + /** Use [request] as parameter to request a ingestion transaction from + * TxnServer. + * The [ingest] is assigned to transaction information after function called.*/ static RetCode BeginIngest(const FixTupleIngestReq& request, Ingest& ingest); - static RetCode CommitIngest(UInt64 id); - static RetCode AbortIngest(UInt64 id); + + /** Commit ingestion transaction [ts]. + * [ts] is write timestamp of ingestion transaction, + * set its visibility to true. */ + static RetCode CommitIngest(UInt64 ts); + + /** Abort ingestion transaction [ts]. + * [id] is write timestamp of ingestion transaction. */ + static RetCode AbortIngest(UInt64 ts); + + /** Use [request] as parameter to request a query transaction from TxnServer. + * [query] assigned to transaction information after function called */ static RetCode BeginQuery(const QueryReq& request, Query& query); + + /** Announce query transaction [ts] is end*/ static RetCode CommitQuery(UInt64 ts); + + /** Commit checkpoint [ts] on [part], + * set new historical checkpoint [his_cp], and real-time checkpoint [rt_cp] */ static RetCode CommitCheckpoint(UInt64 ts, UInt64 part, UInt64 his_cp, UInt64 rt_cp); }; diff --git a/txn_manager/txn_server.cpp b/txn_manager/txn_server.cpp index c277c9f4d..9a083109c 100644 --- a/txn_manager/txn_server.cpp +++ b/txn_manager/txn_server.cpp @@ -30,11 +30,7 @@ #include "txn_server.hpp" #include "caf/all.hpp" #include "txn_log.hpp" -//#include "../catalog/catalog.h" #include "../utility/resource_guard.h" -// using claims::catalog::Catalog; -//#include "../loader/slave_loader.h" -//#include "../Environment.h" #include "../txn_manager/txn_server.hpp" using caf::aout; using std::ofstream; @@ -51,11 +47,8 @@ namespace txn { // using claims::common::rAbortIngestTxnFail; // using claims::common::rCommitCheckpointFail; -/***************/ atomic TimeStamp::now_; -/**************/ - int TxnServer::port_ = kTxnPort; int TxnServer::concurrency_ = kConcurrency; caf::actor TxnServer::proxy_; @@ -65,7 +58,6 @@ bool TxnServer::active_ = false; unordered_map> TxnServer::pos_list_; unordered_map TxnServer::cp_list_; set TxnServer::active_querys_; -caf::actor test; caf::behavior TxnCore::make_behavior() { this->delayed_send(this, seconds(3 + core_id_), MergeAtom::value); @@ -74,14 +66,12 @@ caf::behavior TxnCore::make_behavior() { [this](DebugAtom, string flag) { cout << ToString() << endl; }, [this](IngestAtom, shared_ptr ingest) -> caf::message { RetCode ret = rSuccess; - // cout << "begin ingestion" << endl << ingest->ToString() << endl; auto id = TxnBin::GetTxnBinID(ingest->ts_, TxnServer::concurrency_); auto pos = TxnBin::GetTxnBinPos(ingest->ts_, TxnServer::concurrency_); txnbin_list_[id].SetTxn(pos, ingest->strip_list_); return caf::make_message(ret, *ingest); }, [this](CommitIngestAtom, const UInt64 ts) -> caf::message { - // cout << "commit:" << ts << endl; RetCode ret = rSuccess; auto id = TxnBin::GetTxnBinID(ts, TxnServer::concurrency_); auto pos = TxnBin::GetTxnBinPos(ts, TxnServer::concurrency_); @@ -89,7 +79,6 @@ caf::behavior TxnCore::make_behavior() { return caf::make_message(rSuccess); }, [this](AbortIngestAtom, const UInt64 ts) -> caf::message { - cout << "!!!!!!!abort:" << ts << endl; RetCode ret = rSuccess; auto id = TxnBin::GetTxnBinID(ts, TxnServer::concurrency_); auto pos = TxnBin::GetTxnBinPos(ts, TxnServer::concurrency_); @@ -102,7 +91,6 @@ caf::behavior TxnCore::make_behavior() { int bin_num = 0; if (query->ts_ >= TxnServer::concurrency_) bin_num = (query->ts_) / (TxnServer::concurrency_ * kTxnBinSize); - // cout << "bin_num:" << bin_num << endl; int remain = 0; if (query->ts_ > TxnServer::concurrency_) { auto all_remain = @@ -132,7 +120,7 @@ caf::behavior TxnCore::make_behavior() { txnbin_list_[id].MergeTxn(*query, kTxnBinSize); auto count = txnbin_list_[id].Count(); if (count < kTxnBinSize) - cout << "$$$$$$error:" << query->ts_ << "@" << core_id_ << "," + cout << "error:" << query->ts_ << "@" << core_id_ << "," << bin_num << "?count:" << count << endl; } } @@ -140,9 +128,7 @@ caf::behavior TxnCore::make_behavior() { if (next_core_id != TxnServer::GetCoreID(query->ts_)) { // scan next core this->forward_to(TxnServer::cores_[next_core_id]); - } else if (include_abort) { // - // process the final subquery - // cout << "size of abort list: " << endl; + } else if (include_abort) { for (auto& part_cp : query->rt_cp_list_) { auto part = part_cp.first; auto checkpoint = part_cp.second; @@ -182,27 +168,19 @@ caf::behavior TxnCore::make_behavior() { return caf::make_message(*query); }, [this](MergeAtom) { - // cout << "start merge @ core:" << core_id_ << endl; while (txnbin_list_[txnbin_cur_].IsFull()) { if (txnbin_cur_ == 0) { - // cout << ToString() << endl; txnbin_list_[txnbin_cur_].GenSnapshot(); - // cout << "merge: <" << core_id_ << "," << txnbin_cur_ << "," - // << txnbin_list_[txnbin_cur_].IsSnapshot() << ">\n"; } else { txnbin_list_[txnbin_cur_].GenSnapshot( txnbin_list_[txnbin_cur_ - 1]); - // cout << "merge: <" << core_id_ << "," << txnbin_cur_ << "," - // << txnbin_list_[txnbin_cur_].IsSnapshot() << ">\n"; } - // cout << ToString() << endl; txnbin_cur_++; } this->delayed_send(this, seconds(3 + core_id_), MergeAtom::value); }, [this](GCAtom, UInt64 min_ts_remain) { - // cout << "core:" << core_id_ << " gc:" << min_ts << endl; - // if (core_id_ == 0) + map new_txnbin_list; auto ct = 0; for (auto it = txnbin_list_.rbegin(); it != txnbin_list_.rend(); it++) { @@ -220,16 +198,6 @@ caf::behavior TxnCore::make_behavior() { new_txnbin_list[id] = txnbin_list_[id]; } } - - // if (core_id_ == 0 && new_txnbin_list.size() < - // txnbin_list_.size()) { - // cout << "gc :" << endl; - // for (auto& txnbin : txnbin_list_) - // if (new_txnbin_list.find(txnbin.first) == - // new_txnbin_list.end()) - // cout << "<" << core_id_ << "," << txnbin.first << ">"; - // cout << endl; - // } if (new_txnbin_list.size() < txnbin_list_.size()) txnbin_list_ = new_txnbin_list; @@ -273,63 +241,69 @@ caf::behavior TxnServer::make_behavior() { } catch (...) { cout << "txn server bind to port:" << port_ << " fail" << endl; } + // this gc for txn_manager is not complete implemented // this->delayed_send(this, seconds(3), GCAtom::value); - return { - [this](DebugAtom, string flag) -> caf::message { - cout << "debug begin" << endl; - for (auto& core : cores_) caf::anon_send(core, DebugAtom::value, flag); - return caf::make_message(rSuccess); - }, - [this](IngestAtom, const FixTupleIngestReq& request) { - auto ts = TimeStamp::GenAdd(); - auto ingest = make_shared(request.content_, ts); - for (auto& part : ingest->strip_list_) - ingest->InsertStrip( - AtomicMalloc(part.first, part.second.first, part.second.second)); - current_message() = caf::make_message(IngestAtom::value, ingest); - forward_to(cores_[GetCoreID(ts)]); - }, - [this](CommitIngestAtom, - const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, - [this](AbortIngestAtom, - const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, - [this](QueryAtom, const QueryReq& request) { - auto ts = TimeStamp::Gen(); - active_querys_.insert(ts); - auto query = - make_shared(ts, GetHisCPList(ts, request.part_list_), - GetRtCPList(ts, request.part_list_)); - current_message() = - caf::make_message(QueryAtom::value, query, request.include_abort_); - forward_to(cores_[GetCoreID(ts)]); - // cout << "**********query:" << ts << " begin**************" << endl; - }, - [this](CommitQueryAtom, UInt64 ts) -> caf::message { - active_querys_.erase(ts); - return caf::make_message(rSuccess); - }, - [this](CommitCPAtom, UInt64 ts, UInt64 part, UInt64 his_cp, UInt64 rt_cp) - -> caf::message { + return {[this](DebugAtom, string flag) -> caf::message { + cout << "debug begin" << endl; + for (auto& core : cores_) + caf::anon_send(core, DebugAtom::value, flag); + return caf::make_message(rSuccess); + }, + [this](IngestAtom, const FixTupleIngestReq& request) { + auto ts = TimeStamp::GenAdd(); + // temporary object to store snapshot, etc. between cores + auto ingest = make_shared(request.content_, ts); + for (auto& part : ingest->strip_list_) + // atomic malloc a strip for each requested partition + ingest->InsertStrip(AtomicMalloc(part.first, part.second.first, + part.second.second)); + current_message() = caf::make_message(IngestAtom::value, ingest); + // forward request to each "core" + forward_to(cores_[GetCoreID(ts)]); + }, + [this](CommitIngestAtom, + const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, + [this](AbortIngestAtom, + const UInt64 ts) { forward_to(cores_[GetCoreID(ts)]); }, + [this](QueryAtom, const QueryReq& request) { + auto ts = TimeStamp::Gen(); + active_querys_.insert(ts); + auto query = + make_shared(ts, GetHisCPList(ts, request.part_list_), + GetRtCPList(ts, request.part_list_)); + current_message() = caf::make_message(QueryAtom::value, query, + request.include_abort_); + // forward request to each core to get sub snapshot + forward_to(cores_[GetCoreID(ts)]); + }, + [this](CommitQueryAtom, UInt64 ts) -> caf::message { + // query execution end + active_querys_.erase(ts); + return caf::make_message(rSuccess); + }, + [this](CommitCPAtom, UInt64 ts, UInt64 part, UInt64 his_cp, + UInt64 rt_cp) -> caf::message { + // commit a checkpoint on partition, set new one cp_list_[part].SetHisCP(ts, his_cp); cp_list_[part].SetRtCP(ts, rt_cp); - cout << "commit " << part << ":" << his_cp << "," << rt_cp << endl; - + // cout << "commit " << part << ":" << his_cp << "," << rt_cp << + // endl; return caf::make_message(rSuccess); }, - [this](GCAtom) { - UInt64 ts; - if (active_querys_.size() > 0) - ts = *active_querys_.begin(); - else - ts = TimeStamp::Gen(); - for (auto& core : TxnServer::cores_) - caf::anon_send(core, GCAtom::value, ts); - this->delayed_send(this, seconds(3), GCAtom::value); - }, - caf::others >> [this]() { - cout << "server unkown message:" - << to_string(current_message()) << endl; - }}; + [this](GCAtom) { + UInt64 ts; + if (active_querys_.size() > 0) + ts = *active_querys_.begin(); + else + ts = TimeStamp::Gen(); + for (auto& core : TxnServer::cores_) + caf::anon_send(core, GCAtom::value, ts); + this->delayed_send(this, seconds(3), GCAtom::value); + }, + caf::others >> [this]() { + cout << "server unkown message:" + << to_string(current_message()) << endl; + }}; } RetCode TxnServer::Init(int concurrency, int port) { @@ -337,12 +311,10 @@ RetCode TxnServer::Init(int concurrency, int port) { concurrency_ = concurrency; port_ = port; proxy_ = caf::spawn(); + // start all cores for (auto i = 0; i < concurrency_; i++) cores_.push_back(caf::spawn(i)); CAFSerConfig(); - // RecoveryCheckpoint(); - // RecoveryFromTxnLog(); - // srand((unsigned)time(NULL)); return 0; } @@ -358,15 +330,16 @@ Strip TxnServer::AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 remain_count = TupleCount; int count = 0; while (remain_count > 0) { - // 求出一个块内可以存放的最多元组数 + // Calculate max tuple count that a block resides UInt64 use_count = (kBlockSize - block_pos - kTailSize) / TupleSize; if (use_count > remain_count) use_count = remain_count; - //使用块内可用区域 + // update remain count to be allocated remain_count -= use_count; strip.offset_ += use_count * TupleSize; block_pos += use_count * TupleSize; - //将不可利用的空间也分配 + + // allocate unavailable space of block tail if (kBlockSize - block_pos - kTailSize < TupleSize) { strip.offset_ += kBlockSize - block_pos; block_pos = 0; diff --git a/txn_manager/txn_server.hpp b/txn_manager/txn_server.hpp index d82571c8f..589e0ebe9 100644 --- a/txn_manager/txn_server.hpp +++ b/txn_manager/txn_server.hpp @@ -49,7 +49,6 @@ #include "caf/io/all.hpp" #include "../txn_manager/txn.hpp" #include "../utility/Timer.h" -//#include "txn_log.hpp" namespace claims { namespace txn { @@ -84,18 +83,32 @@ class TimeStamp { private: static atomic now_; }; - +/** + * @brief: a work thread to process transaction, query, produce snapshot + * @param: [core_id] is the id of TxnCore + * @param:[txnbin_list_] store list of txnbin + * @param: [txnbin_cur_] is the current max txnbin id + */ class TxnCore : public caf::event_based_actor { public: UInt64 core_id_; UInt64 txnbin_cur_ = 0; - // txnbin id <=> txnbin object map txnbin_list_; caf::behavior make_behavior() override; TxnCore(int coreId) : core_id_(coreId) {} string ToString(); }; +/** + * @brief: a proxy thread to handle all request, operate state changes + * @param: [active_] is whether local [TxnServer] launched + * If true, need not to send request by network + * @param: [port_] is service network port, setting in "config file" + * @param: [concurrency_] is number of TxnCore + * @param: [proxy_] is TxnServer singleton instance + * @param: [pos_list_] is current cursor for all partitions + * @param: [cp_list_] is checkpoints for all partitions + */ class TxnServer : public caf::event_based_actor { public: static bool active_; @@ -105,13 +118,14 @@ class TxnServer : public caf::event_based_actor { static vector cores_; static unordered_map> pos_list_; static unordered_map cp_list_; - // static unordered_map> CountList; /**************** User APIs ***************/ static RetCode Init(int concurrency = kConcurrency, int port = kTxnPort); + /** Initialize [TxnServer], called when claims start **/ static RetCode LoadCPList(UInt64 ts, const unordered_map& his_cp_list, const unordered_map& rt_cp_list); static RetCode LoadPos(const unordered_map& pos_list); + /** hash transaction with [ts] to core id **/ static int GetCoreID(UInt64 ts) { return ts % concurrency_; } caf::behavior make_behavior() override; /**************** System APIs ***************/ @@ -121,6 +135,7 @@ class TxnServer : public caf::event_based_actor { UInt64 ts, const vector& parts); static unordered_map GetRtCPList(UInt64 ts, const vector& parts); + /** request atomic allocate next strip of [part] **/ static inline Strip AtomicMalloc(UInt64 part, UInt64 TupleSize, UInt64 TupleCount); };