Skip to content

Commit

Permalink
Removed unused code, few typos
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanLucPons committed Jan 9, 2024
1 parent 3751511 commit 68a021e
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 80 deletions.
4 changes: 3 additions & 1 deletion atgpu/BndMPoleSymplectic4RadPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,16 @@ void BndMPoleSymplectic4RadPass::generateCode(std::string& code, PassMethodInfo
generateBendFringeEnter(code,info);
generateQuadFringeEnter(code,info);

// Kick/Drift methods are defined in PassMethodFactory
integrator.resetMethods();
// Default bend
integrator.addDriftMethod("p_norm=1.0/(1.0 + r6[4]);fastdrift(r6,%STEP%,p_norm)");
integrator.addKickMethod("bndthinkickrad(r6,elem->PolynomA,elem->PolynomB,%STEP%,elem->MaxOrder,elem->irho,elem->CRAD,p_norm)");

integrator.generateCode(code);

if(integrator.getLastKickWeight()!=0.0)
code.append(" p_norm = 1.0 / (1.0 + r6[4]);\n");

generateQuadFringeExit(code,info);
generateBendFringeExit(code,info);
generateApertures(code,info);
Expand Down
95 changes: 24 additions & 71 deletions atgpu/Lattice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@

using namespace std;

//#define COALESCED_MEMORY

// Get the header file
// Generated by setup.py
const string header = {
#include "element.gpuh"
};

Lattice::Lattice(SymplecticIntegrator& integrator,AT_FLOAT energy,int gpuId) : factory(PassMethodFactory(integrator)) {
Lattice::Lattice(int32_t nbElements,SymplecticIntegrator& integrator,AT_FLOAT energy,int gpuId) : factory(PassMethodFactory(integrator)) {
elements.reserve(nbElements);
lost = nullptr;
memset(&ringParams,0,sizeof(ringParams));
ringParams.Energy = energy;
Expand Down Expand Up @@ -95,35 +94,16 @@ void Lattice::generateGPUKernel() {

// GPU main track function
// extern C to prevent from mangled name
code.append("extern \"C\" __global__ void track(RING_PARAM *ringParam,ELEMENT* gpuRing,uint32_t startElem,uint32_t nbElement,uint32_t nbTotalElement,\n"
code.append("extern \"C\" __global__ void track(RING_PARAM *ringParam,ELEMENT* gpuRing,\n"
" uint32_t startElem,uint32_t nbElement,uint32_t nbTotalElement,\n"
" uint64_t nbPart,AT_FLOAT* rin,AT_FLOAT* rout,\n"
" uint32_t* lost,uint64_t turn,\n"
" int32_t *refpts,uint32_t nbRef\n"
" ) {\n");

code.append(" int threadId = blockIdx.x * blockDim.x + threadIdx.x;\n");

#ifdef COALESCED_MEMORY
// Copy ring param into shared mem (coalesced GM access)
code.append(" __shared__ ELEMENT elemData[GPU_BLOCK_SIZE];\n");
code.append(" if( (startElem + threadIdx.x) < nbTotalElement ) {\n");
code.append(" uint64_t *dPtr = (uint64_t *)&elemData[threadIdx.x];\n");
code.append(" uint64_t *sPtr = ((uint64_t *)gpuRing) + (startElem + threadIdx.x);\n");
code.append(" for(int i=0;i<sizeof(ELEMENT)/8;i++) {\n");
code.append(" dPtr[i] = sPtr[i*nbTotalElement];\n");
code.append(" }\n");
code.append(" }\n");
// Wait that all thread has filled the shared mem
code.append(" __syncthreads();\n");

code.append(" ELEMENT* elemPtr = elemData;\n");
code.append(" AT_FLOAT* _r6 = rin + threadId;\n");
code.append(" uint64_t r6Stride = nbPart;\n");
#else
code.append(" ELEMENT* elemPtr = &gpuRing[startElem];\n");
code.append(" AT_FLOAT* _r6 = rin + (6 * threadId);\n");
code.append(" uint64_t r6Stride = 1;\n");
#endif

// Exit if particle lost
code.append(" if(lost[threadId]) return;\n");
Expand All @@ -132,12 +112,12 @@ void Lattice::generateGPUKernel() {

// Copy particle coordinates into registers
code.append(" AT_FLOAT sr6[6];\n");
code.append(" sr6[0] = _r6[0*r6Stride];\n"); // x
code.append(" sr6[1] = _r6[1*r6Stride];\n"); // px/p0 = x'(1+d)
code.append(" sr6[2] = _r6[2*r6Stride];\n"); // y
code.append(" sr6[3] = _r6[3*r6Stride];\n"); // py/p0 = y'(1+d)
code.append(" sr6[4] = _r6[4*r6Stride];\n"); // d = (pz-p0)/p0
code.append(" sr6[5] = _r6[5*r6Stride];\n"); // c.tau (time lag)
code.append(" sr6[0] = _r6[0];\n"); // x
code.append(" sr6[1] = _r6[1];\n"); // px/p0 = x'(1+d)
code.append(" sr6[2] = _r6[2];\n"); // y
code.append(" sr6[3] = _r6[3];\n"); // py/p0 = y'(1+d)
code.append(" sr6[4] = _r6[4];\n"); // d = (pz-p0)/p0
code.append(" sr6[5] = _r6[5];\n"); // c.tau (time lag)
code.append(" AT_FLOAT* r6 = sr6;\n");
code.append(" AT_FLOAT fTurn = (AT_FLOAT)(ringParam->turnCounter + turn);\n");

Expand All @@ -150,16 +130,24 @@ void Lattice::generateGPUKernel() {
code.append(" switch(elemPtr->Type) {\n");
factory.generatePassMethodsCalls(code);
code.append(" }\n");
code.append(" bool pLost = !isfinite(_r6[0]) || !isfinite(_r6[1]) ||\n");
code.append(" !isfinite(_r6[2]) || !isfinite(_r6[3]) ||\n");
code.append(" !isfinite(_r6[4]) || !isfinite(_r6[5]) ||\n");
code.append(" (fabs(_r6[0]) > 1.0 || fabs(_r6[1]) > 1.0) ||\n");
code.append(" (fabs(_r6[2]) > 1.0 || fabs(_r6[3]) > 1.0);\n");
code.append(" if(!lost[threadId] & pLost) {\n");
code.append(" _r6[0] = NAN;\n");
code.append(" }\n");
code.append(" elemPtr++;\n");
code.append(" }\n");

// Copy back particle coordinates to global mem
code.append(" _r6[0*r6Stride] = sr6[0];\n");
code.append(" _r6[1*r6Stride] = sr6[1];\n");
code.append(" _r6[2*r6Stride] = sr6[2];\n");
code.append(" _r6[3*r6Stride] = sr6[3];\n");
code.append(" _r6[4*r6Stride] = sr6[4];\n");
code.append(" _r6[5*r6Stride] = sr6[5];\n");
code.append(" _r6[0] = sr6[0];\n");
code.append(" _r6[1] = sr6[1];\n");
code.append(" _r6[2] = sr6[2];\n");
code.append(" _r6[3] = sr6[3];\n");
code.append(" _r6[4] = sr6[4];\n");
code.append(" _r6[5] = sr6[5];\n");

code.append(" if( elem==nbTotalElement ) {\n");

Expand Down Expand Up @@ -229,29 +217,11 @@ void Lattice::fillGPUMemory() {
gpuPtr += element->getMemorySize();
}

// Transpose ELEMENT memory for GPU coalescence access
#ifdef COALESCED_MEMORY
size_t X = sizeof(ELEMENT)/8;
size_t Y = elements.size();
Transpose64(X,Y,memPtr);
#endif
gpu->hostToDevice(gpuRing,memPtr,size);
free(memPtr);

}

void Lattice::Transpose64(int32_t X,int32_t Y,void *mem) {

uint64_t *memPtr= (uint64_t *)mem;
uint64_t *memPtrT64 = (uint64_t *)malloc(X*Y*8);
for(size_t y=0;y<Y;y++)
for(size_t x=0;x<X;x++)
memPtrT64[x*Y + y] = memPtr[x + y*X];
memcpy(mem,memPtrT64,X*Y*8);
free(memPtrT64);

}

void Lattice::run(uint64_t nbTurn,uint64_t nbParticles,AT_FLOAT *rin,AT_FLOAT *rout,uint32_t nbRef,
uint32_t *refPts,uint64_t turnCounter) {

Expand All @@ -262,9 +232,6 @@ void Lattice::run(uint64_t nbTurn,uint64_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r
// Copy rin to gpu mem
void *gpuRin;
gpu->allocDevice(&gpuRin, nbParticles * 6 * sizeof(AT_FLOAT));
#ifdef COALESCED_MEMORY
Transpose64(6,nbParticles,rin);
#endif
gpu->hostToDevice(gpuRin, rin, nbParticles * 6 * sizeof(AT_FLOAT));

// Expand ref indexes
Expand Down Expand Up @@ -320,25 +287,11 @@ void Lattice::run(uint64_t nbTurn,uint64_t nbParticles,AT_FLOAT *rin,AT_FLOAT *r


// Turn loop
#ifdef COALESCED_MEMORY
uint32_t leftNbElement = nbElement%GPU_BLOCK_SIZE;
uint32_t alignedNbElem = nbElement - leftNbElement;
for(turn=0;turn<nbTurn;turn++) {
// By block of GPU_BLOCK_SIZE elements
nbElemToProcess = GPU_BLOCK_SIZE;
for (startElem = 0; startElem < alignedNbElem; startElem += GPU_BLOCK_SIZE)
gpu->run(GPU_BLOCK_SIZE, nbParticles + dummyParticles);
// Remaining elements
nbElemToProcess = leftNbElement;
if( nbElemToProcess ) gpu->run(GPU_BLOCK_SIZE, nbParticles + dummyParticles);
}
#else
for(turn=0;turn<nbTurn;turn++) {
startElem = 0;
nbElemToProcess = nbElement;
gpu->run(GPU_BLOCK_SIZE, nbParticles + dummyParticles);
}
#endif

// Get back data
gpu->deviceToHost(rout,gpuRout,routSize);
Expand Down
3 changes: 1 addition & 2 deletions atgpu/Lattice.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Lattice {

public:

explicit Lattice(SymplecticIntegrator& integrator,double energy,int gpuId);
explicit Lattice(int32_t nbElements,SymplecticIntegrator& integrator,double energy,int gpuId);
~Lattice();

// Add an element in the lattice
Expand All @@ -31,7 +31,6 @@ class Lattice {
private:

void generateGPUKernel();
void Transpose64(int32_t w,int32_t hm,void *mem);

PassMethodFactory factory; // Pass method code generation
std::vector<AbstractElement *> elements; // All elements
Expand Down
5 changes: 4 additions & 1 deletion atgpu/PyATGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,17 +179,20 @@ static PyObject *at_gpupass(PyObject *self, PyObject *args, PyObject *kwargs) {
// Create and run lattice on GPU
try {

double t0 = AbstractGPU::get_ticks();
// Default symplectic integrator 4th order (Forest/Ruth)
SymplecticIntegrator integrator(4);
// Create the GPU lattice and run it
PyInterface *pyI = (PyInterface *) AbstractInterface::getInstance();
size_t nElements = PyList_Size(lattice);
Lattice *l = new Lattice(integrator, 0.0, 0);
Lattice *l = new Lattice(nElements,integrator, 0.0, 0);
for (size_t i = 0; i < nElements; i++) {
PyObject *elem = PyList_GET_ITEM(lattice, i);
pyI->setObject(elem);
l->addElement();
}
double t1 = AbstractGPU::get_ticks();
cout << "Ring build: " << (t1-t0)*1000.0 << "ms" << endl;

npy_intp outdims[4] = {6,(npy_intp)(num_particles),num_refs,num_turns};
PyObject *rout = PyArray_EMPTY(4, outdims, NPY_DOUBLE, 1);
Expand Down
1 change: 0 additions & 1 deletion atgpu/RFCavityPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ void RFCavityPass::getParameters(AbstractInterface *param, PassMethodInfo *info)
CavityPass::getParameters(param,info);

elemData.Type = RFCAVITYPASS;
elemData.Length = param->getDouble("Length");

}

Expand Down
1 change: 0 additions & 1 deletion atgpu/StrMPoleSymplectic4Pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ void StrMPoleSymplectic4Pass::generateCode(std::string& code, PassMethodInfo *in
generateApertures(code,info);
generateQuadFringeEnter(code,info);

// Kick/Drift methods are defined in PassMethodFactory
integrator.resetMethods();
// Default straight magnet
integrator.addDriftMethod("fastdrift(r6,%STEP%,p_norm)");
Expand Down
6 changes: 4 additions & 2 deletions atgpu/StrMPoleSymplectic4RadPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,16 @@ void StrMPoleSymplectic4RadPass::generateCode(std::string& code, PassMethodInfo
generateApertures(code,info);
generateQuadFringeEnter(code,info);

// Kick/Drift methods are defined in PassMethodFactory
integrator.resetMethods();
// Default bend
// Default straight element
integrator.addDriftMethod("p_norm=1.0/(1.0 + r6[4]);fastdrift(r6,%STEP%,p_norm)");
integrator.addKickMethod("strthinkickrad(r6,elem->PolynomA,elem->PolynomB,%STEP%,elem->MaxOrder,elem->CRAD,p_norm)");

integrator.generateCode(code);

if(integrator.getLastKickWeight()!=0.0)
code.append(" p_norm = 1.0 / (1.0 + r6[4]);\n");

generateQuadFringeExit(code,info);
generateApertures(code,info);
generateExit(code,info);
Expand Down
4 changes: 4 additions & 0 deletions atgpu/SymplecticIntegrator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ void SymplecticIntegrator::addKickMethod(const std::string& kickMethod) {
kickMethods.push_back(kickMethod);
}

double SymplecticIntegrator::getLastKickWeight() {
return d[nbCoefficients-1];
}

void SymplecticIntegrator::generateCode(std::string& code) {

if( driftMethods.empty() || (driftMethods.size() != kickMethods.size()) )
Expand Down
3 changes: 3 additions & 0 deletions atgpu/SymplecticIntegrator.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class SymplecticIntegrator {
void addDriftMethod(const std::string& driftMethod);
void addKickMethod(const std::string& kickMethod);

// Get last kick weight
double getLastKickWeight();

private:

void allocate(int nb);
Expand Down
2 changes: 1 addition & 1 deletion atgpu/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ int main(int argc,char **arv) {

try {

Lattice *l = new Lattice(integrator,6e9,1);
Lattice *l = new Lattice(0,integrator,6e9,1);
double t0 = AbstractGPU::get_ticks();
for(auto & element : elements) {
dI->setObject(&element);
Expand Down

0 comments on commit 68a021e

Please sign in to comment.