#include #include #if !(defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) #include #endif #include using namespace std; #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) clock_t global_time; #else struct timeval time_start,time_stop; #endif //*** IMPORTANT KERNEL STATIC OPTION. //*** UnComment/Comment the following line // In the first case (parameter defined) it simply assigns the block variables. // In the second case it performs fast unit propagation after any assignment #define SIMPLEBLOCK 0 //**** STATIC SIZE FOR block_vars and delta_vars //**** ARRAYS USED IN PARALLELIZING SEARCH #define BV_SIZE 101 #define DV_SIZE 100 #define FN_SIZE 10000 //*** Comment/Uncomment the following line for information //*** on memory allocation during CUDA execution //#define MYTRACE 0 // The following PARAMETERS decide the parallelism degree // and the complexity of the formula to be delagetd to CUDA // We allow two versions, with static (#define) and dynamic values // *** PARAMETER #1 (argv 1) // *** Modes for using GPU. Currently 0-3 int USEGPU = 0; // The default value // *** PARAMETER #2 (argv 2) // NUMBER OF BLOCKS = CUDABLOCKS // LOG_BLKS is the number of vars fixed by the block address int LOG_BLKS=2; int CUDABLOCKS=(1<b?a:b) // *** ALLOCATE / DEALLOCATE __host__ void allocate_first(){ //*** SIZE INDEPENDENT ON THE FORMULA if (USEGPU>0){ HANDLE_ERROR( cudaHostAlloc( (void**)&formula, MAX_NL * sizeof(int), cudaHostAllocDefault ) ); HANDLE_ERROR( cudaMalloc( (void**)&dev_mapped_formula, MAX_NL * sizeof(int))); HANDLE_ERROR( cudaMalloc((void**)&dev_formula, MAX_NL * sizeof(int))); HANDLE_ERROR( cudaHostAlloc( (void**)&clause_pointer, (MAX_NC+1) * sizeof(int), cudaHostAllocDefault ) ); HANDLE_ERROR( cudaMalloc( (void**)&dev_mapped_cp, (MAX_NC+1) * sizeof(int) )); HANDLE_ERROR( cudaMalloc((void**)&dev_cp, (MAX_NC+1) * sizeof(int) )); } else{ formula=(int*)malloc(MAX_NL * sizeof(int)); clause_pointer=(int*)malloc((MAX_NC+1) * sizeof(int)); } filtered_clause_pointer=(int*) malloc((MAX_NC+1) * sizeof(int)); filtered_formula =(int*) malloc(MAX_NL * sizeof(int)); } __host__ void allocate_second(){ #ifdef MYTRACE printf("Memory allocation (in main):\n"); printf(" (cuda) phi:\t%d bytes for %d literals\n", NL * sizeof(int), NL); printf(" (cuda) cp:\t%d bytes for %d clauses\n", (NC + 1) * sizeof(int), NC); printf(" (cuda) host_vars:\t%d bytes for %d vars\n", NV * sizeof(int), NV); printf(" (cuda) dev_parma_vars:\t%d bytes for %d vars\n", NV * sizeof(int), NV); printf(" (host) h_mask_num:\t%d bytes for %d blocks\n", PARblocks * sizeof(int), PARblocks); printf(" (host) h_mask_id:\t%d bytes for %d blocks\n", PARblocks * sizeof(int), PARblocks); fflush(stdout); #endif //*** SIZE DEPENDING ON THE FORMULA host_strat_vars=(int*)malloc(2*NV * sizeof(int)); host_vars=(int*)malloc(NV * sizeof(int)); if (USEGPU>0){ HANDLE_ERROR( cudaHostAlloc( (void**)&h_mask_data, (3* PARblocks+NV) * sizeof(int), cudaHostAllocDefault ) ); //questo non serve su solo cpu HANDLE_ERROR( cudaMalloc((void**)&dev_vars, NV * sizeof( int) )) ; HANDLE_ERROR( cudaMalloc( (void**)&dev_parma_vars, NV * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&mask_data, (3* PARblocks+NV) * sizeof(int) ) ); } filtered_vars = (int*) malloc(NV * sizeof( int)); maps_vars = (int*) malloc(NV * sizeof( int)); level = (int*) malloc(NV * sizeof(int)); refs = (int*) malloc(NV * sizeof(int)); trail = (int*) malloc(NV * sizeof(int)); seen = (int*) malloc(NV * sizeof(int)); learnt_clause = (int*) malloc(NV * sizeof(int)); // AGO" giusto NV? // *** Initialization: if (USEGPU>0){ HANDLE_ERROR( cudaMemcpy( dev_mapped_formula, formula, NL * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( dev_mapped_cp, clause_pointer, (NC+1) * sizeof(int), cudaMemcpyHostToDevice ) ); } for(int i=0;i0){ HANDLE_ERROR( cudaFree( mask_data )); // HANDLE_ERROR( cudaFree(dev_vars)); HANDLE_ERROR( cudaFree(dev_formula)); HANDLE_ERROR( cudaFree(dev_cp)); cudaEventDestroy( start ); cudaEventDestroy( stop ); } } //************************************************** // INPUT: // Data Structure: the formula is stored in two vectors. // The first stores the literals in sequence. // Its length is NL (0..NL-1) // The second stores the beginning of the various clauses // The number of clauses is NC. The vector is 0..NC // The last (redundant) cell stores the value NL // It also returns NV = 1+number of vars in the formula // Remark: the formula is read twice //************************************************** __host__ int simple_parsing(char c, FILE *miofile){ int sum=c-'0'; char t = fgetc(miofile); while (47 =60000){ MIN = (int)time/60000; time = (time - MIN*60000)/1000; printf("Execution Time: %dm %.5f sec\n",MIN,time); } else printf("Execution Time: %.5f sec\n",time/1000); } //***************************************************** //*** PROPAGATION PHASE //***************************************************** // unit propagation is implemented by mask_prop (in one // of its variants). // Given a (partial) assignment "vars", for each clause i, // mask[i]= 0 => clause i is true // mask[i]=-1 => clause i is false // mask[i]> 0 => clause i can be satisfied and there are mask[i] // unground variables // ***************************************************************** // return 0 if all clauses are satisfied // return -1 if there is a false clause // return the smallest index of a clause with fewer unground literals otherwise // **** Moreover, *point returns a pointer to ONE of these unassigned literal // ****************************************************************** // *** There are versions for the device and for the host // ****************************************************************** __host__ int mask_prop(int* sel_var, int* minmask){ int i,j,min=0,t,mask=0,pt,bestpt=0,bestcl=0; bool initial=true; int maybe_sat; // 2 surely sat, 1 maybe, 0 unsat //*********************************************************************** //****** SIMPLE CASE (coherent with mask_prop_delta and PARMASKprop) //*********************************************************************** // *** AGO: some little speed-up can be obtained storing pt=t instead of // *** pt = j (and changing all tests accordingly - basically removing formula[...]). // *** Left this way to be fully consistent with mask_prop_delta //*********************************************************************** i=0; while((mask >= 0) && (i 0 && host_vars[t]==1)){ mask = 0; maybe_sat = 2; } else if (host_vars[abs(t)] < 0) { // Literal Unknown mask++; if (pt == 0 || (pt > 0 && abs(t) < abs(formula[pt]))) pt=j; // ==> Here the remark above j++; maybe_sat = 1; } else j++; } // while if (j==clause_pointer[i+1] && maybe_sat==0) mask = -1; // EOC: UNSAT! if (initial) { if (mask != 0) {min=mask; bestpt=pt; initial=false; bestcl=i;} } else if (mask!=0){ // not initial if (mask= 0) && (i 0)){printf("cp[%d]: %d,cp[%d]: %d\n", i, cp[i], i+1,cp[i+1]);} while((maybe_sat < 2) && (j0 && block_vars[t]==1)){ mask = 0; maybe_sat = 2; } // Literal Unknown else if(block_vars[abs(t)]<0){ mask++; if (pt == 0 || (pt > 0 && abs(t) < abs(formula[pt]))) pt=j; // CAMBIATO QUI - 22/5 j++; maybe_sat = 1; } else j++; } // while if (j==cp[i+1] && maybe_sat==0) mask = -1; // EOC: UNSAT! if (initial) { if (mask != 0) {min=mask; bestpt=pt; initial=false; bestcpidx=i; } } else if (mask!=0){ // not initial if ( mask< min || (mask==min && abs(formula[pt]) < abs(formula[bestpt])) || (mask==min && abs(formula[pt])== abs(formula[bestpt]) && i 0 ) && ( v == 1 ))) { // con la variabile ground num = 0; // la clausola è vera maybe_sat = 2; // sicuramente sat } else if (v == -1){ // Literal non-ground // Nota. s_mask_num e s_mask_id sono shared, dunque ad accesso veloce num++; // +1 al num di lett non-ground in clausola if (abs(l) memorizza! if( num == -1 || bestnum == -1) { if (num==-1 && bestnum!=-1){ // la nuova cl e' conflitto s_mask_num[tid] = -1; // copia solo se il precedente non era fallito -> tengo g_tid minimo per cella tid s_mask_cl[tid] = g_tid; // segno la clausola che fa fallire } } else if (bestnum == 0) { s_mask_num[tid] = num; s_mask_id[tid] = id; s_mask_cl[tid] = g_tid; } else if (num > 0 && (bestnum > num || (bestnum == num && abs(bestid) > abs(id)) || (bestnum == num && abs(bestid) == abs(id) && bestcl > g_tid) )) { s_mask_num[tid] = num; s_mask_id[tid] = id; s_mask_cl[tid] = g_tid; } g_tid+=stride; } /// while ogni pezzo //************************************************************************************* __syncthreads(); // DISTRIBUTED algorithm for implementing first-fail choice (and minimum) short tn = blockDim.x; while(tn > 1){ short half = (tn >> 1); // stesso che tn = tn/2 short tid_2 = tid + half; if(tid < half){ if( s_mask_num[tid_2] == -1 || s_mask_num[tid] == -1) { if ((s_mask_num[tid_2] == -1 && s_mask_num[tid]>-1 ) || (s_mask_num[tid_2] == -1 && s_mask_cl[tid_2] da %d a %d\n",s_mask_cl[tid_2],s_mask_cl[tid],s_mask_num[tid_2],s_mask_num[tid],tid_2,tid); } } //*** AGO GIU12: i seguenti due bodies sono uguali. Bug? else if (s_mask_num[tid] == 0) { s_mask_num[tid] = s_mask_num[tid_2]; s_mask_id[tid] = s_mask_id[tid_2]; s_mask_cl[tid] = s_mask_cl[tid_2]; } else if (s_mask_num[tid_2] > 0 && (s_mask_num[tid] > s_mask_num[tid_2] || (s_mask_num[tid] == s_mask_num[tid_2] && abs(s_mask_id[tid]) > abs(s_mask_id[tid_2])) || (s_mask_num[tid] == s_mask_num[tid_2] && abs(s_mask_id[tid]) == abs(s_mask_id[tid_2]) && s_mask_cl[tid] > s_mask_cl[tid_2]) )) { s_mask_num[tid] = s_mask_num[tid_2]; s_mask_id[tid] = s_mask_id[tid_2]; s_mask_cl[tid] = s_mask_cl[tid_2]; } } //************************************************************************************* __syncthreads(); tn = half; } // while (tn>1) if(tid == 0){ mask_data[3*blockIdx.x] = s_mask_num[0]; mask_data[3*blockIdx.x+1] = s_mask_id[0]; mask_data[3*blockIdx.x+2] = s_mask_cl[0]; } } // parmask_prop //**************************************************************** //*** FORMULA SIMPLIFICATION (ATL-VEN July 12, 2011) // simplifies a (still satisfiable) formula by // removing true clauses and false literals //***************************************************************** __host__ int filter_formula(){ int i,lp,cp,flp,fv,fnc,vera; filtered_vars[0]=-1; maps_vars[0]=0; // Here only the unassigned (free) vars are considered. // maps_vars stores the correspondence new var / old var fv = 0; for(i=1;i 0) && (host_vars[formula[lp]] == 1) || (formula[lp] < 0) && (host_vars[-formula[lp]] == 0)){ vera = 1; lp = clause_pointer[cp+1]; break; // Exit the loop } // SECOND CASE: the literal is not ground. Copy it, renaming the var else if (host_vars[abs(formula[lp])] == -1){ filtered_formula[flp] = maps_vars[abs(formula[lp])]; if (formula[lp] <= 0) filtered_formula[flp]=-filtered_formula[flp]; flp++; lp++; vera=0; } //THIRD CASE: the literal is false. Just skip it else lp++; } // while if (!vera) { // NOTA: se e' falsa, la tolgo come se fosse vera fnc++; // Non dovrebbe mai essere chiamata in tal caso. filtered_clause_pointer[fnc] = flp; } } // for return fnc; } // function filter_formula //*************************************************************** //***** KERNEL FUNCTION. //***** DEVICE VERSION OF DPLL //***** Without recursion //***** Use block/thread address to guide the search //***** Use shared block memories //*************************************************************** // __shared__ short int* block_varsE; //**************************learning su GPU**************************** __device__ int clause_learning_gpu( int * dev_level, int * dev_refs, int * dev_trail, int* dev_seen, int* formula, int* new_formula, int* cp, int* new_cp, int old_NC, int* NC, int* new_NC, int NV) { //imposto il numero di clausole int NCP=*new_NC; int learnt_clause[50]; // sets jump and level_to_jump int level_to_jump; // inizializzazioni int i=0, c=0, p=0, q, var_p=0, var_q, clause, btlevel=0, tptr=dev_trail[0]; int lp=0; for (int i=0;i=old_NC){ clause=clause-old_NC; //printf("cp[%d]:%d\n", clause,cp[clause]); //printf("cp[%d+1]:%d\n", clause+1,cp[clause+1]); for( i=new_cp[clause];i0){ // se q è di un decision level inferiore, ma non il root level (al momento il root level è inutilizzato) learnt_clause[lp++]= -q; // calcolo level to backtrack, il massimo della learnt_clause, escluso il first-uip che verrà aggiunto alla fine btlevel=max(btlevel,dev_level[var_q]); } } } }//chiudo il for }//ref[var_p]>old_NC else{ for( i=cp[clause];i0){ // se q è di un decision level inferiore, ma non il root level (al momento il root level è inutilizzato) learnt_clause[lp++]= -q; // calcolo level to backtrack, il massimo della learnt_clause, escluso il first-uip che verrà aggiunto alla fine btlevel=max(btlevel,dev_level[var_q]); } } } }//chiudo il for }//chiudo else ref[var_p]0); // Blocca i thread del blocco // finchè non tutti hanno terminato l'operazione di scrittura __syncthreads(); //ALE: 8 giu commentata per avere backjumping //btlevel = max(btlevel,dev_level[abs(p)]-1); // in caso di solo UIP senza reasons, comunque devo annullare la decisione del livello corrente (dato che poi sommo 1, qui lo sottraggo) //in p ora ho il first UIP, ci va il suo negato nella learnt clause learnt_clause[lp++]=-p; //condizioni per memorizzare solo una determinata categoria di clausole //if (lp <16 ){ for(i=0;i 0)){ min_mask=new_min_mask; flag=1; lit=new_lit; cpidx=new_cpidx+NC; }else{ if((old_min_mask>0)&&(new_min_mask == 0)){ min_mask=old_min_mask; flag=0; lit=old_lit; cpidx=old_cpidx; }else{ if(old_min_mask < new_min_mask){ min_mask=old_min_mask; flag=0; lit=old_lit; cpidx=old_cpidx; }else{ min_mask=new_min_mask; flag=1; lit=new_lit; cpidx=new_cpidx+NC; } } } } }//chiudo il controllo per scegliere min_mask if (min_mask == 0) { end=true; break; // logically superfluous "break", but it speeds it up... }else if (min_mask > 0) { // min_mask > 0 if(flag==0){ pos = (formula[lit] > 0); // sign of the unknown literal }else{ pos = (new_formula[lit] > 0); // sign of the unknown literal } if(min_mask == 1){ // There is ONE non-ground literal: determinism int VAR; if(flag==0){ VAR=abs(formula[lit]); dev_trail[dev_trail[0]] = formula[lit]; }else{ VAR=abs(new_formula[lit]); dev_trail[dev_trail[0]] = new_formula[lit]; } block_vars[VAR]=pos; dev_level[VAR] = dev_level[0]; dev_refs[VAR] = cpidx; // devo sapere la clausola dev_trail[0]++; }//chiudo if (min_mask==1) else { //if (min_mask>=2){ int selected; dev_level[0]++; // apro nuovo ramo if(flag==0){ selected=abs(formula[lit]); dev_trail[dev_trail[0]]=formula[lit]; }else{ selected=abs(new_formula[lit]); dev_trail[dev_trail[0]]=new_formula[lit]; } block_vars[selected]=pos; dev_level[selected] = dev_level[0]; dev_trail[0]++; } //else(min_mask>1) } // if min_mask > 0 else { // if (min_mask < 0) *** Failure: Backtracking //******************************************************************** dev_refs[0]=cpidx; //faccio il learning level_to_jump= clause_learning_gpu( dev_level, dev_refs, dev_trail, dev_seen, formula, new_formula, cp, new_cp, old_NC, &NC, &new_NC, NV); int loop=dev_trail[0]>0; while (loop){ // questo mette a posto trail e variabili. level sistemato sulla ricorsione int VARP=abs(dev_trail[dev_trail[0]-1]); //printf("VARP: %d\n",VARP); loop=dev_trail[0]>0 && ((dev_level[VARP]>level_to_jump) || (dev_refs[VARP]>=0)); //<------------------ da casini if ((dev_level[VARP]>level_to_jump) || (dev_refs[VARP]>=0)){ block_vars[VARP]=-1; dev_level[VARP] = -1; //printf("level[%d]:%d\n",VARP,dev_level[VARP]); dev_refs[VARP] = -1; dev_trail[0]--; } else{ // refs<0 && level=leveltojump if (dev_level[VARP]<=level_to_jump && dev_refs[VARP]==-1){ // variabile gia' testata -> metto caso opposto dev_level[0]=dev_level[VARP]; } } } //chiudo while // caso speciale se backjump annulla anche primo livello if (level_to_jump==0){ dev_level[0]=0; } isbackjumping=1; if((dev_level[0]<0)&& (min_mask<0)){ vars[0]=0; } //******************************************************************** }//chiudo l'else(min_mask<0) } while( isbackjumping || (dev_level[0]>0 && !end)); //************************************************************************************* __syncthreads(); //printf("exit %d-%d-%d ",blockIdx.x,threadIdx.x,min_mask); if (vars[0]<1 && (min_mask==0)){ // *** A solution: export it on vars //printf("yes\n"); vars[0]=1; for(pos=1; pos %d \t",maps_vars[i],i); printf("\n"); print_instance(filtered_formula,filtered_clause_pointer,unk_clauses); for(int i=1; i < NV; i++) printf("%d -> var[%d]\t",host_vars[i],i); printf("\n"); */ //printf("unk_clau %d nl %d, fv %d\n",unk_clauses,filtered_clause_pointer[unk_clauses],FV); //print_instance(filtered_formula,filtered_clause_pointer,unk_clauses); // copy the value of filtered vars, formula, and clause pointers // to device global variables HANDLE_ERROR(cudaMemcpy(dev_vars, filtered_vars, (1+FV)*sizeof( int), cudaMemcpyHostToDevice )); HANDLE_ERROR(cudaMemcpy(dev_formula, filtered_formula, filtered_clause_pointer[unk_clauses]*sizeof( int), cudaMemcpyHostToDevice )); HANDLE_ERROR(cudaMemcpy(dev_cp, filtered_clause_pointer, (unk_clauses+1)*sizeof(int), cudaMemcpyHostToDevice )); #ifdef MYTRACE printf("\nMore memory allocation (in CUDA_caller):\n"); printf(" (cuda) dev_vars:\t%d bytes for %d vars\n", (1+FV)* sizeof(int), FV); printf(" (cuda) dev_formula:\t%d bytes for %d vars\n", filtered_clause_pointer[unk_clauses]* sizeof(int), filtered_clause_pointer[unk_clauses]); printf(" (cuda) dev_cp:\t%d bytes for %d clauses\n", (unk_clauses+1) * sizeof(int), (unk_clauses+1)); printf(" (host) filtered_vars:\t%d bytes for %d vars\n", (1+FV) * sizeof(int), FV); printf(" (host) maps_vars:\t%d bytes for %d vars\n", NV * sizeof(int), NV); printf(" (host) filtered_formula:\t%d bytes for %d vars\n", clause_pointer[NC] * sizeof(int), clause_pointer[NC]); printf(" (host) filtered_clause_pointer:\t%d bytes for %d clauses\n", (NC+1) * sizeof(int), NC); printf("CUDA_caller calling CUDADPPL3: blocks = %d threads per block = %d\n",CUDABLOCKS,CUDATHREADS); printf(" (shared) block_vars:\t%d bytes for %d vars. Total of %d bytes per block\n", MaxV * sizeof(int), MaxV, MaxV * sizeof(int)); printf(" (thread) delta_vars:\t%d bytes for %d vars. Total of %d bytes per block\n", Delta * sizeof(int), Delta, CUDATHREADS*Delta * sizeof(int)); printf(" (thread) varstack:\t%d bytes for %d vals. Total of %d bytes per block\n", 2*Delta * sizeof(int), 2*Delta, CUDATHREADS*2*Delta * sizeof(int)); fflush(stdout); #endif // *************** DEVICE COMPUTATION: ************************* //printf("Delta %d (Check if it is less than %d)\n",Delta,DV_SIZE); CUDADPLL<<>>(dev_formula,dev_cp,dev_vars,unk_clauses,1+FV,CUDATHREADS,LOG_BLKS,Delta,LOG_THRDS); // *************** Results' analysis ************************* // Only the first value (a flag) is copied back HANDLE_ERROR(cudaMemcpy(host_vars,dev_vars, sizeof( int), cudaMemcpyDeviceToHost)); // If there is no solution, skip, otherwise copy the whole // assignment back from DEVICE to HOST // Then assembly the solution from vars and filtered_vars if (host_vars[0]==1){ HANDLE_ERROR(cudaMemcpy(filtered_vars,dev_vars, (1+FV)*sizeof( int), cudaMemcpyDeviceToHost)); for(int i = 1; i < NV; i++) if (host_vars[i] < 0) host_vars[i] = filtered_vars[maps_vars[i]]; } // In realta' le prox quattro potrebbero anche essere globali o statiche, // comunque grandi (stanno su host) e si evita di riallocare return (host_vars[0]==1)*2; // 2 if good, 0 if bad } // ************************************************************************************************** // ************************************************************************************************** // ******************* clause learning ********************************************************** // ************************************************************************************************** // ************************************************************************************************** __host__ void clause_learning() { // sets jump and level_to_jump // inizializzazioni int i, c=0, p=0, q, var_p=0, var_q, clause, lp=0, btlevel=0, tptr=trail[0], verbose=0; if (verbose>1) { printf("Current level:\t\t%d\n",level[0]); printf("Conflict clause:\t%d\n",refs[0]); int i; printf("trail[%d] = [",trail[0]); for(i=1;i1) { printf("p:\t%d\t",p); printf("var_p:\t%d\t",var_p); printf("clause:\t%d\t",clause); printf("c_start:\t%d\t",clause_pointer[clause]); printf("end: %d\n",clause_pointer[clause+1]); printf("reason (clause %d) is:\t",clause); for(i=clause_pointer[clause];i1) { printf("q:%d\n",q); printf("lp at %d, q is %d at level %d, ref %d\n",lp,q,level[var_q],refs[var_q]); } if (!seen[var_q]) { if (verbose>1) printf("q never seen! "); seen[var_q] = 1; if (level[var_q] == level[0]) { // (q == decision level corrente) c++; if (verbose>1) printf("and of same current level (%d==%d) so c++.\n",level[var_q],level[0]); } else if (level[var_q] > 0) { // se q è di un decision level inferiore, ma non il root level (al momento il root level è inutilizzato) learnt_clause[lp++] = -q; // calcolo level to backtrack, il massimo della learnt_clause, escluso il first-uip che verrà aggiunto alla fine btlevel = max(btlevel,level[var_q]); if (verbose>1) { printf("and of minor level (%d<%d) so add %d.\n",level[var_q],level[0],-trail[i]); if (lp>0) { printf("current learnt clause is: "); int j; for(j=0;j1) printf("q is seen\n"); } } do { p = trail[--tptr]; var_p = abs(p); if (verbose>1) { printf("c:%d p%d\n",c,p); } if (verbose>1) { printf("scan p=%d\ttptr=%d\tseen[var_p]=%d\n",p,tptr,seen[var_p]); } } while (!seen[var_p]); c--; //seen[var_p] = 0; if (verbose>1) printf("c--, process var %d, c:%d, ref %d val %d\n",var_p,c,refs[var_p],host_vars[var_p]); } while (c>0); //ALE: 8 giu commentata per avere backjumping //btlevel = max(btlevel,level[abs(p)]-1); // in caso di solo UIP senza reasons, comunque devo annullare la decisione del livello corrente (dato che poi sommo 1, qui lo sottraggo) // in p ora ho il first-UIP, ci va il suo negato nella learnt clause learnt_clause[lp++] = -p; // controllo inserimento clausole doppie: todo // debug if (verbose>0) { printf("Add First-UIP: %d lev %d\nLearn clause %d: ",-p,level[abs(p)],NC); // printf("* LEARNT CLAUSE: "); for(i=0;i1) printf("backjump from level %d to level %d\n\n\n",level[0],btlevel); level_to_jump=btlevel; // non chronological backtracking } else { if (verbose>1) printf("backtrack to level %d\n",level[0]); level_to_jump=level[0]; // chronological backtracking } } //************************************************************************** //************************************************************************** //****** AUXILIARY //************************************************************************** //************************************************************************** __host__ inline void backtrackingf(int level_to_jump){ // annulla trail stack e riferimenti a variabili // printf("backtrack to %d\n",level_to_jump); backtracking++; int loop=trail[0]>0; while (loop){ // questo mette a posto trail e variabili. level sistemato sulla ricorsione int VARP=abs(trail[trail[0]-1]); loop=trail[0]>0 && ((level[VARP]>level_to_jump) || (refs[VARP]>=0) || (level[VARP]<=level_to_jump && refs[VARP]==-2)); if (level[VARP]>level_to_jump || refs[VARP]>=0 || refs[VARP]==-2){ //printf("back var %d lev %d\n",VARP,level[VARP]); host_vars[VARP] = -1; // restore "unknown" status level[VARP] = -1; refs[VARP] = -1; trail[0]--; } else{ if (level[VARP]<=level_to_jump && refs[VARP]==-1){ // variabile gia' testata -> metto caso opposto //printf("switch var %d at lev %d -> %d\n",VARP,level[VARP],-trail[trail[0]-1]); refs[VARP] = -2; host_vars[VARP] = 1-host_vars[VARP]; // inverse status trail[trail[0]-1]=-trail[trail[0]-1]; // inverto valore su trail level[0]=level[VARP]; } } } } __host__ inline void backjumpingf(int level_to_jump){ // riparte con ultima scelta fatta a level_to_jump // annulla trail stack e riferimenti a variabili // printf("backtrack to %d\n",level_to_jump); backjump++; int loop=trail[0]>0; while (loop){ // questo mette a posto trail e variabili. level sistemato sulla ricorsione int VARP=abs(trail[trail[0]-1]); loop=trail[0]>0 && ((level[VARP]>level_to_jump) || (refs[VARP]>=0) //|| (level[VARP]<=level_to_jump && refs[VARP]==-2) ); if (level[VARP]>level_to_jump || refs[VARP]>=0){ //printf("back var %d lev %d\n",VARP,level[VARP]); host_vars[VARP] = -1; // restore "unknown" status level[VARP] = -1; refs[VARP] = -1; trail[0]--; } else{ // refs<0 && level=leveltojump if (level[VARP]<=level_to_jump && refs[VARP]==-1){ // variabile gia' testata -> metto caso opposto //printf("switch var %d at lev %d -> %d\n",VARP,level[VARP],-trail[trail[0]-1]); //refs[VARP] = -2; //host_vars[VARP] = 1-host_vars[VARP]; // inverse status //trail[trail[0]-1]=-trail[trail[0]-1]; // inverto valore su trail level[0]=level[VARP]; } } } if (level_to_jump==0)// caso speciale se backjump annulla anche primo livello level[0]=0; } //************************************************************************** //************************************************************************** //********** MAIN PROCEDURE: twolevel_DPLL ********************************* //********** a part of DPLL (recursive) is handled by the host. *********** //********** The final part is made in the device(s) *********************** //************************************************************************** //************************************************************************** //************************************************************************** __host__ void mask_propagation_cpu(){ int dbg=0; do{ // CPU mask_prop clauind = mask_prop(&selected_var,&sat_val); if (dbg) printf("prop %d %d %d: ",sat_val,selected_var,clauind); if (dbg) for (int i=clause_pointer[clauind];i 0; level[VAR] = level[0]; refs[VAR] = clauind; // devo sapere la clausola trail[trail[0]] = selected_var; trail[0]++; } } while (sat_val==1); } __host__ void mask_propagation_gpu(){ int dbg=0; if (1==0 && dbg){ printf("at lev %d\n",level[0]); for(int i=1;i>>( mask_data, dev_parma_vars, dev_mapped_formula, dev_mapped_cp,NC); HANDLE_ERROR( cudaMemcpy( h_mask_data, mask_data, (3*PARblocks)*sizeof(int), cudaMemcpyDeviceToHost)); // puo' copiare anche 3*blocks+NV per risparmiare chiamate (se servono dati su vars) if (dbg) printf("..\n"); CUDA_count++; // colleziona dati da blocchi int bestnum=0; int bestid=0; int bestcl=0; for (int i=0;ih_mask_data[3*i+2]) || bestnum==0 || (bestnum>0 && h_mask_data[3*i]>0 && (h_mask_data[3*i] 0; level[VAR] = level[0]; refs[VAR] = h_mask_data[3*i+2]; // devo sapere la clausola trail[trail[0]] = h_mask_data[3*i+1]; trail[0]++; } } */ } if (dbg) printf("\nbest %d %d %d\n",bestnum,bestid,bestcl); //printf("\n"); sat_val =bestnum; selected_var=bestid; clauind=bestcl; if (dbg) printf("prop %d %d %d: ",sat_val,selected_var,clauind); if (dbg) for (int i=clause_pointer[clauind];i 0; level[VAR] = level[0]; refs[VAR] = clauind; // devo sapere la clausola trail[trail[0]] = selected_var; trail[0]++; } } while (sat_val==1); } //********************************************************************************** //********************************************************************************** //********************************************************************************** //****************** TWO LEVEL DPLL (no recursive) ***************************** //********************************************************************************** //********************************************************************************** //********************************************************************************** __host__ int twolevel_DPLL(){ int selected; int pos; int FV; short good=0; int dbg=0; int isbackjumping=0; // a 1 se fatto backj e quindi devo continuare (anche se il bj ha portato il livello a 0 = ho imparato clausola unitaria, e con una UP posso ripartire da livello 1 con l'altro valore) do { isbackjumping=0; if (dbg) printf("twolevel_DPLL lev %d\n ",level[0]); // mask_propagation if (USEGPU %2 == 0) mask_propagation_cpu(); else mask_propagation_gpu(); if (dbg){ printf("dopo maskprop lev %d host_vars: ",level[0]); for(int i=1;i%d(%d,%d) ",i,host_vars[i],level[i],refs[i]); printf("\n"); */ printf("sat_val %d, selvar %d\n",sat_val,selected_var); } // new case if (level[0]<=0 && sat_val<0){ // failed, UP was enough, no need to learn, return fail! return 0; } //************* UNSATISFIABLE ASSIGNMENT if (sat_val < 0){ // At least one clause is false good = 0; refs[0] = clauind; // conflict clause if (learning) {// CLAUSE LEARNING clause_learning(); if (NC%1000 ==0) printf("NC: %d\n",NC); if (dbg) printf("cl: level to backjump to %d\n",level_to_jump); if (dbg) printf("backjump %d\n",level_to_jump); backjumpingf(level_to_jump); isbackjumping=1; } else{ if (dbg) printf("backtrack %d\n",level[0]); backtrackingf(level[0]); // se non imparo, apro fratello (tengo livello corrente) } // END CLAUSE LEARNING } else if (sat_val == 0){ //************* FOUND A SOLUTION good = 1; } else { // There is a non-ground literal - sat_val > 0 pos = (selected_var > 0); selected = abs(selected_var); // Look for its variable and sign FV = NV-trail[0]; // free vars if (USEGPU >= 2 && FV <= MaxV) { //*** GPU CALL: //printf("CUDA caller with %d learnt clauses\n",nclausel); good=CUDA_caller(FV); if (!good) backtrackingf(level[0]); //backtrack } else { ///////// vado con CPU level[0]++; // apro nuovo ramo if (dbg) printf("lev %d, 2 scelte: var %d val %d\n",level[0],selected,pos); level[selected] = level[0]; host_vars[selected] = pos; trail[trail[0]++] = selected_var; } // CPU } //sat_val >2 } while(isbackjumping || (level[0]>0 && !good)); return good; } //************************************************************************ //************************************************************************ //******************* main ********************************** //************************************************************************ //************************************************************************ __host__ int main(int argc, char** argv) { int retv = 0; cudaDeviceProp prop; int whichDevice; if (argc!=7){ printf("usage: %s mode N_vars_blocks(2LOG_BLKS) N_vars_thread(LOG_THRDS) Maxv learning(0/1) filename\n",argv[0]); return -1; } // *** PARAMETER SETTINGS USEGPU = atoi(argv[1]); LOG_BLKS = atoi(argv[2]); LOG_THRDS = atoi(argv[3]); CUDABLOCKS = (1<0){ // CUDA timings initialization cudaEventCreate( &start ); cudaEventCreate( &stop ); // Checking of CUDA HW capabilities HANDLE_ERROR( cudaGetDevice( &whichDevice ) ); HANDLE_ERROR( cudaGetDeviceProperties( &prop, whichDevice ) ); if (prop.canMapHostMemory != 1) { printf( "Device cannot map memory.\n" ); return 0; } } //*** Some "warning" and exit if(USEGPU>0 && CUDATHREADS > prop.maxThreadsPerBlock){ printf("Cant handle so many vars per block (max %d < %d threads per block)\n",CUDATHREADS,prop.maxThreadsPerBlock); return -1; } printf("using LOG_BLKS=%d, LOG_THRDS=%d MaxV=%d \n",LOG_BLKS,LOG_THRDS,MaxV); if (Delta > DV_SIZE){ printf("Delta too large, recompile kernel with %d in the size of arrays\n",Delta); return -1; } //**** CORE COMPUTATION allocate_first(); // *** Load a SAT formula in DIMACS format load_formula(argv[6],&NV,&NC,&NL); //*** Print and use some HW and instance info if (USEGPU>0){ print_info(prop); PARblocks = IMIN( ( NC + THREADS - 1) / THREADS, 2*prop.multiProcessorCount ); } allocate_second(); //*** Start running time (formula loading is not counted) if (USEGPU %2 == 1) printf("blocks %d, threads %d\n",PARblocks, THREADS); srand ( time(NULL) ); printf("start\n"); if (USEGPU>0){ cudaEventRecord(start,0); cudaEventSynchronize(start); }else{ #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) global_time=clock(); #else gettimeofday(&time_start,NULL); #endif } //********************************************************************* //******* Call the main procedure (store the result in vars[0]): //********************************************************************* retv = twolevel_DPLL(); // <======== HERE! host_vars[0] = retv; //*** CUDA timings: if (USEGPU>0){ cudaEventRecord( stop, 0 ); cudaEventSynchronize( stop ); cudaEventElapsedTime( &deltatime, start, stop ); } else{ #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) clock_t final=clock()-global_time; deltatime=((float)final/CLK_TCK)*1000; #else gettimeofday(&time_stop,NULL); deltatime=(0.0+time_stop.tv_sec+time_stop.tv_usec/1000000.0) - (0.0+time_start.tv_sec+time_start.tv_usec/1000000.0); deltatime*=1000; #endif } /* DISPLAY RESULT AND RUNNING TIME: */ print_result( host_vars, NV ); print_time( deltatime ); deallocate(); return 0; } //********* END *******************************************************