#include #include #if !(defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) #include #endif #include using namespace std; #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) clock_t global_time; #else struct timeval time_start,time_stop; #endif //*** IMPORTANT KERNEL STATIC OPTION. //*** UnComment/Comment the following line // In the first case (parameter defined) it simply assigns the block variables. // In the second case it performs fast unit propagation after any assignment #define SIMPLEBLOCK 0 //**** STATIC SIZE FOR block_vars and delta_vars //**** ARRAYS USED IN PARALLELIZING SEARCH #define BV_SIZE 101 #define DV_SIZE 100 //*** Comment/Uncomment the following line for information //*** on memory allocation during CUDA execution //#define MYTRACE 0 // The following PARAMETERS decide the parallelism degree // and the complexity of the formula to be delagetd to CUDA // We allow two versions, with static (#define) and dynamic values // *** PARAMETER #1 (argv 1) // *** Modes for using GPU. Currently 0-3 int USEGPU = 0; // The default value // *** PARAMETER #2 (argv 2) // NUMBER OF BLOCKS = CUDABLOCKS // LOG_BLKS is the number of vars fixed by the block address int LOG_BLKS=2; int CUDABLOCKS=(1<b?a:b) // *** ALLOCATE / DEALLOCATE __host__ void allocate_first(){ //*** SIZE INDEPENDENT ON THE FORMULA if (USEGPU>0){ HANDLE_ERROR( cudaHostAlloc( (void**)&formula, MAX_NL * sizeof(int), cudaHostAllocDefault ) ); HANDLE_ERROR( cudaMalloc( (void**)&dev_mapped_formula, MAX_NL * sizeof(int))); HANDLE_ERROR( cudaMalloc((void**)&dev_formula, MAX_NL * sizeof(int))); HANDLE_ERROR( cudaHostAlloc( (void**)&clause_pointer, (MAX_NC+1) * sizeof(int), cudaHostAllocDefault ) ); HANDLE_ERROR( cudaMalloc( (void**)&dev_mapped_cp, (MAX_NC+1) * sizeof(int) )); HANDLE_ERROR( cudaMalloc((void**)&dev_cp, (MAX_NC+1) * sizeof(int) )); } else{ formula=(int*)malloc(MAX_NL * sizeof(int)); clause_pointer=(int*)malloc((MAX_NC+1) * sizeof(int)); } filtered_clause_pointer=(int*) malloc((MAX_NC+1) * sizeof(int)); filtered_formula =(int*) malloc(MAX_NL * sizeof(int)); } __host__ void allocate_second(){ #ifdef MYTRACE printf("Memory allocation (in main):\n"); printf(" (cuda) phi:\t%d bytes for %d literals\n", NL * sizeof(int), NL); printf(" (cuda) cp:\t%d bytes for %d clauses\n", (NC + 1) * sizeof(int), NC); printf(" (cuda) host_vars:\t%d bytes for %d vars\n", NV * sizeof(int), NV); printf(" (cuda) dev_parma_vars:\t%d bytes for %d vars\n", NV * sizeof(int), NV); printf(" (host) h_mask_num:\t%d bytes for %d blocks\n", PARblocks * sizeof(int), PARblocks); printf(" (host) h_mask_id:\t%d bytes for %d blocks\n", PARblocks * sizeof(int), PARblocks); fflush(stdout); #endif //*** SIZE DEPENDING ON THE FORMULA host_strat_vars=(int*)malloc(2*NV * sizeof(int)); host_vars=(int*)malloc(NV * sizeof(int)); if (USEGPU>0){ HANDLE_ERROR( cudaHostAlloc( (void**)&h_mask_data, (3* PARblocks+NV) * sizeof(int), cudaHostAllocDefault ) ); //questo non serve su solo cpu HANDLE_ERROR( cudaMalloc((void**)&dev_vars, NV * sizeof( int) )) ; HANDLE_ERROR( cudaMalloc( (void**)&dev_parma_vars, NV * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&mask_data, (3* PARblocks+NV) * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&device_trail, NV * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&device_info_formula, 2 * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&device_refs, NV * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&device_seen, NV * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&device_level, NV * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&device_state, PARblocks * sizeof(int) ) ); } filtered_vars = (int*) malloc(NV * sizeof( int)); maps_vars = (int*) malloc(NV * sizeof( int)); level = (int*) malloc(NV * sizeof(int)); refs = (int*) malloc(NV * sizeof(int)); trail = (int*) malloc(NV * sizeof(int)); seen = (int*) malloc(NV * sizeof(int)); learnt_clause = (int*) malloc(NV * sizeof(int)); // AGO" giusto NV? for(int i=0;i0){ HANDLE_ERROR( cudaMemcpy( dev_mapped_formula, formula, NL * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( dev_mapped_cp, clause_pointer, (NC+1) * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( device_level, level, NV * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( device_trail, trail, NV * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( device_refs, refs, NV * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( device_seen, seen, NV * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( device_state, state, PARblocks * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( dev_parma_vars, host_vars, NV * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( device_info_formula, info_formula, 2 * sizeof(int), cudaMemcpyHostToDevice ) ); } } __host__ void deallocate() { free( host_vars ); //AGO: solo queste deallochiamo? if (USEGPU>0){ HANDLE_ERROR( cudaFree( mask_data )); // HANDLE_ERROR( cudaFree(dev_vars)); HANDLE_ERROR( cudaFree(dev_formula)); HANDLE_ERROR( cudaFree(dev_cp)); cudaEventDestroy( start ); cudaEventDestroy( stop ); } } //************************************************** // INPUT: // Data Structure: the formula is stored in two vectors. // The first stores the literals in sequence. // Its length is NL (0..NL-1) // The second stores the beginning of the various clauses // The number of clauses is NC. The vector is 0..NC // The last (redundant) cell stores the value NL // It also returns NV = 1+number of vars in the formula // Remark: the formula is read twice //************************************************** __host__ int simple_parsing(char c, FILE *miofile){ int sum=c-'0'; char t = fgetc(miofile); while (47 =60000){ MIN = (int)time/60000; time = (time - MIN*60000)/1000; printf("Execution Time: %dm %.5f sec\n",MIN,time); } else printf("Execution Time: %.5f sec\n",time/1000); } //***************************************************** //*** PROPAGATION PHASE //***************************************************** // unit propagation is implemented by mask_prop (in one // of its variants). // Given a (partial) assignment "vars", for each clause i, // mask[i]= 0 => clause i is true // mask[i]=-1 => clause i is false // mask[i]> 0 => clause i can be satisfied and there are mask[i] // unground variables // ***************************************************************** // return 0 if all clauses are satisfied // return -1 if there is a false clause // return the smallest index of a clause with fewer unground literals otherwise // **** Moreover, *point returns a pointer to ONE of these unassigned literal // ****************************************************************** // *** There are versions for the device and for the host // ****************************************************************** __host__ int mask_prop(int* sel_var, int* minmask){ int i,j,min=0,t,mask=0,pt,bestpt=0,bestcl=0; bool initial=true; int maybe_sat; // 2 surely sat, 1 maybe, 0 unsat //*********************************************************************** //****** SIMPLE CASE (coherent with mask_prop_delta and PARMASKprop) //*********************************************************************** // *** AGO: some little speed-up can be obtained storing pt=t instead of // *** pt = j (and changing all tests accordingly - basically removing formula[...]). // *** Left this way to be fully consistent with mask_prop_delta //*********************************************************************** i=0; while((mask >= 0) && (i 0 && host_vars[t]==1)){ mask = 0; maybe_sat = 2; } else if (host_vars[abs(t)] < 0) { // Literal Unknown mask++; if (pt == 0 || (pt > 0 && abs(t) < abs(formula[pt]))) pt=j; // ==> Here the remark above j++; maybe_sat = 1; } else j++; } // while if (j==clause_pointer[i+1] && maybe_sat==0) mask = -1; // EOC: UNSAT! if (initial) { if (mask != 0) {min=mask; bestpt=pt; initial=false; bestcl=i;} } else if (mask!=0){ // not initial if (mask= 0) && (i 0) && ((a==1) || ((a<0) && (delta_vars[-a]==1)))) ) { mask = 0; maybe_sat = 2; } else if ((a<0) && (delta_vars[-a]== -1)){ // Literal Unknown mask++; if (pt == 0 || (pt > 0 && abs(t) < abs(formula[pt]))) pt=j; // CAMBIATO QUI - 22/5 j++; maybe_sat = 1; } else j++; } // while if (j==cp[i+1] && maybe_sat==0) mask = -1; // EOC: UNSAT! if (initial) { if (mask != 0) {min=mask; bestpt=pt; initial=false; } } else if (mask!=0){ // not initial if ( mask< min || (mask==min && abs(formula[pt]) < abs(formula[bestpt]))) {min=mask; bestpt=pt;} } i++; } // while principale *lit = bestpt; return min; } //********************************************************************* //********************************************************************* //********************************************************************* //********************************************************************* __global__ void parmask_prop( int* mask_data, int* vars,int* dev_mapped_formula,int* dev_mapped_cp, int* info_formula){ int stride = blockDim.x * gridDim.x; int g_tid = threadIdx.x + blockIdx.x * blockDim.x; int tid = threadIdx.x; __shared__ int s_mask_num[THREADS]; __shared__ int s_mask_id[THREADS]; __shared__ int s_mask_cl[THREADS]; // memorizza la clausola che fa unit prop __shared__ int NC[1]; if (tid==0) NC[0]=info_formula[1]; __syncthreads(); int g_tn=NC[0]; s_mask_num[tid] = 0; while(g_tid < g_tn){ int l,v; short maybe_sat; // maybe_sat == 2 surely sat, 1 maybe, 0 unsat int iter,end; iter = dev_mapped_cp[g_tid]; end = dev_mapped_cp[g_tid + 1]; maybe_sat = 0; int num=0; // portato a registri, cosi' posso tenere s_mask per tutte le clausole int id=2147483647; // MAXINT <========== while((maybe_sat < 2) && (iter < end)) { // Per ogni lett non sat nella cl l = dev_mapped_formula[iter]; v = vars[abs(l)]; iter++; // passo al prossimo lett if ((( l < 0 ) && ( v == 0 )) || // se ha segno concorde (( l > 0 ) && ( v == 1 ))) { // con la variabile ground num = 0; // la clausola è vera maybe_sat = 2; // sicuramente sat } else if (v == -1){ // Literal non-ground // Nota. s_mask_num e s_mask_id sono shared, dunque ad accesso veloce num++; // +1 al num di lett non-ground in clausola if (abs(l) memorizza! if( num == -1 || bestnum == -1) { if (num==-1 && bestnum!=-1){ // la nuova cl e' conflitto s_mask_num[tid] = -1; // copia solo se il precedente non era fallito -> tengo g_tid minimo per cella tid s_mask_cl[tid] = g_tid; // segno la clausola che fa fallire } } else if (bestnum == 0) { s_mask_num[tid] = num; s_mask_id[tid] = id; s_mask_cl[tid] = g_tid; } else if (num > 0 && (bestnum > num || (bestnum == num && abs(bestid) > abs(id)) || (bestnum == num && abs(bestid) == abs(id) && bestcl > g_tid) )) { s_mask_num[tid] = num; s_mask_id[tid] = id; s_mask_cl[tid] = g_tid; } g_tid+=stride; } /// while ogni pezzo //************************************************************************************* __syncthreads(); // DISTRIBUTED algorithm for implementing first-fail choice (and minimum) short tn = blockDim.x; while(tn > 1){ short half = (tn >> 1); // stesso che tn = tn/2 short tid_2 = tid + half; if(tid < half){ if( s_mask_num[tid_2] == -1 || s_mask_num[tid] == -1) { if ((s_mask_num[tid_2] == -1 && s_mask_num[tid]>-1 ) || (s_mask_num[tid_2] == -1 && s_mask_cl[tid_2] da %d a %d\n",s_mask_cl[tid_2],s_mask_cl[tid],s_mask_num[tid_2],s_mask_num[tid],tid_2,tid); } } //*** AGO GIU12: i seguenti due bodies sono uguali. Bug? else if (s_mask_num[tid] == 0) { s_mask_num[tid] = s_mask_num[tid_2]; s_mask_id[tid] = s_mask_id[tid_2]; s_mask_cl[tid] = s_mask_cl[tid_2]; } else if (s_mask_num[tid_2] > 0 && (s_mask_num[tid] > s_mask_num[tid_2] || (s_mask_num[tid] == s_mask_num[tid_2] && abs(s_mask_id[tid]) > abs(s_mask_id[tid_2])) || (s_mask_num[tid] == s_mask_num[tid_2] && abs(s_mask_id[tid]) == abs(s_mask_id[tid_2]) && s_mask_cl[tid] > s_mask_cl[tid_2]) )) { s_mask_num[tid] = s_mask_num[tid_2]; s_mask_id[tid] = s_mask_id[tid_2]; s_mask_cl[tid] = s_mask_cl[tid_2]; } } //************************************************************************************* __syncthreads(); tn = half; } // while (tn>1) if(tid == 0){ mask_data[3*blockIdx.x] = s_mask_num[0]; mask_data[3*blockIdx.x+1] = s_mask_id[0]; mask_data[3*blockIdx.x+2] = s_mask_cl[0]; } } // parmask_prop //**************************************************************** //*** FORMULA SIMPLIFICATION (ATL-VEN July 12, 2011) // simplifies a (still satisfiable) formula by // removing true clauses and false literals //***************************************************************** __host__ int filter_formula(){ int i,lp,cp,flp,fv,fnc,vera; filtered_vars[0]=-1; maps_vars[0]=0; // Here only the unassigned (free) vars are considered. // maps_vars stores the correspondence new var / old var fv = 0; for(i=1;i 0) && (host_vars[formula[lp]] == 1) || (formula[lp] < 0) && (host_vars[-formula[lp]] == 0)){ vera = 1; lp = clause_pointer[cp+1]; break; // Exit the loop } // SECOND CASE: the literal is not ground. Copy it, renaming the var else if (host_vars[abs(formula[lp])] == -1){ filtered_formula[flp] = maps_vars[abs(formula[lp])]; if (formula[lp] <= 0) filtered_formula[flp]=-filtered_formula[flp]; flp++; lp++; vera=0; } //THIRD CASE: the literal is false. Just skip it else lp++; } // while if (!vera) { // NOTA: se e' falsa, la tolgo come se fosse vera fnc++; // Non dovrebbe mai essere chiamata in tal caso. filtered_clause_pointer[fnc] = flp; } } // for return fnc; } // function filter_formula //*************************************************************** //***** KERNEL FUNCTION. //***** DEVICE VERSION OF DPLL //***** Without recursion //***** Use block/thread address to guide the search //***** Use shared block memories //*************************************************************** // __shared__ short int* block_varsE; __global__ void CUDADPLL(int* formula, int* cp, int *vars, int NC, int NV, int CUDATHREADS, int LOG_BLKS, int Delta, int LOG_THRDS ){ //*** If some other thread already found solution, skip the thread if (vars[0]<1){ // *** SHARED ARRAY OF THE BLOCK __shared__ int block_vars[BV_SIZE]; // *** LOCAL ARRAYS OF THE THREAD short int delta_vars[DV_SIZE], varstack[DV_SIZE][2]; int addr=blockIdx.x,count=0,top=-1,pos,i; int lit; bool end=false, updated=false; int min_mask=0; //**************************************************************** // The first LOG_BLKS vars are assigned using block coordinates // The others are delegated to delta_vars //**************************************************************** #ifdef SIMPLEBLOCK // ************************************************************************* // **** SIMPLE CODE. GOOD FOR DATA PARALLELISM. SOME DETERMINISM IS // **** NOT WELL-EXPLOITED // ************************************************************************* if (threadIdx.x==0) { block_vars[0]=0; for(i=1;i= 0)) j++; if(i+j < NV){ block_vars[i+j] = addr % 2; addr=addr/2; count++; } // *** FAST UNIT PROPAGATION DIRECTLY HERE min_mask=1; while(min_mask==1){ ui = 0; min_mask=0; while((ui < NC) && (min_mask != 1)){ min_mask=0; uj=cp[ui]; while((min_mask < 2) && (uj < cp[ui+1])) { l=formula[uj]; v=block_vars[abs(l)]; if ((( l < 0 ) && ( v == 0 )) || // se ha segno concorde : block -> mask=0, stop is sat! (( l > 0 ) && ( v == 1 ))){ uj=cp[ui+1]; min_mask=0; } else if (block_vars[abs(formula[uj])] < 0) { min_mask++; lit = uj; } uj++; } // while min_mask < 2 ui++; } // while ui < NC if (min_mask==1){ block_vars[ abs(formula[lit])] = (formula[lit] > 0); } } // while(min_mask==1) } // if count < else if(block_vars[i] == -1){ // Assign the remaining variables (if any) block_vars[i] = top; top--; } } // for i=1 .. NV } // if ((threadIdx.x==0)...) #endif //************************************************************************************* __syncthreads(); // *** Every thread resets its local part of the var array for(i=0;i < Delta;i++) delta_vars[i] = -1; // Use the (block,thread) coordinate to guide the successive // LOG_THRDS ND choices // top is now a pointer to the top of the stack // lit points to the selected unknown literal // pos is its sign addr=threadIdx.x; top=-1; count=0; while(!end){ if(top>=0) { delta_vars[varstack[top][0]] = varstack[top][1] % 2; } //***************************************************** //*** Partial substitution evaluation here: //***************************************************** min_mask = mask_prop_delta(formula, cp, block_vars,delta_vars,NC,&lit); if (min_mask == 0) { end=true; break; // logically superfluous "break", but it speeds it up... } else if (min_mask > 0) { // min_mask > 0 pos = (formula[lit] > 0); // sign of the unknown literal top++; varstack[top][0]=-block_vars[abs(formula[lit])]; if (min_mask == 1) // There is ONE non-ground literal: determinism varstack[top][1]=pos; else if (count < LOG_THRDS){ // Set the variable using thread coords varstack[top][1] = addr % 2; addr>>=1; count++; } else // min_mask > 1 && count >= LOG_THRDS varstack[top][1]=2+pos; // Assign a backtrackable value } // else min_mask > 0 else { // if (min_mask < 0) *** Failure: Backtracking while((top>=0) && (!updated)){ pos=varstack[top][1]; // pos is used to avoid 2 addressing to varstack[top][1] if (pos > 1){ // 3-> try 0, 2-> try 1 varstack[top][1] = 3-pos; updated=true; } else { delta_vars[varstack[top][0]] = -1; // Restore unknown status top--; // 1 -> stop, 0-> stop } } if (top <0) {end = true; break;} else updated=false; } // else } // while //************************************************************************************* __syncthreads(); if (vars[0]<1 && (min_mask==0)){ // *** A solution: export it on vars //printf("yes\n"); vars[0]=1; for(pos=1; pos= 0) vars[pos] = block_vars[pos]; else vars[pos] = delta_vars[-block_vars[pos]]; } } // if (vars[0]... } //************************************************************************** //********* CUDA_caller ************************* //************************************************************************** __host__ int CUDA_caller(int FV){ int unk_clauses; CUDA_count_lower++; // *** filter_formula call - global variables are used unk_clauses= filter_formula(); // printf("Called GPU with %d free vars\n",FV); // printf("Before: V=%d, C=%d, L=%d, After: V=%d, C=%d, L=%d\n", // NV-1,NC,clause_pointer[NC],FV,unk_clauses,filtered_clause_pointer[unk_clauses]); // DEBUG PRINT /* printf("Vars Mapping:\n"); for(int i=1; i < NV; i++) if(maps_vars[i] < i) printf("var[%d] -> %d \t",maps_vars[i],i); printf("\n"); print_instance(filtered_formula,filtered_clause_pointer,unk_clauses); for(int i=1; i < NV; i++) printf("%d -> var[%d]\t",host_vars[i],i); printf("\n"); */ //printf("unk_clau %d nl %d, fv %d\n",unk_clauses,filtered_clause_pointer[unk_clauses],FV); //print_instance(filtered_formula,filtered_clause_pointer,unk_clauses); // copy the value of filtered vars, formula, and clause pointers // to device global variables HANDLE_ERROR(cudaMemcpy(dev_vars, filtered_vars, (1+FV)*sizeof( int), cudaMemcpyHostToDevice )); HANDLE_ERROR(cudaMemcpy(dev_formula, filtered_formula, filtered_clause_pointer[unk_clauses]*sizeof( int), cudaMemcpyHostToDevice )); HANDLE_ERROR(cudaMemcpy(dev_cp, filtered_clause_pointer, (unk_clauses+1)*sizeof(int), cudaMemcpyHostToDevice )); #ifdef MYTRACE printf("\nMore memory allocation (in CUDA_caller):\n"); printf(" (cuda) dev_vars:\t%d bytes for %d vars\n", (1+FV)* sizeof(int), FV); printf(" (cuda) dev_formula:\t%d bytes for %d vars\n", filtered_clause_pointer[unk_clauses]* sizeof(int), filtered_clause_pointer[unk_clauses]); printf(" (cuda) dev_cp:\t%d bytes for %d clauses\n", (unk_clauses+1) * sizeof(int), (unk_clauses+1)); printf(" (host) filtered_vars:\t%d bytes for %d vars\n", (1+FV) * sizeof(int), FV); printf(" (host) maps_vars:\t%d bytes for %d vars\n", NV * sizeof(int), NV); printf(" (host) filtered_formula:\t%d bytes for %d vars\n", clause_pointer[NC] * sizeof(int), clause_pointer[NC]); printf(" (host) filtered_clause_pointer:\t%d bytes for %d clauses\n", (NC+1) * sizeof(int), NC); printf("CUDA_caller calling CUDADPPL3: blocks = %d threads per block = %d\n",CUDABLOCKS,CUDATHREADS); printf(" (shared) block_vars:\t%d bytes for %d vars. Total of %d bytes per block\n", MaxV * sizeof(int), MaxV, MaxV * sizeof(int)); printf(" (thread) delta_vars:\t%d bytes for %d vars. Total of %d bytes per block\n", Delta * sizeof(int), Delta, CUDATHREADS*Delta * sizeof(int)); printf(" (thread) varstack:\t%d bytes for %d vals. Total of %d bytes per block\n", 2*Delta * sizeof(int), 2*Delta, CUDATHREADS*2*Delta * sizeof(int)); fflush(stdout); #endif // *************** DEVICE COMPUTATION: ************************* printf("Delta %d (Check if it is less than %d)\n",Delta,DV_SIZE); CUDADPLL<<>>(dev_formula,dev_cp,dev_vars,unk_clauses,1+FV,CUDATHREADS,LOG_BLKS,Delta,LOG_THRDS); // *************** Results' analysis ************************* // Only the first value (a flag) is copied back HANDLE_ERROR(cudaMemcpy(host_vars,dev_vars, sizeof( int), cudaMemcpyDeviceToHost)); // If there is no solution, skip, otherwise copy the whole // assignment back from DEVICE to HOST // Then assembly the solution from vars and filtered_vars if (host_vars[0]==1){ HANDLE_ERROR(cudaMemcpy(filtered_vars,dev_vars, (1+FV)*sizeof( int), cudaMemcpyDeviceToHost)); for(int i = 1; i < NV; i++) if (host_vars[i] < 0) host_vars[i] = filtered_vars[maps_vars[i]]; } // In realta' le prox quattro potrebbero anche essere globali o statiche, // comunque grandi (stanno su host) e si evita di riallocare return (host_vars[0]==1)*2; // 2 if good, 0 if bad } // ************************************************************************************************** // ************************************************************************************************** // ******************* clause learning ********************************************************** // ************************************************************************************************** // ************************************************************************************************** __host__ void clause_learning() { // sets jump and level_to_jump // inizializzazioni int i, c=0, p=0, q, var_p=0, var_q, clause, lp=0, btlevel=0, tptr=trail[0], verbose=0; if (verbose>1) { printf("Current level:\t\t%d\n",level[0]); printf("Conflict clause:\t%d\n",refs[0]); int i; printf("trail[%d] = [",trail[0]); for(i=1;i1) { printf("p:\t%d\t",p); printf("var_p:\t%d\t",var_p); printf("clause:\t%d\t",clause); printf("c_start:\t%d\t",clause_pointer[clause]); printf("end: %d\n",clause_pointer[clause+1]); printf("reason (clause %d) is:\t",clause); for(i=clause_pointer[clause];i1) { printf("q:%d\n",q); printf("lp at %d, q is %d at level %d, ref %d\n",lp,q,level[var_q],refs[var_q]); } if (!seen[var_q]) { if (verbose>1) printf("q never seen! "); seen[var_q] = 1; if (level[var_q] == level[0]) { // (q == decision level corrente) c++; if (verbose>1) printf("and of same current level (%d==%d) so c++.\n",level[var_q],level[0]); } else if (level[var_q] > 0) { // se q è di un decision level inferiore, ma non il root level (al momento il root level è inutilizzato) learnt_clause[lp++] = -q; // calcolo level to backtrack, il massimo della learnt_clause, escluso il first-uip che verrà aggiunto alla fine btlevel = max(btlevel,level[var_q]); if (verbose>1) { printf("and of minor level (%d<%d) so add %d.\n",level[var_q],level[0],-trail[i]); if (lp>0) { printf("current learnt clause is: "); int j; for(j=0;j1) printf("q is seen\n"); } } do { p = trail[--tptr]; var_p = abs(p); if (verbose>1) { printf("c:%d p%d\n",c,p); } if (verbose>1) { printf("scan p=%d\ttptr=%d\tseen[var_p]=%d\n",p,tptr,seen[var_p]); } } while (!seen[var_p]); c--; //seen[var_p] = 0; if (verbose>1) printf("c--, process var %d, c:%d, ref %d val %d\n",var_p,c,refs[var_p],host_vars[var_p]); } while (c>0); //ALE: 8 giu commentata per avere backjumping //btlevel = max(btlevel,level[abs(p)]-1); // in caso di solo UIP senza reasons, comunque devo annullare la decisione del livello corrente (dato che poi sommo 1, qui lo sottraggo) // in p ora ho il first-UIP, ci va il suo negato nella learnt clause learnt_clause[lp++] = -p; // controllo inserimento clausole doppie: todo // debug if (verbose>0) { printf("Add First-UIP: %d lev %d\nLearn clause %d: ",-p,level[abs(p)],NC); // printf("* LEARNT CLAUSE: "); for(i=0;i1) printf("backjump from level %d to level %d\n\n\n",level[0],btlevel); level_to_jump=btlevel; // non chronological backtracking } else { if (verbose>1) printf("backtrack to level %d\n",level[0]); level_to_jump=level[0]; // chronological backtracking } } __device__ int clause_learning_gpu( int* vars, int * level, int * refs, int * trail, int* seen, int* formula, int* clause_pointer, int* info) { // sets jump and level_to_jump int NV=info[0]; int NC=info[1]; int* learnt_clause=formula+clause_pointer[NC]; int level_to_jump; // inizializzazioni int i, c=0, p=0, q, var_p=0, var_q, clause, lp=0, btlevel=0, tptr=trail[0], verbose=0; if (verbose>1) { printf("Current level:\t\t%d\n",level[0]); printf("Conflict clause:\t%d\n",refs[0]); int i; printf("trail[%d] = [",trail[0]); for(i=1;i1) { printf("p:\t%d\t",p); printf("var_p:\t%d\t",var_p); printf("clause:\t%d\t",clause); printf("c_start:\t%d\t",clause_pointer[clause]); printf("end: %d\n",clause_pointer[clause+1]); printf("reason (clause %d) is:\t",clause); for(i=clause_pointer[clause];i1) { printf("q:%d\n",q); printf("lp at %d, q is %d at level %d, ref %d\n",lp,q,level[var_q],refs[var_q]); } if (!seen[var_q]) { if (verbose>1) printf("q never seen! "); seen[var_q] = 1; if (level[var_q] == level[0]) { // (q == decision level corrente) c++; if (verbose>1) printf("and of same current level (%d==%d) so c++.\n",level[var_q],level[0]); } else if (level[var_q] > 0) { // se q è di un decision level inferiore, ma non il root level (al momento il root level è inutilizzato) learnt_clause[lp++] = -q; // calcolo level to backtrack, il massimo della learnt_clause, escluso il first-uip che verrà aggiunto alla fine btlevel = max(btlevel,level[var_q]); if (verbose>1) { printf("and of minor level (%d<%d) so add %d.\n",level[var_q],level[0],-trail[i]); if (lp>0) { printf("current learnt clause is: "); int j; for(j=0;j1) printf("q is seen\n"); } } do { p = trail[--tptr]; var_p = abs(p); if (verbose>1) { printf("c:%d p%d\n",c,p); } if (verbose>1) { printf("scan p=%d\ttptr=%d\tseen[var_p]=%d\n",p,tptr,seen[var_p]); } } while (!seen[var_p]); c--; //seen[var_p] = 0; if (verbose>1) printf("c--, process var %d, c:%d, ref %d val %d\n",var_p,c,refs[var_p],vars[var_p]); } while (c>0); //ALE: 8 giu commentata per avere backjumping //btlevel = max(btlevel,level[abs(p)]-1); // in caso di solo UIP senza reasons, comunque devo annullare la decisione del livello corrente (dato che poi sommo 1, qui lo sottraggo) // in p ora ho il first-UIP, ci va il suo negato nella learnt clause learnt_clause[lp++] = -p; // controllo inserimento clausole doppie: todo // debug if (verbose>0) { printf("Add First-UIP: %d lev %d\nLearn clause %d: ",-p,level[abs(p)],NC); // printf("* LEARNT CLAUSE: "); for(i=0;i1) printf("backjump from level %d to level %d\n\n\n",level[0],btlevel); level_to_jump=btlevel; // non chronological backtracking } else { if (verbose>1) printf("backtrack to level %d\n",level[0]); level_to_jump=level[0]; // chronological backtracking } return level_to_jump; } //************************************************************************** //************************************************************************** //****** AUXILIARY //************************************************************************** //************************************************************************** __host__ inline void backtrackingf(int level_to_jump){ // annulla trail stack e riferimenti a variabili // printf("backtrack to %d\n",level_to_jump); backtracking++; int loop=trail[0]>0; while (loop){ // questo mette a posto trail e variabili. level sistemato sulla ricorsione int VARP=abs(trail[trail[0]-1]); loop=trail[0]>0 && ((level[VARP]>level_to_jump) || (refs[VARP]>=0) || (level[VARP]<=level_to_jump && refs[VARP]==-2)); if (level[VARP]>level_to_jump || refs[VARP]>=0 || refs[VARP]==-2){ //printf("back var %d lev %d\n",VARP,level[VARP]); host_vars[VARP] = -1; // restore "unknown" status level[VARP] = -1; refs[VARP] = -1; trail[0]--; } else{ if (level[VARP]<=level_to_jump && refs[VARP]==-1){ // variabile gia' testata -> metto caso opposto //printf("switch var %d at lev %d -> %d\n",VARP,level[VARP],-trail[trail[0]-1]); refs[VARP] = -2; host_vars[VARP] = 1-host_vars[VARP]; // inverse status trail[trail[0]-1]=-trail[trail[0]-1]; // inverto valore su trail level[0]=level[VARP]; } } } } __host__ inline void backjumpingf(int level_to_jump){ // riparte con ultima scelta fatta a level_to_jump // annulla trail stack e riferimenti a variabili // printf("backtrack to %d\n",level_to_jump); backjump++; int loop=trail[0]>0; while (loop){ // questo mette a posto trail e variabili. level sistemato sulla ricorsione int VARP=abs(trail[trail[0]-1]); loop=trail[0]>0 && ((level[VARP]>level_to_jump) || (refs[VARP]>=0) //|| (level[VARP]<=level_to_jump && refs[VARP]==-2) ); if (level[VARP]>level_to_jump || refs[VARP]>=0){ //printf("back var %d lev %d\n",VARP,level[VARP]); host_vars[VARP] = -1; // restore "unknown" status level[VARP] = -1; refs[VARP] = -1; trail[0]--; } else{ // refs<0 && level=leveltojump if (level[VARP]<=level_to_jump && refs[VARP]==-1){ // variabile gia' testata -> metto caso opposto //printf("switch var %d at lev %d -> %d\n",VARP,level[VARP],-trail[trail[0]-1]); //refs[VARP] = -2; //host_vars[VARP] = 1-host_vars[VARP]; // inverse status //trail[trail[0]-1]=-trail[trail[0]-1]; // inverto valore su trail level[0]=level[VARP]; } } } if (level_to_jump==0)// caso speciale se backjump annulla anche primo livello level[0]=0; } //************************************************************************** //************************************************************************** //********** MAIN PROCEDURE: twolevel_DPLL ********************************* //********** a part of DPLL (recursive) is handled by the host. *********** //********** The final part is made in the device(s) *********************** //************************************************************************** //************************************************************************** //************************************************************************** __host__ void mask_propagation_cpu(){ int dbg=0; do{ // CPU mask_prop clauind = mask_prop(&selected_var,&sat_val); if (dbg) printf("prop %d %d %d: ",sat_val,selected_var,clauind); if (dbg) for (int i=clause_pointer[clauind];i 0; level[VAR] = level[0]; refs[VAR] = clauind; // devo sapere la clausola trail[trail[0]] = selected_var; trail[0]++; } } while (sat_val==1); } __host__ void mask_propagation_gpu(){ int dbg=0; if (1==0 && dbg){ printf("at lev %d\n",level[0]); for(int i=1;i>>( mask_data, dev_parma_vars, dev_mapped_formula, dev_mapped_cp,device_info_formula); HANDLE_ERROR( cudaMemcpy( h_mask_data, mask_data, (3*PARblocks)*sizeof(int), cudaMemcpyDeviceToHost)); // puo' copiare anche 3*blocks+NV per risparmiare chiamate (se servono dati su vars) if (dbg) printf("..\n"); CUDA_count++; // colleziona dati da blocchi int bestnum=0; int bestid=0; int bestcl=0; for (int i=0;ih_mask_data[3*i+2]) || bestnum==0 || (bestnum>0 && h_mask_data[3*i]>0 && (h_mask_data[3*i] 0; level[VAR] = level[0]; refs[VAR] = h_mask_data[3*i+2]; // devo sapere la clausola trail[trail[0]] = h_mask_data[3*i+1]; trail[0]++; } } */ } if (dbg) printf("\nbest %d %d %d\n",bestnum,bestid,bestcl); //printf("\n"); sat_val =bestnum; selected_var=bestid; clauind=bestcl; if (dbg) printf("prop %d %d %d: ",sat_val,selected_var,clauind); if (dbg) for (int i=clause_pointer[clauind];i 0; level[VAR] = level[0]; refs[VAR] = clauind; // devo sapere la clausola trail[trail[0]] = selected_var; trail[0]++; } } while (sat_val==1); } //********************************************************************************** //********************************************************************************** //********************************************************************************** //****************** TWO LEVEL DPLL (no recursive) ***************************** //********************************************************************************** //********************************************************************************** //********************************************************************************** __host__ int twolevel_DPLL(){ int selected; int pos; int FV; short good=0; int dbg=0; int isbackjumping=0; // a 1 se fatto backj e quindi devo continuare (anche se il bj ha portato il livello a 0 = ho imparato clausola unitaria, e con una UP posso ripartire da livello 1 con l'altro valore) do { isbackjumping=0; if (dbg) printf("twolevel_DPLL lev %d\n ",level[0]); // mask_propagation if (USEGPU %2 == 0) mask_propagation_cpu(); else mask_propagation_gpu(); if (dbg){ printf("dopo maskprop lev %d host_vars: ",level[0]); for(int i=1;i%d(%d,%d) ",i,host_vars[i],level[i],refs[i]); printf("\n"); */ printf("sat_val %d, selvar %d\n",sat_val,selected_var); } // new case if (level[0]<=0 && sat_val<0){ // failed, UP was enough, no need to learn, return fail! return 0; } //************* UNSATISFIABLE ASSIGNMENT if (sat_val < 0){ // At least one clause is false good = 0; refs[0] = clauind; // conflict clause if (learning) {// CLAUSE LEARNING clause_learning(); if (NC%1000 ==0) printf("NC: %d\n",NC); if (dbg) printf("cl: level to backjump to %d\n",level_to_jump); if (dbg) printf("backjump %d\n",level_to_jump); backjumpingf(level_to_jump); isbackjumping=1; } else{ if (dbg) printf("backtrack %d\n",level[0]); backtrackingf(level[0]); // se non imparo, apro fratello (tengo livello corrente) } // END CLAUSE LEARNING } else if (sat_val == 0){ //************* FOUND A SOLUTION good = 1; } else { // There is a non-ground literal - sat_val > 0 pos = (selected_var > 0); selected = abs(selected_var); // Look for its variable and sign FV = NV-trail[0]; // free vars if (USEGPU >= 2 && FV <= MaxV) { //*** GPU CALL: printf("CUDA caller with %d learnt clauses\n",nclausel); good=CUDA_caller(FV); if (!good) backtrackingf(level[0]); //backtrack } else { ///////// vado con CPU level[0]++; // apro nuovo ramo if (dbg) printf("lev %d, 2 scelte: var %d val %d\n",level[0],selected,pos); level[selected] = level[0]; host_vars[selected] = pos; trail[trail[0]++] = selected_var; } // CPU } //sat_val >2 } while(isbackjumping || (level[0]>0 && !good)); return good; } __global__ void tutto( int* mask_data, int* vars, int * level, int * refs, int * trail, int * seen, int nBlocks, int * info_formula, int * formula, int* cp, int learn){ int stride = blockDim.x * gridDim.x; int g_tid = threadIdx.x + blockIdx.x * blockDim.x; int g_tid_R = g_tid; int tid = threadIdx.x; if (vars[0]!=-1) // trovato soluzione return; int sat_val,selected_var,clauind; int bestnum=0; int bestid=0; int bestcl=0; for (int i=0;imask_data[3*i+2]) || bestnum==0 || (bestnum>0 && mask_data[3*i]>0 && (mask_data[3*i] 0; level[VAR] = level[0]; refs[VAR] = clauind; // devo sapere la clausola trail[trail[0]] = selected_var; trail[0]++; } else if (level[0]<=0 && sat_val<0){ // failed, UP was enough, no need to learn, break --> vars[0]=0; } else //************* UNSATISFIABLE ASSIGNMENT if (sat_val < 0){ // At least one clause is false refs[0] = clauind; // conflict clause int dbg=0; if (1==learn) //learning) {// CLAUSE LEARNING int level_to_jump=clause_learning_gpu( vars, level, refs, trail, seen, formula, cp, info_formula); if (dbg) printf("cl: level to backjump to %d\n",level_to_jump); if (dbg) printf("backjump %d\n",level_to_jump); int loop=trail[0]>0; while (loop){ // questo mette a posto trail e variabili. level sistemato sulla ricorsione int VARP=abs(trail[trail[0]-1]); loop=trail[0]>0 && ((level[VARP]>level_to_jump) || (refs[VARP]>=0) //|| (level[VARP]<=level_to_jump && refs[VARP]==-2) ); if (level[VARP]>level_to_jump || refs[VARP]>=0){ //printf("back var %d lev %d\n",VARP,level[VARP]); vars[VARP] = -1; // restore "unknown" status level[VARP] = -1; refs[VARP] = -1; trail[0]--; } else{ // refs<0 && level=leveltojump if (level[VARP]<=level_to_jump && refs[VARP]==-1){ // variabile gia' testata -> metto caso opposto //printf("switch var %d at lev %d -> %d\n",VARP,level[VARP],-trail[trail[0]-1]); //refs[VARP] = -2; //host_vars[VARP] = 1-host_vars[VARP]; // inverse status //trail[trail[0]-1]=-trail[trail[0]-1]; // inverto valore su trail level[0]=level[VARP]; } } } if (level_to_jump==0)// caso speciale se backjump annulla anche primo livello level[0]=0; } else { //printf("b%d\n",level[0]); // backtracking++; int level_to_jump=level[0]; int loop=trail[0]>0; while (loop){ // questo mette a posto trail e variabili. level sistemato sulla ricorsione int VARP=abs(trail[trail[0]-1]); loop=trail[0]>0 && ((level[VARP]>level_to_jump) || (refs[VARP]>=0) || (level[VARP]<=level_to_jump && refs[VARP]==-2)); if (level[VARP]>level_to_jump || refs[VARP]>=0 || refs[VARP]==-2){ //printf("back var %d lev %d\n",VARP,level[VARP]); vars[VARP] = -1; // restore "unknown" status level[VARP] = -1; refs[VARP] = -1; trail[0]--; } else{ if (level[VARP]<=level_to_jump && refs[VARP]==-1){ // variabile gia' testata -> metto caso opposto //printf("switch var %d at lev %d -> %d\n",VARP,level[VARP],-trail[trail[0]-1]); refs[VARP] = -2; vars[VARP] = 1-vars[VARP]; // inverse status trail[trail[0]-1]=-trail[trail[0]-1]; // inverto valore su trail level[0]=level[VARP]; } } } } // printf("trail %d lev %d\n",trail[0],level[0]); // after backtracking check if failed tree if (level[0]<0 && sat_val<0) vars[0]=0; // END CLAUSE LEARNING } else if (sat_val == 0){ //************* FOUND A SOLUTION vars[0]=1; } else { // There is a non-ground literal - sat_val > 1 int pos = (selected_var > 0); int selected = abs(selected_var); // Look for its variable and sign level[0]++; // apro nuovo ramo //printf("l%d, v%d=%d\n",level[0],selected,pos); level[selected] = level[0]; vars[selected] = pos; trail[trail[0]++] = selected_var; } } // parmask_prop //************************************************************************ //************************************************************************ //******************* main ********************************** //************************************************************************ //************************************************************************ __host__ int main(int argc, char** argv) { cudaDeviceProp prop; int whichDevice; if (argc!=3){ printf("usage: %s learning(0/1) filename\n",argv[0]); return -1; } // *** PARAMETER SETTINGS USEGPU = 1; CUDABLOCKS = 1; CUDATHREADS = 1; learning = atoi(argv[1]); if (USEGPU>0){ // CUDA timings initialization cudaEventCreate( &start ); cudaEventCreate( &stop ); // Checking of CUDA HW capabilities HANDLE_ERROR( cudaGetDevice( &whichDevice ) ); HANDLE_ERROR( cudaGetDeviceProperties( &prop, whichDevice ) ); if (prop.canMapHostMemory != 1) { printf( "Device cannot map memory.\n" ); return 0; } } //*** Some "warning" and exit if(USEGPU>0 && CUDATHREADS > prop.maxThreadsPerBlock){ printf("Cant handle so many vars per block (max %d < %d threads per block)\n",CUDATHREADS,prop.maxThreadsPerBlock); return -1; } printf("using LOG_BLKS=%d, LOG_THRDS=%d MaxV=%d \n",LOG_BLKS,LOG_THRDS,MaxV); if (Delta > DV_SIZE){ printf("Delta too large, recompile kernel with %d in the size of arrays\n",Delta); return -1; } //**** CORE COMPUTATION allocate_first(); // *** Load a SAT formula in DIMACS format load_formula(argv[2],&NV,&NC,&NL); //*** Print and use some HW and instance info if (USEGPU>0){ print_info(prop); PARblocks = IMIN( ( NC + THREADS - 1) / THREADS, 2*prop.multiProcessorCount ); } allocate_second(); //*** Start running time (formula loading is not counted) if (USEGPU %2 == 1) printf("blocks %d, threads %d\n",PARblocks, THREADS); srand ( time(NULL) ); printf("start\n"); if (USEGPU>0){ cudaEventRecord(start,0); cudaEventSynchronize(start); }else{ #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) global_time=clock(); #else gettimeofday(&time_start,NULL); #endif } //********************************************************************* //******* Call the main procedure (store the result in vars[0]): //********************************************************************* HANDLE_ERROR( cudaMemcpy( host_vars, dev_parma_vars, NV * sizeof(int), cudaMemcpyDeviceToHost ) ); int ct=0; while (host_vars[0]==-1){ parmask_prop<<>>( mask_data, dev_parma_vars, dev_mapped_formula, dev_mapped_cp,device_info_formula); tutto<<<1, 1>>>( mask_data, dev_parma_vars, device_level, device_refs, device_trail,device_seen, PARblocks,device_info_formula,dev_mapped_formula, dev_mapped_cp, learning); backtracking++; //printf("ct %d back %d\n",ct,backtracking); if (ct==0) HANDLE_ERROR( cudaMemcpy( host_vars, dev_parma_vars, sizeof(int), cudaMemcpyDeviceToHost ) ); ct=(1+ct)%10; } HANDLE_ERROR( cudaMemcpy( host_vars, dev_parma_vars, NV * sizeof(int), cudaMemcpyDeviceToHost ) ); HANDLE_ERROR( cudaMemcpy( info_formula, device_info_formula, 2 * sizeof(int), cudaMemcpyDeviceToHost ) ); nclausel=info_formula[1]-NC; //*** CUDA timings: if (USEGPU>0){ cudaEventRecord( stop, 0 ); cudaEventSynchronize( stop ); cudaEventElapsedTime( &deltatime, start, stop ); } else{ #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) clock_t final=clock()-global_time; deltatime=((float)final/CLK_TCK)*1000; #else gettimeofday(&time_stop,NULL); deltatime=(0.0+time_stop.tv_sec+time_stop.tv_usec/1000000.0) - (0.0+time_start.tv_sec+time_start.tv_usec/1000000.0); deltatime*=1000; #endif } /* DISPLAY RESULT AND RUNNING TIME: */ print_result( host_vars, NV ); print_time( deltatime ); deallocate(); return 0; } //********* END *******************************************************