Vous êtes sur la page 1sur 1

start

assert(m_stack.size() > 0);

assert( next_pc.size() == m_warp_size );

simt_mask_t top_active_mask = m_stack.back().m_active_mask;

address_type top_recvg_pc = m_stack.back().m_recvg_pc;

address_type top_pc = m_stack.back().m_pc;

stack_entry_type top_type = m_stack.back().m_type;

assert(top_pc==next_inst_pc);

assert(top_active_mask.any());

const address_type null_pc = -1;

bool warp_diverged = false;

address_type new_recvg_pc = null_pc;

unsigned num_divergent_paths=0;

std::map<address_type,simt_mask_t> divergent_paths;

(top_active_mask.any())
yes
address_type tmp_next_pc = null_pc;

simt_mask_t tmp_active_mask;

assert(num_divergent_paths<=2);

(int i = m_warp_size - 1; i >= 0; i--)


yes

no

no

yes

(tmp_next_pc == null_pc)

yes

yes

(thread_done.test(i))
yes

(unsigned i=0; i<num_divergent_paths; i++)


no

( top_active_mask.test(i) )

top_active_mask.reset(i);

no
address_type not_taken_pc = next_inst_pc+next_inst_size;

no

assert(!top_active_mask.any());

address_type tmp_next_pc = null_pc;

no

assert(m_stack.size() > 0);

divergent_paths[tmp_next_pc]=tmp_active_mask;

simt_mask_t tmp_active_mask;

no
(tmp_next_pc == null_pc)
yes

tmp_next_pc = next_pc[i];

continue

num_divergent_paths++;

tmp_active_mask.reset();

no
(tmp_next_pc == next_pc[i])

(divergent_paths.find(not_taken_pc)!=divergent_paths.end())

yes
tmp_active_mask.set(i);

top_active_mask.reset(i);

no

tmp_active_mask.set(i);

yes

std::map<address_type,simt_mask_t>:: iterator it=divergent_paths.begin();

top_active_mask.reset(i);

assert(i==0);

tmp_next_pc=it->first;

tmp_next_pc=not_taken_pc;

tmp_active_mask=divergent_paths[tmp_next_pc];

tmp_active_mask=divergent_paths[tmp_next_pc];

divergent_paths.erase(tmp_next_pc);

divergent_paths.erase(tmp_next_pc);

(next_inst_op== CALL_OPS)
no

yes

(next_inst_op == RET_OPS && top_type==STACK_ENTRY_TYPE_CALL)


no
(tmp_next_pc == top_recvg_pc && (top_type != STACK_ENTRY_TYPE_CALL))
yes
continue

assert(num_divergent_paths == 1);

yes
assert(num_divergent_paths == 1);

simt_stack_entry new_stack_entry;

no
((num_divergent_paths>1) && !warp_diverged )

m_stack.pop_back();

new_stack_entry.m_pc = tmp_next_pc;

yes
warp_diverged = true;

assert(m_stack.size() > 0);

new_stack_entry.m_active_mask = tmp_active_mask;

new_recvg_pc = recvg_pc;

m_stack.back().m_pc=tmp_next_pc;

new_stack_entry.m_branch_div_cycle = gpu_sim_cycle+gpu_tot_sim_cycle;

(new_recvg_pc != top_recvg_pc)

(tmp_next_pc == m_stack.back().m_recvg_pc && m_stack.back().m_type!=STACK_ENTRY_TYPE_CALL)

new_stack_entry.m_type = STACK_ENTRY_TYPE_CALL;

no

yes
m_stack.back().m_pc = new_recvg_pc;

yes
m_stack.pop_back();

assert(m_stack.back().m_type==STACK_ENTRY_TYPE_NORMAL);
no

no

m_stack.back().m_branch_div_cycle = gpu_sim_cycle+gpu_tot_sim_cycle;

(warp_diverged)

m_stack.pop_back();

yes

m_stack.push_back(simt_stack_entry());

(warp_diverged && tmp_next_pc == new_recvg_pc)


yes
continue

no

m_stack.back().m_pc = tmp_next_pc;

m_stack.back().m_active_mask = tmp_active_mask;

(warp_diverged)
no

yes

m_stack.back().m_calldepth = 0;

m_stack.back().m_recvg_pc = top_recvg_pc;

m_stack.back().m_recvg_pc = new_recvg_pc;

m_stack.push_back(simt_stack_entry());

no

return;

ptx_file_line_stats_add_warp_divergence(top_pc, 1);

end

return;

m_stack.push_back(new_stack_entry);