diff --git a/JDK/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/JDK/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 74e6edce2a..de0882387e 100644 --- a/JDK/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/JDK/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -571,9 +571,11 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* __ testptr(tmp2, tmp2); __ jcc(Assembler::zero, prefetch_runtime); __ subptr(tmp2, wordSize); - __ movptr(prefetch_queue_index, tmp2); + // __ movptr(prefetch_queue_index, tmp2); __ addptr(tmp2, prefetch_buffer); __ movptr(Address(tmp2, 0), new_val); + __ subptr(tmp2, prefetch_buffer); + __ movptr(prefetch_queue_index, tmp2); __ jmp(prefetch_done); __ bind(prefetch_runtime); diff --git a/JDK/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/JDK/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp index f487b22e01..01b2af7c27 100644 --- a/JDK/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp +++ b/JDK/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp @@ -296,9 +296,9 @@ void G1BarrierSetC2::pre_barrier(GraphKit* kit, Node* prefetch_next_index = kit->gvn().transform(new SubXNode(prefetch_index, __ ConX(sizeof(intptr_t)))); // Now get the buffer location we will log the previous value into and store it Node *prefetch_log_addr = __ AddP(no_base, prefetch_buffer, prefetch_next_index); - __ store(__ ctrl(), prefetch_log_addr, val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); + __ store(__ ctrl(), prefetch_log_addr, val, T_OBJECT, Compile::AliasIdxRaw, MemNode::release); // update the index - __ store(__ ctrl(), prefetch_index_adr, prefetch_next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); + __ store(__ ctrl(), prefetch_index_adr, prefetch_next_index, index_bt, Compile::AliasIdxRaw, MemNode::release); } __ else_(); { // logging buffer is full, call the runtime const TypeFunc *tf = write_ref_field_prefetch_entry_Type(); diff --git a/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.cpp index 3b86ce7871..b4850e90ed 100644 --- a/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.cpp +++ b/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.cpp @@ -1833,10 +1833,15 @@ jint G1CollectedHeap::initialize() { create_aux_memory_mapper("Prev Bitmap", bitmap_size, G1CMBitMap::heap_map_factor()); G1RegionToSpaceMapper* next_bitmap_storage = create_aux_memory_mapper("Next Bitmap", bitmap_size, G1CMBitMap::heap_map_factor()); + G1RegionToSpaceMapper* prev_black_bitmap_storage = + create_aux_memory_mapper("Prev Black Bitmap", bitmap_size, G1CMBitMap::heap_map_factor()); + G1RegionToSpaceMapper* next_black_bitmap_storage = + create_aux_memory_mapper("Next Black Bitmap", bitmap_size, G1CMBitMap::heap_map_factor()); + _hrm = HeapRegionManager::create_manager(this, g1_collector_policy()); - _hrm->initialize(heap_storage, prev_bitmap_storage, next_bitmap_storage, bot_storage, cardtable_storage, card_counts_storage); + _hrm->initialize(heap_storage, prev_bitmap_storage, next_bitmap_storage, prev_black_bitmap_storage, next_black_bitmap_storage, bot_storage, cardtable_storage, card_counts_storage); _card_table->initialize(cardtable_storage); // Do later initialization work for concurrent refinement. _hot_card_cache->initialize(card_counts_storage); @@ -1881,7 +1886,7 @@ jint G1CollectedHeap::initialize() { // Create the G1ConcurrentMark data structure and thread. // (Must do this late, so that "max_regions" is defined.) - _cm = new G1ConcurrentMark(this, prev_bitmap_storage, next_bitmap_storage); + _cm = new G1ConcurrentMark(this, prev_bitmap_storage, next_bitmap_storage, prev_black_bitmap_storage, next_black_bitmap_storage); if (_cm == NULL || !_cm->completed_initialization()) { vm_shutdown_during_initialization("Could not create/initialize G1ConcurrentMark"); return JNI_ENOMEM; @@ -2216,7 +2221,7 @@ void G1CollectedHeap::increment_old_marking_cycles_completed(bool concurrent) { if (concurrent) { _cm_thread->set_idle(); // Haoran: Modify - _pf_thread->set_idle(); + // _pf_thread->set_idle(); } // This notify_all() will ensure that a thread that called @@ -2777,9 +2782,9 @@ HeapWord* G1CollectedHeap::do_collection_pause(size_t word_size, void G1CollectedHeap::do_concurrent_mark() { MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); if (!_cm_thread->in_progress()) { + concurrent_mark()->clear_bitmap(concurrent_mark()->next_black_mark_bitmap(), workers(), false); _cm_thread->set_started(); // Haoran: modify - _pf_thread->set_started(); CGC_lock->notify(); // CPF_lock->notify(); } diff --git a/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.hpp index 50264cf524..e6137600cc 100644 --- a/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.hpp +++ b/JDK/src/hotspot/share/gc/g1/g1CollectedHeap.hpp @@ -146,6 +146,8 @@ class G1CollectedHeap : public CollectedHeap { friend class G1FullCollector; friend class G1GCAllocRegion; friend class G1HeapVerifier; + friend class G1CMTask; + friend class G1ConcurrentMark; // Closures used in implementation. friend class G1ParScanThreadState; diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp index 632499b873..195fb4c5b9 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp @@ -39,6 +39,8 @@ #include "gc/g1/heapRegion.inline.hpp" #include "gc/g1/heapRegionRemSet.hpp" #include "gc/g1/heapRegionSet.inline.hpp" +#include "gc/g1/g1ConcurrentPrefetchThread.hpp" +#include "gc/g1/g1ConcurrentPrefetchThread.inline.hpp" #include "gc/shared/gcId.hpp" #include "gc/shared/gcTimer.hpp" #include "gc/shared/gcTrace.hpp" @@ -72,7 +74,23 @@ bool G1CMBitMapClosure::do_addr(HeapWord* const addr) { // We move that task's local finger along. _task->move_finger_to(addr); - _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); + size_t page_id = ((size_t)addr - SEMERU_START_ADDR)/4096; + bool page_likely_local = G1CollectedHeap::heap()->user_buf->page_stats[page_id] == 0; + + + + if(!_task->_next_black_mark_bitmap->is_marked(addr)){ + if(page_likely_local){ + _task->_count_bitmap_page_local += 1; + } else { + _task->_count_bitmap_page_remote += 1; + } + _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); + } + // else { + // ShouldNotReachHere(); + // } + // we only partially drain the local queue and global stack _task->drain_local_queue(true); _task->drain_global_stack(true); @@ -357,16 +375,23 @@ static uint scale_concurrent_worker_threads(uint num_gc_workers) { } G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, - G1RegionToSpaceMapper* prev_bitmap_storage, - G1RegionToSpaceMapper* next_bitmap_storage) : + G1RegionToSpaceMapper* prev_bitmap_storage, + G1RegionToSpaceMapper* next_bitmap_storage, + G1RegionToSpaceMapper* prev_black_bitmap_storage, + G1RegionToSpaceMapper* next_black_bitmap_storage) : // _cm_thread set inside the constructor _g1h(g1h), _completed_initialization(false), _mark_bitmap_1(), _mark_bitmap_2(), + _mark_bitmap_3(), + _mark_bitmap_4(), + _prev_mark_bitmap(&_mark_bitmap_1), _next_mark_bitmap(&_mark_bitmap_2), + _prev_black_mark_bitmap(&_mark_bitmap_3), + _next_black_mark_bitmap(&_mark_bitmap_4), _heap(_g1h->reserved_region()), @@ -414,6 +439,8 @@ G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, { _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage); _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage); + _mark_bitmap_3.initialize(g1h->reserved_region(), prev_black_bitmap_storage); + _mark_bitmap_4.initialize(g1h->reserved_region(), next_black_bitmap_storage); // Create & start ConcurrentMark thread. _cm_thread = new G1ConcurrentMarkThread(this); @@ -512,7 +539,7 @@ void G1ConcurrentMark::reset() { // Reset all tasks, since different phases will use different number of active // threads. So, it's easiest to have all of them ready. for (uint i = 0; i < _max_num_tasks; ++i) { - _tasks[i]->reset(_next_mark_bitmap); + _tasks[i]->reset(_next_mark_bitmap, _next_black_mark_bitmap); } uint max_regions = _g1h->max_regions(); @@ -719,6 +746,7 @@ void G1ConcurrentMark::cleanup_for_next_mark() { // guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant"); clear_bitmap(_next_mark_bitmap, _concurrent_workers, true); + clear_bitmap(_next_black_mark_bitmap, _concurrent_workers, true); // Repeat the asserts from above. guarantee(cm_thread()->during_cycle(), "invariant"); @@ -728,6 +756,7 @@ void G1ConcurrentMark::cleanup_for_next_mark() { void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) { assert_at_safepoint_on_vm_thread(); clear_bitmap(_prev_mark_bitmap, workers, false); + clear_bitmap(_prev_black_mark_bitmap, workers, false); } class NoteStartOfMarkHRClosure : public HeapRegionClosure { @@ -843,6 +872,7 @@ class G1CMConcurrentMarkingTask : public AbstractGangTask { assert(worker_id < _cm->active_tasks(), "invariant"); G1CMTask* task = _cm->task(worker_id); + task->clear_memliner_stats(); task->record_start_time(); if (!_cm->has_aborted()) { do { @@ -854,6 +884,7 @@ class G1CMConcurrentMarkingTask : public AbstractGangTask { } while (!_cm->has_aborted() && task->has_aborted()); } task->record_end_time(); + task->print_memliner_stats(); guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant"); } @@ -1004,14 +1035,27 @@ void G1ConcurrentMark::mark_from_roots() { // Parallel task terminator is set in "set_concurrency_and_phase()" set_concurrency_and_phase(active_workers, true /* concurrent */); + set_in_conc_mark_from_roots(true); { MutexLockerEx pl(CPF_lock, Mutex::_no_safepoint_check_flag); + G1CollectedHeap::heap()->_pf_thread->set_started(); // Haoran: modify CPF_lock->notify(); } G1CMConcurrentMarkingTask marking_task(this); _concurrent_workers->run_task(&marking_task); print_stats(); + + set_in_conc_mark_from_roots(false); + { + log_info(gc)("before CCM mark from roots finish"); + MonitorLockerEx ml(CCM_finish_lock, Mutex::_no_safepoint_check_flag); + while(!G1CollectedHeap::heap()->_pf_thread->idle()){ + ml.wait(Mutex::_no_safepoint_check_flag); + } + log_info(gc)("after CCM mark from roots finish"); + + } } void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller) { @@ -1992,13 +2036,15 @@ class VerifyNoCSetOops { void operator()(G1TaskQueueEntry task_entry) const { if (task_entry.is_array_slice()) { - guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); + size_t mask_addr = (size_t)task_entry.slice() & ((1ULL<<63)-1); + guarantee(_g1h->is_in_reserved((void*)mask_addr), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); return; } - guarantee(oopDesc::is_oop(task_entry.obj()), + size_t mask_addr = (size_t)task_entry.obj() & ((1ULL<<63)-1); + guarantee(oopDesc::is_oop((oop)mask_addr), "Non-oop " PTR_FORMAT ", phase: %s, info: %d", p2i(task_entry.obj()), _phase, _info); - guarantee(!_g1h->is_in_cset(task_entry.obj()), + guarantee(!_g1h->is_in_cset((oop)mask_addr), "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", p2i(task_entry.obj()), _phase, _info); } @@ -2076,6 +2122,7 @@ void G1ConcurrentMark::concurrent_cycle_abort() { { GCTraceTime(Debug, gc) debug("Clear Next Bitmap"); clear_bitmap(_next_mark_bitmap, _g1h->workers(), false); + clear_bitmap(_next_black_mark_bitmap, _g1h->workers(), false); } // Note we cannot clear the previous marking bitmap here // since VerifyDuringGC verifies the objects marked during @@ -2238,9 +2285,11 @@ void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) { _cm_oop_closure = cm_oop_closure; } -void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { +void G1CMTask::reset(G1CMBitMap* next_mark_bitmap, G1CMBitMap* next_black_mark_bitmap) { guarantee(next_mark_bitmap != NULL, "invariant"); _next_mark_bitmap = next_mark_bitmap; + _next_black_mark_bitmap = next_black_mark_bitmap; + clear_region_fields(); _calls = 0; @@ -2415,16 +2464,40 @@ void G1CMTask::drain_local_queue(bool partially) { // bool ret = _task_queue->pop_local(entry); bool ret = _task_queue->pop_global(entry); while (ret) { - oop obj = entry.obj(); - oop mask_obj = (oop)((size_t)obj & ((1ULL<<63)-1)); - size_t page_id = ((size_t)mask_obj - SEMERU_START_ADDR)/4096; - if(((size_t)obj & (1ULL<<63)) || _g1h->user_buf->page_stats[page_id] == 0) { - G1TaskQueueEntry clean_entry = G1TaskQueueEntry::from_oop(mask_obj); - scan_task_entry(clean_entry); + //shengkai distinguish slice/obj + size_t addr; + if(entry.is_array_slice()){ + addr = (size_t)entry.slice(); + }else{ + addr = (size_t)entry.obj(); + } + size_t mask_addr = addr & ((1ULL<<63)-1); + size_t page_id = (mask_addr - SEMERU_START_ADDR)/4096; + + bool page_likely_local = G1CollectedHeap::heap()->user_buf->page_stats[page_id] == 0; + + if(page_likely_local){ + _count_local_queue_page_local += 1; + } else { + _count_local_queue_page_remote += 1; } - else { - mask_obj = (oop)((size_t)obj | (1ULL<<63)); - G1TaskQueueEntry new_entry = G1TaskQueueEntry::from_oop(mask_obj); + + if((addr & (1ULL<<63)) || _g1h->user_buf->page_stats[page_id] == 0) { + G1TaskQueueEntry clean_entry; + if(entry.is_array_slice()){ + clean_entry = G1TaskQueueEntry::from_slice((HeapWord *)mask_addr); + }else{ + clean_entry = G1TaskQueueEntry::from_oop((oop)mask_addr); + } + scan_task_entry(clean_entry); + } else { + mask_addr = addr | (1ULL<<63); + G1TaskQueueEntry new_entry; + if(entry.is_array_slice()){ + new_entry = G1TaskQueueEntry::from_slice((HeapWord *)mask_addr); + }else{ + new_entry = G1TaskQueueEntry::from_oop((oop)mask_addr); + } _task_queue->push(new_entry); } if (_task_queue->size() <= target_size || has_aborted()) { @@ -2826,7 +2899,7 @@ void G1CMTask::do_marking_step(double time_target_ms, drain_global_stack(false); // Attempt at work stealing from other task's queues. - if (do_stealing && !has_aborted()) { + if (do_stealing && !has_aborted() && false) { // We have not aborted. This means that we have finished all that // we could. Let's try to do some stealing... @@ -2837,7 +2910,21 @@ void G1CMTask::do_marking_step(double time_target_ms, while (!has_aborted()) { G1TaskQueueEntry entry; if (_cm->try_stealing(_worker_id, entry)) { - scan_task_entry(entry); + //shengkai clear mask before scan + size_t addr; + if(entry.is_array_slice()){ + addr = (size_t)entry.slice(); + }else{ + addr = (size_t)entry.obj(); + } + size_t mask_addr = addr & ((1ULL<<63)-1); + G1TaskQueueEntry clean_entry; + if(entry.is_array_slice()){ + clean_entry = G1TaskQueueEntry::from_slice((HeapWord *)mask_addr); + }else{ + clean_entry = G1TaskQueueEntry::from_oop((oop)mask_addr); + } + scan_task_entry(clean_entry); // And since we're towards the end, let's totally drain the // local queue and global stack. @@ -2917,11 +3004,26 @@ void G1CMTask::do_marking_step(double time_target_ms, if (!is_serial) { // We only need to enter the sync barrier if being called // from a parallel context + if( _worker_id == 0){ + log_info(gc)("before CCM overflow handle"); + MonitorLockerEx ml(CCM_finish_lock, Mutex::_no_safepoint_check_flag); + while(!G1CollectedHeap::heap()->_pf_thread->idle()){ + ml.wait(Mutex::_no_safepoint_check_flag); + } + log_info(gc)("after CCM overflow handle"); + } _cm->enter_first_sync_barrier(_worker_id); // When we exit this sync barrier we know that all tasks have // stopped doing marking work. So, it's now safe to // re-initialize our data structures. + } else { + log_info(gc)("before CCM overflow handle"); + MonitorLockerEx ml(CCM_finish_lock, Mutex::_no_safepoint_check_flag); + while(!G1CollectedHeap::heap()->_pf_thread->idle()){ + ml.wait(Mutex::_no_safepoint_check_flag); + } + log_info(gc)("after CCM overflow handle"); } clear_region_fields(); @@ -2986,7 +3088,17 @@ G1CMTask::G1CMTask(uint worker_id, _elapsed_time_ms(0.0), _termination_time_ms(0.0), _termination_start_time_ms(0.0), - _marking_step_diffs_ms() + _marking_step_diffs_ms(), + _count_local_queue_page_local(0), + _count_local_queue_page_remote(0), + _count_scan_stat_0(0), + _count_scan_stat_1(0), + _count_scan(0), + _count_push_back(0), + // _count_global_queue_page_local(0), + // _count_global_queue_page_remote(0), + _count_bitmap_page_local(0), + _count_bitmap_page_remote(0) { guarantee(task_queue != NULL, "invariant"); diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp index cf5ee15503..0585d92e8a 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp @@ -320,8 +320,12 @@ class G1ConcurrentMark : public CHeapObj { // Concurrent marking support structures G1CMBitMap _mark_bitmap_1; G1CMBitMap _mark_bitmap_2; + G1CMBitMap _mark_bitmap_3; + G1CMBitMap _mark_bitmap_4; G1CMBitMap* _prev_mark_bitmap; // Completed mark bitmap G1CMBitMap* _next_mark_bitmap; // Under-construction mark bitmap + G1CMBitMap* _prev_black_mark_bitmap; // Completed mark bitmap + G1CMBitMap* _next_black_mark_bitmap; // Under-construction mark bitmap // Heap bounds MemRegion const _heap; @@ -362,6 +366,7 @@ class G1ConcurrentMark : public CHeapObj { volatile bool _has_overflown; // True: marking is concurrent, false: we're in remark volatile bool _concurrent; + volatile bool _in_conc_mark_from_roots; // Set at the end of a Full GC so that marking aborts volatile bool _has_aborted; @@ -429,8 +434,12 @@ class G1ConcurrentMark : public CHeapObj { // Prints all gathered CM-related statistics void print_stats(); + void set_in_conc_mark_from_roots(bool status) { _in_conc_mark_from_roots = status; } + HeapWord* finger() { return _finger; } bool concurrent() { return _concurrent; } + bool in_conc_mark_from_roots() { return _in_conc_mark_from_roots; } + uint active_tasks() { return _num_active_tasks; } ParallelTaskTerminator* terminator() const { return _terminator.terminator(); } @@ -468,8 +477,8 @@ class G1ConcurrentMark : public CHeapObj { // Access / manipulation of the overflow flag which is set to // indicate that the global stack has overflown bool has_overflown() { return _has_overflown; } - // void set_has_overflown() { _has_overflown = true; } - void set_has_overflown() {/*Haoran: modify*/ ShouldNotReachHere(); _has_overflown = true; } + void set_has_overflown() { _has_overflown = true; } + // void set_has_overflown() {/*Haoran: modify*/ ShouldNotReachHere(); _has_overflown = true; } void clear_has_overflown() { _has_overflown = false; } bool restart_for_overflow() { return _restart_for_overflow; } @@ -544,7 +553,9 @@ class G1ConcurrentMark : public CHeapObj { G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, - G1RegionToSpaceMapper* next_bitmap_storage); + G1RegionToSpaceMapper* next_bitmap_storage, + G1RegionToSpaceMapper* prev_black_bitmap_storage, + G1RegionToSpaceMapper* next_black_bitmap_storage); ~G1ConcurrentMark(); G1ConcurrentMarkThread* cm_thread() { return _cm_thread; } @@ -552,6 +563,9 @@ class G1ConcurrentMark : public CHeapObj { const G1CMBitMap* const prev_mark_bitmap() const { return _prev_mark_bitmap; } G1CMBitMap* next_mark_bitmap() const { return _next_mark_bitmap; } + const G1CMBitMap* const prev_black_mark_bitmap() const { return _prev_black_mark_bitmap; } + G1CMBitMap* next_black_mark_bitmap() const { return _next_black_mark_bitmap; } + // Calculates the number of concurrent GC threads to be used in the marking phase. uint calc_active_marking_workers(); @@ -634,6 +648,7 @@ class G1ConcurrentMark : public CHeapObj { // A class representing a marking task. class G1CMTask : public TerminatorTerminator { + friend class G1CMBitMapClosure; private: enum PrivateConstants { // The regular clock call is called once the scanned words reaches @@ -656,6 +671,7 @@ class G1CMTask : public TerminatorTerminator { G1CollectedHeap* _g1h; G1ConcurrentMark* _cm; G1CMBitMap* _next_mark_bitmap; + G1CMBitMap* _next_black_mark_bitmap; // the task queue of this task G1CMTaskQueue* _task_queue; @@ -720,6 +736,17 @@ class G1CMTask : public TerminatorTerminator { TruncatedSeq _marking_step_diffs_ms; + uint _count_local_queue_page_local; + uint _count_local_queue_page_remote; + uint _count_scan_stat_0; + uint _count_scan_stat_1; + uint _count_scan; + uint _count_push_back; + // uint _count_global_queue_page_local; + // uint _count_global_queue_page_remote; + uint _count_bitmap_page_local; + uint _count_bitmap_page_remote; + // Updates the local fields after this task has claimed // a new region to scan void setup_for_region(HeapRegion* hr); @@ -761,7 +788,7 @@ class G1CMTask : public TerminatorTerminator { // scanned. inline size_t scan_objArray(objArrayOop obj, MemRegion mr); // Resets the task; should be called right at the beginning of a marking phase. - void reset(G1CMBitMap* next_mark_bitmap); + void reset(G1CMBitMap* next_mark_bitmap, G1CMBitMap* next_black_mark_bitmap); // Clears all the fields that correspond to a claimed region. void clear_region_fields(); @@ -861,6 +888,33 @@ class G1CMTask : public TerminatorTerminator { Pair flush_mark_stats_cache(); // Prints statistics associated with this task void print_stats(); + + void clear_memliner_stats(){ + _count_local_queue_page_local = 0; + _count_local_queue_page_remote = 0; + _count_scan_stat_0 = 0; + _count_scan_stat_1 = 0; + _count_scan = 0; + _count_push_back = 0; + // _count_global_queue_page_local = 0; + // _count_global_queue_page_remote = 0; + _count_bitmap_page_local = 0; + _count_bitmap_page_remote = 0; + } + + void print_memliner_stats(){ + log_info(gc)( + "_count_local_queue_page_local: %u _count_local_queue_page_remote: %u _count_scan_stat_0: %u _count_scan_stat_1: %u _count_scan: %u _count_push_back: %u", + _count_local_queue_page_local, _count_local_queue_page_remote, _count_scan_stat_0, _count_scan_stat_1, _count_scan, _count_push_back + ); + // log_info(gc)( + // "_count_global_queue_page_local: %u _count_global_queue_page_remote: %u", + // _count_global_queue_page_local, _count_global_queue_page_remote); + log_info(gc)( + "_count_bitmap_page_local: %u _count_bitmap_page_remote: %u", + _count_bitmap_page_local, _count_bitmap_page_remote); + } + }; // Class that's used to to print out per-region liveness diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkBitMap.cpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkBitMap.cpp index 4b8d04f493..3998dc667a 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkBitMap.cpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkBitMap.cpp @@ -27,8 +27,10 @@ #include "gc/g1/g1ConcurrentMarkBitMap.inline.hpp" #include "gc/g1/heapRegion.hpp" #include "memory/virtualspace.hpp" +#include "logging/log.hpp" void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) { + log_info(gc)("storage is %p %p", storage->reserved().start(), storage->reserved().end()); MarkBitMap::initialize(heap, storage->reserved()); storage->set_mapping_changed_listener(&_listener); } diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.cpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.cpp index 9a041278bc..7dccc95432 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.cpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.cpp @@ -404,8 +404,15 @@ void G1ConcurrentMarkThread::run_service() { } void G1ConcurrentMarkThread::stop_service() { - MutexLockerEx ml(CGC_lock, Mutex::_no_safepoint_check_flag); - CGC_lock->notify_all(); + { + MutexLockerEx ml(CGC_lock, Mutex::_no_safepoint_check_flag); + CGC_lock->notify_all(); + } + + { + MutexLockerEx ml(CCM_finish_lock, Mutex::_no_safepoint_check_flag); + CCM_finish_lock->notify_all(); + } } diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.cpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.cpp index 18765786d3..0b21cbe222 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.cpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.cpp @@ -183,7 +183,7 @@ void G1ConcurrentPrefetch::reset() { // Reset all tasks, since different phases will use different number of active // threads. So, it's easiest to have all of them ready. for (uint i = 0; i < _max_num_tasks; ++i) { - _tasks[i]->reset(_cm->next_mark_bitmap()); + _tasks[i]->reset(_cm->next_mark_bitmap(), _cm->next_black_mark_bitmap()); } // uint max_regions = _g1h->max_regions(); @@ -194,7 +194,7 @@ void G1ConcurrentPrefetch::reset() { } bool G1PFTask::should_exit_termination() { - return !_cm->concurrent(); + return !_cm->in_conc_mark_from_roots() || has_aborted(); } void G1ConcurrentPrefetch::clear_statistics_in_region(uint region_idx) { @@ -313,7 +313,7 @@ class G1PFConcurrentPrefetchingTask : public AbstractGangTask { index--; } bool get_queue = 0; - while(_cm->concurrent()) { + while(_cm->in_conc_mark_from_roots() && !_cm->has_aborted()) { if(t == NULL) { jtiwh.rewind(); t = jtiwh.next(); @@ -334,25 +334,35 @@ class G1PFConcurrentPrefetchingTask : public AbstractGangTask { } if(get_queue) { void* ptr; - bool ret = prefetch_queue->dequeue(&ptr); - while (ret && ptr != NULL) { - if(!G1CollectedHeap::heap()->is_in_g1_reserved(ptr)) break; - bool success = task->make_reference_grey((oop)(HeapWord*)ptr); - if(success) { - // log_debug(prefetch)("Succesfully mark one in PFTask!"); - } - ret = prefetch_queue->dequeue(&ptr); - } + { + MutexLockerEx z(prefetch_queue->locker(), Mutex::_no_safepoint_check_flag); + bool ret = prefetch_queue->dequeue_no_lock(&ptr); + while (ret && ptr != NULL) { + if(!G1CollectedHeap::heap()->is_in_g1_reserved(ptr)) break; + // bool success = task->make_prefetch_reference_black((oop)(HeapWord*)ptr); + bool success = task->make_reference_grey((oop)(HeapWord*)ptr); + + if(success) { + // log_debug(prefetch)("Succesfully mark one in PFTask!"); + } + ret = prefetch_queue->dequeue_no_lock(&ptr); + } + } prefetch_queue->release_processing(); task->do_marking_step(); _pf->do_yield_check(); } - } while (_cm->concurrent()); + } while (_cm->in_conc_mark_from_roots() && !_cm->has_aborted() && !task->has_aborted()); + } + + if ( _cm->has_aborted() || task->has_aborted() ){ + _pf->set_has_aborted(); } + task->record_end_time(); log_debug(prefetch)("G1PFConcurrentPrefetchingTask duration %lf ms", task->_elapsed_time_ms); - guarantee(!_cm->concurrent(), "invariant"); + // guarantee(!_cm->concurrent(), "invariant"); } double end_vtime = os::elapsedVTime(); @@ -435,9 +445,10 @@ void G1PFTask::set_cm_oop_closure(G1PFOopClosure* cm_oop_closure) { _cm_oop_closure = cm_oop_closure; } -void G1PFTask::reset(G1CMBitMap* next_mark_bitmap) { +void G1PFTask::reset(G1CMBitMap* next_mark_bitmap, G1CMBitMap* next_black_mark_bitmap) { guarantee(next_mark_bitmap != NULL, "invariant"); _next_mark_bitmap = next_mark_bitmap; + _next_black_mark_bitmap = next_black_mark_bitmap; // clear_region_fields(); _calls = 0; @@ -477,8 +488,8 @@ void G1PFTask::move_entries_to_global_stack() { if (n > 0) { if (!_cm->mark_stack_push(buffer)) { - ShouldNotReachHere(); - //set_has_aborted(); + // ShouldNotReachHere(); + set_has_aborted(); } } // This operation was quite expensive, so decrease the limits. @@ -487,9 +498,9 @@ void G1PFTask::move_entries_to_global_stack() { void G1PFTask::drain_local_queue(bool partially) { - // if (has_aborted()) { - // return; - // } + if (has_aborted()) { + return; + } size_t max_num_objects = PrefetchNum; size_t max_size = PrefetchSize; @@ -502,15 +513,34 @@ void G1PFTask::drain_local_queue(bool partially) { // ret = _task_queue->pop_global(entry); // } // } - while(_words_scanned < max_size && _objs_scanned < max_num_objects && !_cm->has_aborted()) { + while(_words_scanned < max_size && _objs_scanned < max_num_objects && !_cm->has_aborted() && !has_aborted()) { // bool ret = _task_queue->pop_global(entry); bool ret = _task_queue->pop_local(entry); + + if (ret) { + size_t addr; + if(entry.is_array_slice()){ + addr = (size_t)entry.slice(); + }else{ + addr = cast_from_oop(entry.obj()); + } + size_t mask_addr = addr & ((1ULL<<63)-1); + size_t page_id = (mask_addr - SEMERU_START_ADDR)/4096; + bool page_likely_local = _g1h->user_buf->page_stats[page_id] == 0; + + if(page_likely_local){ + _count_local_queue_page_local += 1; + } else { + _count_local_queue_page_remote += 1; + } + } + if(ret) scan_task_entry(entry); else break; } if(_words_scanned>0) log_debug(prefetch)("_word_scanned: %lu, _objs_scanned: %lu", _words_scanned, _objs_scanned); - if(!_cm->has_aborted()) + if(!_cm->has_aborted() && !has_aborted()) move_entries_to_global_stack(); else{ _task_queue->set_empty(); @@ -682,7 +712,7 @@ void G1PFTask::do_marking_step() { // recalculate_limits(); // clear all flags - // clear_has_aborted(); + clear_has_aborted(); _has_timed_out = false; _draining_satb_buffers = false; @@ -694,6 +724,15 @@ void G1PFTask::do_marking_step() { G1PFOopClosure cm_oop_closure(_g1h, this); set_cm_oop_closure(&cm_oop_closure); // ...then partially drain the local queue and the global stack + + if (_cm->has_overflown()) { + // This can happen if the mark stack overflows during a GC pause + // and this task, after a yield point, restarts. We have to abort + // as we need to get into the overflow protocol which happens + // right at the end of this task. + set_has_aborted(); + } + drain_local_queue(true); // drain_global_stack(true); @@ -986,7 +1025,13 @@ G1PFTask::G1PFTask(uint worker_id, _elapsed_time_ms(0.0), _termination_time_ms(0.0), _termination_start_time_ms(0.0), - _marking_step_diffs_ms() + _marking_step_diffs_ms(), + _count_local_queue_page_local(0), + _count_local_queue_page_remote(0), + _count_prefetch_white(0), + _count_prefetch_grey(0), + _count_prefetch_black(0), + _count_steal(0) { guarantee(task_queue != NULL, "invariant"); diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.hpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.hpp index 5834a7e2f4..77f1fb983b 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.hpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.hpp @@ -396,7 +396,9 @@ class G1ConcurrentPrefetch : public CHeapObj { inline bool do_yield_check(); -// bool has_aborted() { return _has_aborted; } + bool has_aborted() { return _has_aborted; } + void set_has_aborted() { _has_aborted = true; } + void clear_has_aborted() { _has_aborted = false; } // void print_summary_info(); @@ -408,6 +410,10 @@ class G1ConcurrentPrefetch : public CHeapObj { // Mark the given object on the next bitmap if it is below nTAMS. inline bool mark_in_next_bitmap(uint worker_id, HeapRegion* const hr, oop const obj); inline bool mark_in_next_bitmap(uint worker_id, oop const obj); + inline bool mark_prefetch_in_next_bitmap(uint worker_id, oop const obj, G1PFTask* task); + inline bool mark_black_in_next_bitmap(uint worker_id, oop const obj); + + inline bool is_below_global_finger(oop obj) const; inline bool is_marked_in_next_bitmap(oop p) const; @@ -427,6 +433,7 @@ class G1ConcurrentPrefetch : public CHeapObj { // A class representing a marking task. class G1PFTask : public TerminatorTerminator { friend class G1PFConcurrentPrefetchingTask; + friend class G1ConcurrentPrefetch; private: enum PrivateConstants { // The regular clock call is called once the scanned words reaches @@ -450,6 +457,7 @@ class G1PFTask : public TerminatorTerminator { G1ConcurrentMark* _cm; G1ConcurrentPrefetch* _pf; G1CMBitMap* _next_mark_bitmap; + G1CMBitMap* _next_black_mark_bitmap; // the task queue of this task G1PFTaskQueue* _task_queue; @@ -515,6 +523,15 @@ class G1PFTask : public TerminatorTerminator { TruncatedSeq _marking_step_diffs_ms; + uint _count_local_queue_page_local; + uint _count_local_queue_page_remote; + + uint _count_prefetch_white; + uint _count_prefetch_grey; + uint _count_prefetch_black; + + uint _count_steal; + // // Updates the local fields after this task has claimed // // a new region to scan // void setup_for_region(HeapRegion* hr); @@ -556,7 +573,7 @@ class G1PFTask : public TerminatorTerminator { // scanned. inline size_t scan_objArray(objArrayOop obj, MemRegion mr); // Resets the task; should be called right at the beginning of a marking phase. - void reset(G1CMBitMap* next_mark_bitmap); + void reset(G1CMBitMap* next_mark_bitmap, G1CMBitMap* next_black_mark_bitmap); // // Clears all the fields that correspond to a claimed region. // void clear_region_fields(); @@ -589,9 +606,9 @@ class G1PFTask : public TerminatorTerminator { // HeapWord* finger() { return _finger; } -// bool has_aborted() { return _has_aborted; } -// void set_has_aborted() { _has_aborted = true; } -// void clear_has_aborted() { _has_aborted = false; } + bool has_aborted() { return _has_aborted; } + void set_has_aborted() { _has_aborted = true; } + void clear_has_aborted() { _has_aborted = false; } void set_cm_oop_closure(G1PFOopClosure* cm_oop_closure); @@ -602,6 +619,8 @@ class G1PFTask : public TerminatorTerminator { // the local queue if below the finger. obj is required to be below its region's NTAMS. // Returns whether there has been a mark to the bitmap. inline bool make_reference_grey(oop obj); + inline bool make_reference_black(oop obj); + inline bool make_prefetch_reference_black(oop obj); // Grey the object (by calling make_grey_reference) if required, // e.g. obj is below its containing region's NTAMS. @@ -656,6 +675,25 @@ class G1PFTask : public TerminatorTerminator { Pair flush_mark_stats_cache(); // // Prints statistics associated with this task // void print_stats(); + void clear_memliner_stats(){ + _count_local_queue_page_local = 0; + _count_local_queue_page_remote = 0; + _count_prefetch_black = 0; + _count_prefetch_grey = 0; + _count_prefetch_white = 0; + _count_steal = 0; + } + + void print_memliner_stats(){ + log_info(gc)( + "prefetcher _count_local_queue_page_local: %u _count_local_queue_page_remote: %u _count_steal: %u", + _count_local_queue_page_local, _count_local_queue_page_remote, _count_steal + ); + log_info(gc)( + "prefetcher _count_prefetch_black: %u _count_prefetch_grey: %u _count_prefetch_white: %u", + _count_prefetch_black, _count_prefetch_grey, _count_prefetch_white + ); + } }; diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.inline.hpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.inline.hpp index 1d60e98e97..bafe773c1c 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.inline.hpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetch.inline.hpp @@ -48,6 +48,67 @@ inline bool G1ConcurrentPrefetch::mark_in_next_bitmap(uint const worker_id, oop return mark_in_next_bitmap(worker_id, hr, obj); } +inline bool G1ConcurrentPrefetch::mark_black_in_next_bitmap(uint const worker_id, oop const obj) { + HeapRegion* const hr = _g1h->heap_region_containing(obj); + assert(hr != NULL, "just checking"); + assert(hr->is_in_reserved(obj), "Attempting to mark object at " PTR_FORMAT " that is not contained in the given region %u", p2i(obj), hr->hrm_index()); + + if (hr->obj_allocated_since_next_marking(obj)) { + return false; + } + + // Some callers may have stale objects to mark above nTAMS after humongous reclaim. + // Can't assert that this is a valid object at this point, since it might be in the process of being copied by another thread. + assert(!hr->is_continues_humongous(), "Should not try to mark object " PTR_FORMAT " in Humongous continues region %u above nTAMS " PTR_FORMAT, p2i(obj), hr->hrm_index(), p2i(hr->next_top_at_mark_start())); + + HeapWord* const obj_addr = (HeapWord*)obj; + + bool success = _cm->next_mark_bitmap()->par_mark(obj_addr); + OrderAccess::storestore(); + + if( success ){ + _cm->next_black_mark_bitmap()->par_mark(obj_addr); + } + + return success; +} + +inline bool G1ConcurrentPrefetch::mark_prefetch_in_next_bitmap(uint const worker_id, oop const obj, G1PFTask* task) { + HeapRegion* const hr = _g1h->heap_region_containing(obj); + assert(hr != NULL, "just checking"); + assert(hr->is_in_reserved(obj), "Attempting to mark object at " PTR_FORMAT " that is not contained in the given region %u", p2i(obj), hr->hrm_index()); + + if (hr->obj_allocated_since_next_marking(obj)) { + return false; + } + + // Some callers may have stale objects to mark above nTAMS after humongous reclaim. + // Can't assert that this is a valid object at this point, since it might be in the process of being copied by another thread. + assert(!hr->is_continues_humongous(), "Should not try to mark object " PTR_FORMAT " in Humongous continues region %u above nTAMS " PTR_FORMAT, p2i(obj), hr->hrm_index(), p2i(hr->next_top_at_mark_start())); + + HeapWord* const obj_addr = (HeapWord*)obj; + + bool success = _cm->next_mark_bitmap()->par_mark(obj_addr); + OrderAccess::storestore(); + + if( success ){ + _cm->next_black_mark_bitmap()->par_mark(obj_addr); + } + + + if (success) { + add_to_liveness(worker_id, obj, obj->size()); + if(is_below_global_finger(obj)){ + task->_count_prefetch_white += 1; + } else { + task->_count_prefetch_black += 1; + } + } else { + task->_count_prefetch_grey += 1; + } + return success; +} + inline bool G1ConcurrentPrefetch::mark_in_next_bitmap(uint const worker_id, HeapRegion* const hr, oop const obj) { assert(hr != NULL, "just checking"); assert(hr->is_in_reserved(obj), "Attempting to mark object at " PTR_FORMAT " that is not contained in the given region %u", p2i(obj), hr->hrm_index()); @@ -66,6 +127,11 @@ inline bool G1ConcurrentPrefetch::mark_in_next_bitmap(uint const worker_id, Heap if (success) { add_to_liveness(worker_id, obj, obj->size()); } + + if( success ){ + _cm->next_black_mark_bitmap()->par_mark(obj_addr); + } + return success; } @@ -103,7 +169,14 @@ inline void G1PFTask::push(G1TaskQueueEntry task_entry) { assert(task_entry.is_array_slice() || _next_mark_bitmap->is_marked((HeapWord*)task_entry.obj()), "invariant"); if (!_task_queue->push(task_entry)) { - ShouldNotReachHere(); + // ShouldNotReachHere(); + move_entries_to_global_stack(); + + // this should succeed since, even if we overflow the global + // stack, we should have definitely removed some entries from the + // local queue. So, there must be space on it. + bool success = _task_queue->push(task_entry); + assert(success, "invariant"); } } @@ -178,6 +251,65 @@ inline bool G1PFTask::make_reference_grey(oop obj) { return true; } +inline bool G1PFTask::make_reference_black(oop obj) { + if (!_pf->mark_black_in_next_bitmap(_worker_id, obj)) { + return false; + } + G1TaskQueueEntry entry = G1TaskQueueEntry::from_oop(obj); + if (obj->is_typeArray()) { + // Immediately process arrays of primitive types, rather + // than pushing on the mark stack. This keeps us from + // adding humongous objects to the mark stack that might + // be reclaimed before the entry is processed - see + // selection of candidates for eager reclaim of humongous + // objects. The cost of the additional type test is + // mitigated by avoiding a trip through the mark stack, + // by only doing a bookkeeping update and avoiding the + // actual scan of the object - a typeArray contains no + // references, and the metadata is built-in. + process_grey_task_entry(entry); + } else { + push(entry); + } + return true; +} + +inline bool G1ConcurrentPrefetch::is_below_global_finger(oop obj) const { + // If obj is above the global finger, then the mark bitmap scan + // will find it later, and no push is needed. Similarly, if we have + // a current region and obj is between the local finger and the + // end of the current region, then no push is needed. The tradeoff + // of checking both vs only checking the global finger is that the + // local check will be more accurate and so result in fewer pushes, + // but may also be a little slower. + HeapWord* global_finger = _cm->finger(); + HeapWord* objAddr = cast_from_oop(obj); + return objAddr < global_finger; +} + +inline bool G1PFTask::make_prefetch_reference_black(oop obj) { + if (!_pf->mark_prefetch_in_next_bitmap(_worker_id, obj, this)) { + return false; + } + G1TaskQueueEntry entry = G1TaskQueueEntry::from_oop(obj); + if (obj->is_typeArray()) { + // Immediately process arrays of primitive types, rather + // than pushing on the mark stack. This keeps us from + // adding humongous objects to the mark stack that might + // be reclaimed before the entry is processed - see + // selection of candidates for eager reclaim of humongous + // objects. The cost of the additional type test is + // mitigated by avoiding a trip through the mark stack, + // by only doing a bookkeeping update and avoiding the + // actual scan of the object - a typeArray contains no + // references, and the metadata is built-in. + process_grey_task_entry(entry); + } else { + push(entry); + } + return true; +} + template inline bool G1PFTask::deal_with_reference(T* p) { increment_refs_reached(); diff --git a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetchThread.cpp b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetchThread.cpp index 03cee1279e..9754236734 100644 --- a/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetchThread.cpp +++ b/JDK/src/hotspot/share/gc/g1/g1ConcurrentPrefetchThread.cpp @@ -236,10 +236,12 @@ void G1ConcurrentPrefetchThread::run_service() { } // cpmanager.set_phase(G1ConcurrentPhase::CONCURRENT_CYCLE, false /* force */); { + log_info(gc)("prefetcher cycle start"); ResourceMark rm; HandleMark hm; double cycle_start_time = os::elapsedTime(); double cycle_start = os::elapsedVTime(); + _pf->clear_has_aborted(); // It would be nice to use the G1ConcPhase class here but // the "end" logging is inside the loop and not at the end of // a scope. Also, the timer doesn't support nesting. @@ -247,9 +249,18 @@ void G1ConcurrentPrefetchThread::run_service() { { // G1ConcPhaseManager mark_manager(G1ConcurrentPhase::CONCURRENT_MARK, this); jlong mark_start = os::elapsed_counter(); - while(_cm->concurrent()) { + for(uint i = 0; i < PrefetchThreads; i++){ + _pf->task(i)->clear_memliner_stats(); + } + + while(_cm->in_conc_mark_from_roots() && !_pf->has_aborted() && !_cm->has_aborted() && !_cm->has_overflown()) { _pf->mark_from_stacks(); } + + for(uint i = 0; i < PrefetchThreads; i++){ + _pf->task(i)->print_memliner_stats(); + } + log_info(gc)("prefetcher finish conc prefetching"); // for (uint iter = 1; !_cm->has_aborted(); ++iter) { // // Concurrent marking. // { @@ -317,6 +328,10 @@ void G1ConcurrentPrefetchThread::run_service() { set_idle(); // _cm->concurrent_cycle_end(); } + { + MutexLockerEx ml(CCM_finish_lock, Mutex::_no_safepoint_check_flag); + CCM_finish_lock->notify(); + } // cpmanager.set_phase(G1ConcurrentPhase::IDLE, _cm->has_aborted() /* force */); } // _cm->root_regions()->cancel_scan(); diff --git a/JDK/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp b/JDK/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp index 9d496830b6..8ca90ac98c 100644 --- a/JDK/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp +++ b/JDK/src/hotspot/share/gc/g1/g1YoungGenSizer.cpp @@ -64,9 +64,9 @@ G1YoungGenSizer::G1YoungGenSizer() : _sizer_kind(SizerDefaults), _sizer_kind = SizerNewSizeOnly; } } else if (FLAG_IS_CMDLINE(MaxNewSize)) { - if(MaxNewSize > (1024 + 512) * 1024 * 1024) { - MaxNewSize -= 1024 * 1024 * 1024; - } + // if(MaxNewSize > (1024 + 512) * 1024 * 1024) { + // MaxNewSize -= 1024 * 1024 * 1024; + // } _max_desired_young_length = MAX2((uint) (MaxNewSize / HeapRegion::GrainBytes), 1U); diff --git a/JDK/src/hotspot/share/gc/g1/heapRegionManager.cpp b/JDK/src/hotspot/share/gc/g1/heapRegionManager.cpp index 0154f1bbf9..964a481231 100644 --- a/JDK/src/hotspot/share/gc/g1/heapRegionManager.cpp +++ b/JDK/src/hotspot/share/gc/g1/heapRegionManager.cpp @@ -65,6 +65,8 @@ HeapRegionManager::HeapRegionManager() : _regions(), _heap_mapper(NULL), _prev_bitmap_mapper(NULL), _next_bitmap_mapper(NULL), + _prev_black_bitmap_mapper(NULL), + _next_black_bitmap_mapper(NULL), _free_list("Free list", new MasterFreeRegionListChecker()) { } @@ -78,6 +80,8 @@ HeapRegionManager* HeapRegionManager::create_manager(G1CollectedHeap* heap, G1Co void HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage, G1RegionToSpaceMapper* prev_bitmap, G1RegionToSpaceMapper* next_bitmap, + G1RegionToSpaceMapper* prev_black_bitmap, + G1RegionToSpaceMapper* next_black_bitmap, G1RegionToSpaceMapper* bot, G1RegionToSpaceMapper* cardtable, G1RegionToSpaceMapper* card_counts) { @@ -88,6 +92,9 @@ void HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage, _prev_bitmap_mapper = prev_bitmap; _next_bitmap_mapper = next_bitmap; + _prev_black_bitmap_mapper = prev_black_bitmap; + _next_black_bitmap_mapper = next_black_bitmap; + _bot_mapper = bot; _cardtable_mapper = cardtable; @@ -129,6 +136,9 @@ void HeapRegionManager::commit_regions(uint index, size_t num_regions, WorkGang* _prev_bitmap_mapper->commit_regions(index, num_regions, pretouch_gang); _next_bitmap_mapper->commit_regions(index, num_regions, pretouch_gang); + _prev_black_bitmap_mapper->commit_regions(index, num_regions, pretouch_gang); + _next_black_bitmap_mapper->commit_regions(index, num_regions, pretouch_gang); + _bot_mapper->commit_regions(index, num_regions, pretouch_gang); _cardtable_mapper->commit_regions(index, num_regions, pretouch_gang); @@ -156,6 +166,9 @@ void HeapRegionManager::uncommit_regions(uint start, size_t num_regions) { _prev_bitmap_mapper->uncommit_regions(start, num_regions); _next_bitmap_mapper->uncommit_regions(start, num_regions); + _prev_black_bitmap_mapper->uncommit_regions(start, num_regions); + _next_black_bitmap_mapper->uncommit_regions(start, num_regions); + _bot_mapper->uncommit_regions(start, num_regions); _cardtable_mapper->uncommit_regions(start, num_regions); @@ -194,6 +207,8 @@ MemoryUsage HeapRegionManager::get_auxiliary_data_memory_usage() const { size_t used_sz = _prev_bitmap_mapper->committed_size() + _next_bitmap_mapper->committed_size() + + _prev_black_bitmap_mapper->committed_size() + + _next_black_bitmap_mapper->committed_size() + _bot_mapper->committed_size() + _cardtable_mapper->committed_size() + _card_counts_mapper->committed_size(); @@ -201,6 +216,8 @@ MemoryUsage HeapRegionManager::get_auxiliary_data_memory_usage() const { size_t committed_sz = _prev_bitmap_mapper->reserved_size() + _next_bitmap_mapper->reserved_size() + + _prev_black_bitmap_mapper->reserved_size() + + _next_black_bitmap_mapper->reserved_size() + _bot_mapper->reserved_size() + _cardtable_mapper->reserved_size() + _card_counts_mapper->reserved_size(); diff --git a/JDK/src/hotspot/share/gc/g1/heapRegionManager.hpp b/JDK/src/hotspot/share/gc/g1/heapRegionManager.hpp index a72169191c..e00ccff569 100644 --- a/JDK/src/hotspot/share/gc/g1/heapRegionManager.hpp +++ b/JDK/src/hotspot/share/gc/g1/heapRegionManager.hpp @@ -125,6 +125,8 @@ class HeapRegionManager: public CHeapObj { G1RegionToSpaceMapper* _heap_mapper; G1RegionToSpaceMapper* _prev_bitmap_mapper; G1RegionToSpaceMapper* _next_bitmap_mapper; + G1RegionToSpaceMapper* _prev_black_bitmap_mapper; + G1RegionToSpaceMapper* _next_black_bitmap_mapper; FreeRegionList _free_list; void make_regions_available(uint index, uint num_regions = 1, WorkGang* pretouch_gang = NULL); @@ -144,6 +146,8 @@ class HeapRegionManager: public CHeapObj { virtual void initialize(G1RegionToSpaceMapper* heap_storage, G1RegionToSpaceMapper* prev_bitmap, G1RegionToSpaceMapper* next_bitmap, + G1RegionToSpaceMapper* prev_black_bitmap, + G1RegionToSpaceMapper* next_black_bitmap, G1RegionToSpaceMapper* bot, G1RegionToSpaceMapper* cardtable, G1RegionToSpaceMapper* card_counts); diff --git a/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.cpp b/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.cpp index 56261b6984..d20c6929cc 100644 --- a/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.cpp +++ b/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.cpp @@ -44,15 +44,19 @@ HeterogeneousHeapRegionManager* HeterogeneousHeapRegionManager::manager() { void HeterogeneousHeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage, G1RegionToSpaceMapper* prev_bitmap, G1RegionToSpaceMapper* next_bitmap, + G1RegionToSpaceMapper* prev_black_bitmap, + G1RegionToSpaceMapper* next_black_bitmap, G1RegionToSpaceMapper* bot, G1RegionToSpaceMapper* cardtable, G1RegionToSpaceMapper* card_counts) { - HeapRegionManager::initialize(heap_storage, prev_bitmap, next_bitmap, bot, cardtable, card_counts); + HeapRegionManager::initialize(heap_storage, prev_bitmap, next_bitmap, prev_black_bitmap, next_black_bitmap, bot, cardtable, card_counts); // We commit bitmap for all regions during initialization and mark the bitmap space as special. // This allows regions to be un-committed while concurrent-marking threads are accessing the bitmap concurrently. _prev_bitmap_mapper->commit_and_set_special(); _next_bitmap_mapper->commit_and_set_special(); + _prev_black_bitmap_mapper->commit_and_set_special(); + _next_black_bitmap_mapper->commit_and_set_special(); } // expand_by() is called to grow the heap. We grow into nvdimm now. diff --git a/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.hpp b/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.hpp index cc4321e2a0..885c749d07 100644 --- a/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.hpp +++ b/JDK/src/hotspot/share/gc/g1/heterogeneousHeapRegionManager.hpp @@ -100,6 +100,8 @@ class HeterogeneousHeapRegionManager : public HeapRegionManager { virtual void initialize(G1RegionToSpaceMapper* heap_storage, G1RegionToSpaceMapper* prev_bitmap, G1RegionToSpaceMapper* next_bitmap, + G1RegionToSpaceMapper* prev_black_bitmap, + G1RegionToSpaceMapper* next_black_bitmap, G1RegionToSpaceMapper* bot, G1RegionToSpaceMapper* cardtable, G1RegionToSpaceMapper* card_counts); diff --git a/JDK/src/hotspot/share/gc/shared/gc_globals.hpp b/JDK/src/hotspot/share/gc/shared/gc_globals.hpp index 391cf42308..453dac67b0 100644 --- a/JDK/src/hotspot/share/gc/shared/gc_globals.hpp +++ b/JDK/src/hotspot/share/gc/shared/gc_globals.hpp @@ -235,6 +235,10 @@ "Number of threads prefetch will use") \ range(0, max_jint) \ \ + product(uint, PrefetchDelay, 0, \ + "The number of objects before first prefetch") \ + range(0, max_jint) \ + \ product(uint, PrefetchNum, 0, \ "The maximum objects to be marked during prefetching") \ range(0, max_jint) \ diff --git a/JDK/src/hotspot/share/gc/shared/prefetchQueue.hpp b/JDK/src/hotspot/share/gc/shared/prefetchQueue.hpp index b942966e29..15a1249087 100644 --- a/JDK/src/hotspot/share/gc/shared/prefetchQueue.hpp +++ b/JDK/src/hotspot/share/gc/shared/prefetchQueue.hpp @@ -71,6 +71,10 @@ class PrefetchQueue: public PtrQueue { } } + Mutex* locker(){ + return &_m; + } + bool set_in_processing() { if(_in_processing == true) return false; if(Atomic::cmpxchg(true, &_in_processing, false ) == false) return true; @@ -130,8 +134,9 @@ class PrefetchQueue: public PtrQueue { assert(_buf != NULL, "postcondition"); assert(index() > 0, "postcondition"); assert(index() <= capacity(), "invariant"); + _buf[index() - 1] = ptr; + OrderAccess::storestore(); _index -= _element_size; - _buf[index()] = ptr; } size_t prefetch_queue_threshold() { @@ -194,7 +199,37 @@ class PrefetchQueue: public PtrQueue { MutexLockerEx z(&_m, Mutex::_no_safepoint_check_flag); size_t current_index = index(); size_t current_tail = _tail; - if(current_tail == current_index) { + if(current_tail - current_index <= PrefetchDelay) { + *ptrptr = NULL; + + // _in_dequeue = false; + return false; + } + _tail -= 1; + *ptrptr = _buf[current_tail - 1]; + + + // _in_dequeue = false; + return true; + + } + + bool dequeue_no_lock(void** ptrptr) { + + // while(_in_dequeue == true) { + // continue; + // } + // if(Atomic::cmpxchg(true, &_in_dequeue, false ) == true) { + // *ptrptr = NULL; + // return false; + // } + // if(_in_dequeue == true) { + // *ptrptr = NULL; + // return false; + // } + size_t current_index = index(); + size_t current_tail = _tail; + if(current_tail - current_index <= PrefetchDelay) { *ptrptr = NULL; // _in_dequeue = false; diff --git a/JDK/src/hotspot/share/runtime/mutexLocker.cpp b/JDK/src/hotspot/share/runtime/mutexLocker.cpp index 178ae0a674..1a8a9df567 100644 --- a/JDK/src/hotspot/share/runtime/mutexLocker.cpp +++ b/JDK/src/hotspot/share/runtime/mutexLocker.cpp @@ -78,6 +78,7 @@ Monitor* Threads_lock = NULL; Mutex* NonJavaThreadsList_lock = NULL; Monitor* CGC_lock = NULL; Monitor* CPF_lock = NULL; //Haoran: modify +Monitor* CCM_finish_lock = NULL; Monitor* STS_lock = NULL; Monitor* FullGCCount_lock = NULL; Mutex* SATB_Q_FL_lock = NULL; @@ -207,7 +208,9 @@ void mutex_init() { def(CGC_lock , PaddedMonitor, special, true, Monitor::_safepoint_check_never); // coordinate between fore- and background GC //Haoran: modify def(CPF_lock , PaddedMonitor, special, true, Monitor::_safepoint_check_never); // coordinate between fore- and background GC - + + def(CCM_finish_lock , PaddedMonitor, special, true, Monitor::_safepoint_check_never); // coordinate between fore- and background GC + def(STS_lock , PaddedMonitor, leaf, true, Monitor::_safepoint_check_never); def(VMWeakAlloc_lock , PaddedMutex , vmweak, true, Monitor::_safepoint_check_never); diff --git a/JDK/src/hotspot/share/runtime/mutexLocker.hpp b/JDK/src/hotspot/share/runtime/mutexLocker.hpp index f6cf1b47c1..511a8bc921 100644 --- a/JDK/src/hotspot/share/runtime/mutexLocker.hpp +++ b/JDK/src/hotspot/share/runtime/mutexLocker.hpp @@ -75,6 +75,7 @@ extern Mutex* NonJavaThreadsList_lock; // a lock on the NonJavaThreads extern Monitor* CGC_lock; // used for coordination between // fore- & background GC threads. extern Monitor* CPF_lock; // Haoran: modify used for coordination between +extern Monitor* CCM_finish_lock; // Haoran: modify used for coordination between // background GC threads and background PF threads. extern Monitor* STS_lock; // used for joining/leaving SuspendibleThreadSet. extern Monitor* FullGCCount_lock; // in support of "concurrent" full gc