From 3d2e6a3f86c1882c8d0a2e3da50d605147771469 Mon Sep 17 00:00:00 2001 From: yumisims Date: Wed, 25 Mar 2026 19:52:16 +0000 Subject: [PATCH] =?UTF-8?q?fixing=20selection=20bounds=20and=20sink/source?= =?UTF-8?q?=20handling,=20rebuilding=20contigIds=20before=20selection,=20s?= =?UTF-8?q?moothing/collapsing=20noisy=20contig=20runs,=20correcting=20loc?= =?UTF-8?q?al-to-global=20order=20remapping=20in=20AutoCurationFromFragsOr?= =?UTF-8?q?der,=20and=20safely=20handling=20zero-length=20contigs=20in=20c?= =?UTF-8?q?ompressed=20Hi=E2=80=91C=20(cal=5Fblock=5Fmean=20and=20related?= =?UTF-8?q?=20paths).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PretextView.cpp | 34 ++++++- src/auto_curation_state.h | 187 +++++++++++++++++++++++++++----------- src/copy_texture.cpp | 36 +++++--- src/frag_for_compress.h | 18 ++++ 4 files changed, 204 insertions(+), 71 deletions(-) diff --git a/PretextView.cpp b/PretextView.cpp index f26fc1b..ac59b49 100644 --- a/PretextView.cpp +++ b/PretextView.cpp @@ -8399,12 +8399,32 @@ void AutoCurationFromFragsOrder( std::vector predicted_order = frags_order_->get_order_without_chromosomeInfor(); // start from 1 if (using_select_area) { + // FragsOrder uses local indices 1..N (sign = orientation). Map each global slot in the + // selection (frags_id_to_sort[k] = map position / 0-based contig id) to the actual global + // contig id: frags_id_to_sort[abs(predicted_order[k]) - 1]. Do not use front()+abs(...). std::vector full_predicted_order(num_frags); std::iota(full_predicted_order.begin(), full_predicted_order.end(), 1); std::vector frags_id_to_sort = select_area->get_to_sort_frags_id(Contigs); - for (u32 i=0; i < frags_id_to_sort.size(); i++) - full_predicted_order[frags_id_to_sort[i]] = (predicted_order[i]>0?1:-1) * (frags_id_to_sort.front() + std::abs(predicted_order[i])); - predicted_order = full_predicted_order; + const u32 n_sort = (u32)frags_id_to_sort.size(); + for (u32 k = 0; k < n_sort; k++) + { + const u32 global_pos = frags_id_to_sort[k]; + const s32 local_signed = predicted_order[k]; + const u32 local_1based = (u32)std::abs(local_signed); + if (local_1based < 1u || local_1based > n_sort) + { + fmt::print( + stderr, + "[AutoCurationFromFragsOrder] invalid local fragment index {} (expected 1..{})\n", + local_1based, + n_sort); + assert(0); + } + const u32 src0 = frags_id_to_sort[local_1based - 1u]; + const s32 global_1based = (s32)(src0 + 1u); + full_predicted_order[global_pos] = (local_signed > 0 ? 1 : -1) * global_1based; + } + predicted_order = std::move(full_predicted_order); } for (s32 i = 0; i < num_frags; ++i) current_order[i] = {i+1, contigs_->contigs_arr[i].length}; // start from 1 auto move_current_order_element = [¤t_order, &num_frags](u32 from, u32 to) @@ -8456,12 +8476,12 @@ void AutoCurationFromFragsOrder( // find the pixel range of the contig to move u32 pixelFrom = 0, tmp_i = 0; // tmp_i is the index of the current contig, which is going to be processed - while (std::abs(predicted_order[i])!=std::abs(current_order[tmp_i].first)) + while (std::abs(predicted_order[i])!=std::abs(current_order[tmp_i].first)) { if (tmp_i >= num_frags) { char buff[256]; - snprintf(buff, sizeof(buff), "Error: contig %d not found in the current order.\n", current_order[i].first); + snprintf(buff, sizeof(buff), "Error: contig %d not found in the current order.\n", (int)std::abs(predicted_order[i])); MY_CHECK(buff); assert(0); } @@ -8651,6 +8671,10 @@ auto_sort_func(char* currFileName) ); restore_settings_after_copy(original_color_control_points); + // Rebuild contigIds from originalContigIds + contigRelCoords so selection matches the current map + // (RearrangeMap updates those buffers but not contigIds until this runs). + UpdateContigsFromMapState(); + // check if select the area for sorting SelectArea selected_area; auto_curation_state.get_selected_fragments( // consider wheather include the sink and source while calculating the selected area diff --git a/src/auto_curation_state.h b/src/auto_curation_state.h index 02d372a..dc62d20 100644 --- a/src/auto_curation_state.h +++ b/src/auto_curation_state.h @@ -4,7 +4,93 @@ #include "utilsPretextView.h" #include "showWindowData.h" #include "genomeData.h" +#include +#include +namespace { + +inline u32 median_u32_3(u32 a, u32 b, u32 c) +{ + if (a > b) std::swap(a, b); + if (b > c) std::swap(b, c); + if (a > b) std::swap(a, b); + return b; +} + +// Hi-C map contig ids can flip for single pixels at boundaries; median-of-3 along the 1D axis +// stabilizes runs so Pixel Sort does not build huge duplicate fragment lists (e.g. 130,131,130,...). +inline u32 smoothed_contig_id_1d( + map_state* Map_State, + u32 i, + u32 start_pixel, + u32 end_pixel) +{ + u32 left = (i > start_pixel) ? Map_State->contigIds[i - 1] : Map_State->contigIds[i]; + u32 mid = Map_State->contigIds[i]; + u32 right = (i < end_pixel) ? Map_State->contigIds[i + 1] : Map_State->contigIds[i]; + return median_u32_3(left, mid, right); +} + +// Median-of-3 can still alternate every pixel when raw contigIds alternate. Collapse a trailing +// suffix that strictly alternates between exactly two ids (length >= 4) to [left, other] in map order. +inline void collapse_alternating_two_contig_suffix(std::vector& ids) +{ + const size_t n = ids.size(); + if (n < 4) return; + + const size_t e = n - 1; + const u32 a = ids[e]; + const u32 b = ids[e - 1]; + if (a == b) return; + + size_t s = e - 1; + while (s > 0) + { + const u32 x = ids[s - 1]; + if (x != a && x != b) break; + if (x == ids[s]) break; + --s; + } + + const size_t len = n - s; + if (len < 4) return; + + for (size_t i = s; i + 1 < n; ++i) + if (ids[i] == ids[i + 1]) return; + + for (size_t i = s; i < n; ++i) + if (ids[i] != a && ids[i] != b) return; + + const u32 left = ids[s]; + const u32 other = (left == a) ? b : a; + ids.erase(ids.begin() + (ptrdiff_t)s, ids.end()); + ids.push_back(left); + ids.push_back(other); +} + +inline std::vector build_smoothed_selected_run_ids( + map_state* Map_State, + u32 sp, + u32 ep) +{ + std::vector out; + if (sp > ep) return out; + u32 last = smoothed_contig_id_1d(Map_State, sp, sp, ep); + out.push_back(last); + for (u32 i = sp + 1; i <= ep; i++) + { + u32 cur = smoothed_contig_id_1d(Map_State, i, sp, ep); + if (cur != last) + { + out.push_back(cur); + last = cur; + } + } + collapse_alternating_two_contig_suffix(out); + return out; +} + +} // namespace struct SelectArea { @@ -231,7 +317,13 @@ struct AutoCurationState u32 number_of_pixels_1D ) { - if (this->start_pixel > number_of_pixels_1D || this->end_pixel > number_of_pixels_1D || this->start_pixel < 0 || this->end_pixel < 0) + // Bounds: Map_State->contigIds is [0, number_of_pixels_1D - 1]. Reject >= number_of_pixels_1D + // (a strict `>` check would wrongly allow end_pixel == number_of_pixels_1D and overrun the array). + if (number_of_pixels_1D == 0 || + this->start_pixel >= (s32)number_of_pixels_1D || + this->end_pixel >= (s32)number_of_pixels_1D || + this->start_pixel < 0 || + this->end_pixel < 0) { return 0; } @@ -240,15 +332,8 @@ struct AutoCurationState std::swap(this->start_pixel, this->end_pixel); } - u32 num_selected_frags = 1; - for (u32 i = this->start_pixel+1; i <= this->end_pixel; i++) - { - if (Map_State->contigIds[i] != Map_State->contigIds[i-1]) - { - num_selected_frags ++; - } - } - return num_selected_frags; + u32 sp = (u32)this->start_pixel, ep = (u32)this->end_pixel; + return (u32)build_smoothed_selected_run_ids(Map_State, sp, ep).size(); } std::vector get_selected_fragments_id( @@ -257,7 +342,12 @@ struct AutoCurationState ) { std::vector selected_frag_ids={}; - if (this->start_pixel > number_of_pixels_1D || this->end_pixel > number_of_pixels_1D || this->start_pixel < 0 || this->end_pixel < 0) + // Same bounds as get_selected_fragments_num / get_selected_fragments. + if (number_of_pixels_1D == 0 || + this->start_pixel >= (s32)number_of_pixels_1D || + this->end_pixel >= (s32)number_of_pixels_1D || + this->start_pixel < 0 || + this->end_pixel < 0) { return selected_frag_ids; } @@ -266,18 +356,8 @@ struct AutoCurationState std::swap(this->start_pixel, this->end_pixel); } - u32 last_contig_id = Map_State->contigIds[this->start_pixel]; - selected_frag_ids.push_back(last_contig_id); - for (u32 i = this->start_pixel+1; i <= this->end_pixel; i++) - { - if (last_contig_id == Map_State->contigIds[i]) continue; - else - { - last_contig_id = Map_State->contigIds[i]; - selected_frag_ids.push_back(last_contig_id); - } - } - return selected_frag_ids; + u32 sp = (u32)this->start_pixel, ep = (u32)this->end_pixel; + return build_smoothed_selected_run_ids(Map_State, sp, ep); } @@ -289,11 +369,17 @@ struct AutoCurationState u08 exclude_flag=false ) // return cluster_flag { - if (this->start_pixel > number_of_pixels_1D || this->end_pixel > number_of_pixels_1D || this->start_pixel < 0 || this->end_pixel < 0) + // Walk [start_pixel, end_pixel] and record one fragment id per contig run (when contigIds changes). + // - Bounds: indices must satisfy 0 <= start/end < number_of_pixels_1D (use >= below; do not use + // end_pixel > number_of_pixels_1D only, or end_pixel == number_of_pixels_1D can index past the array). + // - clearup() every time: avoids stale source_frag_id / sink_frag_id when the new selection has no + // neighbour on the left (start at 0) or right (end at last pixel); those stay -1 by design. + if (number_of_pixels_1D == 0 || + this->start_pixel >= (s32)number_of_pixels_1D || + this->end_pixel >= (s32)number_of_pixels_1D || + this->start_pixel < 0 || + this->end_pixel < 0) { - // fmt::println(stderr, - // "[AutoCurationState::get_selected_fragments]: start_pixel({}) should smaller than end_pixel({}), and they should be within [0, Number_of_Pixels_1D({})-1], file:{}, line:{}\n", - // this->start_pixel, this->end_pixel, number_of_pixels_1D, __FILE__, __LINE__); return ; } if (this->start_pixel > this->end_pixel) @@ -301,52 +387,43 @@ struct AutoCurationState std::swap(this->start_pixel, this->end_pixel); } - if (!select_area.selected_frag_ids.empty() ) - { - select_area.clearup(); - } + select_area.clearup(); - // push the selected frag id into select_area - select_area.start_pixel = this->start_pixel; - select_area.end_pixel = this->end_pixel; - u32 last_contig_id = Map_State->contigIds[this->start_pixel]; - select_area.selected_frag_ids.push_back(last_contig_id); - for (u32 i = start_pixel+1; i <= this->end_pixel; i ++ ) - { - if (last_contig_id == Map_State->contigIds[i]) continue; - else - { - last_contig_id = Map_State->contigIds[i]; - select_area.selected_frag_ids.push_back(last_contig_id); - } - } + // Copy run list into select_area (use this->start_pixel / this->end_pixel consistently for the loop). + select_area.start_pixel = (u32)this->start_pixel; + select_area.end_pixel = (u32)this->end_pixel; + u32 sp = select_area.start_pixel, ep = select_area.end_pixel; + for (u32 cid : build_smoothed_selected_run_ids(Map_State, sp, ep)) + select_area.selected_frag_ids.push_back((s32)cid); - // define source frag - if (this->start_pixel>0 ) // if selected frags <= 5, still sort them together without clustering first. + // Source = fragment on the pixel immediately before the selection (contigIds[start_pixel - 1]). + // If start_pixel == 0 there is no left neighbour; source_frag_id remains -1 (callers must handle). + select_area.source_frag_id = -1; + if (this->start_pixel > 0) { - u32 frag_id = Map_State->contigIds[this->start_pixel-1]; + u32 frag_id = Map_State->contigIds[(u32)this->start_pixel - 1]; if (Contigs->contigs_arr[frag_id].length >= this->smallest_frag_size_in_pixel) { - select_area.source_frag_id = frag_id; + select_area.source_frag_id = (s32)frag_id; } else { - select_area.source_frag_id = -1; fmt::print("The source_frag_len ({}) < smallest_length ({}), not set the source frag.\n", Contigs->contigs_arr[frag_id].length, this->smallest_frag_size_in_pixel); } } - // define sink frag - if (this->end_pixel + 1 <= number_of_pixels_1D - 1) + // Sink = fragment on the pixel immediately after the selection (contigIds[end_pixel + 1]). + // If end_pixel is the last map pixel, end_pixel + 1 is out of range; sink_frag_id remains -1. + select_area.sink_frag_id = -1; + if ((u32)this->end_pixel + 1u < number_of_pixels_1D) { - u32 frag_id = Map_State->contigIds[this->end_pixel+1]; + u32 frag_id = Map_State->contigIds[(u32)this->end_pixel + 1u]; if (Contigs->contigs_arr[frag_id].length >= this->smallest_frag_size_in_pixel) { - select_area.sink_frag_id = frag_id; + select_area.sink_frag_id = (s32)frag_id; } else { - select_area.sink_frag_id = -1; fmt::print("The tail_frag_len ({}) < smallest_length ({}), not set the tail frag.\n", Contigs->contigs_arr[frag_id].length, this->smallest_frag_size_in_pixel); } } diff --git a/src/copy_texture.cpp b/src/copy_texture.cpp index e58b6f7..d06ef0b 100644 --- a/src/copy_texture.cpp +++ b/src/copy_texture.cpp @@ -979,11 +979,18 @@ void TexturesArray4AI::cal_compressed_hic( { for (u32 j = i; j < frags->num; j++) { - (*compressed_hic)(i, j, 4) = (*compressed_hic)(j, i, 4) = cal_block_mean( - frags->startCoord[i], - frags->startCoord[j], - frags->length[i], - frags->length[j])/255.f; + if (frags->length[i] < 1u || frags->length[j] < 1u) + { + (*compressed_hic)(i, j, 4) = (*compressed_hic)(j, i, 4) = 0.f; + } + else + { + (*compressed_hic)(i, j, 4) = (*compressed_hic)(j, i, 4) = cal_block_mean( + frags->startCoord[i], + frags->startCoord[j], + frags->length[i], + frags->length[j])/255.f; + } } } @@ -1067,6 +1074,12 @@ void TexturesArray4AI::get_interaction_score( { check_copied_from_buffer(); + if (num_row < 1 || num_column < 1) + { + buffer_values_on_channel.initilize(); + return; + } + std::function score_cal = [&norm_diag_mean](const f32& mean, const int& i)->f32 { f32 dis = std::abs(mean - norm_diag_mean[i]); @@ -1402,11 +1415,9 @@ Sum_and_Number TexturesArray4AI::get_interaction_block_diag_sum_number( f32 TexturesArray4AI::cal_block_mean(const u32 &offset_row, const u32 &offset_column, const u32 &num_row, const u32 &num_column) const { + // Zero-length contigs: empty block (callers should skip when possible to avoid log noise). if (num_row < 1 || num_column < 1) - { - std::cerr << "The number of row or column is less than 1" << std::endl; - assert(0); - } + return 0.f; u64 sum = 0; for (u32 i = offset_row; i < offset_row + num_row; i++) { @@ -1452,8 +1463,11 @@ void TexturesArray4AI::cal_mass_centre(const u32 &offset_row, const u32 &offset_ { if (num_row < 1 || num_column < 1) { - std::cerr << "The number of row or column is less than 1" << std::endl; - assert(0); + std::cerr << "[cal_mass_centre] skipping empty block: num_row=" << num_row + << " num_column=" << num_column << " at offset (" << offset_row << "," << offset_column << ")\n"; + mass_centre->row = 0.5f; + mass_centre->col = 0.5f; + return; } if (num_row == 1 && num_column == 1) { diff --git a/src/frag_for_compress.h b/src/frag_for_compress.h index 40e0541..b216b16 100644 --- a/src/frag_for_compress.h +++ b/src/frag_for_compress.h @@ -102,6 +102,7 @@ struct Frag4compress { } } length[0] = Contigs->contigs_arr[frag_id[0]].length; + warn_if_zero_length_contig(Contigs, frag_id[0], 0); metaDataFlags[0] = (Contigs->contigs_arr[frag_id[0]].metaDataFlags == nullptr)? 0 : *(Contigs->contigs_arr[frag_id[0]].metaDataFlags); total_length = length[0]; @@ -112,6 +113,7 @@ struct Frag4compress { inversed[i] = false; // currently, this is not used startCoord[i] = startCoord[i-1] + length[i-1]; length[i] = Contigs->contigs_arr[frag_id[i]].length; + warn_if_zero_length_contig(Contigs, frag_id[i], i); metaDataFlags[i] = (Contigs->contigs_arr[frag_id[i]].metaDataFlags == nullptr)? 0 : *(Contigs->contigs_arr[frag_id[i]].metaDataFlags); total_length += length[i]; @@ -146,6 +148,7 @@ struct Frag4compress { frag_id[i] = selected_frag_ids_tmp[i]; inversed[i] = false; length[i] = Contigs->contigs_arr[frag_id[i]].length; + warn_if_zero_length_contig(Contigs, frag_id[i], (u32)i); total_length += length[i]; metaDataFlags[i] = (Contigs->contigs_arr[frag_id[i]].metaDataFlags == nullptr)?0:*(Contigs->contigs_arr[frag_id[i]].metaDataFlags); while (global_frag_index < frag_id[i]) @@ -165,6 +168,21 @@ struct Frag4compress { private: + static void warn_if_zero_length_contig(const map_contigs* Contigs, u32 frag_id, u32 index_in_list) + { + if (frag_id >= Contigs->numberOfContigs) + return; + u32 L = Contigs->contigs_arr[frag_id].length; + if (L != 0) + return; + fmt::print( + stderr, + "[Frag4compress] contig_id={} has length 0 in map_contigs (fragment list index {}); " + "compressed Hi-C will skip 0×N blocks for this contig.\n", + frag_id, + index_in_list); + } + void cleanup() { if (frag_id)