UMCUGenetics · mraves2 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/DIMS/preprocessing/collect_filled_functions.R b/DIMS/preprocessing/collect_filled_functions.R
@@ -129,7 +129,7 @@ order_columns_peakgrouplist <- function(peakgroup_list) {
 
   original_colnames <- colnames(peakgroup_list)
   mass_columns <- c(grep("mzm", original_colnames), grep("nrsamples", original_colnames))
-  descriptive_columns <- c(grep("assi_HMDB", original_colnames):grep("avg.int", original_colnames), grep("ppmdev", original_colnames))
+  descriptive_columns <- grep("assi_HMDB", original_colnames):grep("avg.int", original_colnames)
   intensity_columns <- c((grep("nrsamples", original_colnames) + 1):(grep("assi_HMDB", original_colnames) - 1))
   # if no Z-scores have been calculated, the following two variables will be empty without consequences for outlist_total
   control_columns <- grep ("ctrls", original_colnames)

diff --git a/DIMS/preprocessing/fill_missing_functions.R b/DIMS/preprocessing/fill_missing_functions.R
@@ -24,7 +24,7 @@ fill_missing_intensities <- function(peakgroup_list, repl_pattern, thresh, disab
       for (zero_index in seq_along(zero_intensity)) {
         peakgroup_list[zero_intensity[zero_index], names(repl_pattern)[sample_index]] <- rnorm(n = 1,
                                                                                                mean = thresh,
-                                                                                               sd = 100)
+                                                                                               sd = 80)
       }
     }
 

diff --git a/DIMS/preprocessing/peak_finding_functions.R b/DIMS/preprocessing/peak_finding_functions.R
@@ -28,26 +28,18 @@ search_regions_of_interest <- function(ints_fullrange) {
     if (regions_of_interest_gte3[roi_nr, "length"] > 11) {
       roi <- ints_fullrange[(regions_of_interest_gte3[roi_nr, "from"]:regions_of_interest_gte3[roi_nr, "to"]), ]
       roi_intrange <- as.numeric(roi$int)
+      roi_firstindex <- as.numeric(rownames(roi)[1])
       # look for local minima that separate the peaks
       local_min_positions <- which(diff(sign(diff(roi_intrange))) == 2) + 1
       if (length(local_min_positions) > 0) {
         remove_roi_index <- c(remove_roi_index, roi_nr)
         # find new indices for rois after splitting
-        start_pos <- regions_of_interest_gte3[roi_nr, "from"]
-        new_rois <- data.frame(from = 0, to = 0, length = 0)
-        new_rois_splitroi <- regions_of_interest_gte3[0, ]
-        for (local_min_index in 1:length(local_min_positions)) {
-          new_rois[, 1] <- start_pos
-          new_rois[, 2] <- start_pos + local_min_positions[local_min_index]
-          new_rois[, 3] <- new_rois[, 2] - new_rois[, 1] + 1
-          new_rois_splitroi <- rbind(new_rois_splitroi, new_rois)
-          start_pos <- new_rois[, 2]
-        }
-        # intensities after last local minimum
-        new_rois[, 1] <- start_pos
-        new_rois[, 2] <- regions_of_interest_gte3[roi_nr, "to"]
-        new_rois[, 3] <- new_rois[, 2] - new_rois[, 1] + 1
-        new_rois_splitroi <- rbind(new_rois_splitroi, new_rois)
+        new_rois_splitroi <- as.data.frame(matrix(0, ncol = 3, nrow = (length(local_min_positions) + 1)))
+        colnames(new_rois_splitroi) <- colnames(regions_of_interest_gte3)
+	# fill new rois matrix; from in column 1, to in column 2 and length in column 3
+        new_rois_splitroi[, 1] <- c(roi_firstindex, roi_firstindex + local_min_positions)
+        new_rois_splitroi[, 2] <- c(roi_firstindex + local_min_positions, roi_firstindex + length(roi_intrange))
+        new_rois_splitroi[, 3] <- new_rois_splitroi[, 2] - new_rois_splitroi[, 1]
         # append
         new_rois_all <- rbind(new_rois_all, new_rois_splitroi)
       } else {

diff --git a/DIMS/tests/testthat/fixtures/test_peakgroup_list.txt b/DIMS/tests/testthat/fixtures/test_peakgroup_list.txt
@@ -1,5 +1,5 @@
-"mzmed.pgrp"	"nrsamples"	"C101.1"	"C102.1"	"P2.1"	"P3.1"	"assi_HMDB"	"all_hmdb_names"	"iso_HMDB"	"HMDB_code"	"all_hmdb_ids"	"sec_hmdb_ids"	"theormz_HMDB"	"avg.int"	"avg.ctrls"	"sd.ctrls"	"C101.1_Zscore"	"C102.1_Zscore"	"P2.1_Zscore"	"P3.1_Zscore"	"ppmdev"
-"1"	300.199680958642	0.451108327135444	1000	5000	10000	50000	"A"	"A;X"	NA	"HMDB1234567"	"HMDB1234567;HMDB1234567"	NA	300.1996476	16500	3000	2828.42712474619	9000	13000	90000	130000	0.111112214857712
-"2"	300.000315890415	0.498603057814762	2000	6000	20000	60000	"B"	"B;Y"	NA	"HMDB1234567_1"	"HMDB1234567_1;HMDB1234567_1"	NA	300.00017417	22000	4000	2828.42712474619	10000	14000	1e+05	140000	0.473299680976197
-"3"	300.254185894039	0.589562055887654	3000	7000	30000	70000	"C"	"C;Z"	NA	"HMDB1234567_2"	"HMDB1234567_2;HMDB1234567_2"	NA	300.25413357	27500	5000	2828.42712474619	11000	15000	110000	150000	0.17426158930175
-"4"	300.755745105678	0.277923040557653	4000	8000	40000	80000	"D"	"D;V"	NA	"HMDB1234567_7"	"HMDB1234567_7;HMDB1234567_7"	NA	300.75568892	33000	6000	2828.42712474619	12000	16000	120000	160000	0.186787674436346
+"mzmed.pgrp"	"nrsamples"	"C101.1"	"C102.1"	"P2.1"	"P3.1"	"assi_HMDB"	"all_hmdb_names"	"iso_HMDB"	"HMDB_code"	"all_hmdb_ids"	"sec_hmdb_ids"	"theormz_HMDB"	"ppmdev"	"avg.int"	"avg.ctrls"	"sd.ctrls"	"C101.1_Zscore"	"C102.1_Zscore"	"P2.1_Zscore"	"P3.1_Zscore"
+"1"	300.199680958642	0.451108327135444	1000	5000	10000	50000	"A"	"A;X"	NA	"HMDB1234567"	"HMDB1234567;HMDB1234567"	NA	300.1996476	0.111112214857712	16500	3000	2828.42712474619	9000	13000	90000	130000
+"2"	300.000315890415	0.498603057814762	2000	6000	20000	60000	"B"	"B;Y"	NA	"HMDB1234567_1"	"HMDB1234567_1;HMDB1234567_1"	NA	300.00017417	0.473299680976197	22000	4000	2828.42712474619	10000	14000	1e+05	140000
+"3"	300.254185894039	0.589562055887654	3000	7000	30000	70000	"C"	"C;Z"	NA	"HMDB1234567_2"	"HMDB1234567_2;HMDB1234567_2"	NA	300.25413357	0.17426158930175	27500	5000	2828.42712474619	11000	15000	110000	150000
+"4"	300.755745105678	0.277923040557653	4000	8000	40000	80000	"D"	"D;V"	NA	"HMDB1234567_7"	"HMDB1234567_7;HMDB1234567_7"	NA	300.75568892	0.186787674436346	33000	6000	2828.42712474619	12000	16000	120000	160000
diff --git a/DIMS/tests/testthat/test_collect_filled.R b/DIMS/tests/testthat/test_collect_filled.R
@@ -65,7 +65,7 @@ testthat::test_that("columns in peak group list are corretly sorted", {
   # original order of columns
   original_column_order <- colnames(test_peakgroup_list)
   # after ordering, column names should be re-ordered
-  test_column_order <- original_column_order[c(1, 2, 7:14, 21, 3:6, 15:20)]
+  test_column_order <- original_column_order[c(1, 2, 7:15, 3:6, 16:21)]
 
   expect_identical(colnames(order_columns_peakgrouplist(test_peakgroup_list)), test_column_order)
 

diff --git a/DIMS/tests/testthat/test_peak_finding_functions.R b/DIMS/tests/testthat/test_peak_finding_functions.R
@@ -31,7 +31,7 @@ test_that("regions of interest are correctly found for two peaks", {
                                              int = test_ints_range))
 
   # expected output
-  expected_output <- as.data.frame(matrix(c(1, 8, 8, 20, 8, 13), nrow = 2, ncol = 3))
+  expected_output <- as.data.frame(matrix(c(1, 8, 8, 21, 7, 13), nrow = 2, ncol = 3))
   colnames(expected_output) <- c("from", "to", "length")
   rownames(expected_output) <- as.character(c(1, 2))