From 859a400056ba9d44ecb32e11c3535558eaa9f966 Mon Sep 17 00:00:00 2001 From: mraves2 <116661291+mraves2@users.noreply.github.com> Date: Thu, 30 Apr 2026 13:39:23 +0200 Subject: [PATCH] Revert "Feature/refactor_DIMS_MakeInit" --- DIMS/MakeInit.R | 29 ++++++++++++++++++ DIMS/MakeInit.nf | 18 +++++++++++ DIMS/ParseSamplesheet.R | 19 ------------ DIMS/ParseSamplesheet.nf | 18 ----------- .../parse_samplesheet_functions.R | 30 ------------------- .../testthat/parse_samplesheet_functions.R | 24 --------------- 6 files changed, 47 insertions(+), 91 deletions(-) create mode 100644 DIMS/MakeInit.R create mode 100644 DIMS/MakeInit.nf delete mode 100644 DIMS/ParseSamplesheet.R delete mode 100644 DIMS/ParseSamplesheet.nf delete mode 100644 DIMS/preprocessing/parse_samplesheet_functions.R delete mode 100644 DIMS/tests/testthat/parse_samplesheet_functions.R diff --git a/DIMS/MakeInit.R b/DIMS/MakeInit.R new file mode 100644 index 00000000..9ff22623 --- /dev/null +++ b/DIMS/MakeInit.R @@ -0,0 +1,29 @@ +# define parameters +args <- commandArgs(trailingOnly = TRUE) + +sample_sheet <- read.csv(args[1], sep = "\t") +nr_replicates <- as.numeric(args[2]) + +sample_names <- trimws(as.vector(unlist(sample_sheet[1]))) +nr_sample_groups <- length(sample_names) / nr_replicates +group_names <- trimws(as.vector(unlist(sample_sheet[2]))) +group_names <- gsub("[^-.[:alnum:]]", "_", group_names) +group_names_unique <- unique(group_names) + +# generate the replication pattern +repl_pattern <- c() +for (sample_group in 1:nr_sample_groups) { + replicates_persample <- c() + for (repl in nr_replicates:1) { + index <- ((sample_group * nr_replicates) - repl) + 1 + replicates_persample <- c(replicates_persample, sample_names[index]) + } + repl_pattern <- c(repl_pattern, list(replicates_persample)) +} + +names(repl_pattern) <- group_names_unique + +# preview the replication pattern +print(tail(repl_pattern)) + +save(repl_pattern, file = "init.RData") diff --git a/DIMS/MakeInit.nf b/DIMS/MakeInit.nf new file mode 100644 index 00000000..7aae0e46 --- /dev/null +++ b/DIMS/MakeInit.nf @@ -0,0 +1,18 @@ +process MakeInit { + tag "DIMS MakeInit" + label 'MakeInit' + container = 'docker://umcugenbioinf/dims:1.3' + shell = ['/bin/bash', '-euo', 'pipefail'] + + input: + path(samplesheet) + val(nr_replicates) + + output: + path('init.RData') + + script: + """ + Rscript ${baseDir}/CustomModules/DIMS/MakeInit.R $samplesheet $nr_replicates + """ +} diff --git a/DIMS/ParseSamplesheet.R b/DIMS/ParseSamplesheet.R deleted file mode 100644 index 806486fb..00000000 --- a/DIMS/ParseSamplesheet.R +++ /dev/null @@ -1,19 +0,0 @@ -# define parameters -args <- commandArgs(trailingOnly = TRUE) - -sample_sheet <- as.data.frame(read.csv(args[1], sep = "\t")) -preprocessing_scripts_dir <- args[2] - -# load in function script -source(paste0(preprocessing_scripts_dir, "parse_samplesheet_functions.R")) - -# generate the replication pattern -repl_pattern <- generate_repl_pattern(sample_sheet) - -# write the replication pattern to text file for troubleshooting purposes -sink("replication_pattern.txt") -print(repl_pattern) -sink() - -# save replication pattern to file -save(repl_pattern, file = "init.RData") diff --git a/DIMS/ParseSamplesheet.nf b/DIMS/ParseSamplesheet.nf deleted file mode 100644 index e0fc055b..00000000 --- a/DIMS/ParseSamplesheet.nf +++ /dev/null @@ -1,18 +0,0 @@ -process ParseSamplesheet { - tag "DIMS ParseSamplesheet" - label 'ParseSamplesheet' - container = 'docker://umcugenbioinf/dims:1.3' - - input: - path(samplesheet) - val(preprocessing_scripts_dir) - - output: - path('init.RData'), emit: rdata_file - path('replication_pattern.txt'), emit: repl_pattern_txtfile - - script: - """ - Rscript ${baseDir}/CustomModules/DIMS/ParseSamplesheet.R $samplesheet $preprocessing_scripts_dir - """ -} diff --git a/DIMS/preprocessing/parse_samplesheet_functions.R b/DIMS/preprocessing/parse_samplesheet_functions.R deleted file mode 100644 index 9107ba6a..00000000 --- a/DIMS/preprocessing/parse_samplesheet_functions.R +++ /dev/null @@ -1,30 +0,0 @@ -# function for parse_samplesheet - -#' Generate replication pattern list based on information in sample_sheet -#' -#' @param sample_sheet: matrix of file names and sample names -#' -#' @return ints_sorted: list of sample names with corresponding file names (technical replicates) -generate_repl_pattern <- function(sample_sheet) { - # get the file name and sample name columns from the samplesheet - file_name_col <- grep("File_Name|File Name", colnames(sample_sheet), ignore.case = TRUE) - sample_name_col <- grep("Sample_Name|Sample Name", colnames(sample_sheet), ignore.case = TRUE) - # get the unique sample names from the samplesheet - sample_names <- sample_sheet[sample_name_col] |> - unlist() |> - as.vector() |> - trimws() |> - unique() |> - sort() - # remove all characters from sample_names which are not letters, numbers, hyphens and periods - sample_names <- gsub("[^-.[:alnum:]]", "_", sample_names) - - # create replication pattern (which technical replicates belong to which sample) - repl_pattern <- split( - sample_sheet[[file_name_col]], - sample_sheet[[sample_name_col]] - ) - - return(repl_pattern) -} - diff --git a/DIMS/tests/testthat/parse_samplesheet_functions.R b/DIMS/tests/testthat/parse_samplesheet_functions.R deleted file mode 100644 index 8e467967..00000000 --- a/DIMS/tests/testthat/parse_samplesheet_functions.R +++ /dev/null @@ -1,24 +0,0 @@ -# unit tests for ParseSamplesheet -# function: generate_repl_pattern - -# source all functions for ParseSamplesheet -source("../../preprocessing/parse_samplesheet_functions.R") - -# test generate_repl_pattern -testthat::test_that("replication pattern is correctly generated", { - # create sample sheet tot test on: - test_file_names <- paste0(rep("RES_20260101_", 6), sprintf("%03d", 1:6)) - test_sample_names <- sort(rep(c("C1", "P2", "P3"), 2)) - test_sample_sheet <- as.data.frame(cbind(File_Name = test_file_names, Sample_Name = test_sample_names)) - - # test that a list of length 3 is generated - expect_length(generate_repl_pattern(test_sample_sheet), 3) - # test list names - expect_equal(names(generate_repl_pattern(test_sample_sheet)), unique(test_sample_names), TRUE) - - # test what happens if any sample name is used twice - test_sample_names <- gsub("P3", "P2", test_sample_names) - test_sample_sheet <- as.data.frame(cbind(File_Name = test_file_names, Sample_Name = test_sample_names)) - expect_length(generate_repl_pattern(test_sample_sheet), 2) - expect_length(generate_repl_pattern(test_sample_sheet)$P2, 4) -})