Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 45 additions & 8 deletions DIMS/GenerateBreaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,56 @@ suppressPackageStartupMessages(library("xcms"))
cmd_args <- commandArgs(trailingOnly = TRUE)

filepath <- cmd_args[1]
trim <- as.numeric(cmd_args[2])
resol <- as.numeric(cmd_args[3])
outdir <- cmd_args[2]
trim <- as.numeric(cmd_args[3])
resol <- as.numeric(cmd_args[4])

# initialize
trim_left_pos <- NULL
trim_right_pos <- NULL
trim_left_neg <- NULL
trim_right_neg <- NULL
breaks_fwhm <- NULL
breaks_fwhm_avg <- NULL
bins <- NULL

# read in mzML file
raw_data <- suppressMessages(xcms::xcmsRaw(filepath))

# get trim parameters and save them to file
get_trim_parameters(raw_data@scantime, raw_data@polarity, trim)
# Get time values for positive and negative scans
pos_times <- raw_data@scantime[raw_data@polarity == "positive"]
neg_times <- raw_data@scantime[raw_data@polarity == "negative"]

# trim (remove) scans at the start and end for positive
trim_left_pos <- round(pos_times[length(pos_times) * (trim * 1.5)]) # 15% aan het begin
trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))]) # 5% aan het eind

# create breaks of bins for intensities. Bin size is a function of fwhm which is a function of m/z
get_breaks_for_bins(raw_data$mzrange, resol)
# trim (remove) scans at the start and end for negative
trim_left_neg <- round(neg_times[length(neg_times) * trim])
trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)])

# Determine maximum m/z and save to file
# Mass range m/z
low_mz <- raw_data@mzrange[1]
high_mz <- raw_data@mzrange[2]
save(high_mz, file = "highest_mz.RData")

# determine number of segments (bins)
nr_segments <- 2 * (high_mz - low_mz)
segment <- seq(from = low_mz, to = high_mz, length.out = nr_segments + 1)

# determine start and end of each bin.
for (i in 1:nr_segments) {
start_segment <- segment[i]
end_segment <- segment[i+1]
resol_mz <- resol * (1 / sqrt(2) ^ (log2(start_segment / 200)))
fwhm_segment <- start_segment / resol_mz
breaks_fwhm <- c(breaks_fwhm, seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment))
# average the m/z instead of start value
range <- seq(from = (start_segment + fwhm_segment), to = end_segment, by = 0.2 * fwhm_segment)
delta_mz <- range[2] - range[1]
breaks_fwhm_avg <- c(breaks_fwhm_avg, range + 0.5 * delta_mz)
}

# generate output file
save(breaks_fwhm, breaks_fwhm_avg, file = "breaks.fwhm.RData")
save(trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "trim_params.RData")
save(high_mz, file = "highest_mz.RData")
3 changes: 2 additions & 1 deletion DIMS/GenerateBreaks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ process GenerateBreaks {
input:
tuple(val(file_id), path(mzML_file))


output:
path('breaks.fwhm.RData'), emit: breaks
path('trim_params.RData'), emit: trim_params
path('highest_mz.RData'), emit: highest_mz

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/GenerateBreaks.R $mzML_file $params.trim $params.resolution
Rscript ${baseDir}/CustomModules/DIMS/GenerateBreaks.R $mzML_file ./ $params.trim $params.resolution
"""
}
29 changes: 29 additions & 0 deletions DIMS/MakeInit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# define parameters
args <- commandArgs(trailingOnly = TRUE)

sample_sheet <- read.csv(args[1], sep = "\t")
nr_replicates <- as.numeric(args[2])

sample_names <- trimws(as.vector(unlist(sample_sheet[1])))
nr_sample_groups <- length(sample_names) / nr_replicates
group_names <- trimws(as.vector(unlist(sample_sheet[2])))
group_names <- gsub("[^-.[:alnum:]]", "_", group_names)
group_names_unique <- unique(group_names)

# generate the replication pattern
repl_pattern <- c()
for (sample_group in 1:nr_sample_groups) {
replicates_persample <- c()
for (repl in nr_replicates:1) {
index <- ((sample_group * nr_replicates) - repl) + 1
replicates_persample <- c(replicates_persample, sample_names[index])
}
repl_pattern <- c(repl_pattern, list(replicates_persample))
}

names(repl_pattern) <- group_names_unique

# preview the replication pattern
print(tail(repl_pattern))

save(repl_pattern, file = "init.RData")
18 changes: 18 additions & 0 deletions DIMS/MakeInit.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
process MakeInit {
tag "DIMS MakeInit"
label 'MakeInit'
container = 'docker://umcugenbioinf/dims:1.3'
shell = ['/bin/bash', '-euo', 'pipefail']

input:
path(samplesheet)
val(nr_replicates)

output:
path('init.RData')

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/MakeInit.R $samplesheet $nr_replicates
"""
}
19 changes: 0 additions & 19 deletions DIMS/ParseSamplesheet.R

This file was deleted.

18 changes: 0 additions & 18 deletions DIMS/ParseSamplesheet.nf

This file was deleted.

59 changes: 0 additions & 59 deletions DIMS/preprocessing/generate_breaks_functions.R

This file was deleted.

30 changes: 0 additions & 30 deletions DIMS/preprocessing/parse_samplesheet_functions.R

This file was deleted.

Binary file removed DIMS/tests/testthat/fixtures/test_breaks.fwhm.RData
Binary file not shown.
Binary file removed DIMS/tests/testthat/fixtures/test_trim_params.RData
Binary file not shown.
24 changes: 0 additions & 24 deletions DIMS/tests/testthat/parse_samplesheet_functions.R

This file was deleted.

54 changes: 0 additions & 54 deletions DIMS/tests/testthat/test_generate_breaks.R

This file was deleted.

Loading