diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 6d042f359..04c5a79bc 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -7,6 +7,3 @@ jobs: call-build: uses: ./.github/workflows/docker-build.yaml secrets: inherit - call-test: - uses: ./.github/workflows/pytest.yaml - needs: call-build diff --git a/.github/workflows/miniwdl-check.yaml b/.github/workflows/miniwdl-check.yaml deleted file mode 100644 index 14fa817bc..000000000 --- a/.github/workflows/miniwdl-check.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: Workflows Miniwdl Check - -on: [push] - -jobs: - miniwdl_check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.10' - - name: Install miniwdl - run: | - python -m pip install --upgrade pip - pip install miniwdl - - name: Run miniwdl - run: | - EXITCODE=0 - echo "Checking WDL files using \`miniwdl check\`." - shopt -s extglob - files=$(find ./!(template) -name '*.wdl') - for file in $files; do - echo " [***] $file [***]" - miniwdl check "$file" - EXITCODE=$(($? || EXITCODE)) - done - exit $EXITCODE diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml deleted file mode 100644 index 64c0b310b..000000000 --- a/.github/workflows/pytest.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: pytest-workflow Check - -on: - workflow_call: - -jobs: - list-tags: - runs-on: ubuntu-latest - outputs: - tags: ${{ steps.set-tags.outputs.tags }} - steps: - - name: checkout - uses: actions/checkout@v4 - - name: set tags - id: set-tags - # remove the "reference" tag as it's redundant with other tags - run: echo "tags=$(find tests -name '*.yaml' -exec yq --output-format yaml '.[].tags[] ' {} \; | sort | uniq | grep -vE 'reference|slow' | jq -ncR '[inputs]')" >> $GITHUB_OUTPUT - pytest_check: - needs: list-tags - runs-on: ubuntu-latest - strategy: - matrix: - tag: ${{ fromJson(needs.list-tags.outputs.tags) }} - runner: [sprocket, miniwdl] - fail-fast: false - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Update Rust - if: matrix.runner == 'sprocket' - run: rustup update stable && rustup default stable - - name: Build Sprocket - if: matrix.runner == 'sprocket' - run: | - cargo install sprocket --locked - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install miniwdl and pytest-workflow - run: | - python -m pip install --upgrade pip - pip install -r requirements-ci.txt - - name: filter tests - # don't run slow tests in CI - run: | - find tests -name '*.yaml' -exec yq --output-format yaml -i 'del(.[] | select(.tags[] | test("slow") ) )' {} \; - - name: Update containers - run: | - ./developer_scripts/update_container_tags.sh ${GITHUB_REF##*/} - - name: Run pytest-workflow - env: - RUNNER: ${{ matrix.runner }} - run: | - pytest --basetemp /home/runner/work/pytest --tag ${{ matrix.tag }} diff --git a/.github/workflows/sprocket-check.yaml b/.github/workflows/sprocket-check.yaml deleted file mode 100644 index a2cc9ca39..000000000 --- a/.github/workflows/sprocket-check.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: Workflows Sprocket Check - -on: [push] - -jobs: - sprocket_check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Run sprocket - uses: stjude-rust-labs/sprocket-action@main diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml index e289f28f7..60f48a8e5 100644 --- a/.github/workflows/sprocket-lint.yaml +++ b/.github/workflows/sprocket-lint.yaml @@ -3,12 +3,17 @@ name: Workflows Sprocket Lint on: [push] jobs: - sprocket_lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Run sprocket - uses: stjude-rust-labs/sprocket-action@main - with: - lint: true - except: KnownRules + sprocket_lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run 'sprocket lint' + uses: stjude-rust-labs/sprocket-action@main + with: + lint: true + except: KnownRules + ignore-patterns: methylation + - name: Run 'sprocket format' + uses: stjude-rust-labs/sprocket-action@feat/format + with: + action: format diff --git a/.gitignore b/.gitignore index a487fb902..ac07b2a2c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +pytest/ # Ignore common bioinformatics formats used in these workflows. # But only if they are in the root of this repo /*.fastq.gz diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 3e49ccd1d..b4247a62b 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -82,7 +82,9 @@ workflow read_group_to_string { input { ReadGroup read_group - Array[String] required_fields = [] + Array[String] required_fields = [ + "SM", + ] Boolean format_as_sam_record = false Boolean restrictive = true } @@ -147,22 +149,20 @@ task validate_read_group { } parameter_meta { - read_group: "`ReadGroup` struct to validate" required_fields: "Array of read group fields that must be defined. The ID field is always required and does not need to be specified." + read_group: "`ReadGroup` struct to validate" restrictive: "If true, run a stricter validation of field values. Otherwise, check against SAM spec-defined values." } input { + Array[String] required_fields ReadGroup read_group - Array[String] required_fields = [] - Boolean restrictive = true + Boolean restrictive } # The SAM spec allows any printable ASCII character in header fields. String sam_spec_pattern = "[\\ -~]+" # We have the opinion that is too permissive for ID and SM. - String id_pattern = "id" - String sample_pattern = "sample.?" String restrictive_pattern = "\\ " # Disallow spaces Array[String] platforms = [ "CAPILLARY", @@ -182,11 +182,9 @@ task validate_read_group { command <<< exit_code=0 if ~{restrictive}; then - if [[ ~{read_group.ID} =~ ^~{id_pattern}$ ]] \ - || [[ ~{read_group.ID} =~ ~{restrictive_pattern} ]] + if [[ "~{read_group.ID}" =~ ~{restrictive_pattern} ]] then - >&2 echo "ID (~{read_group.ID}) must not match patterns:" - >&2 echo "'~{id_pattern}' or '~{restrictive_pattern}'" + >&2 echo "ID must not contain spaces" exit_code=1 fi fi @@ -202,11 +200,9 @@ task validate_read_group { fi if ~{defined(read_group.SM)}; then if ~{restrictive}; then - if [[ "~{read_group.SM}" =~ ^~{sample_pattern}$ ]] \ - || [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]] + if [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]] then - >&2 echo "SM must not match patterns:" - >&2 echo "'~{sample_pattern}' or '~{restrictive_pattern}'" + >&2 echo "SM must not contain spaces" exit_code=1 fi fi @@ -391,7 +387,7 @@ task inner_read_group_to_string { input { ReadGroup read_group - Boolean format_as_sam_record = false + Boolean format_as_sam_record } String delimiter = if format_as_sam_record diff --git a/data_structures/test/flag_filter.yaml b/data_structures/test/flag_filter.yaml new file mode 100644 index 000000000..cd64d0cca --- /dev/null +++ b/data_structures/test/flag_filter.yaml @@ -0,0 +1,54 @@ +validate_string_is_12bit_int: + - name: valid_numbers + inputs: + number: + - "5" + - "0x900" + - "01" + - "4095" + - "0" + - "072" + assertions: + stderr: + - Input number \(.*\) is valid + - name: invalid_numbers + inputs: + number: + - "0x1000" + - "" + - string + - this is not a number + - "000000000011" + - "-1" + - "08" + assertions: + exit_code: 42 + stderr: + - Input number \(.*\) is invalid + - name: too_big_decimal_fails + inputs: + number: + - "4096" + - "9999" + assertions: + exit_code: 42 + stderr: + - Input number \(.*\) interpreted as decimal + - But number must be less than 4096! +validate_flag_filter: + - name: valid_FlagFilter_passes + inputs: + flags: + - include_if_all: "3" + exclude_if_any: "0xF04" + include_if_any: "03" + exclude_if_all: "4095" + - name: invalid_FlagFilter_fails + inputs: + flags: + - include_if_all: "3" + exclude_if_any: "0xF04" + include_if_any: "03" + exclude_if_all: "" # empty string should trigger a fail + assertions: + should_fail: true diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml new file mode 100644 index 000000000..12ef8e5ad --- /dev/null +++ b/data_structures/test/read_group.yaml @@ -0,0 +1,93 @@ +read_group_to_string: + - name: valid_read_groups + inputs: + read_group: + - ID: R1 + SM: sampleFoo + - ID: R1 + SM: sampleFoo + LB: spaces are allowed in LB + BC: barcode with a space + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + assertions: + outputs: + validated_read_group: + - Contains: R1 + - Contains: sampleFoo + - name: id_with_spaces + inputs: + read_group: + - ID: ids should not have spaces # this is a problem + SM: sample_a + LB: library + BC: barcode + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + assertions: + should_fail: true + - name: sample_with_spaces + inputs: + read_group: + - ID: R123 + SM: samples should not have spaces + LB: library can have spaces though + BC: barcode + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + assertions: + should_fail: true + - name: spaces_allowed + inputs: + read_group: + - ID: technically permissable but gross + SM: same here! + restrictive: + - false + - name: missing_sample + inputs: + read_group: + - ID: R123 + LB: library + assertions: + should_fail: true + - name: missing_sample_allowed + inputs: + read_group: + - ID: R1 + LB: lib + required_fields: + - [] + +get_read_groups: + - name: works + inputs: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam diff --git a/sprocket.toml b/sprocket.toml index b84f31f61..912ce6a3b 100644 --- a/sprocket.toml +++ b/sprocket.toml @@ -3,6 +3,9 @@ all_lint_rules = true except = ["ContainerUri"] deny_notes = true +[format] +sort_inputs = true + [run.task] cpu_limit_behavior = "try_with_max" memory_limit_behavior = "try_with_max" diff --git a/test/bin/quickcheck.sh b/test/bin/quickcheck.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/input/1scattered.interval_list b/test/fixtures/1scattered.interval_list similarity index 100% rename from tests/input/1scattered.interval_list rename to test/fixtures/1scattered.interval_list diff --git a/tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv similarity index 100% rename from tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv rename to test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv diff --git a/tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv similarity index 100% rename from tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv rename to test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv diff --git a/tests/input/201533520001_R03C01_Grn.idat b/test/fixtures/201533520001_R03C01_Grn.idat similarity index 100% rename from tests/input/201533520001_R03C01_Grn.idat rename to test/fixtures/201533520001_R03C01_Grn.idat diff --git a/tests/input/201533520001_R03C01_Red.idat b/test/fixtures/201533520001_R03C01_Red.idat similarity index 100% rename from tests/input/201533520001_R03C01_Red.idat rename to test/fixtures/201533520001_R03C01_Red.idat diff --git a/tests/input/README.md b/test/fixtures/README.md similarity index 100% rename from tests/input/README.md rename to test/fixtures/README.md diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam similarity index 100% rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai similarity index 100% rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai diff --git a/tests/input/test.PE.2_RGs.Aligned.out.sorted.bam b/test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam similarity index 100% rename from tests/input/test.PE.2_RGs.Aligned.out.sorted.bam rename to test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam diff --git a/tests/input/test.bam b/test/fixtures/bams/test.bam similarity index 100% rename from tests/input/test.bam rename to test/fixtures/bams/test.bam diff --git a/tests/input/test.bam.bai b/test/fixtures/bams/test.bam.bai similarity index 100% rename from tests/input/test.bam.bai rename to test/fixtures/bams/test.bam.bai diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai diff --git a/tests/input/test.extra_RG.bam b/test/fixtures/bams/test.extra_RG.bam similarity index 100% rename from tests/input/test.extra_RG.bam rename to test/fixtures/bams/test.extra_RG.bam diff --git a/tests/input/test.unaccounted_read.bam b/test/fixtures/bams/test.unaccounted_read.bam similarity index 100% rename from tests/input/test.unaccounted_read.bam rename to test/fixtures/bams/test.unaccounted_read.bam diff --git a/tests/input/test2.bam b/test/fixtures/bams/test2.bam similarity index 100% rename from tests/input/test2.bam rename to test/fixtures/bams/test2.bam diff --git a/tests/input/test_rnaseq_variant.bam b/test/fixtures/bams/test_rnaseq_variant.bam similarity index 100% rename from tests/input/test_rnaseq_variant.bam rename to test/fixtures/bams/test_rnaseq_variant.bam diff --git a/tests/input/test_rnaseq_variant.bam.bai b/test/fixtures/bams/test_rnaseq_variant.bam.bai similarity index 100% rename from tests/input/test_rnaseq_variant.bam.bai rename to test/fixtures/bams/test_rnaseq_variant.bam.bai diff --git a/tests/input/chr1_chr19.interval_list b/test/fixtures/chr1_chr19.interval_list similarity index 100% rename from tests/input/chr1_chr19.interval_list rename to test/fixtures/chr1_chr19.interval_list diff --git a/tests/input/combined_beta.csv b/test/fixtures/combined_beta.csv similarity index 100% rename from tests/input/combined_beta.csv rename to test/fixtures/combined_beta.csv diff --git a/tests/input/random10k.r1.fq.gz b/test/fixtures/fastqs/random10k.r1.fq.gz similarity index 100% rename from tests/input/random10k.r1.fq.gz rename to test/fixtures/fastqs/random10k.r1.fq.gz diff --git a/tests/input/random10k.r2.fq.gz b/test/fixtures/fastqs/random10k.r2.fq.gz similarity index 100% rename from tests/input/random10k.r2.fq.gz rename to test/fixtures/fastqs/random10k.r2.fq.gz diff --git a/tests/input/test_R1.fq.gz b/test/fixtures/fastqs/test_R1.fq.gz similarity index 100% rename from tests/input/test_R1.fq.gz rename to test/fixtures/fastqs/test_R1.fq.gz diff --git a/tests/input/test_R2.fq.gz b/test/fixtures/fastqs/test_R2.fq.gz similarity index 100% rename from tests/input/test_R2.fq.gz rename to test/fixtures/fastqs/test_R2.fq.gz diff --git a/tests/input/filtered_beta.csv b/test/fixtures/filtered_beta.csv similarity index 100% rename from tests/input/filtered_beta.csv rename to test/fixtures/filtered_beta.csv diff --git a/tests/input/fusions.BCR_ABL1.tsv b/test/fixtures/fusions.BCR_ABL1.tsv similarity index 100% rename from tests/input/fusions.BCR_ABL1.tsv rename to test/fixtures/fusions.BCR_ABL1.tsv diff --git a/tests/input/GRCh38.chr1_chr19.dict b/test/fixtures/reference/GRCh38.chr1_chr19.dict similarity index 100% rename from tests/input/GRCh38.chr1_chr19.dict rename to test/fixtures/reference/GRCh38.chr1_chr19.dict diff --git a/tests/input/GRCh38.chr1_chr19.fa b/test/fixtures/reference/GRCh38.chr1_chr19.fa similarity index 100% rename from tests/input/GRCh38.chr1_chr19.fa rename to test/fixtures/reference/GRCh38.chr1_chr19.fa diff --git a/tests/input/GRCh38.chr1_chr19.fa.fai b/test/fixtures/reference/GRCh38.chr1_chr19.fa.fai similarity index 100% rename from tests/input/GRCh38.chr1_chr19.fa.fai rename to test/fixtures/reference/GRCh38.chr1_chr19.fa.fai diff --git a/tests/input/GRCh38.chr9_chr22.fa.gz b/test/fixtures/reference/GRCh38.chr9_chr22.fa.gz similarity index 100% rename from tests/input/GRCh38.chr9_chr22.fa.gz rename to test/fixtures/reference/GRCh38.chr9_chr22.fa.gz diff --git a/tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz b/test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz similarity index 100% rename from tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz rename to test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz diff --git a/tests/input/GRCh38.chrY_chrM.dict b/test/fixtures/reference/GRCh38.chrY_chrM.dict similarity index 100% rename from tests/input/GRCh38.chrY_chrM.dict rename to test/fixtures/reference/GRCh38.chrY_chrM.dict diff --git a/tests/input/GRCh38.chrY_chrM.fa b/test/fixtures/reference/GRCh38.chrY_chrM.fa similarity index 100% rename from tests/input/GRCh38.chrY_chrM.fa rename to test/fixtures/reference/GRCh38.chrY_chrM.fa diff --git a/tests/input/GRCh38.chrY_chrM.fa.fai b/test/fixtures/reference/GRCh38.chrY_chrM.fa.fai similarity index 100% rename from tests/input/GRCh38.chrY_chrM.fa.fai rename to test/fixtures/reference/GRCh38.chrY_chrM.fa.fai diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf similarity index 100% rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx similarity index 100% rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz similarity index 100% rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi similarity index 100% rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi diff --git a/tests/input/gencode.v31.chr9_chr22.gtf.gz b/test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz similarity index 100% rename from tests/input/gencode.v31.chr9_chr22.gtf.gz rename to test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz diff --git a/tests/input/gencode.v31.chrY_chrM.gene.bed b/test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.gene.bed rename to test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed diff --git a/tests/input/gencode.v31.chrY_chrM.genelengths.txt b/test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.genelengths.txt rename to test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt diff --git a/tests/input/gencode.v31.chrY_chrM.gtf.gz b/test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.gtf.gz rename to test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz diff --git a/tests/input/kraken2_C_elegans_library.tar.gz b/test/fixtures/reference/kraken2_C_elegans_library.tar.gz similarity index 100% rename from tests/input/kraken2_C_elegans_library.tar.gz rename to test/fixtures/reference/kraken2_C_elegans_library.tar.gz diff --git a/tests/input/kraken2_db.mini.tar.gz b/test/fixtures/reference/kraken2_db.mini.tar.gz similarity index 100% rename from tests/input/kraken2_db.mini.tar.gz rename to test/fixtures/reference/kraken2_db.mini.tar.gz diff --git a/tests/input/kraken2_taxonomy.tar.gz b/test/fixtures/reference/kraken2_taxonomy.tar.gz similarity index 100% rename from tests/input/kraken2_taxonomy.tar.gz rename to test/fixtures/reference/kraken2_taxonomy.tar.gz diff --git a/tests/input/star_db.chrY_chrM.tar.gz b/test/fixtures/reference/star_db.chrY_chrM.tar.gz similarity index 100% rename from tests/input/star_db.chrY_chrM.tar.gz rename to test/fixtures/reference/star_db.chrY_chrM.tar.gz diff --git a/tests/input/test.fa b/test/fixtures/reference/test.fa similarity index 100% rename from tests/input/test.fa rename to test/fixtures/reference/test.fa diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt diff --git a/tests/input/test.tar.gz b/test/fixtures/test.tar.gz similarity index 100% rename from tests/input/test.tar.gz rename to test/fixtures/test.tar.gz diff --git a/tests/input/test_rnaseq_variant.recal.txt b/test/fixtures/test_rnaseq_variant.recal.txt similarity index 100% rename from tests/input/test_rnaseq_variant.recal.txt rename to test/fixtures/test_rnaseq_variant.recal.txt diff --git a/tests/input/umap.csv b/test/fixtures/umap.csv similarity index 100% rename from tests/input/umap.csv rename to test/fixtures/umap.csv diff --git a/tests/input/test1.vcf.gz b/test/fixtures/vcfs/test1.vcf.gz similarity index 100% rename from tests/input/test1.vcf.gz rename to test/fixtures/vcfs/test1.vcf.gz diff --git a/tests/input/test1.vcf.gz.tbi b/test/fixtures/vcfs/test1.vcf.gz.tbi similarity index 100% rename from tests/input/test1.vcf.gz.tbi rename to test/fixtures/vcfs/test1.vcf.gz.tbi diff --git a/tests/input/test2.vcf.gz b/test/fixtures/vcfs/test2.vcf.gz similarity index 100% rename from tests/input/test2.vcf.gz rename to test/fixtures/vcfs/test2.vcf.gz diff --git a/tests/input/test2.vcf.gz.tbi b/test/fixtures/vcfs/test2.vcf.gz.tbi similarity index 100% rename from tests/input/test2.vcf.gz.tbi rename to test/fixtures/vcfs/test2.vcf.gz.tbi diff --git a/tests/input/wgs_calling_regions.hg38.interval_list b/test/fixtures/wgs_calling_regions.hg38.interval_list similarity index 100% rename from tests/input/wgs_calling_regions.hg38.interval_list rename to test/fixtures/wgs_calling_regions.hg38.interval_list diff --git a/tests/data_structures/input_json/get_read_groups.json b/tests/data_structures/input_json/get_read_groups.json deleted file mode 100644 index 9b3f1bbc7..000000000 --- a/tests/data_structures/input_json/get_read_groups.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "get_read_groups.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam" -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_bad_id.json b/tests/data_structures/input_json/read_group_bad_id.json deleted file mode 100644 index 41d05152c..000000000 --- a/tests/data_structures/input_json/read_group_bad_id.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "id", - "SM": "sample_a", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_bad_sample.json b/tests/data_structures/input_json/read_group_bad_sample.json deleted file mode 100644 index 452d17b8d..000000000 --- a/tests/data_structures/input_json/read_group_bad_sample.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "R123", - "SM": "sample1", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_good.json b/tests/data_structures/input_json/read_group_good.json deleted file mode 100644 index 81753f65d..000000000 --- a/tests/data_structures/input_json/read_group_good.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "R123", - "SM": "IPSC-1", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_missing_sample.json b/tests/data_structures/input_json/read_group_missing_sample.json deleted file mode 100644 index 8a23be66a..000000000 --- a/tests/data_structures/input_json/read_group_missing_sample.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "id" - }, - "read_group_to_string.required_fields": [ - "SM" - ] -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_sample_with_space.json b/tests/data_structures/input_json/read_group_sample_with_space.json deleted file mode 100644 index f0c7d88d0..000000000 --- a/tests/data_structures/input_json/read_group_sample_with_space.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group": { - "ID": "myID", - "SM": "sample a", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/test_flag_filter.yaml b/tests/data_structures/test_flag_filter.yaml deleted file mode 100644 index 20a1fb4d5..000000000 --- a/tests/data_structures/test_flag_filter.yaml +++ /dev/null @@ -1,74 +0,0 @@ -- name: flag_filter_0x900 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x900" - -- name: flag_filter_5 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="5" - -- name: flag_filter_01 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="01" - -- name: flag_filter_0x1000 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x1000" - exit_code: 1 - stderr: - contains: - - "Input number (0x1000) is invalid" - -- name: flag_filter_neg1 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="-1" - exit_code: 1 - stderr: - contains: - - "Input number (-1) is invalid" - -- name: flag_filter_4096 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4096" - exit_code: 1 - stderr: - contains: - - "Input number (4096) interpreted as decimal" - - "But number must be less than 4096!" - -- name: flag_filter_4095 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4095" - -- name: flag_filter_string - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="string" - exit_code: 1 - stderr: - contains: - - "Input number (string) is invalid" - -- name: flag_filter_empty_string - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="" - exit_code: 1 - stderr: - contains: - - "Input number () is invalid" diff --git a/tests/data_structures/test_read_group.yaml b/tests/data_structures/test_read_group.yaml deleted file mode 100644 index 41d09e543..000000000 --- a/tests/data_structures/test_read_group.yaml +++ /dev/null @@ -1,51 +0,0 @@ -- name: read_group_bad_id - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_id.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "ID (id) must not match pattern" - -- name: read_group_bad_sample - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_sample.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM must not match pattern" - -- name: read_group_good - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_good.json data_structures/read_group.wdl - -- name: read_group_missing_sample - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_missing_sample.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM is required" - -- name: read_group_sample_with_space - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_sample_with_space.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM must not match pattern" - -- name: get_read_groups - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t get_read_groups -i tests/data_structures/input_json/get_read_groups.json data_structures/read_group.wdl \ No newline at end of file diff --git a/tests/tools/input_json/arriba.json b/tests/tools/input_json/arriba.json deleted file mode 100644 index 0721bbd93..000000000 --- a/tests/tools/input_json/arriba.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "arriba.bam": "../../input/Aligned.sortedByCoord.chr9_chr22.bam", - "arriba.gtf": "../../input/gencode.v31.chr9_chr22.gtf.gz", - "arriba.reference_fasta_gz": "../../input/GRCh38.chr9_chr22.fa.gz", - "arriba.disable_filters": [ - "blacklist" - ], - "arriba.prefix": "fusions" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_aln.json b/tests/tools/input_json/bwa_aln.json deleted file mode 100644 index 3b59db584..000000000 --- a/tests/tools/input_json/bwa_aln.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "bwa_aln.fastq": "../../input/test_R1.fq.gz", - "bwa_aln.read_group": "@RG\\tID:test\\tSM:test", - "bwa_aln.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_aln_pe.json b/tests/tools/input_json/bwa_aln_pe.json deleted file mode 100644 index f878cb118..000000000 --- a/tests/tools/input_json/bwa_aln_pe.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "bwa_aln_pe.read_one_fastq_gz": "../../input/test_R1.fq.gz", - "bwa_aln_pe.read_two_fastq_gz": "../../input/test_R2.fq.gz", - "bwa_aln_pe.read_group": "@RG\\tID:test\\tSM:test", - "bwa_aln_pe.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_mem.json b/tests/tools/input_json/bwa_mem.json deleted file mode 100644 index 4563e2b8b..000000000 --- a/tests/tools/input_json/bwa_mem.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "bwa_mem.read_one_fastq_gz": "../../input/test_R1.fq.gz", - "bwa_mem.read_group": "@RG\\tID:test\\tSM:test", - "bwa_mem.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/calc_tpm.json b/tests/tools/input_json/calc_tpm.json deleted file mode 100644 index d4c9fc03f..000000000 --- a/tests/tools/input_json/calc_tpm.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "calc_tpm.counts": "../../input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt", - "calc_tpm.feature_lengths": "../../input/gencode.v31.chrY_chrM.genelengths.txt" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_apply_bqsr.json b/tests/tools/input_json/gatk4_apply_bqsr.json deleted file mode 100644 index d73c2a3eb..000000000 --- a/tests/tools/input_json/gatk4_apply_bqsr.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "apply_bqsr.bam": "../../input/test_rnaseq_variant.bam", - "apply_bqsr.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "apply_bqsr.recalibration_report": "../../input/test_rnaseq_variant.recal.txt" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_base_recalibrator.json b/tests/tools/input_json/gatk4_base_recalibrator.json deleted file mode 100644 index e63e309b2..000000000 --- a/tests/tools/input_json/gatk4_base_recalibrator.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "base_recalibrator.bam": "../../input/test_rnaseq_variant.bam", - "base_recalibrator.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "base_recalibrator.fasta": "../../input/GRCh38.chr1_chr19.fa", - "base_recalibrator.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "base_recalibrator.dict": "../../input/GRCh38.chr1_chr19.dict", - "base_recalibrator.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "base_recalibrator.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx", - "base_recalibrator.known_indels_sites_vcfs": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - ], - "base_recalibrator.known_indels_sites_indices": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_haplotype_caller.json b/tests/tools/input_json/gatk4_haplotype_caller.json deleted file mode 100644 index 653fb5714..000000000 --- a/tests/tools/input_json/gatk4_haplotype_caller.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "haplotype_caller.bam": "../../input/test_rnaseq_variant.bam", - "haplotype_caller.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "haplotype_caller.fasta": "../../input/GRCh38.chr1_chr19.fa", - "haplotype_caller.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "haplotype_caller.dict": "../../input/GRCh38.chr1_chr19.dict", - "haplotype_caller.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "haplotype_caller.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx", - "haplotype_caller.interval_list": "../../input/chr1_chr19.interval_list" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_split_n_cigar_reads.json b/tests/tools/input_json/gatk4_split_n_cigar_reads.json deleted file mode 100644 index e547714cf..000000000 --- a/tests/tools/input_json/gatk4_split_n_cigar_reads.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "split_n_cigar_reads.bam": "../../input/test.bam", - "split_n_cigar_reads.bam_index": "../../input/test.bam.bai", - "split_n_cigar_reads.fasta": "../../input/GRCh38.chr1_chr19.fa", - "split_n_cigar_reads.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "split_n_cigar_reads.dict": "../../input/GRCh38.chr1_chr19.dict", - "split_n_cigar_reads.prefix": "split" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_variant_filtration.json b/tests/tools/input_json/gatk4_variant_filtration.json deleted file mode 100644 index ba6d698da..000000000 --- a/tests/tools/input_json/gatk4_variant_filtration.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "variant_filtration.vcf": "../../input/test1.vcf.gz", - "variant_filtration.vcf_index": "../../input/test1.vcf.gz.tbi", - "variant_filtration.fasta": "../../input/GRCh38.chr1_chr19.fa", - "variant_filtration.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "variant_filtration.dict": "../../input/GRCh38.chr1_chr19.dict" -} \ No newline at end of file diff --git a/tests/tools/input_json/kraken_build_db.json b/tests/tools/input_json/kraken_build_db.json deleted file mode 100644 index d3251ef77..000000000 --- a/tests/tools/input_json/kraken_build_db.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "build_db.tarballs": [ - "../../input/kraken2_taxonomy.tar.gz", - "../../input/kraken2_C_elegans_library.tar.gz" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/kraken_create_library_from_fastas.json b/tests/tools/input_json/kraken_create_library_from_fastas.json deleted file mode 100644 index 1d15046d6..000000000 --- a/tests/tools/input_json/kraken_create_library_from_fastas.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "create_library_from_fastas.fastas_gz": [ - "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Caenorhabditis_elegans/reference/GCF_000002985.6_WBcel235/GCF_000002985.6_WBcel235_genomic.fna.gz" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/multiqc.json b/tests/tools/input_json/multiqc.json deleted file mode 100644 index 556d7111f..000000000 --- a/tests/tools/input_json/multiqc.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "multiqc.files": [ - "../../input/test.bwa_aln_pe.chrY_chrM.readlength.txt" - ], - "multiqc.report_name": "test.bwa_aln_pe.chrY_chrM.multiqc" -} \ No newline at end of file diff --git a/tests/tools/input_json/multiqc_empty.json b/tests/tools/input_json/multiqc_empty.json deleted file mode 100644 index f2fbc2d6c..000000000 --- a/tests/tools/input_json/multiqc_empty.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "multiqc.files": [], - "multiqc.report_name": "empty" -} \ No newline at end of file diff --git a/tests/tools/input_json/ngsderive_encoding.json b/tests/tools/input_json/ngsderive_encoding.json deleted file mode 100644 index 3b4d25fd5..000000000 --- a/tests/tools/input_json/ngsderive_encoding.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "encoding.ngs_files": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam" - ], - "encoding.outfile_name": "test.bwa_aln_pe.chrY_chrM.encoding.tsv" -} \ No newline at end of file diff --git a/tests/tools/input_json/picard_merge_sam_files.json b/tests/tools/input_json/picard_merge_sam_files.json deleted file mode 100644 index 59448d62d..000000000 --- a/tests/tools/input_json/picard_merge_sam_files.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge_sam_files.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge_sam_files.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/picard_merge_vcfs.json b/tests/tools/input_json/picard_merge_vcfs.json deleted file mode 100644 index 729faa020..000000000 --- a/tests/tools/input_json/picard_merge_vcfs.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "merge_vcfs.vcfs": [ - "../../input/test1.vcf.gz", - "../../input/test2.vcf.gz" - ], - "merge_vcfs.vcfs_indexes": [ - "../../input/test1.vcf.gz.tbi", - "../../input/test2.vcf.gz.tbi" - ], - "merge_vcfs.output_vcf_name": "test.vcf.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/sambamba_merge.json b/tests/tools/input_json/sambamba_merge.json deleted file mode 100644 index 2b8c66b13..000000000 --- a/tests/tools/input_json/sambamba_merge.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/samtools_merge.json b/tests/tools/input_json/samtools_merge.json deleted file mode 100644 index 2b8c66b13..000000000 --- a/tests/tools/input_json/samtools_merge.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/samtools_subsample.json b/tests/tools/input_json/samtools_subsample.json deleted file mode 100644 index 05e5fe4b5..000000000 --- a/tests/tools/input_json/samtools_subsample.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "subsample.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam", - "subsample.desired_reads": 100 -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_PE_1_pair.json b/tests/tools/input_json/star_alignment_PE_1_pair.json deleted file mode 100644 index befd948bd..000000000 --- a/tests/tools/input_json/star_alignment_PE_1_pair.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "alignment.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz", - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:test SM:test PL:ILLUMINA" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_PE_2_pairs.json b/tests/tools/input_json/star_alignment_PE_2_pairs.json deleted file mode 100644 index 45e479445..000000000 --- a/tests/tools/input_json/star_alignment_PE_2_pairs.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz", - "../../input/random10k.r1.fq.gz" - ], - "alignment.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz", - "../../input/random10k.r2.fq.gz" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz", - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:test SM:test PL:ILLUMINA", - "ID:random" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_SE.json b/tests/tools/input_json/star_alignment_SE.json deleted file mode 100644 index c30d29281..000000000 --- a/tests/tools/input_json/star_alignment_SE.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:foo" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/util_add_to_bam_header.json b/tests/tools/input_json/util_add_to_bam_header.json deleted file mode 100644 index 71f5e7960..000000000 --- a/tests/tools/input_json/util_add_to_bam_header.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "add_to_bam_header.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "add_to_bam_header.additional_header": "@RG\tID:3" -} \ No newline at end of file diff --git a/tests/tools/input_json/util_download.json b/tests/tools/input_json/util_download.json deleted file mode 100644 index 16dde86b1..000000000 --- a/tests/tools/input_json/util_download.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "download.url": "https://raw.githubusercontent.com/stjudecloud/workflows/main/LICENSE.md", - "download.outfile_name": "license.txt", - "download.md5sum": "cf3575bd84ab3151c7e9700b5f1a9746", - "download.disk_size_gb": 1 -} \ No newline at end of file diff --git a/tests/tools/input_json/util_split_string.json b/tests/tools/input_json/util_split_string.json deleted file mode 100644 index 68e3d245b..000000000 --- a/tests/tools/input_json/util_split_string.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "split_string.string": "rg1 , rg2", - "split_string.delimiter": " , " -} \ No newline at end of file diff --git a/tests/tools/test_arriba.yaml b/tests/tools/test_arriba.yaml deleted file mode 100644 index 883a9a9c7..000000000 --- a/tests/tools/test_arriba.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: arriba - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba -i tests/tools/input_json/arriba.json tools/arriba.wdl - files: - - path: output/outputs.json - contains: - - fusions.tsv - - fusions.discarded.tsv - -- name: arriba_tsv_to_vcf - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_tsv_to_vcf tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" reference_fasta="tests/input/GRCh38.chr9_chr22.fa.gz" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions.vcf - -- name: arriba_extract_fusion_supporting_alignments - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_extract_fusion_supporting_alignments tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" bam="tests/input/Aligned.sortedByCoord.chr9_chr22.bam" bam_index="tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions_1.bam - -- name: arriba_annotate_exon_numbers - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_annotate_exon_numbers tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" gtf="tests/input/gencode.v31.chr9_chr22.gtf.gz" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions.annotated.tsv diff --git a/tests/tools/test_bwa.yaml b/tests/tools/test_bwa.yaml deleted file mode 100644 index 6c38821ef..000000000 --- a/tests/tools/test_bwa.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: bwa_aln - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln -i tests/tools/input_json/bwa_aln.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: bwa_aln_pe - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln_pe -i tests/tools/input_json/bwa_aln_pe.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: bwa_mem - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_mem -i tests/tools/input_json/bwa_mem.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: build_bwa_db - tags: - - bwa - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_bwa_db tools/bwa.wdl reference_fasta="tests/input/GRCh38.chrY_chrM.fa" - files: - - path: output/outputs.json - contains: - - bwa_db.tar.gz diff --git a/tests/tools/test_deeptools.yaml b/tests/tools/test_deeptools.yaml deleted file mode 100644 index 64c671edf..000000000 --- a/tests/tools/test_deeptools.yaml +++ /dev/null @@ -1,9 +0,0 @@ -- name: deeptools_bam_coverage - tags: - - deeptools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_coverage tools/deeptools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bw diff --git a/tests/tools/test_fastp.yaml b/tests/tools/test_fastp.yaml deleted file mode 100644 index 006db1087..000000000 --- a/tests/tools/test_fastp.yaml +++ /dev/null @@ -1,12 +0,0 @@ -- name: fastp - tags: - - fastp - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastp tools/fastp.wdl read_one_fastq="tests/input/test_R1.fq.gz" read_two_fastq="tests/input/test_R2.fq.gz" - files: - - path: output/outputs.json - contains: - - test.trimmed.R1.fastq.gz - - test.trimmed.R2.fastq.gz - - test.trimmed.fastp.html - - test.trimmed.fastp.json \ No newline at end of file diff --git a/tests/tools/test_fastqc.yaml b/tests/tools/test_fastqc.yaml deleted file mode 100644 index 2be254308..000000000 --- a/tests/tools/test_fastqc.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: fastqc - tags: - - fastqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastqc tools/fastqc.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM_fastqc.zip - - test.bwa_aln_pe.chrY_chrM.fastqc_results.tar.gz diff --git a/tests/tools/test_fq.yaml b/tests/tools/test_fq.yaml deleted file mode 100644 index dae76f1bc..000000000 --- a/tests/tools/test_fq.yaml +++ /dev/null @@ -1,27 +0,0 @@ -- name: fqlint - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fqlint tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz - -- name: subsample_fq - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz record_count=1000 - files: - - path: output/outputs.json - contains: - - test.R1.subsampled.fastq.gz - - test.R2.subsampled.fastq.gz - -- name: subsample_fq_percentage - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz probability=0.01 - files: - - path: output/outputs.json - contains: - - test.R1.subsampled.fastq.gz - - test.R2.subsampled.fastq.gz diff --git a/tests/tools/test_gatk4.yaml b/tests/tools/test_gatk4.yaml deleted file mode 100644 index 200a7f33e..000000000 --- a/tests/tools/test_gatk4.yaml +++ /dev/null @@ -1,65 +0,0 @@ -- name: gatk4_split_n_cigar_reads - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_n_cigar_reads -i tests/tools/input_json/gatk4_split_n_cigar_reads.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - split.bam - - split.bam.bai - - split.bam.md5 - -- name: gatk4_base_recalibrator - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t base_recalibrator -i tests/tools/input_json/gatk4_base_recalibrator.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.recal.txt - -- name: gatk4_apply_bqsr - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t apply_bqsr -i tests/tools/input_json/gatk4_apply_bqsr.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.bqsr.bam - - test_rnaseq_variant.bqsr.bam.bai - -- name: gatk4_haplotype_caller - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t haplotype_caller -i tests/tools/input_json/gatk4_haplotype_caller.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.vcf.gz - -- name: gatk4_variant_filtration - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t variant_filtration -i tests/tools/input_json/gatk4_variant_filtration.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test1.filtered.vcf.gz - - test1.filtered.vcf.gz.tbi - -- name: gatk4_mark_duplicates_spark - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates_spark tools/gatk4.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt \ No newline at end of file diff --git a/tests/tools/test_htseq.yaml b/tests/tools/test_htseq.yaml deleted file mode 100644 index f35b7adbd..000000000 --- a/tests/tools/test_htseq.yaml +++ /dev/null @@ -1,19 +0,0 @@ -- name: htseq_count - tags: - - htseq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t count tools/htseq.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" strandedness="no" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.feature-counts.txt - -- name: calc_tpm - tags: - - htseq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_tpm -i tests/tools/input_json/calc_tpm.json tools/htseq.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.TPM.txt diff --git a/tests/tools/test_kraken2.yaml b/tests/tools/test_kraken2.yaml deleted file mode 100644 index 22564eaf6..000000000 --- a/tests/tools/test_kraken2.yaml +++ /dev/null @@ -1,55 +0,0 @@ -- name: download_taxonomy - tags: - - kraken - - reference - - slow - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_taxonomy tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_taxonomy.tar.gz - -- name: download_library - tags: - - kraken - - reference - - slow - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_library tools/kraken2.wdl library_name='fungi' - files: - - path: output/outputs.json - contains: - - kraken2_fungi_library.tar.gz - -- name: create_library_from_fastas - tags: - - kraken - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_library_from_fastas -i tests/tools/input_json/kraken_create_library_from_fastas.json tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_custom_library.tar.gz - -- name: build_db - tags: - - kraken - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_db -i tests/tools/input_json/kraken_build_db.json tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_db.tar.gz - -- name: kraken - tags: - - kraken - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t kraken tools/kraken2.wdl read_one_fastq_gz="tests/input/test_R1.fq.gz" read_two_fastq_gz="tests/input/test_R2.fq.gz" db="tests/input/kraken2_db.mini.tar.gz" - files: - - path: output/outputs.json - contains: - - test.kraken2.txt diff --git a/tests/tools/test_librarian.yaml b/tests/tools/test_librarian.yaml deleted file mode 100644 index a0033514e..000000000 --- a/tests/tools/test_librarian.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: librarian - tags: - - librarian - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t librarian tools/librarian.wdl read_one_fastq="tests/input/test_R1.fq.gz" - files: - - path: output/outputs.json - contains: - - test.librarian.tar.gz - - librarian_heatmap.txt diff --git a/tests/tools/test_md5sum.yaml b/tests/tools/test_md5sum.yaml deleted file mode 100644 index a1199b897..000000000 --- a/tests/tools/test_md5sum.yaml +++ /dev/null @@ -1,11 +0,0 @@ -- name: compute_checksum - tags: - - md5sum - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t compute_checksum tools/md5sum.wdl file="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.md5 - # contains: - # - "77fa2f59b0083202c73b0c80b60b24f6" diff --git a/tests/tools/test_mosdepth.yaml b/tests/tools/test_mosdepth.yaml deleted file mode 100644 index e0b33e21f..000000000 --- a/tests/tools/test_mosdepth.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: mosdepth_coverage - tags: - - mosdepth - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t coverage tools/mosdepth.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.mosdepth.summary.txt - - test.bwa_aln_pe.chrY_chrM.mosdepth.global.dist.txt diff --git a/tests/tools/test_multiqc.yaml b/tests/tools/test_multiqc.yaml deleted file mode 100644 index c2537df7b..000000000 --- a/tests/tools/test_multiqc.yaml +++ /dev/null @@ -1,20 +0,0 @@ -- name: multiqc - tags: - - multiqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc.json tools/multiqc.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.multiqc.html - -- name: multiqc_empty - tags: - - multiqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc_empty.json tools/multiqc.wdl - exit_code: 1 - stderr: - contains: - - "No analysis results found" - - "MultiQC didn't find any valid files" \ No newline at end of file diff --git a/tests/tools/test_ngsderive.yaml b/tests/tools/test_ngsderive.yaml deleted file mode 100644 index b94534d5e..000000000 --- a/tests/tools/test_ngsderive.yaml +++ /dev/null @@ -1,68 +0,0 @@ -- name: strandedness - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t strandedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.strandedness.tsv - - "Unstranded" - -- name: instrument - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t instrument tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.instrument.tsv - - "multiple instruments" - # - "unknown confidence" - -- name: read_length - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t read_length tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.readlength.tsv - # contains: - # - "150=20000" - -- name: encoding - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t encoding -i tests/tools/input_json/ngsderive_encoding.json tools/ngsderive.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.encoding.tsv - # contains: - # - "ASCII range: 74-74" - # - "Illumina 1.3" - -- name: junction_annotation - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t junction_annotation tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.junction_summary.tsv - - test.bwa_aln_pe.chrY_chrM.junctions.tsv.gz - -- name: endedness - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t endedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.endedness.tsv diff --git a/tests/tools/test_picard.yaml b/tests/tools/test_picard.yaml deleted file mode 100644 index ca99cab36..000000000 --- a/tests/tools/test_picard.yaml +++ /dev/null @@ -1,163 +0,0 @@ -- name: picard_mark_duplicates - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.md5 - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt - -- name: picard_validate_bam - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_bam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.ValidateSamFile.txt - -- name: picard_bam_to_fastq - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz - - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz - -- name: picard_sort - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16 - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai - -- name: picard_sort_queryname - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16 sort_order="queryname" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - must_not_contain: - - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai - -- name: picard_merge_sam_files - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_sam_files -i tests/tools/input_json/picard_merge_sam_files.json tools/picard.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: picard_clean_sam - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t clean_sam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.cleaned.bam - -- name: picard_collect_wgs_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_wgs_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta="tests/input/GRCh38.chrY_chrM.fa" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectWgsMetrics.txt - -- name: picard_collect_alignment_summary_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_alignment_summary_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.pdf - -- name: picard_collect_gc_bias_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_gc_bias_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta=tests/input/GRCh38.chrY_chrM.fa - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.summary.txt - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.pdf - -- name: picard_collect_insert_size_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_insert_size_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.pdf - -- name: picard_quality_score_distribution - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t quality_score_distribution tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.txt - - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.pdf - -- name: picard_merge_vcfs - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_vcfs -i tests/tools/input_json/picard_merge_vcfs.json tools/picard.wdl - files: - - path: output/outputs.json - contains: - - test.vcf.gz - -- name: picard_scatter_interval_list - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t scatter_interval_list tools/picard.wdl interval_list="tests/input/wgs_calling_regions.hg38.interval_list" scatter_count=3 - files: - - path: output/outputs.json - contains: - - 1scattered.interval_list - - 2scattered.interval_list - - 3scattered.interval_list - -- name: picard_create_sequence_dictionary - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_sequence_dictionary tools/picard.wdl fasta="tests/input/GRCh38.chrY_chrM.fa" outfile_name="GRCh38.chrY_chrM.dict" - files: - - path: output/outputs.json - contains: - - GRCh38.chrY_chrM.dict \ No newline at end of file diff --git a/tests/tools/test_qualimap.yaml b/tests/tools/test_qualimap.yaml deleted file mode 100644 index 405b31517..000000000 --- a/tests/tools/test_qualimap.yaml +++ /dev/null @@ -1,11 +0,0 @@ -- name: qualimap_rnaseq - tags: - - qualimap - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t rnaseq tools/qualimap.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.qualimap_rnaseq_results.tar.gz - - rnaseq_qc_results.txt - - coverage_profile_along_genes_(total).txt diff --git a/tests/tools/test_sambamba.yaml b/tests/tools/test_sambamba.yaml deleted file mode 100644 index 4a1464a2b..000000000 --- a/tests/tools/test_sambamba.yaml +++ /dev/null @@ -1,51 +0,0 @@ -- name: sambamba_index - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.bai - -- name: sambamba_merge - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/sambamba_merge.json tools/sambamba.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: sambamba_sort - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - -- name: sambamba_flagstat - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.flagstat.txt - -- name: sambamba_markdup - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t markdup tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.markdup.bam - - test.bwa_aln_pe.chrY_chrM.markdup.bam.bai - - test.bwa_aln_pe.chrY_chrM.markdup_log.txt \ No newline at end of file diff --git a/tests/tools/test_samtools.py b/tests/tools/test_samtools.py deleted file mode 100644 index 272ed7b76..000000000 --- a/tests/tools/test_samtools.py +++ /dev/null @@ -1,53 +0,0 @@ -"""TODO: rewrite this so it is runner agnostic""" - -# import pytest -# import pathlib -# from collections import OrderedDict - -# import pysam -# import fastq - - -# @pytest.mark.workflow('samtools_split') -# def test_samtools_split(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/0/test.1.bam') -# samfile = pysam.AlignmentFile(bam, "rb") -# bam_header = OrderedDict((k, v) for k, v in samfile.header.items()) -# read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group] -# assert len(read_groups) == 1 -# assert read_groups[0] == "1" - -# second_bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/1/test.2.bam') -# second_samfile = pysam.AlignmentFile(second_bam, "rb") -# second_bam_header = OrderedDict((k, v) for k, v in second_samfile.header.items()) -# second_read_groups = [read_group['ID'] for read_group in second_bam_header.get('RG', []) if 'ID' in read_group] -# assert len(second_read_groups) == 1 -# assert second_read_groups[0] == "2" - -# @pytest.mark.workflow('samtools_merge') -# def test_samtools_merge(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/merged_bam/test.bam') -# samfile = pysam.AlignmentFile(bam, "rb") -# bam_header = OrderedDict((k, v) for k, v in samfile.header.items()) -# read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group] -# assert len(read_groups) == 2 -# assert read_groups[0] == "test2" -# assert read_groups[1] == "test.bwa_aln_pe.chrY_chrM" - -# @pytest.mark.workflow('samtools_collate', 'samtools_collate_to_fastq') -# def test_samtools_collate(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/collated_bam/test.bwa_aln_pe.chrY_chrM.collated.bam') -# samfile = pysam.AlignmentFile(bam, "rb") - -# reads = list(samfile.fetch(until_eof=True)) -# for c in range(0, 100, 2): -# assert reads[c].query_name == reads[c+1].query_name -# assert reads[c].is_read1 != reads[c+1].is_read1 - -# @pytest.mark.workflow('samtools_bam_to_fastq', 'samtools_collate_to_fastq') -# def test_samtools_bam_to_fastq(workflow_dir): -# fq1 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_one_fastq_gz/test.bwa_aln_pe.chrY_chrM.R1.fastq.gz')) -# fq2 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_two_fastq_gz/test.bwa_aln_pe.chrY_chrM.R2.fastq.gz')) - -# for r1, r2 in zip(fq1, fq2): -# assert r1.head.removesuffix("/1") == r2.head.removesuffix("/2") diff --git a/tests/tools/test_samtools.yaml b/tests/tools/test_samtools.yaml deleted file mode 100644 index 4d08c8583..000000000 --- a/tests/tools/test_samtools.yaml +++ /dev/null @@ -1,125 +0,0 @@ -- name: samtools_quickcheck - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t quickcheck tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - -- name: samtools_split - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.bam" - files: - - path: output/outputs.json - contains: - - test.1.bam - - test.2.bam - -- name: samtools_split_unaccounted - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.unaccounted_read.bam" - exit_code: 1 - stderr: - contains: - - "There are reads present with bad or missing RG tags!" - -- name: samtools_split_extra_RG - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.extra_RG.bam" - exit_code: 1 - stderr: - contains: - - "No reads are in output BAM test.extra_RG.no_match.bam!" - - "This is likely caused by malformed RG records." - -- name: samtools_flagstat - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.flagstat.txt - # contains: - # - "20000" - # - "0 + 0 secondary" - # - "20000 + 0 in total (QC-passed reads + QC-failed reads)" - # - "10000 + 0 read1" - # - "10000 + 0 read2" - -- name: samtools_index - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.bai - -- name: samtools_subsample - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample -i tests/tools/input_json/samtools_subsample.json tools/samtools.wdl - files: - - path: output/outputs.json - contains: - - test.sampled.bam - -- name: samtools_merge - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/samtools_merge.json tools/samtools.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: samtools_addreplacerg - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t addreplacerg tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" read_group_id="test" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.addreplacerg.bam - -- name: samtools_collate - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collate tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.collated.bam - -- name: samtools_bam_to_fastq - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" retain_collated_bam=true - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.collated.bam - - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz - - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz - -- name: samtools_faidx - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t faidx tools/samtools.wdl fasta="tests/input/test.fa" - files: - - path: output/outputs.json - contains: - - test.fa.fai diff --git a/tests/tools/test_star.yaml b/tests/tools/test_star.yaml deleted file mode 100644 index 159f217e7..000000000 --- a/tests/tools/test_star.yaml +++ /dev/null @@ -1,43 +0,0 @@ -- name: build_star_db - tags: - - star - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_star_db tools/star.wdl reference_fasta=tests/input/GRCh38.chrY_chrM.fa gtf=tests/input/gencode.v31.chrY_chrM.gtf.gz - files: - - path: output/outputs.json - contains: - - star_db.tar.gz - -- name: star_alignment_PE_1_pair - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_1_pair.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam - -- name: star_alignment_PE_2_pairs - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_2_pairs.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam - -- name: star_alignment_SE - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_SE.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam \ No newline at end of file diff --git a/tests/tools/test_util.yaml b/tests/tools/test_util.yaml deleted file mode 100644 index 695da9859..000000000 --- a/tests/tools/test_util.yaml +++ /dev/null @@ -1,94 +0,0 @@ -- name: download - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -i tests/tools/input_json/util_download.json -t download tools/util.wdl - files: - - path: output/outputs.json - contains: - - license.txt - # md5sum: cf3575bd84ab3151c7e9700b5f1a9746 - # contains: - # - "MIT License" - -- name: split_string - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_string -i tests/tools/input_json/util_split_string.json tools/util.wdl - stdout: - contains: - - "rg1" - - "rg2" - must_not_contain: - - "rg1 , rg2" - -- name: calc_feature_lengths - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_feature_lengths tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - gencode.v31.chrY_chrM.genelengths.txt - # contains: - # - "AL954722.1" - -- name: compression_integrity - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t compression_integrity tools/util.wdl bgzipped_file="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - -# TODO: This does not test that the record was properly added to the header. -- name: add_to_bam_header - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t add_to_bam_header -i tests/tools/input_json/util_add_to_bam_header.json tools/util.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.reheader.bam - -- name: unpack_tarball - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t unpack_tarball tools/util.wdl tarball="tests/input/test.tar.gz" - files: - - path: output/outputs.json - contains: - - test_file_a - - test_file_b - -- name: make_coverage_regions_bed - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t make_coverage_regions_bed tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" feature_type="exon" - files: - - path: output/outputs.json - contains: - - gencode.v31.chrY_chrM.exon.bed - -- name: global_phred_scores - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t global_phred_scores tools/util.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.global_PHRED_scores.tsv - -- name: split_fastq - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_fastq tools/util.wdl fastq="tests/input/test_R1.fq.gz" prefix="test.R1." - files: - - path: output/outputs.json - contains: - - test.R1.000000.fastq.gz \ No newline at end of file diff --git a/tests/workflows/_test_methylation-preprocess.yaml b/tests/workflows/_test_methylation-preprocess.yaml deleted file mode 100644 index 3bfe379b8..000000000 --- a/tests/workflows/_test_methylation-preprocess.yaml +++ /dev/null @@ -1,15 +0,0 @@ -- name: process_raw_idats - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t process_raw_idats -i tests/workflows/input_json/process_raw_idats.json workflows/methylation/methylation-preprocess.wdl - files: - - path: output/outputs.json - contains: - - 201533520001_R03C01.beta_swan_norm_unfiltered.csv - - 201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv - - 201533520001_R03C01.annotation.csv - - 201533520001_R03C01.beta.csv - - 201533520001_R03C01.cn_values.csv - - 201533520001_R03C01.m_values.csv - - 201533520001_R03C01.probeNames.csv \ No newline at end of file diff --git a/tests/workflows/input_json/combine_data.json b/tests/workflows/input_json/combine_data.json deleted file mode 100644 index ec1edd324..000000000 --- a/tests/workflows/input_json/combine_data.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "combine_data.files_to_combine": [ - "../../input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv", - "../../input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv" - ], - "combine_data.combined_file_name": "combined_beta.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/dnaseq-standard-fastq.json b/tests/workflows/input_json/dnaseq-standard-fastq.json deleted file mode 100644 index f1db97b82..000000000 --- a/tests/workflows/input_json/dnaseq-standard-fastq.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "dnaseq_standard_fastq_experimental.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "dnaseq_standard_fastq_experimental.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "dnaseq_standard_fastq_experimental.read_groups": [ - { - "ID": "test", - "PI": 150, - "PL": "ILLUMINA", - "SM": "Sample", - "LB": "Sample" - } - ], - "dnaseq_standard_fastq_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/workflows/input_json/dnaseq-standard.json b/tests/workflows/input_json/dnaseq-standard.json deleted file mode 100644 index 06e8d8cac..000000000 --- a/tests/workflows/input_json/dnaseq-standard.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "dnaseq_standard_experimental.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "dnaseq_standard_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/workflows/input_json/filter_probes.json b/tests/workflows/input_json/filter_probes.json deleted file mode 100644 index 8b7d0a024..000000000 --- a/tests/workflows/input_json/filter_probes.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "filter_probes.beta_values": "../../input/combined_beta.csv", - "filter_probes.num_probes": 1000 -} \ No newline at end of file diff --git a/tests/workflows/input_json/generate_umap.json b/tests/workflows/input_json/generate_umap.json deleted file mode 100644 index 670c02298..000000000 --- a/tests/workflows/input_json/generate_umap.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "generate_umap.filtered_beta_values": "../../input/filtered_beta.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/plot_umap.json b/tests/workflows/input_json/plot_umap.json deleted file mode 100644 index 86444c490..000000000 --- a/tests/workflows/input_json/plot_umap.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "plot_umap.umap": "../../input/umap.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/process_raw_idats.json b/tests/workflows/input_json/process_raw_idats.json deleted file mode 100644 index b1dbbbc77..000000000 --- a/tests/workflows/input_json/process_raw_idats.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "process_raw_idats.idats": { - "left": "../../input/201533520001_R03C01_Grn.idat", - "right": "../../input/201533520001_R03C01_Red.idat" - } -} \ No newline at end of file diff --git a/tests/workflows/input_json/qc-standard.json b/tests/workflows/input_json/qc-standard.json deleted file mode 100644 index 0f552166e..000000000 --- a/tests/workflows/input_json/qc-standard.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "quality_check_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "quality_check_standard.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai", - "quality_check_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "quality_check_standard.kraken_db": "../../input/kraken2_db.mini.tar.gz", - "quality_check_standard.rna": true, - "quality_check_standard.coverage_beds": [ - "../../input/gencode.v31.chrY_chrM.gene.bed" - ] -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-standard-fastq.json b/tests/workflows/input_json/rnaseq-standard-fastq.json deleted file mode 100644 index 452d4c932..000000000 --- a/tests/workflows/input_json/rnaseq-standard-fastq.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "rnaseq_standard_fastq.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "rnaseq_standard_fastq.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "rnaseq_standard_fastq.read_groups": [ - { - "ID": "test", - "PI": 150, - "PL": "ILLUMINA", - "SM": "Sample", - "LB": "Sample" - } - ], - "rnaseq_standard_fastq.prefix": "test", - "rnaseq_standard_fastq.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "rnaseq_standard_fastq.star_db": "../../input/star_db.chrY_chrM.tar.gz", - "rnaseq_standard_fastq.strandedness": "Unstranded" -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-standard.json b/tests/workflows/input_json/rnaseq-standard.json deleted file mode 100644 index a42cbc5a1..000000000 --- a/tests/workflows/input_json/rnaseq-standard.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "rnaseq_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "rnaseq_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "rnaseq_standard.star_db": "../../input/star_db.chrY_chrM.tar.gz", - "rnaseq_standard.strandedness": "Unstranded" -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-variant-calling.json b/tests/workflows/input_json/rnaseq-variant-calling.json deleted file mode 100644 index e108c5298..000000000 --- a/tests/workflows/input_json/rnaseq-variant-calling.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "rnaseq_variant_calling.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "rnaseq_variant_calling.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai", - "rnaseq_variant_calling.fasta": "../../input/GRCh38.chrY_chrM.fa", - "rnaseq_variant_calling.fasta_index": "../../input/GRCh38.chrY_chrM.fa.fai", - "rnaseq_variant_calling.dict": "../../input/GRCh38.chrY_chrM.dict", - "rnaseq_variant_calling.calling_interval_list": "../../input/wgs_calling_regions.hg38.interval_list", - "rnaseq_variant_calling.known_vcfs": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - ], - "rnaseq_variant_calling.known_vcf_indexes": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi" - ], - "rnaseq_variant_calling.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "rnaseq_variant_calling.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx" -} \ No newline at end of file diff --git a/tests/workflows/test_methylation-cohort.yaml b/tests/workflows/test_methylation-cohort.yaml deleted file mode 100644 index f981b75a7..000000000 --- a/tests/workflows/test_methylation-cohort.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: combine_data - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t combine_data -i tests/workflows/input_json/combine_data.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - combined_beta.csv - -- name: filter_probes - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t filter_probes -i tests/workflows/input_json/filter_probes.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - filtered.beta.csv - - filtered.probes.csv - -- name: generate_umap - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t generate_umap -i tests/workflows/input_json/generate_umap.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - umap.csv - -- name: plot_umap - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t plot_umap -i tests/workflows/input_json/plot_umap.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - umap.png diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 196479133..fb5d0a149 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -169,7 +169,6 @@ task arriba { "NC_*", ] Array[String] disable_filters = [] - #@ except: LineWidth String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" String prefix = basename(bam, ".bam") + ".fusions" String strandedness = "auto" @@ -200,9 +199,9 @@ task arriba { Int modify_disk_size_gb = 0 } - Int bam_size_gb = ceil(size(bam, "GB")) - Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GB")) + ceil(size(reference_fasta_gz, - "GB")) + modify_disk_size_gb + Int bam_size_gb = ceil(size(bam, "GiB")) + Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GiB")) + ceil(size(reference_fasta_gz, + "GiB")) + modify_disk_size_gb Int memory_gb = bam_size_gb + modify_memory_gb command <<< @@ -221,18 +220,18 @@ task arriba { ~{"-d '" + wgs_svs + "'"} \ -D ~{max_genomic_breakpoint_distance} \ -s "~{strandedness}" \ - ~{if length(interesting_contigs) > 0 + ~{(if length(interesting_contigs) > 0 then "-i " + sep(",", quote(interesting_contigs)) else "" - } \ - ~{if length(viral_contigs) > 0 + )} \ + ~{(if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) else "" - } \ - ~{if length(disable_filters) > 0 + )} \ + ~{(if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters)) else "" - } \ + )} \ -E ~{max_e_value} \ -S ~{min_supporting_reads} \ -m ~{max_mismappers} \ @@ -302,8 +301,8 @@ task arriba_tsv_to_vcf { Int modify_disk_size_gb = 0 } - Int input_size_gb = ceil(size(fusions, "GB")) - Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GB")) * 3) + modify_disk_size_gb + Int input_size_gb = ceil(size(fusions, "GiB")) + Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GiB")) * 3) + modify_disk_size_gb command <<< set -euo pipefail @@ -356,7 +355,7 @@ task arriba_extract_fusion_supporting_alignments { Int modify_disk_size_gb = 0 } - Int input_size_gb = ceil(size(bam, "GB")) + Int input_size_gb = ceil(size(bam, "GiB")) Int disk_size_gb = ceil(input_size_gb) + 5 + modify_disk_size_gb command <<< @@ -401,7 +400,7 @@ task arriba_annotate_exon_numbers { Int modify_disk_size_gb = 0 } - Int input_size_gb = ceil(size(gtf, "GB")) + Int input_size_gb = ceil(size(gtf, "GiB")) Int disk_size_gb = ceil(input_size_gb) + 5 + modify_disk_size_gb command <<< diff --git a/tools/bwa.wdl b/tools/bwa.wdl index a27cb71a0..3a7b3cb0d 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -45,8 +45,8 @@ task bwa_aln { String output_bam = prefix + ".bam" - Float input_fastq_size = size(fastq, "GB") - Float reference_size = size(bwa_db_tar_gz, "GB") + Float input_fastq_size = size(fastq, "GiB") + Float reference_size = size(bwa_db_tar_gz, "GiB") Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) @@ -137,9 +137,9 @@ task bwa_aln_pe { String output_bam = prefix + ".bam" - Float input_fastq_size = (size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB" + Float input_fastq_size = (size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB" )) - Float reference_size = size(bwa_db_tar_gz, "GB") + Float reference_size = size(bwa_db_tar_gz, "GiB") Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb ) @@ -230,8 +230,9 @@ task bwa_mem { String output_bam = prefix + ".bam" - Float input_fastq_size = size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB") - Float reference_size = size(bwa_db_tar_gz, "GB") + Float input_fastq_size = size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB" + ) + Float reference_size = size(bwa_db_tar_gz, "GiB") Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) @@ -257,23 +258,23 @@ task bwa_mem { -R "~{read_group}" \ bwa_db/"$PREFIX" \ "~{basename(read_one_fastq_gz)}" \ - ~{if defined(read_two_fastq_gz) + ~{(if defined(read_two_fastq_gz) then "'" + basename(select_first([ read_two_fastq_gz, ])) + "'" else "" - } \ + )} \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ > "~{output_bam}" rm -r bwa_db rm "~{basename(read_one_fastq_gz)}" - ~{if defined(read_two_fastq_gz) + ~{(if defined(read_two_fastq_gz) then "rm '" + basename(select_first([ read_two_fastq_gz, ])) + "'" else "" - } + )} >>> output { @@ -312,7 +313,7 @@ task build_bwa_db { Int modify_disk_size_gb = 0 } - Float input_fasta_size = size(reference_fasta, "GB") + Float input_fasta_size = size(reference_fasta, "GiB") Int disk_size_gb = ceil(input_fasta_size * 2) + 10 + modify_disk_size_gb String bwa_db_out_name = db_name + ".tar.gz" diff --git a/tools/fastp.wdl b/tools/fastp.wdl index 2701ec7b6..64d1ea545 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -159,16 +159,17 @@ task fastp { fastp \ -i "~{read_one_fastq}" \ ~{"-I '" + read_two_fastq + "'"} \ - ~{if output_fastq - then "-o '" + if defined(read_two_fastq) + ~{(if output_fastq + then "-o '" + (if defined(read_two_fastq) then "~{prefix}.R1.fastq.gz" - else "~{prefix}.fastq.gz" + "'" + else "~{prefix}.fastq.gz" + ) + "'" else "" - } \ - ~{if (defined(read_two_fastq) && output_fastq) + )} \ + ~{(if (defined(read_two_fastq) && output_fastq) then "-O '" + prefix + ".R2.fastq.gz'" else "" - } \ + )} \ --reads_to_process ~{first_n_reads} \ ~{if deduplicate then "--dedup" @@ -248,9 +249,10 @@ task fastp { runtime { cpu: ncpu - memory: if disable_duplicate_eval + memory: (if disable_duplicate_eval then "4 GB" else dup_acc_to_mem[duplicate_accuracy] + ) disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/fastp:1.0.1--heae3180_0" maxRetries: 1 diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index feddc237b..7b63db9e5 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -4,6 +4,8 @@ version 1.1 task fastqc { meta { description: "Generates a FastQC quality control metrics report for the input BAM file" + warning: "**[DEPRECATED]** We prefer the analysis provided by `fastp` which computes similar metrics but is faster and more robust. Please see the `fastp` task in `fastp.wdl` instead of using FastQC!" + deprecated: true outputs: { raw_data: "A zip archive of raw FastQC data. Can be parsed by MultiQC.", results: "A gzipped tar archive of all FastQC output files", diff --git a/tools/fq.wdl b/tools/fq.wdl index 4bf2133e9..b353597f2 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -63,8 +63,8 @@ task fqlint { Int modify_disk_size_gb = 0 } - Float read1_size = size(read_one_fastq, "GB") - Float read2_size = size(read_two_fastq, "GB") + Float read1_size = size(read_one_fastq, "GiB") + Float read2_size = size(read_two_fastq, "GiB") Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb) @@ -131,14 +131,15 @@ task subsample { Int modify_disk_size_gb = 0 } - Float read1_size = size(read_one_fastq, "GB") - Float read2_size = size(read_two_fastq, "GB") + Float read1_size = size(read_one_fastq, "GiB") + Float read2_size = size(read_two_fastq, "GiB") Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = if (probability < 1.0 && probability > 0) + String probability_arg = (if (probability < 1.0 && probability > 0) then "-p ~{probability}" else "" + ) String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" @@ -152,7 +153,10 @@ task subsample { ~{probability_arg} \ ~{record_count_arg} \ --r1-dst "~{r1_dst}" \ - ~{"--r2-dst '" + r2_dst + "'"} \ + ~{if defined(read_two_fastq) + then "--r2-dst '" + r2_dst + "'" + else "" + } \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index a723b1d28..c54a4e9f9 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -40,16 +40,15 @@ task split_n_cigar_reads { Int java_heap_size = ceil(memory_gb * 0.9) command <<< - set -euo pipefail - - gatk \ - --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ - SplitNCigarReads \ - -R "~{fasta}" \ - -I "~{bam}" \ - -O "~{prefix}.bam" \ - -OBM true - + set -euo pipefail + + gatk \ + --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ + SplitNCigarReads \ + -R "~{fasta}" \ + -I "~{bam}" \ + -O "~{prefix}.bam" \ + -OBM true # GATK is unreasonable and uses the plain ".bai" suffix. mv "~{prefix}.bai" "~{prefix}.bam.bai" >>> @@ -117,19 +116,19 @@ task base_recalibrator { Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) - #@ except: LineWidth command <<< # shellcheck disable=SC2102 gatk \ - --java-options \ - "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{ + java_heap_size + }g" \ BaseRecalibratorSpark \ -R "~{fasta}" \ -I "~{bam}" \ - ~{if use_original_quality_scores + ~{(if use_original_quality_scores then "--use-original-qualities" else "" - } \ + )} \ -O "~{outfile_name}" \ --known-sites "~{dbSNP_vcf}" \ ~{sep(" ", prefix("--known-sites ", squote(known_indels_sites_vcfs)))} \ @@ -184,14 +183,14 @@ task apply_bqsr { Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) - #@ except: LineWidth command <<< set -euo pipefail # shellcheck disable=SC2102 gatk \ - --java-options \ - "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{ + java_heap_size + }g" \ ApplyBQSRSpark \ --spark-master local[~{ncpu}] \ -I "~{bam}" \ @@ -272,11 +271,9 @@ task haplotype_caller { Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) - #@ except: LineWidth command <<< gatk \ - --java-options \ - "-Xms6000m -Xmx~{java_heap_size}g -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \ + --java-options "-Xms6000m -Xmx~{java_heap_size}g -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \ HaplotypeCaller \ -R "~{fasta}" \ -I "~{bam}" \ @@ -457,12 +454,12 @@ task mark_duplicates_spark { Int ncpu = 4 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = (if create_bam + Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10) - ) + modify_disk_size_gb + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) diff --git a/tools/htseq.wdl b/tools/htseq.wdl index 0552586b5..cb8b16014 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -33,15 +33,6 @@ task count { description: "GFF attribute to be used as feature ID", group: "Common", } - mode: { - description: "Mode to handle reads overlapping more than one feature. `union` is recommended for most use-cases.", - external_help: "https://htseq.readthedocs.io/en/latest/htseqcount.html#htseq-count-counting-reads-within-features", - choices: [ - "union", - "intersection-strict", - "intersection-nonempty", - ], - } include_custom_header: { description: "Include a custom header for the output file? If true, the first line of the output file will be `~{idattr}\t~{prefix}`.", warning: "This is not an official feature of HTSeq. This may break downstream tools that expect the typical headerless HTSeq output format.", @@ -79,7 +70,6 @@ task count { String prefix = basename(bam, ".bam") String feature_type = "exon" String idattr = "gene_name" - String mode = "union" Boolean include_custom_header = true Boolean pos_sorted = false Boolean nonunique = false @@ -92,8 +82,11 @@ task count { String outfile_name = prefix + ".feature-counts.txt" - Float bam_size = size(bam, "GB") - Float gtf_size = size(gtf, "GB") + # the docs recommend this for most use cases, so we hardcode + String mode = "union" + + Float bam_size = size(bam, "GiB") + Float gtf_size = size(gtf, "GiB") Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 @@ -110,8 +103,6 @@ task count { if ~{include_custom_header}; then echo -e "~{idattr}\t~{prefix}" > "~{outfile_name}" - else - true > "~{outfile_name}" # ensure file is empty fi # 9223372036854776000 == max 64 bit Float @@ -134,10 +125,10 @@ task count { then "score" else "ignore" } \ - --supplementary-alignments ~{if supplementary_alignments + --supplementary-alignments ~{(if supplementary_alignments then "score" else "ignore" - } \ + )} \ "~{bam}" \ "~{gtf}" \ >> "~{outfile_name}" @@ -204,7 +195,7 @@ task calc_tpm { runtime { memory: "4 GB" disks: "10 GB" - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index 601f8c1f6..018921bd2 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -98,14 +98,14 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" - Int disk_size_gb = (if library_name == "bacteria" + Int disk_size_gb = ((if library_name == "bacteria" then 300 else if library_name == "nr" - then 600 - else if library_name == "nt" - then 2500 - else 25 - ) + modify_disk_size_gb + then 600 + else if library_name == "nt" + then 2500 + else 25 + ) + modify_disk_size_gb) command <<< set -euo pipefail @@ -165,7 +165,7 @@ task create_library_from_fastas { String db_name = "kraken2_custom_library" - Float fastas_size = size(fastas_gz, "GB") + Float fastas_size = size(fastas_gz, "GiB") Int disk_size_gb = ceil(fastas_size * 5) + 10 + modify_disk_size_gb command <<< @@ -260,12 +260,12 @@ task build_db { Int modify_disk_size_gb = 0 } - Float tarballs_size = size(tarballs, "GB") + Float tarballs_size = size(tarballs, "GiB") Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb - Int memory_gb = (if (max_db_size_gb > 0) + Int memory_gb = ((if (max_db_size_gb > 0) then ceil(max_db_size_gb * 1.2) else ceil(tarballs_size * 2) - ) + modify_memory_gb + ) + modify_memory_gb) String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -294,10 +294,10 @@ task build_db { --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{if (max_db_size_gb > 0) + ~{(if (max_db_size_gb > 0) then "--max-db-size '" + max_db_size_bytes + "'" else "" - } \ + )} \ --threads "$n_cores" \ --db "~{db_name}" @@ -383,14 +383,16 @@ task kraken { Int modify_disk_size_gb = 0 } - Float db_size = size(db, "GB") - Float read1_size = size(read_one_fastq_gz, "GB") - Float read2_size = size(read_two_fastq_gz, "GB") + Float db_size = size(db, "GiB") + Float read1_size = size(read_one_fastq_gz, "GiB") + Float read2_size = size(read_two_fastq_gz, "GiB") Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb ) - Int disk_size_gb = if store_sequences + Int disk_size_gb = (if store_sequences then disk_size_gb_calculation + ceil(read1_size + read2_size) else disk_size_gb_calculation + ) + Int memory_gb = ceil(db_size * 2) + modify_memory_gb String out_report = prefix + ".kraken2.txt" diff --git a/tools/librarian.wdl b/tools/librarian.wdl index 9fe3efac5..88b2fd078 100644 --- a/tools/librarian.wdl +++ b/tools/librarian.wdl @@ -29,7 +29,7 @@ task librarian { Int modify_disk_size_gb = 0 } - Float read1_size = size(read_one_fastq, "GB") + Float read1_size = size(read_one_fastq, "GiB") Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb) command <<< diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index f7856be9a..72cb92ced 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -46,7 +46,7 @@ task strandedness { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< @@ -119,7 +119,7 @@ task instrument { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< @@ -178,7 +178,7 @@ task read_length { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< @@ -235,7 +235,7 @@ task encoding { Int modify_disk_size_gb = 0 } - Float files_size = size(ngs_files, "GB") + Float files_size = size(ngs_files, "GiB") Int disk_size_gb = ceil(files_size) + 10 + modify_disk_size_gb command <<< @@ -303,7 +303,7 @@ task junction_annotation { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< @@ -399,10 +399,11 @@ task endedness { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") - Int memory_gb = if calc_rpt + Float bam_size = size(bam, "GiB") + Int memory_gb = (if calc_rpt then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 + ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< diff --git a/tools/picard.wdl b/tools/picard.wdl index 4b37db614..3c8f7d4ce 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -81,12 +81,12 @@ task mark_duplicates { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = (if create_bam + Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10) - ) + modify_disk_size_gb + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -194,17 +194,15 @@ task validate_bam { Int modify_disk_size_gb = 0 } - String outfile = if summary_mode - then outfile_name - else outfile_name + ".gz" - String mode_arg = if summary_mode + String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" - String stringency_arg = if index_validation_stringency_less_exhaustive + String stringency_arg = (if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" else "" - Float bam_size = size(bam, "GB") - Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb + ) + Float bam_size = size(bam, "GiB") + Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) command <<< @@ -220,11 +218,7 @@ task validate_bam { --VALIDATION_STRINGENCY "~{validation_stringency}" \ ~{sep(" ", prefix("--IGNORE ", squote(ignore_list)))} \ --MAX_OUTPUT ~{max_errors} \ - ~{if !summary_mode - then "| gzip" - else "" - } \ - > "~{outfile}" \ + > "~{outfile_name}" \ || rc=$? # rc = 0 = success @@ -242,16 +236,16 @@ task validate_bam { fi if ! ~{succeed_on_errors} \ - && [ "$(grep -Ec "$GREP_PATTERN" "~{outfile}")" -gt 0 ] + && [ "$(grep -Ec "$GREP_PATTERN" "~{outfile_name}")" -gt 0 ] then >&2 echo "Problems detected by Picard ValidateSamFile" - >&2 grep -E "$GREP_PATTERN" "~{outfile}" + >&2 grep -E "$GREP_PATTERN" "~{outfile_name}" exit $rc fi >>> output { - File validate_report = outfile + File validate_report = outfile_name } runtime { @@ -307,7 +301,7 @@ task sort { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 4) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -396,7 +390,7 @@ task merge_sam_files { Int modify_disk_size_gb = 0 } - Float bams_size = size(bams, "GB") + Float bams_size = size(bams, "GiB") Int disk_size_gb = ceil(bams_size * 2) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -471,7 +465,7 @@ task clean_sam { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -543,7 +537,7 @@ task collect_wgs_metrics { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -606,7 +600,7 @@ task collect_alignment_summary_metrics { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -674,7 +668,7 @@ task collect_gc_bias_metrics { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -740,7 +734,7 @@ task collect_insert_size_metrics { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -799,7 +793,7 @@ task quality_score_distribution { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -851,7 +845,7 @@ task bam_to_fastq { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 4) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -860,10 +854,10 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{if paired + ~{(if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "" - } \ + )} \ RE_REVERSE=true \ VALIDATION_STRINGENCY=SILENT @@ -911,7 +905,7 @@ task merge_vcfs { Int modify_disk_size_gb = 0 } - Int disk_size_gb = ceil(size(vcfs, "GB") * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = ceil(size(vcfs, "GiB") * 2) + 10 + modify_disk_size_gb command <<< picard -Xms2000m \ @@ -1034,7 +1028,7 @@ task create_sequence_dictionary { Int modify_disk_size_gb = 0 } - Float fasta_size = size(fasta, "GB") + Float fasta_size = size(fasta, "GiB") Int disk_size_gb = ceil(fasta_size * 2) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index 498bdc797..4aec8447d 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -40,23 +40,23 @@ task rnaseq { } String out_tar_gz = prefix + ".tar.gz" - String name_sorted_arg = if name_sorted + String name_sorted_arg = if (name_sorted) then "-s" else "" - String paired_end_arg = if paired_end + String paired_end_arg = if (paired_end) then "-pe" else "" Int java_heap_size = ceil(memory_gb * 0.9) - Float bam_size = size(bam, "GB") - Float gtf_size = size(gtf, "GB") + Float bam_size = size(bam, "GiB") + Float gtf_size = size(gtf, "GiB") # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = (if name_sorted + Int disk_size_gb = ((if name_sorted then ceil(bam_size + gtf_size + 15) else ceil(((bam_size + gtf_size) * 12) + 10) - ) + modify_disk_size_gb + ) + modify_disk_size_gb) command <<< set -euo pipefail @@ -68,12 +68,12 @@ task rnaseq { # '-oc qualimap_counts.txt' puts the file in '-outdir' # shellcheck disable=SC2086 qualimap rnaseq -bam "~{bam}" \ - -oc qualimap_counts.txt \ - -gtf "$gtf_name" \ - -outdir "~{prefix}" \ - ~{name_sorted_arg} \ - ~{paired_end_arg} \ - --java-mem-size=~{java_heap_size}G + -oc qualimap_counts.txt \ + -gtf "$gtf_name" \ + -outdir "~{prefix}" \ + ~{name_sorted_arg} \ + ~{paired_end_arg} \ + --java-mem-size=~{java_heap_size}G rm "$gtf_name" tar -czf "~{out_tar_gz}" "~{prefix}" @@ -124,7 +124,7 @@ task bamqc { Int java_heap_size = ceil(memory_gb * 0.9) - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 719b079b3..b75864496 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -19,7 +19,7 @@ task quickcheck { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< @@ -76,7 +76,7 @@ task split { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb command <<< @@ -177,7 +177,7 @@ task flagstat { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< @@ -233,7 +233,7 @@ task index { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 1.2) + 10 + modify_disk_size_gb String outfile_name = basename(bam) + ".bai" @@ -303,7 +303,7 @@ task subsample { String suffixed = prefix + ".sampled" - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb command <<< @@ -445,7 +445,7 @@ task filter { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb command <<< @@ -558,8 +558,8 @@ task merge { Int modify_disk_size_gb = 0 } - Float bams_size = size(bams, "GB") - Float header_size = size(new_header, "GB") + Float bams_size = size(bams, "GiB") + Float header_size = size(new_header, "GiB") Int disk_size_gb = ceil(bams_size * 2 + header_size) + 10 + modify_disk_size_gb command <<< @@ -671,7 +671,7 @@ task addreplacerg { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb String outfile_name = prefix + ".bam" @@ -752,7 +752,7 @@ task collate { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int memory_gb = ceil(bam_size * 0.2) + 4 + modify_memory_gb Int disk_size_gb = ceil(bam_size * 4) + 10 + modify_disk_size_gb @@ -875,15 +875,16 @@ task bam_to_fastq { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4) ) + modify_memory_gb - Int disk_size_gb = ceil(bam_size * if (retain_collated_bam && !collated && paired_end) + Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end + ) then 5 else 2 - ) + 10 + modify_disk_size_gb + )) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -909,10 +910,10 @@ task bam_to_fastq { } \ -O \ "~{bam}" \ - | tee ~{if retain_collated_bam + | tee ~{(if retain_collated_bam then "\"" + prefix + ".collated.bam\"" else "" - } \ + )} \ > bam_pipe \ & else @@ -925,28 +926,29 @@ task bam_to_fastq { -F "~{bitwise_filter.exclude_if_any}" \ --rf "~{bitwise_filter.include_if_any}" \ -G "~{bitwise_filter.exclude_if_all}" \ - ~{if append_read_number + ~{(if append_read_number then "-N" else "-n" - } \ - -1 ~{if paired_end + )} \ + -1 ~{(if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - } \ - -2 ~{if paired_end + )} \ + -2 ~{(if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" - } \ - ~{if paired_end - then if output_singletons + )} \ + ~{(if paired_end + then (if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" else "-s junk.singleton.fastq.gz" + ) else "" - } \ - -0 ~{if paired_end + )} \ + -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix + ".fastq.gz\"" - } \ + )} \ bam_pipe rm bam_pipe @@ -1045,7 +1047,7 @@ task fixmate { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb command <<< @@ -1154,7 +1156,7 @@ task position_sorted_fixmate { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int memory_gb = ceil(bam_size * 0.2) + 4 + modify_memory_gb Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb @@ -1314,7 +1316,7 @@ task markdup { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int memory_gb = ceil(bam_size * 3) + 4 + modify_memory_gb Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb @@ -1408,7 +1410,7 @@ task faidx { Int modify_disk_size_gb = 0 } - Float fasta_size = size(fasta, "GB") + Float fasta_size = size(fasta, "GiB") Int disk_size_gb = ceil(fasta_size * 2.5) + 10 + modify_disk_size_gb String outfile_name = basename(fasta, ".gz") + ".fai" diff --git a/tools/star.wdl b/tools/star.wdl index f22d2cbef..8b77ced1b 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -83,8 +83,8 @@ task build_star_db { String star_db_tar_gz = db_name + ".tar.gz" - Float reference_fasta_size = size(reference_fasta, "GB") - Float gtf_size = size(gtf, "GB") + Float reference_fasta_size = size(reference_fasta, "GiB") + Float gtf_size = size(gtf, "GiB") Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb ) @@ -704,9 +704,9 @@ task alignment { [], ]) - Float read_one_fastqs_size = size(read_one_fastqs_gz, "GB") - Float read_two_fastqs_size = size(read_twos, "GB") - Float star_db_tar_gz_size = size(star_db_tar_gz, "GB") + Float read_one_fastqs_size = size(read_one_fastqs_gz, "GiB") + Float read_two_fastqs_size = size(read_twos, "GiB") + Float star_db_tar_gz_size = size(star_db_tar_gz, "GiB") Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size ) * 3) + 10 + modify_disk_size_gb) @@ -766,32 +766,34 @@ task alignment { align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif, align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ - --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{if (length(read_twos) != 0) + --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0 + ) then "'" + clip_3p_adapter_seq.right + "'" else "" - } \ - --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{if (length(read_twos) != 0) + )} \ + --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0) then clip_3p_adapter_mmp.right else None - } \ - --alignEndsProtrude ~{align_ends_protrude.left} "~{if (length(read_twos) != 0) + )} \ + --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0 + ) then align_ends_protrude.right else None - }" \ - --clip3pNbases ~{clip_3p_n_bases.left} ~{if (length(read_twos) != 0) + )}" \ + --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_3p_n_bases.right else None - } \ - --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{if (length( + )} \ + --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length( read_twos ) != 0) then clip_3p_after_adapter_n_bases.right else None - } \ - --clip5pNbases ~{clip_5p_n_bases.left} ~{if (length(read_twos) != 0) + )} \ + --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_5p_n_bases.right else None - } \ + )} \ --readNameSeparator "~{read_name_separator}" \ --clipAdapterType "~{clip_adapter_type}" \ --outSAMstrandField "~{out_sam_strand_field}" \ @@ -799,12 +801,13 @@ task alignment { --outSAMunmapped "~{out_sam_unmapped}" \ --outSAMorder "~{out_sam_order}" \ --outSAMreadID "~{out_sam_read_id}" \ - --outSAMtlen ~{if (out_sam_tlen == "left_plus") + --outSAMtlen ~{(if (out_sam_tlen == "left_plus") then "1" - else if (out_sam_tlen == "left_any") + else (if (out_sam_tlen == "left_any") then "2" else "error" - } \ + ) + )} \ --outFilterType "~{out_filter_type}" \ --outFilterIntronMotifs "~{out_filter_intron_motifs}" \ --outFilterIntronStrands "~{out_filter_intron_strands}" \ diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml new file mode 100644 index 000000000..8f51f6456 --- /dev/null +++ b/tools/test/arriba.yaml @@ -0,0 +1,44 @@ +# TODO: test advanced options +arriba: + - name: works + inputs: + bam: + - bams/Aligned.sortedByCoord.chr9_chr22.bam + gtf: + - reference/gencode.v31.chr9_chr22.gtf.gz + reference_fasta_gz: + - reference/GRCh38.chr9_chr22.fa.gz + disable_filters: + - [ blacklist ] + prefix: + - fusions +arriba_tsv_to_vcf: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + reference_fasta: + - reference/GRCh38.chr9_chr22.fa.gz + prefix: + - fusions +arriba_extract_fusion_supporting_alignments: + - name: works + inputs: + $files: + fusions: + - fusions.BCR_ABL1.tsv + bam: + - bams/Aligned.sortedByCoord.chr9_chr22.bam + bam_index: + - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai + prefix: + - fusions +arriba_annotate_exon_numbers: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + gtf: + - reference/gencode.v31.chr9_chr22.gtf.gz + prefix: + - fusions \ No newline at end of file diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml new file mode 100644 index 000000000..f5e1d7c15 --- /dev/null +++ b/tools/test/bwa.yaml @@ -0,0 +1,50 @@ +bwa_aln: + - name: works + inputs: + fastq: + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz +bwa_aln_pe: + - name: works + inputs: + $files: + read_one_fastq_gz: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq_gz: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz +bwa_mem: + - name: works + inputs: + $samples: + read_one_fastq_gz: + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz + read_two_fastq_gz: + - fastqs/test_R2.fq.gz + - null + - fastqs/random10k.r2.fq.gz + - null + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz +build_bwa_db: + - name: works + tags: [ reference, slow ] + inputs: + reference_fasta: + - reference/GRCh38.chrY_chrM.fa \ No newline at end of file diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml new file mode 100644 index 000000000..e44caf285 --- /dev/null +++ b/tools/test/deeptools.yaml @@ -0,0 +1,14 @@ +bam_coverage: + - name: works + inputs: + $files: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam + bam_index: + - bams/test.bwa_aln_pe.chrY_chrM.bam.bai + - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai + - bams/test_rnaseq_variant.bam.bai + - bams/test.bam.bai \ No newline at end of file diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml new file mode 100644 index 000000000..56429b143 --- /dev/null +++ b/tools/test/fastp.yaml @@ -0,0 +1,68 @@ +# TODO: test advanced options +fastp: + - name: SE_trimming + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: true + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false + - name: PE_trimming + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: true + read_two_fastq_gz: + - Defined: true + - name: SE_qc + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + output_fastq: + - false + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false + - name: PE_qc + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + output_fastq: + - false + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false \ No newline at end of file diff --git a/tools/test/fq.yaml b/tools/test/fq.yaml new file mode 100644 index 000000000..c8c48084a --- /dev/null +++ b/tools/test/fq.yaml @@ -0,0 +1,64 @@ +# TODO: add lint tests for malformed fastqs +fqlint: + - name: valid_fastqs + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + - null + - null +subsample: + - name: works + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + - null + - null + $sampling_controls: + record_count: + - 1000 + - 500 + - -1 # negative should disable + - 0 # so should zero + probability: + - 0.0 # 0 should disable + - 1.0 # 1 should also disable + - 0.5 + - 0.25 + - name: conflicting_args + inputs: + $files: + read_one_fastq: + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/random10k.r2.fq.gz + record_count: + - -1 + - 0 + probability: + - 0.0 + - 1.0 + assertions: + exit_code: 2 + - name: neither_count_nor_probability_specified + inputs: + $files: + read_one_fastq: + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/random10k.r2.fq.gz + assertions: + exit_code: 2 diff --git a/tools/test/gatk4.yaml b/tools/test/gatk4.yaml new file mode 100644 index 000000000..bfb256ae6 --- /dev/null +++ b/tools/test/gatk4.yaml @@ -0,0 +1,101 @@ +# TODO: advanced options +apply_bqsr: + - name: works + inputs: + $files: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + recalibration_report: + - test_rnaseq_variant.recal.txt +base_recalibrator: + - name: works + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + $dbsnp: + dbSNP_vcf: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf + dbSNP_vcf_index: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx + $known_indels: + known_indels_sites_vcfs: + - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz ] + known_indels_sites_indices: + - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi ] +haplotype_caller: + - name: works + tags: [ slow ] + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + $dbsnp: + dbSNP_vcf: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf + dbSNP_vcf_index: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx + interval_list: + - chr1_chr19.interval_list +split_n_cigar_reads: + - name: works + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + prefix: + - split +variant_filtration: + - name: works + inputs: + $sample: + vcf: + - vcfs/test1.vcf.gz + - vcfs/test2.vcf.gz + vcf_index: + - vcfs/test1.vcf.gz.tbi + - vcfs/test2.vcf.gz.tbi + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict +mark_duplicates_spark: + - name: works + inputs: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam \ No newline at end of file diff --git a/tools/test/htseq.yaml b/tools/test/htseq.yaml new file mode 100644 index 000000000..1a2356883 --- /dev/null +++ b/tools/test/htseq.yaml @@ -0,0 +1,37 @@ +count: + - name: kitchen_sink + tags: [ slow ] + inputs: + $files: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + gtf: + - reference/gencode.v31.chrY_chrM.gtf.gz + pos_sorted: + - false + strandedness: + - yes + - no + - reverse + include_custom_header: + - true + - false + nonunique: + - true + - false + secondary_alignments: + - true + - false + supplementary_alignments: + - true + - false + minaqual: + - 0 + - 10 +calc_tpm: + - name: works + inputs: + counts: + - test.bwa_aln_pe.chrY_chrM.feature-counts.txt + feature_lengths: + - reference/gencode.v31.chrY_chrM.genelengths.txt \ No newline at end of file diff --git a/tools/test/picard.yaml b/tools/test/picard.yaml new file mode 100644 index 000000000..1c9f80721 --- /dev/null +++ b/tools/test/picard.yaml @@ -0,0 +1,9 @@ +merge_sam_files: + - name: Merge works + inputs: + bams: + - [bams/test.bwa_aln_pe.chrY_chrM.bam, bams/test.PE.2_RGs.Aligned.out.sorted.bam] + prefix: + - test.merged + assertions: + custom: quickcheck.sh diff --git a/tools/test/samtools.yaml b/tools/test/samtools.yaml new file mode 100644 index 000000000..52a5de1b1 --- /dev/null +++ b/tools/test/samtools.yaml @@ -0,0 +1,39 @@ +bam_to_fastq: + - name: kitchen_sink + inputs: + bam: + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam + bitwise_filter: + - include_if_all: "0x0" + exclude_if_any: "0x900" + include_if_any: "0x0" + exclude_if_all: "0x0" + - include_if_all: "00" + exclude_if_any: "0x904" + include_if_any: "3" + exclude_if_all: "0" + paired_end: + - true + - false + retain_collated_bam: + - true + - false + append_read_number: + - true + - false + output_singletons: + - true + - false + prefix: + - kitchen_sink_test + - name: simpler + inputs: + output_singletons: + - true + - false + bam: + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam diff --git a/tools/util.wdl b/tools/util.wdl index e1511d3e7..be141df2c 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -40,7 +40,7 @@ task download { runtime { disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } @@ -79,7 +79,7 @@ task split_string { } runtime { - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } @@ -110,7 +110,7 @@ task calc_feature_lengths { Int modify_disk_size_gb = 0 } - Float gtf_size = size(gtf, "GB") + Float gtf_size = size(gtf, "GiB") Int disk_size_gb = ceil(gtf_size * 2) + 10 + modify_disk_size_gb command <<< @@ -127,7 +127,7 @@ task calc_feature_lengths { runtime { memory: "16 GB" disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } @@ -147,7 +147,7 @@ task compression_integrity { Int modify_disk_size_gb = 0 } - Float file_size = size(bgzipped_file, "GB") + Float file_size = size(bgzipped_file, "GiB") Int disk_size_gb = ceil(file_size) + 10 + modify_disk_size_gb command <<< @@ -186,7 +186,7 @@ task add_to_bam_header { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb String outfile_name = prefix + ".bam" @@ -228,7 +228,7 @@ task unpack_tarball { Int modify_disk_size_gb = 0 } - Float tarball_size = size(tarball, "GB") + Float tarball_size = size(tarball, "GiB") Int disk_size_gb = ceil(tarball_size * 8) + modify_disk_size_gb command <<< @@ -246,7 +246,7 @@ task unpack_tarball { runtime { disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } @@ -286,7 +286,7 @@ task make_coverage_regions_bed { Int modify_disk_size_gb = 0 } - Float gtf_size = size(gtf, "GB") + Float gtf_size = size(gtf, "GiB") Int disk_size_gb = ceil(gtf_size * 1.2) + 10 + modify_disk_size_gb command <<< @@ -331,7 +331,7 @@ task global_phred_scores { Int modify_disk_size_gb = 0 } - Float bam_size = size(bam, "GB") + Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb String outfile_name = prefix + ".global_PHRED_scores.tsv" @@ -353,7 +353,7 @@ task global_phred_scores { runtime { memory: "4 GB" disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } @@ -394,15 +394,15 @@ task check_fastq_and_rg_concordance { command <<< python3 /scripts/util/check_FQs_and_RGs.py \ --read-one-fastqs "~{sep(",", read_one_names)}" \ - ~{if length(read_twos) > 0 + ~{(if length(read_twos) > 0 then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\"" else "" - } \ + )} \ --read-groups "~{sep(",", read_groups)}" >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } @@ -437,7 +437,7 @@ task split_fastq { Int ncpu = 2 } - Float fastq_size = size(fastq, "GB") + Float fastq_size = size(fastq, "GiB") Int disk_size_gb = ceil(fastq_size * 5) + 10 + modify_disk_size_gb command <<< @@ -462,7 +462,7 @@ task split_fastq { cpu: ncpu memory: "4 GB" disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl index aa30759fe..5f7296726 100755 --- a/workflows/chipseq/chipseq-standard.wdl +++ b/workflows/chipseq/chipseq-standard.wdl @@ -8,13 +8,10 @@ import "../../tools/picard.wdl" import "../../tools/samtools.wdl" import "../../tools/util.wdl" import "../general/bam-to-fastqs.wdl" as b2fq -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl index 9caa5344c..618871b9d 100644 --- a/workflows/general/alignment-post.wdl +++ b/workflows/general/alignment-post.wdl @@ -3,7 +3,6 @@ version 1.1 import "../../tools/md5sum.wdl" import "../../tools/picard.wdl" import "../../tools/samtools.wdl" -#@ except: LineWidth import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index e5bcbedc9..573db7c7d 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -67,9 +67,10 @@ workflow bam_to_fastqs { } output { - Array[File] read1s = if paired_end + Array[File] read1s = (if paired_end then select_all(bam_to_fastq.read_one_fastq_gz) else select_all(bam_to_fastq.single_end_reads_fastq_gz) + ) Array[File?] read2s = bam_to_fastq.read_two_fastq_gz } } diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index de9053218..fe6825278 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -35,9 +35,10 @@ workflow samtools_merge { scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams scatter (bam_num in range(max_length)) { - Int num = if merge_num > 0 + Int num = (if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num + ) if (num < bam_length) { File bam_list = bams[num] } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 652ab9c0d..03a06cf6b 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -11,15 +11,12 @@ workflow methylation_cohort { umap_embedding: "UMAP embedding for all samples", umap_plot: "UMAP plot for all samples", probe_pvalues: "Matrix (in CSV format) containing detection p-values for every (common) probe on the array as rows and all of the input samples as columns.", - high_pval_probes: "List of probes that were filtered out due to high p-values", } allowNestedInputs: true } parameter_meta { unfiltered_normalized_beta: "Array of unfiltered normalized beta values for each sample" - sex_probe_list: "List of probes mapping to sex chromosomes to optionally filter" - additional_probes_to_exclude: "Additional probes to exclude from the analysis" p_values: "Array of detection p-value files for each sample." skip_pvalue_check: "Skip filtering based on p-values, even if `p_values` is supplied." num_probes: "Number of probes to use when filtering to the top `num_probes` probes with the highest standard deviation." @@ -27,8 +24,6 @@ workflow methylation_cohort { input { Array[File] unfiltered_normalized_beta - File? sex_probe_list - File? additional_probes_to_exclude Array[File] p_values = [] Boolean skip_pvalue_check = false Int num_probes = 10000 @@ -42,9 +37,10 @@ workflow methylation_cohort { scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files scatter (beta_num in range(max_length)) { - Int num = if merge_num > 0 + Int num = (if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num + ) if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] } @@ -67,9 +63,10 @@ workflow methylation_cohort { scatter (merge_num in range((pval_length / max_length) + 1)) { # Get the sublist of p-value files scatter (pval_num in range(max_length)) { - Int num_p = if merge_num > 0 + Int num_p = (if merge_num > 0 then pval_num + (merge_num * max_length) else pval_num + ) if (num_p < pval_length) { File pval_list = p_values[num_p] } @@ -102,12 +99,13 @@ workflow methylation_cohort { } } - File? pval_file = if (pval_length > 0 && !skip_pvalue_check) + File? pval_file = (if (pval_length > 0 && !skip_pvalue_check) then select_first([ final_merge_pvals.combined_file, simple_merge_pval.combined_file, ]) else None + ) call filter_probes { input: beta_values = select_first([ @@ -116,10 +114,6 @@ workflow methylation_cohort { ]), p_values = pval_file, num_probes, - additional_probes_to_exclude = select_all([ - sex_probe_list, - additional_probes_to_exclude, - ]), } call generate_umap { input: @@ -140,7 +134,6 @@ workflow methylation_cohort { File umap_embedding = generate_umap.umap File umap_plot = plot_umap.umap_plot File? probe_pvalues = pval_file - File? high_pval_probes = filter_probes.high_pval_probes } } @@ -172,11 +165,11 @@ task combine_data { Int modify_memory_gb = 0 } - Int memory_gb = ceil(size(files_to_combine, "GB") * if simple_merge + Int memory_gb = ceil(size(files_to_combine, "GiB") * if simple_merge then 2 else 1 ) + modify_memory_gb + 2 - Int disk_size_gb = ceil(size(files_to_combine, "GB") * 2) + 2 + Int disk_size_gb = ceil(size(files_to_combine, "GiB") * 2) + 2 command <<< python /scripts/methylation/combine.py \ @@ -193,7 +186,7 @@ task combine_data { } runtime { - container: "ghcr.io/stjudecloud/pandas:2.2.1-7" + container: "ghcr.io/stjudecloud/pandas:2.2.1-6" memory: "~{memory_gb} GB" cpu: 1 disks: "~{disk_size_gb} GB" @@ -208,14 +201,12 @@ task filter_probes { outputs: { filtered_beta_values: "Filtered beta values for all samples", filtered_probes: "Probes that were retained after filtering.", - high_pval_probes: "Probes that were filtered out due to high p-values", } } parameter_meta { beta_values: "Beta values for all samples" p_values: "P-values for all samples" - additional_probes_to_exclude: "Additional probes to exclude from the analysis" prefix: "Prefix for the output files. The extensions `.beta.csv` and `.probes.csv` will be appended." pval_threshold: "P-value cutoff to determine poor quality probes" pval_sample_fraction: "Fraction of samples that must exceed p-value threshold to exclude probe" @@ -225,14 +216,13 @@ task filter_probes { input { File beta_values File? p_values - Array[File] additional_probes_to_exclude = [] String prefix = "filtered" Float pval_threshold = 0.01 Float pval_sample_fraction = 0.5 Int num_probes = 10000 } - Int disk_size_gb = ceil(size(beta_values, "GB") * 2) + 2 + Int disk_size_gb = ceil(size(beta_values, "GiB") * 2) + 2 command <<< python /scripts/methylation/filter.py \ @@ -242,18 +232,16 @@ task filter_probes { --pval-threshold ~{pval_threshold} \ --pval-sample-fraction ~{pval_sample_fraction} \ ~{"--pval '" + p_values + "'"} \ - ~{sep(" ", prefix("--exclude ", quote(additional_probes_to_exclude)))} \ "~{beta_values}" >>> output { File filtered_beta_values = "~{prefix}.beta.csv" File filtered_probes = "~{prefix}.probes.csv" - File? high_pval_probes = "high_pval_probes.csv" } runtime { - container: "ghcr.io/stjudecloud/pandas:2.2.1-7" + container: "ghcr.io/stjudecloud/pandas:2.2.1-6" memory: "8 GB" cpu: 1 disks: "~{disk_size_gb} GB" @@ -279,7 +267,7 @@ task generate_umap { String prefix = "umap" } - Int disk_size_gb = ceil(size(filtered_beta_values, "GB") * 2) + 2 + Int disk_size_gb = ceil(size(filtered_beta_values, "GiB") * 2) + 2 command <<< python /scripts/methylation/generate_umap.py \ @@ -292,7 +280,7 @@ task generate_umap { } runtime { - container: "ghcr.io/stjudecloud/umap:0.5.7-11" + container: "ghcr.io/stjudecloud/umap:0.5.7-10" memory: "8 GB" cpu: 1 disks: "~{disk_size_gb} GB" @@ -332,7 +320,7 @@ task plot_umap { cpu: 1 memory: "4 GB" disks: "4 GB" - container: "ghcr.io/stjudecloud/python-plotting:2.0.9" + container: "ghcr.io/stjudecloud/python-plotting:2.0.7" maxRetries: 1 } } diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 27fdb6b6b..2f7a3bff3 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -125,7 +125,6 @@ workflow quality_check_standard { File bam_index File kraken_db File? gtf - #@ except: LineWidth File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" Array[File] extra_multiqc_inputs = [] Array[File] coverage_beds = [] @@ -209,9 +208,10 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = if (defined(subsample.sampled_bam)) + String post_subsample_prefix = (if (defined(subsample.sampled_bam)) then prefix + ".subsampled" else prefix + ) call picard.validate_bam after quickcheck { input: bam = post_subsample_bam, @@ -592,13 +592,14 @@ task parse_input { >>> output { - Array[String] labels = if (coverage_beds_len > 0) + Array[String] labels = (if (coverage_beds_len > 0) then read_lines("labels.txt") else [] + ) } runtime { - container: "ghcr.io/stjudecloud/util:3.0.3" + container: "ghcr.io/stjudecloud/util:3.0.1" maxRetries: 1 } } diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index 5dbf385f5..afad29046 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -199,12 +199,14 @@ workflow rnaseq_core { } } - Array[File] chosen_r1s = if enable_read_trimming + Array[File] chosen_r1s = (if enable_read_trimming then select_all(trim.read_one_fastq_gz) else read_one_fastqs_gz - Array[File] chosen_r2s = if enable_read_trimming + ) + Array[File] chosen_r2s = (if enable_read_trimming then select_all(trim.read_two_fastq_gz) else read_two_fastqs_gz + ) call star.alignment after validate { input: read_one_fastqs_gz = chosen_r1s, @@ -246,9 +248,10 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = if (provided_strandedness != "") + String htseq_strandedness = (if (provided_strandedness != "") then htseq_strandedness_mapping[provided_strandedness] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string] + ) call htseq.count as htseq_count { input: bam = alignment_post.processed_bam,