-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathMergingMultiplesManually.R
More file actions
85 lines (72 loc) · 3.48 KB
/
MergingMultiplesManually.R
File metadata and controls
85 lines (72 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# This is a script for merging any number of 10X Genomics datasets
# from raw counts with the option of subseting each dataset to a
# predefined set of barcodes. It feeds into scRNAseqWorkflow.Rmd
library(SingleCellExperiment)
library(DropletUtils)
library(Matrix)
setwd("D:/Dropbox/GDB/MKlab_v1to2/For Brendan/")
# These are the folder names of each raw dataset.
# They can be relative or full filepaths.
# Name the folders something meaningful though
# (i.e. rename "mm10" to something useful),
# because these names (last folder in the path)
# will be used as dataset labels.
temp_data_file_paths <- c("../For Brendan/10Day/",
"D:/Dropbox/GDB/MKlab_v1to2/For Brendan/14Day/",
"L11Day/")
# The folder / dataset names are cleaned to
# remove anything that isn't a letter or number.
names(temp_data_file_paths) <- gsub("[^A-Za-z0-9]","",
sapply(strsplit(temp_data_file_paths,"/"),
function(X) X[length(X)]))
# These MUST be in the same order as the datasets!
# If you want to keep all cells from all datasets,
# skip this and the subsetting step below.
temp_barcodes_to_keep <- list(cluster_10Day_barcodes,
cluster_14Day_barcodes,
cluster_L11Day_barcodes)
# Reading raw data, creating a list of SCE objects.
temp_all_sce <- sapply(temp_data_file_paths,
read10xCounts,
col.names=T,
simplify=F)
# Subsetting each SCE object
temp_all_sce <- mapply(function(SCE,BC) SCE[,BC],
SCE=temp_all_sce,
BC=temp_barcodes_to_keep)
# Collecting all gene names across datasets
temp_all_genes <- sort(unique(unlist(lapply(temp_all_sce,rownames))))
# Making unified rowData
temp_rowdata <- do.call("rbind",
lapply(temp_all_sce,function(Y)
rowData(Y)[,Reduce("intersect",
lapply(temp_all_sce,
function(X) colnames(rowData(X))))]))
temp_rowdata <- temp_rowdata[!duplicated(rownames(temp_rowdata)),]
temp_rowdata <- temp_rowdata[temp_all_genes,]
# Adding missing genes to each count matrix and
# generating replacement SCEs with missing genes included
for (i in names(temp_all_sce)) {
temp_missing_genes <- temp_all_genes[!temp_all_genes %in% rownames(temp_all_sce[[i]])]
temp_new_counts <- rbind(
counts(temp_all_sce[[i]]),
Matrix(0,nrow=length(temp_missing_genes),ncol=ncol(temp_all_sce[[i]]),
dimnames=list(temp_missing_genes,colnames(temp_all_sce[[i]])))
)
temp_new_counts <- temp_new_counts[temp_all_genes,]
colnames(temp_new_counts) <- paste(i,colnames(temp_new_counts),sep="_")
temp_sce <- SingleCellExperiment(assays=list(counts=temp_new_counts))
rowData(temp_sce) <- temp_rowdata
colData(temp_sce) <- colData(temp_all_sce[[i]])[,Reduce("intersect",
lapply(temp_all_sce,function(X)
colnames(colData(X))))]
colData(temp_sce)$orig.ident <- i
rownames(colData(temp_sce)) <- colnames(temp_new_counts)
temp_all_sce[[i]] <- temp_sce
}
sce <- do.call("cbind",temp_all_sce)
# cleaning up
rm(list=c("i",grep("^temp",ls(),value=T)))
# Now (AND NOT BEFORE) can you launch into the scRNAseqWorkflow.Rmd
# starting at:
# sce <- sce[rowSums(counts(sce)) > 0,]