11---
22title : " Introduction to single-cell RNA-seq analysis"
33subtitle : ' Multi-sample comparisons - Exercise 1'
4- author : " Stephane Ballereau"
4+ author : " Abbi Edwards, Stephane Ballereau"
55output :
66 html_document :
77 df_print : paged
@@ -15,27 +15,6 @@ output:
1515 number_sections : true
1616---
1717
18- # Differential expression and abundance between conditions
19-
20- ``` {r multiSplComp_setup, include=FALSE, echo=FALSE}
21- # First, set some variables:
22- require(knitr)
23-
24- opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
25- opts_chunk$set(echo = TRUE)
26- opts_chunk$set(eval = TRUE)
27- options(stringsAsFactors = FALSE)
28- opts_chunk$set(fig.width=7, fig.height=7)
29- set.seed(123) # for reproducibility
30- ```
31-
32- ``` {r}
33- splSetToGet <- "PBMMC,ETV6-RUNX1"
34- splSetVec <- unlist(strsplit(splSetToGet, ","))
35- splSetToGet2 <- gsub(",", "_", splSetToGet)
36- nbPcToComp <- 50
37- figSize <- 7
38- ```
3918
4019``` {r, message=FALSE, warning=FALSE}
4120library(scater)
@@ -52,141 +31,30 @@ Source: [Multi-sample comparisons](https://osca.bioconductor.org/multi-sample-co
5231
5332## Exercise 1 - differential expression
5433
55- Identify label-specific DE genes that are significant in 'c10' yet not DE in any other label.
34+ Identify label-specific DE genes that are significant in cluster 4 yet not DE in any other label.
5635
5736Plot the top-ranked gene for inspection.
5837
59- ## Setting up the data
60-
61- Load the SCE object (with 1200 cells per sample):
62-
63- ``` {r}
64- # Read object in:
65- merged <- readRDS("../Robjects/caron_sce_nz_postDeconv_1p2kcps_dsi_PBMMC_ETV6-RUNX1_merged.Rds")
66- # also get raw counts that were written to a separate file
67- # (to help file sharing)
68- merged_counts <- readRDS("../Robjects/caron_sce_nz_postDeconv_1p2kcps_dsi_PBMMC_ETV6-RUNX1_merged_counts.Rds")
69- # put raw counts back:
70- counts(merged) <- merged_counts
71- # tidy:
72- rm(merged_counts)
73- ```
74-
75- A brief inspection of the results shows clusters contain varying contributions from samples:
76-
77- ``` {r}
78- colLabels(merged) <- merged$clusters.mnn
79- tab <- table(colLabels(merged), merged$SampleName)
80- tab
81- ```
82-
83- On the t-SNE plots below, cells are coloured by type or sample ('batch of origin'). Cluster numbers are superimposed based on the median coordinate of cells assigned to that cluster.
84-
85- ``` {r}
86- p1 <- plotTSNE(merged, colour_by="SampleGroup", text_by="label", point_size=0.3)
87- p2 <- plotTSNE(merged, colour_by="SampleName", point_size=0.3) +
88- facet_wrap(~colData(merged)$SampleName)
89- p1
90- p2
91- ```
92-
93- ## Creating pseudo-bulk samples
94-
95- Sum counts together for all cells with the same combination of label and sample,
96- with ` aggregateAcrossCells ` .
97-
98- ``` {r}
99- # Using 'label' and 'sample' as our two factors; each column of the output
100- # corresponds to one unique combination of these two factors.
101- columnsToUse <- c("batch", "SampleName", "SampleGroup", "clusters.mnn")
102- colData(merged) <- colData(merged) %>% data.frame() %>% select(all_of(columnsToUse)) %>% DataFrame
103- summed <- aggregateAcrossCells(merged,
104- id = DataFrame(
105- label = merged$clusters.mnn,
106- sample = merged$SampleName
107- )
108- )
109- colData(summed) %>% head(3)
110- ```
111-
112- ## Performing the DE analysis
113-
114- Filter out all sample-label combinations with insufficient cells, 20 cells or less.
115-
11638``` {r}
117- summed.filt <- summed[,summed$ncells >= 20]
118- ```
119-
120- Construct a common design matrix that will be used in the analysis for each label.
121-
122- ``` {r}
123- # Pulling out a sample-level 'targets' data.frame:
124- targets <- colData(merged)[!duplicated(merged$SampleName),] %>%
125- data.frame() %>%
126- select(-clusters.mnn)
127-
128- # Constructing the design matrix:
129- design <- model.matrix(~factor(SampleGroup), data=targets)
130- rownames(design) <- targets$SampleName
131- ```
132-
133- Apply the ` pseudoBulkDGE ` function to obtain a list of DE genes for each label.
134-
135- ``` {r}
136- summed.filt$SampleGroup <- factor(summed.filt$SampleGroup)
137-
138- de.results <- pseudoBulkDGE(summed.filt,
139- label = summed.filt$label,
140- design = ~SampleGroup,
141- coef = "SampleGroupPBMMC",
142- condition = summed.filt$SampleGroup
143- )
144- ```
145-
146- Examine the numbers of DEGs at a FDR of 5% for each label using the ` decideTestsPerLabel ` function.
147-
148- ``` {r}
149- is.de <- decideTestsPerLabel(de.results, threshold=0.05)
150- summarizeTestsPerLabel(is.de)
151- ```
39+ # load RObjects until this point
15240
153- For each gene, we compute the percentage of cell types in which that gene is upregulated or downregulated. (Here, we consider a gene to be non-DE if it is not retained after filtering.).
41+ load("../Robjects/10_exercise1.RData")
15442
155- <!-- TODO: add gene names -->
156-
157- ``` {r}
158- # Upregulated across most cell types.
159- up.de <- is.de > 0 & !is.na(is.de)
160- head(sort(rowMeans(up.de), decreasing=TRUE), 10)
161- ```
162-
163- ``` {r}
164- # Downregulated across cell types.
165- down.de <- is.de < 0 & !is.na(is.de)
166- head(sort(rowMeans(down.de), decreasing=TRUE), 10)
16743```
16844
169- Identify label-specific DE genes that are significant in 'c10' yet not DE in any other label (FDR threshold of 50%).
170-
171- Plot the top-ranked gene for inspection.
172-
173- ``` {r}
174- remotely.de <- decideTestsPerLabel(de.results, threshold=0.5)
175- not.de <- remotely.de==0 | is.na(remotely.de)
176- ```
17745
17846``` {r}
179- # get c10 's 'unique.degs':
47+ # get cluster 4 's 'unique.degs':
18048
18149# 2nd cluster in is.de
182- cx <- "c10 "
50+ cx <- "4 "
18351other.labels <- setdiff(colnames(not.de), cx)
18452unique.degs <- is.de[,cx]!=0 & rowMeans(not.de[,other.labels])==1
18553unique.degs <- names(which(unique.degs))
18654head(unique.degs)
18755```
18856
189- ``` {r}
57+ ``` {r, warning=FALSE }
19058# Choosing the top-ranked gene for inspection:
19159de.inspec <- list()
19260de.inspec[[cx]] <- de.results[[cx]]
@@ -198,7 +66,7 @@ de.inspec[[cx]] <- de.inspec[[cx]][rownames(de.inspec[[cx]]) %in% unique.degs,]
19866sizeFactors(summed.filt) <- NULL
19967plotExpression(logNormCounts(summed.filt),
20068 features=rownames(de.inspec[[cx]])[1],
201- x="SampleGroup ", colour_by="SampleGroup ",
69+ x="Sample ", colour_by="Sample ",
20270 other_fields="label") +
20371 facet_wrap(~label) +
20472 ggtitle(glue::glue("{cx}: {rownames(de.inspec[[cx]])[1]}"))
0 commit comments