This is a demo of the Recla et al. dataset using functionality of R/qtl2shiny shown in http://www.stat.wisc.edu/~yandell/software/qtl2shiny. We assume that the CCmouse.Rmd and Recla.Rmd scripts have already been run to put data in the CCmouse
directory.
Here we just attach the dplyr
and ggplot2
packages, but explicitly use elements of the qtl2
, qtl2ggplot
and qtl2pattern
packages. The qtl2fst
package is used implicitly for access to genotype probabilities.
library(dplyr)
library(ggplot2)
project_info <- data.frame(project = "Recla",
taxa = "CCmouse",
directory = "../../..",
stringsAsFactors = FALSE)
(taxa_dir <- file.path(project_info$directory,
project_info$taxa))
## [1] "../../../CCmouse"
(project_dir <- file.path(taxa_dir, project_info$project))
## [1] "../../../CCmouse/Recla"
Assume local directory already created with taxa
and project
under, and with all data already installed.
if(!dir.exists(project_dir)) {
warning("project director does not exist")
knitr::knit_exit()
}
List files for the taxa and for the project.
list.files(project_info$taxa)
## character(0)
list.files(project_dir)
## [1] "analyses.rds" "covar.rds" "genoprob"
## [4] "hotspot.rds" "kinship.rds" "peaks.rds"
## [7] "pheno_data.rds" "pheno_type.rds" "pmap.rds"
## [10] "query_mrna.rds" "query_probs.rds"
list.files(file.path(project_dir, "genoprob"))
## [1] "aprobs_1.feather" "aprobs_1.fst" "aprobs_10.feather"
## [4] "aprobs_10.fst" "aprobs_11.feather" "aprobs_11.fst"
## [7] "aprobs_12.feather" "aprobs_12.fst" "aprobs_13.feather"
## [10] "aprobs_13.fst" "aprobs_14.feather" "aprobs_14.fst"
## [13] "aprobs_15.feather" "aprobs_15.fst" "aprobs_16.feather"
## [16] "aprobs_16.fst" "aprobs_17.feather" "aprobs_17.fst"
## [19] "aprobs_18.feather" "aprobs_18.fst" "aprobs_19.feather"
## [22] "aprobs_19.fst" "aprobs_2.feather" "aprobs_2.fst"
## [25] "aprobs_3.feather" "aprobs_3.fst" "aprobs_4.feather"
## [28] "aprobs_4.fst" "aprobs_5.feather" "aprobs_5.fst"
## [31] "aprobs_6.feather" "aprobs_6.fst" "aprobs_7.feather"
## [34] "aprobs_7.fst" "aprobs_8.feather" "aprobs_8.fst"
## [37] "aprobs_9.feather" "aprobs_9.fst" "aprobs_X.feather"
## [40] "aprobs_X.fst" "faprobs.rds" "feather_aprobs.rds"
## [43] "feather_probs.rds" "fprobs.rds" "fst_aprobs.rds"
## [46] "fst_probs.rds" "probs_1.feather" "probs_1.fst"
## [49] "probs_10.feather" "probs_10.fst" "probs_11.feather"
## [52] "probs_11.fst" "probs_12.feather" "probs_12.fst"
## [55] "probs_13.feather" "probs_13.fst" "probs_14.feather"
## [58] "probs_14.fst" "probs_15.feather" "probs_15.fst"
## [61] "probs_16.feather" "probs_16.fst" "probs_17.feather"
## [64] "probs_17.fst" "probs_18.feather" "probs_18.fst"
## [67] "probs_19.feather" "probs_19.fst" "probs_2.feather"
## [70] "probs_2.fst" "probs_3.feather" "probs_3.fst"
## [73] "probs_4.feather" "probs_4.fst" "probs_5.feather"
## [76] "probs_5.fst" "probs_6.feather" "probs_6.fst"
## [79] "probs_7.feather" "probs_7.fst" "probs_8.feather"
## [82] "probs_8.fst" "probs_9.feather" "probs_9.fst"
## [85] "probs_X.feather" "probs_X.fst"
# See shinyHotspot.R
hotspots <- readRDS(file.path(project_dir, "hotspot.rds"))
# See shinyHotspot.R
qtl2ggplot::ggplot_scan1(hotspots$scan, hotspots$map)
# See shinyHotspot.R
(hot_sum <- qtl2pattern::summary_scan1(hotspots$scan, hotspots$map) %>%
filter(pheno == "all") %>%
rename(count = lod) %>%
select(-marker) %>%
arrange(desc(count)))
## # A tibble: 16 x 4
## pheno chr pos count
## <chr> <fct> <int> <dbl>
## 1 all 11 97 4.
## 2 all 5 38 3.
## 3 all 7 139 3.
## 4 all 9 70 3.
## 5 all 14 22 3.
## 6 all 2 115 2.
## 7 all 3 36 2.
## 8 all 12 44 2.
## 9 all 18 32 2.
## 10 all 4 22 1.
## 11 all 6 97 1.
## 12 all 10 3 1.
## 13 all 13 11 1.
## 14 all 16 96 1.
## 15 all 17 30 1.
## 16 all 19 50 1.
chr_id <- as.character(hot_sum$chr[1])
pos_Mbp <- hot_sum$pos[1]
window_Mbp <- 5
start_val <- pos_Mbp - window_Mbp
end_val <- pos_Mbp + window_Mbp
# See shinyHotspot.R
autoplot(hotspots$scan, hotspots$map, chr = chr_id)
# See shinyMain.R
peaks <- readRDS(file.path(project_dir, "peaks.rds")) %>%
filter(chr == chr_id,
pos >= start_val,
pos <= end_val)
# See shinyPhenos.R
(peaks <- peaks %>%
select(pheno, chr, pos, lod) %>%
arrange(desc(lod)))
## pheno chr pos lod
## 1 LD_distance_light 11 97.02702 6.079138
## 2 LD_light_pct 11 97.02702 5.983598
## 3 VC_top_distance_first4 11 96.17147 5.848676
## 4 LD_transitions 11 96.68386 5.536240
## 5 OF_distance_first4 11 97.02702 4.539011
## 6 VC_bottom_distance 11 97.02702 4.487813
## 7 OF_distance 11 97.02702 4.033942
## 8 VC_bottom_transitions 11 97.29928 3.890053
## 9 VC_bottom_velocity 11 100.64162 3.160963
pheno_name <- peaks$pheno[1]
# See shinyMain.R
(analyses <- readRDS(file.path(project_dir, "analyses.rds")) %>%
filter(pheno == pheno_name))
## pheno longname output pheno_group
## 1 LD_distance_light LD_distance_light LD_distance_light Recla
## pheno_type model transf offset winsorize sex Cohort Group Subgroup
## 1 recla normal identity 0 FALSE TRUE TRUE TRUE TRUE
## ngen coat_color
## 1 TRUE TRUE
# See shinyMain.R
(pheno_data <- readRDS(file.path(project_dir, "pheno_data.rds")) %>%
qtl2pattern::pheno_trans(analyses$pheno,
analyses$transf,
analyses$offset,
analyses$winsorize)) %>%
head
## LD_distance_light
## 1 2666.51
## 4 7195.21
## 5 6111.97
## 6 2703.62
## 7 1804.32
## 8 2178.14
# See shinyMain.R
(covar <- readRDS(file.path(project_dir, "covar.rds")) %>%
qtl2pattern::get_covar(analyses)) %>%
head
## sex Cohort Group Subgroup ngen coat_color
## 1 male Fall 1 1B 4 AG
## 4 male Fall 1 1A 4 AG
## 5 male Fall 1 1A 4 AG
## 6 male Fall 1 1A 4 WH
## 7 male Fall 1 1A 4 BL
## 8 male Fall 1 1A 4 AG
# See shinyMain.R
kinship <- readRDS(file.path(project_dir, "kinship.rds"))[chr_id]
# See shinyProbs.R and Recla.Rmd
query_probs <- qtl2pattern::create_probs_query_func(project_dir)
probs_obj <- query_probs(chr_id, start_val, end_val)
names(probs_obj)
## [1] "probs" "map"
dim(probs_obj$probs)
## 11
## ind 261
## gen 8
## mar 28
str(probs_obj$map)
## List of 1
## $ 11: Named num [1:28] 92.2 92.7 93.3 93.5 93.8 ...
## ..- attr(*, "names")= chr [1:28] "UNC111437875" "JAX00318359" "JAX00318512" "UNC110155146" ...
# See shinyScanCoef.R
scan_obj <- qtl2pattern::scan1_covar(pheno_data, covar, probs_obj$probs, kinship,
analyses)
autoplot(scan_obj, probs_obj$map)
summary(scan_obj, probs_obj$map)
## # A tibble: 1 x 5
## pheno chr marker pos lod
## <chr> <fct> <fct> <dbl> <dbl>
## 1 LD_distance_light 11 UNC110158415 97.0 6.08
qtl2::find_peaks(scan_obj, probs_obj$map)
## lodindex lodcolumn chr pos lod
## 1 1 LD_distance_light 11 97.02702 6.079138
addcovar <- qtl2pattern::covar_df_mx(covar)
eff_obj <- qtl2::scan1coef(probs_obj$probs, pheno_data, kinship, addcovar)
autoplot(eff_obj, probs_obj$map) +
geom_vline(xintercept = peaks$pos[1], linetype = "dashed")
autoplot(eff_obj, probs_obj$map, scan1_output = scan_obj,
legend.position = "none")
# See shinySNPSetup.R and shinyProbs.R
query_variants <- qtl2::create_variant_query_func(
file.path(taxa_dir, "cc_variants.sqlite"))
snpinfo <- query_variants(chr_id, start_val, end_val)
snpprobs_obj <- qtl2pattern::get_snpprobs(chr_id, pos_Mbp, window_Mbp,
pheno_name,
probs_obj$probs,
probs_obj$map,
snpinfo)
snp_scan_obj <- qtl2pattern::scan1_covar(pheno_data, covar, snpprobs_obj$snpprobs, kinship, analyses)
autoplot(snp_scan_obj, snpprobs_obj$snpinfo, show_all_snps = FALSE,
drop_hilit = 1.5,)
# See shinySNPSum and shinySNPSetup
top_snps_tbl <- qtl2pattern::top_snps_all(snp_scan_obj,
snpprobs_obj$snpinfo,
1.5)
(patterns <- summary(top_snps_tbl))
## # A tibble: 14 x 8
## pattern max_lod max_pos pheno pct min_lod sdp max_snp
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <int> <chr>
## 1 ABDEH:CFG 4.34 98.1 LD_distan… 10.3 2.94 100 SV_11_98122…
## 2 BDEH:ACFG 3.86 97.0 LD_distan… 0.600 3.04 101 rs27023971
## 3 ABDH:CEFG 3.69 97.0 LD_distan… 7.99 3.46 116 rs257161856
## 4 CEFG:ABDH 3.69 97.0 LD_distan… 0.0700 3.69 139 rs27023987
## 5 ABCDEH:FG 3.27 97.1 LD_distan… 31.1 2.91 96 rs248950985
## 6 ABDE:CFGH 3.25 98.6 LD_distan… 25.5 3.20 228 rs27059423
## 7 ABDEFH:CG 3.23 98.8 LD_distan… 3.88 3.19 68 rs29429364
## 8 ABDEGH:CF 3.12 96.6 LD_distan… 0.150 3.12 36 rs29390482
## 9 BDH:ACEFG 2.98 96.7 LD_distan… 0.0700 2.98 117 rs249968596
## 10 ACEFG:BDH 2.98 96.7 LD_distan… 3.51 2.98 138 rs46752168
## 11 BCFG:ADEH 2.95 97.3 LD_distan… 13.1 2.93 153 rs48941594
## 12 ADEH:BCFG 2.93 97.6 LD_distan… 0.0700 2.93 102 rs27104296
## 13 CEFGH:ABD 2.91 98.4 LD_distan… 3.58 2.88 11 rs250091477
## 14 ABD:CEFGH 2.88 98.3 LD_distan… 0.0700 2.88 244 rs263695351
# See shinyGeneRegion.R
query_genes <- qtl2::create_gene_query_func(
file.path(taxa_dir, "mouse_genes.sqlite"))
rng <- range(top_snps_tbl$pos) + c(-.005, .005)
gene_tbl <- query_genes(chr_id, rng[1], rng[2])
gene_region_tbl <- qtl2pattern::get_genes(chr_id, rng[1], rng[2], gene_tbl)
autoplot(gene_region_tbl)
autoplot(gene_region_tbl, top_snps_tbl = top_snps_tbl)
# See shinyGeneExon.R
gene_exon_tbl <- qtl2pattern::get_gene_exon_snp(top_snps_tbl, gene_tbl)
(exon_sum <- summary(gene_exon_tbl, top_snps_tbl = top_snps_tbl))
## # A tibble: 65 x 10
## gene max_lod exons strand min_Mbp max_Mbp min.len max.len sum.len
## <chr> <dbl> <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Fbxl20 4.34 34 - 98.1 98.2 4.40e-5 7.14e-3 0.0222
## 2 1700003D… 4.22 3 - 98.4 98.4 4.60e-5 1.28e-3 0.00139
## 3 Ppp1r1b 4.22 19 + 98.3 98.4 2.20e-5 7.70e-4 0.00616
## 4 Stard3 4.22 34 + 98.4 98.4 3.30e-5 7.79e-4 0.00578
## 5 Casc3 4.00 27 + 98.8 98.8 2.50e-5 2.52e-3 0.0146
## 6 Gm12359 4.00 8 - 98.8 98.8 6.10e-5 9.24e-4 0.00272
## 7 Gm31862 4.00 6 + 98.8 98.8 7.40e-5 3.51e-3 0.00850
## 8 Gm38985 4.00 6 - 98.7 98.7 6.20e-5 1.75e-3 0.00325
## 9 Msl1 4.00 21 + 98.8 98.8 3.90e-5 1.14e-3 0.00931
## 10 Nr1d1 4.00 11 - 98.8 98.8 8.80e-5 6.59e-4 0.00415
## # ... with 55 more rows, and 1 more variable: LD_distance_light <dbl>
autoplot(gene_exon_tbl, top_snps_tbl, genes = exon_sum$gene[1])
# See shinySNPPattern.R
autoplot(snp_scan_obj, snpprobs_obj$snpinfo, patterns = "hilit", drop_hilit = 1.5)
# See shinySNPFeature.R
top_feature <- qtl2pattern::merge_feature(top_snps_tbl, snpprobs_obj$snpinfo,
snp_scan_obj, 1.5, 0, gene_exon_tbl)
autoplot(top_feature, pheno_name, "pattern")
autoplot(top_feature, pheno_name, "consequence")
This section (and package CausalMST
) is working but will likely be rearranged.
First, identify all phenotypes that map to the region. This is done using peaks
in the region to identify phenotype names and all_analyses
to construct the appropriate phenotype transformation.
# See shinyMediate.R
all_analyses <- readRDS(file.path(project_dir, "analyses.rds")) %>%
filter(pheno != pheno_name,
pheno %in% peaks$pheno)
all_pheno_data <- readRDS(file.path(project_dir, "pheno_data.rds")) %>%
qtl2pattern::pheno_trans(all_analyses$pheno,
all_analyses$transf,
all_analyses$offset,
all_analyses$winsorize)
The following works for mediators as other phenotypes. But see qtl2pattern::expr_region()
and qtl2pattern::create_mrna_query_func()
to create query_mrna()
to access expression data.
The basic idea here is to construct a list containing at least three items:
pheno
: matrix of phenotypes (columns) by individuals (rows)annot
: data frame of annotationcovar
: data frame of covariatespeaks
: data frame of peak information (optional)med_ls <- qtl2pattern::pheno_region(
chr_id, start_val, end_val, covar, probs_obj$map,
peaks, all_analyses, all_pheno_data)
peak_mar <- qtl2::find_marker(probs_obj$map, chr_id, pos_Mbp)
# waiting for fix to qtl2
#geno_max <- qtl2::pull_genoprobpos(probs_obj$probs, peak_mar)
geno_max <- subset(probs_obj$probs, mar = peak_mar)[[1]][,,1]
driver_med <- probs_obj$probs[[chr_id]][,, unique(med_ls[[2]]$driver), drop = FALSE]
Mediate the target (pheno_data
) using information in med_ls
about potential mediators. The driver for the target is geno_max
, while the driver for the mediators come from driver_med
using names from med_ls$annot$driver
.
mediate_obj <- CausalMST::mediation_test(
target = pheno_data,
mediator = med_ls[[1]],
driver = geno_max,
annotation = med_ls[[2]],
covar_tar = covar,
covar_med = med_ls$covar,
kinship = kinship[[1]],
driver_med = driver_med,
test = "wilc",
pos = pos_Mbp)
(sum_med <- summary(mediate_obj))
## # A tibble: 8 x 43
## id pos mediation triad pvalue LRmed lod chr alt A B
## <chr> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <fct> <fct> <dbl> <dbl>
## 1 LD_lig… 97.0 2.27 reac… 0.120 13.2 5.98 11 caus… 177. 865.
## 2 LD_tra… 96.7 3.10 reac… 0.0185 10.6 5.54 11 caus… 1371. 1596.
## 3 OF_dis… 97.0 3.65 reac… 0.0291 7.74 4.03 11 caus… 1212. 1279.
## 4 VC_top… 96.2 4.06 reac… 0.0842 11.3 5.85 11 caus… 1463. 1984.
## 5 OF_dis… 97.0 4.50 reac… 0.0592 7.70 4.54 11 caus… 1968. 2137.
## 6 VC_bot… 97.0 4.63 reac… 0.132 9.41 4.49 11 caus… 1725. 2616.
## 7 VC_bot… 101. 5.13 reac… 0.0354 7.59 3.16 11 caus… 3626. 4326.
## 8 VC_bot… 97.3 5.81 reac… 0.0379 8.55 3.89 11 caus… 2388. 3022.
## # ... with 32 more variables: C <dbl>, D <dbl>, E <dbl>, F <dbl>, G <dbl>,
## # H <dbl>, A_m <dbl>, B_m <dbl>, C_m <dbl>, D_m <dbl>, E_m <dbl>,
## # F_m <dbl>, G_m <dbl>, H_m <dbl>, longname <chr>, output <chr>,
## # pheno_group <chr>, biotype <chr>, model <chr>, transf <chr>,
## # offset <dbl>, winsorize <lgl>, sex <lgl>, Cohort <lgl>, Group <lgl>,
## # Subgroup <lgl>, ngen <lgl>, coat_color <lgl>, qtl_ct <int>,
## # info <chr>, local <lgl>, driver <chr>
autoplot(mediate_obj)
Now look at one of the mediators (the strongest one) in a scatter plot against the target. There are various flavors of plot for this.
scat_ls <- qtl2pattern::pull_mediator(mediate_obj, med_ls, patterns)
med_name <- sum_med$id[1]
mediator <- med_ls[[1]][, med_name, drop = FALSE]
driver_name <- (med_ls[[2]] %>%
filter(id == med_name))$driver
driver <- driver_med[,, driver_name]
sdp <- snpprobs_obj$snpinfo$sdp[which.min(abs(snpprobs_obj$snpinfo$pos - sum_med$pos[1]))]
scat_dat <- CausalMST::mediation_triad(
target = pheno_data,
mediator = mediator,
driver = driver,
covar_tar = covar,
covar_med = med_ls$covar,
kinship = kinship[[1]],
sdp = scat_ls$sdp,
allele = TRUE)
autoplot(scat_dat)
# See shinyDiplo.R and shinyProbs.R
pairprobs_obj <- query_probs(chr_id, start_val, end_val, allele = FALSE)
snppairprobs_obj <- qtl2pattern::get_snpprobs(chr_id, pos_Mbp, window_Mbp,
pheno_name,
pairprobs_obj$probs,
pairprobs_obj$map,
snpinfo)
snppair_scan_obj <- qtl2pattern::scan1_covar(pheno_data, covar,
snppairprobs_obj$snpprobs, kinship, analyses)
autoplot(snppair_scan_obj, snppairprobs_obj$snpinfo, show_all_snps = FALSE,
drop_hilit = 1.5,)
# See shinySNPPattern.R
autoplot(snppair_scan_obj, snppairprobs_obj$snpinfo, patterns = "hilit", drop_hilit = 1.5)
top_snppairs_tbl <-
qtl2pattern::top_snps_all(snppair_scan_obj,
snppairprobs_obj$snpinfo,
1.5)
(patterns <- summary(top_snppairs_tbl))
## # A tibble: 13 x 8
## pattern max_lod max_pos pheno pct min_lod sdp max_snp
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <int> <chr>
## 1 ABDEH:CFG 4.55 98.1 LD_distan… 3.68 4.37 100 SV_11_98122…
## 2 ABDH:CEFG 4.28 98.1 LD_distan… 3.03 3.20 116 rs29458114
## 3 BDEH:ACFG 4.18 97.0 LD_distan… 11.6 3.14 101 rs49048720
## 4 ABDEFH:CG 4.08 98.8 LD_distan… 1.41 4.07 68 rs29429364
## 5 CEFG:ABDH 3.98 97.0 LD_distan… 0.0500 3.21 139 rs27023987
## 6 BCDEH:AFG 3.74 96.7 LD_distan… 54.4 3.32 97 rs240529799
## 7 ABDE:CFGH 3.54 98.0 LD_distan… 9.27 3.53 228 rs29477418
## 8 ABCDEH:FG 3.50 97.1 LD_distan… 11.3 3.16 96 rs248950985
## 9 ABDEFGH:C 3.40 94.3 LD_distan… 3.36 3.09 4 rs51714934
## 10 ACFG:BDEH 3.14 93.3 LD_distan… 0.0300 3.14 154 rs27113575
## 11 ACFGH:BDE 3.14 96.6 LD_distan… 0.410 3.14 26 rs50497233
## 12 ABDEGH:CF 3.12 96.6 LD_distan… 0.0500 3.12 36 rs29390482
## 13 BCFG:ADEH 3.06 97.3 LD_distan… 1.44 3.06 153 rs48941594
# See shinyPattern.R
scan_pat <- qtl2pattern::scan_pattern(pairprobs_obj$probs,
pheno_data,
kinship, addcovar,
pairprobs_obj$map,
patterns)
pattern_choices <- qtl2pattern::sdp_to_pattern(patterns$sdp, LETTERS[1:8])
autoplot(scan_pat, pairprobs_obj$map, "lod", pattern_choices)
pheno_names <- peaks$pheno[1:2]
Assume for now covariates are the same. The routine qtl2pattern::scan1_covar
helps sort out covariates that change among phenotypes.
# See shinyMain.R
(analyses <- readRDS(file.path(project_dir, "analyses.rds")) %>%
filter(pheno %in% pheno_names))
## pheno longname output pheno_group
## 1 LD_distance_light LD_distance_light LD_distance_light Recla
## 2 LD_light_pct LD_light_pct LD_light_pct Recla
## pheno_type model transf offset winsorize sex Cohort Group Subgroup
## 1 recla normal identity 0 FALSE TRUE TRUE TRUE TRUE
## 2 recla normal identity 0 FALSE TRUE TRUE TRUE TRUE
## ngen coat_color
## 1 TRUE TRUE
## 2 TRUE TRUE
# See shinyMain.R
(pheno_data <- readRDS(file.path(project_dir, "pheno_data.rds")) %>%
qtl2pattern::pheno_trans(analyses$pheno,
analyses$transf,
analyses$offset,
analyses$winsorize)) %>%
head
## LD_distance_light LD_light_pct
## 1 2666.51 52.23
## 4 7195.21 56.50
## 5 6111.97 55.17
## 6 2703.62 40.66
## 7 1804.32 31.90
## 8 2178.14 35.24
# See shinyScanCoef.R
scan_obj <- qtl2pattern::scan1_covar(pheno_data, covar, probs_obj$probs, kinship,
analyses)
autoplot(scan_obj, probs_obj$map, 1:2)
snp_scan_obj <- qtl2pattern::scan1_covar(pheno_data, covar, snpprobs_obj$snpprobs, kinship, analyses)
autoplot(snp_scan_obj, snpprobs_obj$snpinfo, 1:2, show_all_snps = FALSE,
drop_hilit = 1.5, facet = "pheno")
# See shinySNPPattern.R
autoplot(snp_scan_obj, snpprobs_obj$snpinfo, 1:2,
patterns = "hilit", drop_hilit = 1.5, facet = "pheno")