## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  bulk_long_pipeline(..., do_genome_align=FALSE, do_read_realign=FALSE)
#  # OR
#  sc_long_pipeline(..., do_genome_align=FALSE, do_read_realign=FALSE)

## ----eval=TRUE, echo=TRUE-----------------------------------------------------
# download required files using BiocFileCache
temp_path <- tempdir()
bfc <- BiocFileCache::BiocFileCache(temp_path, ask=FALSE)
file_url <- 
  "https://raw.githubusercontent.com/OliverVoogd/FLAMESData/master/data"
annot <- bfc[[names(BiocFileCache::bfcadd(bfc, "Annotation", 
                                          paste(file_url, 
                                                "SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf", 
                                                sep="/")))]] # [[ notation is used to get the local file path of the downloaded file
genome_fa <- bfc[[names(BiocFileCache::bfcadd(bfc, 
                                              "Genomefa", 
                                              paste(file_url, 
                                                    "SIRV_isoforms_multi-fasta_170612a.fasta", 
                                                    sep="/")))]]

# download the two fastq files, move them to a folder to be merged together
fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq1", paste(file_url, "fastq/sample1.fastq.gz", sep="/")))]]
fastq2 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq2", paste(file_url, "fastq/sample2.fastq.gz", sep="/")))]]
fastq_dir <- paste(temp_path, "fastq_dir", sep="/") # the downloaded fastq files need to be in a directory to be merged together
dir.create(fastq_dir)
file.copy(c(fastq1, fastq2), fastq_dir)
unlink(c(fastq1, fastq2)) # the original files can be deleted

# setup other environment variables
config_file <- system.file("extdata/SIRV_config_default.json", package="FLAMES") # the configuration file is included with the FLAMES package
outdir <- tempdir() # create a temporary output directory
if (!dir.exists(outdir)) {
    dir.create(outdir)
}

## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------
# download files generated using minimap2 (these files need to be generated by the user on a system with access to Minimap2. More information is in the section [Genome Alignment](#genome-alignment-using-minimap2))
# files are renamed so that FLAMES can find related .bam.bai files using the same file name. BiocFileCache automatically gives the files a random prefix which affects this.
genome_bam <- paste0(temp_path, "/align2genome.bam")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Align BAM", paste(file_url, "align2genome.bam", sep="/")))]], genome_bam)

genome_index <- paste0(temp_path, "/align2genome.bam.bai")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Align BAM Index", paste0(file_url, "/align2genome.bam.bai")))]], genome_index)

realign_bam <- paste0(temp_path, "/realign2transcript.bam")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Realign BAM", paste(file_url, "realign2transcript.bam", sep="/")))]], realign_bam)

realign_index <- paste0(temp_path, "/realign2transcript.bam.bai")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Realign BAM Index", paste(file_url, "realign2transcript.bam.bai", sep="/")))]], realign_index)

## ----eval=TRUE----------------------------------------------------------------
library(FLAMES)
# If running the FLAMES single cell pipeline:
# pipeline_variables <- sc_windows_pipeline_setup(annot=annot, fastq=fastq_dir, outdir=outdir,
#                                                 genome_fa=genome_fa, config_file=config_file)
# or, if running the bulk pipeline:
pipeline_variables <- bulk_windows_pipeline_setup(annot=annot, fastq=fastq_dir, outdir=outdir, 
                                                  genome_fa=genome_fa, config_file=config_file)


## ---- eval=TRUE---------------------------------------------------------------
gff3_to_bed12(pipeline_variables$annot, pipeline_variables$tmp_bed)

## ---- eval=FALSE--------------------------------------------------------------
#  {_prog} -ax splice -t 12 {_others} -k14 --secondary=no {_index} {_fq} -o {_out}

## ---- eval=FALSE--------------------------------------------------------------
#  samtools_as_bam(pipeline_variables$tmp_sam, pipeline_variables$tmp_bam)
#  samtools_sort_index(pipeline_variables$tmp_bam, pipeline_variables$genome_bam)
#  file.remove(pipeline_variables$tmp_sam)
#  file.remove(pipeline_variables$tmp_bam)

## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------
pipeline_variables$genome_bam = genome_bam

## ----eval=FALSE---------------------------------------------------------------
#  pipeline_variables <- windows_pipeline_isoforms(pipeline_variables)

## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------
assembly <- paste0(temp_path, "/transcript_assembly.fa")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Transcript Assembly Fasta", paste(file_url, "transcript_assembly.fa", sep="/")))]], assembly)

assembly_index <- paste0(temp_path, "/transcript_assembly.fa.fai")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Transcript Assembly Index", paste0(file_url, "/transcript_assembly.fa.fai")))]], assembly_index)

isoforms_annot <- paste0(temp_path, "/isoform_annotated.gff3")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Annotated Isoform gff3", paste(file_url, "isoform_annotated.gff3", sep="/")))]], isoforms_annot)

tss_tes <- paste0(temp_path, "/tss_tes.bedgraph")
file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "TSS TES enrichment", paste0(file_url, "/tss_tes.bedgraph")))]], tss_tes)

pipeline_variables$isoform_gff3 <- isoforms_annot
pipeline_variables$transcript_fa <- assembly
pipeline_variables$transcript_fa_idx <- assembly_index
pipeline_variables$tss_tes_stat <- tss_tes

g <- parse_gff_tree(annot)
pipeline_variables$transcript_dict <- g$transcript_dict

i <- parse_gff_tree(isoforms_annot)
pipeline_variables$transcript_dict_i <- i$transcript_dict

## ---- eval=FALSE--------------------------------------------------------------
#  {_prog} -ax map-ont -p 0.9 --end-bonus 10 -N 3 -t 12 {_index} {_fq} -o {_out}

## ---- eval=FALSE--------------------------------------------------------------
#  samtools_as_bam(pipeline_variables$tmp_sam, pipeline_variables$tmp_bam)
#  samtools_sort_index(pipeline_variables$tmp_bam, pipeline_variables$realign_bam)
#  file.remove(pipeline_variables$tmp_sam)
#  file.remove(pipeline_variables$tmp_bam)

## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------
pipeline_variables$realign_bam = realign_bam

## ----eval=TRUE----------------------------------------------------------------
se <- windows_pipeline_quantification(pipeline_variables)

## ----eval=TRUE----------------------------------------------------------------
se

## ----echo=FALSE---------------------------------------------------------------
utils::sessionInfo()

