# INPUT: READS
# Description: A comma separated (no blank space) list of FASTA or FASTQ read files(inlcude path)
# Notes:
# For paired-end reads, each end is expected to be in a separate FASTQ file, 
#   the order should be as follows: reads1_end1,reads1_end2,reads2_end1,read2_end2...
# For two ends from the same read, the read names should be in the following format: read_base_name/1 and read_base_name/2
# -The read_base_name should be the same for two ends

reads_file = data/rank_4_probes.fa


# INPUT: REFERENCE FASTA FILES
# Description: The directory containing the sequence files corresponding to the reference genome (in FASTA format)
# -One chromosome per file
# -In the files, the header should have the chromosome name after '>' and should not contain a tab or a blank space
# -In the files, the chromosome name should be the same as the basename of the chromosome file 
# -The suffix of the chromosome file name should be 'fa' 
# -eg. If the chromosome name after '>' is 'chr1', then the file name should be 'chr1.fa'

chromosome_files_dir = scaffolds/


# INPUT: REFERENCE BOWTIE INDEX FILES
# Description: The path and basename of index to be searched by Bowtie. 
# -If the index does not exist, it will be built from reference genomes indicated by option -c with bowtie-build. 

Bowtieidx = genome_index/P.patens_index


# OUTPUT: OUTPUT DIRECTORY
# Description: The name of the directory in which MapSplice will write its output 

output_dir = output/


######################################################################
#Basic options
#

# INPUT READ FORMAT
# Description: Format of input reads, FASTA OR FASTQ
# -Read name after '>' or '@' should not contain a blank space or tab

reads_format = FASTA


# PAIRED or SINGLE END READS
# Description: Whether or not the input reads are paired end or single end

paired_end = no


# SEGMMENT LENGTH
# Description: Length of read segments
# -Suggested to be in range of [18,25], if the segment is too short it will be mapped everywhere, 
# -Segment length should not be longer than half of the read length
# -Segment length should not be longer than 25
# -If the read length can't be divided evenly, the read sequence will be truncated at the end for now 
#   (e.g. segment length of 25 for a 60 bp read will use segments of nucleotides 1-25 and 26-50)

segment_length = 25

#

read_length = 60

#

######################################################################
#Advanced options
#

# SEMI/NON-CANONICAL JUNCTIONS
# Description: Whether or not the semi-canonical and non-canonical junctions should be outputted
# -non-canonical: output all junctions include non-canonical, semi-canonical, canonical junctions
# -semi-canonical: output semi-canonical, canonical junctions
# -canonical: output canonical junctions

junction_type = canonical


# SEMI/NON-CANONICAL FUSION JUNCTIONS 
# Description: Whether or not the semi-canonical and non-canonical fusion junctions should be outputted
# -non-canonical: output all fusion junctions include non-canonical, semi-canonical, canonical fusion junctions
# -semi-canonical: output semi-canonical, canonical fusion  junctions
# -canonical: output canonical fusion  junctions
# Suggest output only canonical fusion junction 


fusion_junction_type = canonical


# REMAP
# Description: Whether or not to remap reads after an initial mapping to identify junctions
#  should be run to increase the junction coverage

full_running = yes


# ANCHOR LENGTH
# Description: The anchor length that will be used for single anchored spliced alignments
# -Decreasing this value will find more alignments, but increase running time
# -Should be >= 6

anchor_length = 8


# REMOVE TEMP FILES
# Description: Whether or not temp files should be deleted to save disk space after MapSplice is finished running

remove_temp_files = no


# SEGMENT MISMATCHES
# Description: The number of mismatches allowed in a segment during mapping
# -Should be in the range of [0,3]

segment_mismatches = 1


# SPLICE MISMATCHES
# Description: The number of mismatches allowed for in a junction spanning segment

splice_mismatches = 2


# REMAP MISMATCHES
# Description: The number of mismatches allowed in a segment during the remapping step

remap_mismatches = 3


# MIN INTRON LENGTH
# Description: The minimum intron length(default is 10 nucleotides)

min_intron_length = 1


# MAX INTRON LENGTH
# Description: The maximum intron length (default is 200,000 nucleotides)

max_intron_length = 200000


# NUMBER OF THREADS
# Description: The number of threads to be used for segment mapping (will be used by MapSplice in future)

threads = 1


# MAXIMUM NUMBER HITS
# Description: max_hits x 10 is the maximum number of repeated hits during segments mapping and reads mapping (default is 4 x 10 = 40). 
#  reads mapping to more locations than this will be ignored

max_hits = 4


# MAX SMALL INDEL SIZE
# Description: The maximum small indel length (default is 3, suggested to be in [0-3])

max_insert = 3


# OUTPUT PARTIAL ALIGNMENTS
# Description: An option to output incomplete alignments.
# -The minimal number of segments contained in alignment.
# -eg. If read length is 75bp, segment_length is 25, set min_missed_seg = 1 will output 50bp alignments if there are no 75bp alignments for the corresponding reads
# -The default is output alignments of full read length 

#min_missed_seg = 0

#(in the previous line, place a hash before this option to ignore, remove the hash to use this option)


# SEARCH WHOLE CHROMOSOME
# Description: If turned on, search up to the maximum intron length away in exonic region and non-exonic region.
# exonic region: segment mapped region during segment mapping
# Normally MapSplice will only search up to the maximum intron length away in exonic region for fractions (i.e. small exons < segment length) of a spliced seqment
# -This enables MapSplice to find spliced alignments in small exons (< segment length) at head and tail across the chromosome, but will increase running time

search_whole_chromosome = no


# MAP SEGMENT DIRECTLY
# Description: If yes, MapSplice will try to find spliced alignments and unspliced alignments of a read, and select best alignment. (will increase running time)
# If not, MapSplice will try to find unspliced alignments of a read, if no unspliced alignments are found, then MapSplice will try to find spliced alignments for the read

map_segment_directly = no


# RUN MapPER
# Description: Whether or not MapPER (PMID 20576625) should be ran to generate read mappings based on a probabilistic framework, valid for PER reads
# -More information about probabilistic framework is at bioinformatics.oxfordjournals.org/cgi/reprint/btq336v1.pdf

run_MapPER = no


# FUSION ALGINMENT
# Description: Whether or not fusion junctions should be output
# -Reads not aligned as normal unspliced or spliced alignments are consider as fusion candidates
# -The outputs are "fusion.junction" and "fusion_junction.unique" if full-running is turned on
# -The outputs are "fusion_remap_junction.unique.chr_seq.extracted" if full-running is not turned on

do_fusion = no


# CLUSTER REGIONS
# Description: Whether or not to use paired-end reads to generate cluster regions for fusion read mappings
# Use paired end read to find fusion alignment with a single anchored method
# e.g. use 2x50 paired read and 25bp segment length to find fusion alignment
# -Only valid for paired-end reads and the full running model and do_fusion on (set full_running = yes and do_fusion = yes)

do_cluster = no