# INPUT: READS # Description: A comma separated (no blank space) list of FASTA or FASTQ read files(inlcude path) # Notes: # For paired-end reads, each end is expected to be in a separate FASTQ file, # the order should be as follows: reads1_end1,reads1_end2,reads2_end1,read2_end2... # For two ends from the same read, the read names should be in the following format: read_base_name/1 and read_base_name/2 # -The read_base_name should be the same for two ends reads_file = data/rank_4_probes.fa # INPUT: REFERENCE FASTA FILES # Description: The directory containing the sequence files corresponding to the reference genome (in FASTA format) # -One chromosome per file # -In the files, the header should have the chromosome name after '>' and should not contain a tab or a blank space # -In the files, the chromosome name should be the same as the basename of the chromosome file # -The suffix of the chromosome file name should be 'fa' # -eg. If the chromosome name after '>' is 'chr1', then the file name should be 'chr1.fa' chromosome_files_dir = scaffolds/ # INPUT: REFERENCE BOWTIE INDEX FILES # Description: The path and basename of index to be searched by Bowtie. # -If the index does not exist, it will be built from reference genomes indicated by option -c with bowtie-build. Bowtieidx = genome_index/P.patens_index # OUTPUT: OUTPUT DIRECTORY # Description: The name of the directory in which MapSplice will write its output output_dir = output/ ###################################################################### #Basic options # # INPUT READ FORMAT # Description: Format of input reads, FASTA OR FASTQ # -Read name after '>' or '@' should not contain a blank space or tab reads_format = FASTA # PAIRED or SINGLE END READS # Description: Whether or not the input reads are paired end or single end paired_end = no # SEGMMENT LENGTH # Description: Length of read segments # -Suggested to be in range of [18,25], if the segment is too short it will be mapped everywhere, # -Segment length should not be longer than half of the read length # -Segment length should not be longer than 25 # -If the read length can't be divided evenly, the read sequence will be truncated at the end for now # (e.g. segment length of 25 for a 60 bp read will use segments of nucleotides 1-25 and 26-50) segment_length = 25 # read_length = 60 # ###################################################################### #Advanced options # # SEMI/NON-CANONICAL JUNCTIONS # Description: Whether or not the semi-canonical and non-canonical junctions should be outputted # -non-canonical: output all junctions include non-canonical, semi-canonical, canonical junctions # -semi-canonical: output semi-canonical, canonical junctions # -canonical: output canonical junctions junction_type = canonical # SEMI/NON-CANONICAL FUSION JUNCTIONS # Description: Whether or not the semi-canonical and non-canonical fusion junctions should be outputted # -non-canonical: output all fusion junctions include non-canonical, semi-canonical, canonical fusion junctions # -semi-canonical: output semi-canonical, canonical fusion junctions # -canonical: output canonical fusion junctions # Suggest output only canonical fusion junction fusion_junction_type = canonical # REMAP # Description: Whether or not to remap reads after an initial mapping to identify junctions # should be run to increase the junction coverage full_running = yes # ANCHOR LENGTH # Description: The anchor length that will be used for single anchored spliced alignments # -Decreasing this value will find more alignments, but increase running time # -Should be >= 6 anchor_length = 8 # REMOVE TEMP FILES # Description: Whether or not temp files should be deleted to save disk space after MapSplice is finished running remove_temp_files = no # SEGMENT MISMATCHES # Description: The number of mismatches allowed in a segment during mapping # -Should be in the range of [0,3] segment_mismatches = 1 # SPLICE MISMATCHES # Description: The number of mismatches allowed for in a junction spanning segment splice_mismatches = 2 # REMAP MISMATCHES # Description: The number of mismatches allowed in a segment during the remapping step remap_mismatches = 3 # MIN INTRON LENGTH # Description: The minimum intron length(default is 10 nucleotides) min_intron_length = 1 # MAX INTRON LENGTH # Description: The maximum intron length (default is 200,000 nucleotides) max_intron_length = 200000 # NUMBER OF THREADS # Description: The number of threads to be used for segment mapping (will be used by MapSplice in future) threads = 1 # MAXIMUM NUMBER HITS # Description: max_hits x 10 is the maximum number of repeated hits during segments mapping and reads mapping (default is 4 x 10 = 40). # reads mapping to more locations than this will be ignored max_hits = 4 # MAX SMALL INDEL SIZE # Description: The maximum small indel length (default is 3, suggested to be in [0-3]) max_insert = 3 # OUTPUT PARTIAL ALIGNMENTS # Description: An option to output incomplete alignments. # -The minimal number of segments contained in alignment. # -eg. If read length is 75bp, segment_length is 25, set min_missed_seg = 1 will output 50bp alignments if there are no 75bp alignments for the corresponding reads # -The default is output alignments of full read length #min_missed_seg = 0 #(in the previous line, place a hash before this option to ignore, remove the hash to use this option) # SEARCH WHOLE CHROMOSOME # Description: If turned on, search up to the maximum intron length away in exonic region and non-exonic region. # exonic region: segment mapped region during segment mapping # Normally MapSplice will only search up to the maximum intron length away in exonic region for fractions (i.e. small exons < segment length) of a spliced seqment # -This enables MapSplice to find spliced alignments in small exons (< segment length) at head and tail across the chromosome, but will increase running time search_whole_chromosome = no # MAP SEGMENT DIRECTLY # Description: If yes, MapSplice will try to find spliced alignments and unspliced alignments of a read, and select best alignment. (will increase running time) # If not, MapSplice will try to find unspliced alignments of a read, if no unspliced alignments are found, then MapSplice will try to find spliced alignments for the read map_segment_directly = no # RUN MapPER # Description: Whether or not MapPER (PMID 20576625) should be ran to generate read mappings based on a probabilistic framework, valid for PER reads # -More information about probabilistic framework is at bioinformatics.oxfordjournals.org/cgi/reprint/btq336v1.pdf run_MapPER = no # FUSION ALGINMENT # Description: Whether or not fusion junctions should be output # -Reads not aligned as normal unspliced or spliced alignments are consider as fusion candidates # -The outputs are "fusion.junction" and "fusion_junction.unique" if full-running is turned on # -The outputs are "fusion_remap_junction.unique.chr_seq.extracted" if full-running is not turned on do_fusion = no # CLUSTER REGIONS # Description: Whether or not to use paired-end reads to generate cluster regions for fusion read mappings # Use paired end read to find fusion alignment with a single anchored method # e.g. use 2x50 paired read and 25bp segment length to find fusion alignment # -Only valid for paired-end reads and the full running model and do_fusion on (set full_running = yes and do_fusion = yes) do_cluster = no