aboutsummaryrefslogtreecommitdiff
path: root/magic/Magdir/bioinformatics
diff options
context:
space:
mode:
Diffstat (limited to 'magic/Magdir/bioinformatics')
-rw-r--r--magic/Magdir/bioinformatics60
1 files changed, 30 insertions, 30 deletions
diff --git a/magic/Magdir/bioinformatics b/magic/Magdir/bioinformatics
index 7de08a1e0088..0c761ed351fe 100644
--- a/magic/Magdir/bioinformatics
+++ b/magic/Magdir/bioinformatics
@@ -1,6 +1,6 @@
#------------------------------------------------------------------------------
-# $File: bioinformatics,v 1.2 2016/02/14 15:53:53 christos Exp $
+# $File: bioinformatics,v 1.4 2016/06/20 16:13:46 christos Exp $
# bioinfomatics: file(1) magic for Bioinfomatics file formats
###############################################################################
@@ -16,32 +16,32 @@
###############################################################################
-# Tabix index file
+# Tabix index file
# used by SAMtools bgzip/tabix (http://samtools.sourceforge.net/tabix.shtml)
###############################################################################
0 string TBI\1 SAMtools TBI (Tabix index format)
>0x04 lelong =1 \b, with %d reference sequence
>0x04 lelong >1 \b, with %d reference sequences
>0x08 lelong &0x10000 \b, using half-closed-half-open coordinates (BED style)
->0x08 lelong ^0x10000
+>0x08 lelong ^0x10000
>>0x08 lelong =0 \b, using closed and one based coordinates (GFF style)
>>0x08 lelong =1 \b, using SAM format
>>0x08 lelong =2 \b, using VCF format
>0x0c lelong x \b, sequence name column: %d
>0x10 lelong x \b, region start column: %d
->0x08 lelong =0
+>0x08 lelong =0
>>0x14 lelong x \b, region end column: %d
>0x18 byte x \b, comment character: %c
>0x1c lelong x \b, skip line count: %d
###############################################################################
-# BAM (Binary Sequence Alignment/Map format)
-# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf)
+# BAM (Binary Sequence Alignment/Map format)
+# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf)
# data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it
###############################################################################
0 string BAM\1 SAMtools BAM (Binary Sequence Alignment/Map)
->0x04 lelong >0
+>0x04 lelong >0
>>&0x00 regex =^[@]HD\t.*VN: \b, with SAM header
>>>&0 regex =[0-9.]+ \b version %s
>>&(0x04) lelong >0 \b, with %d reference sequences
@@ -49,14 +49,14 @@
###############################################################################
# BAI (BAM indexing format)
-# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf)
+# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf)
###############################################################################
0 string BAI\1 SAMtools BAI (BAM indexing format)
>0x04 lelong >0 \b, with %d reference sequences
###############################################################################
-# CRAM (Binary Sequence Alignment/Map format)
+# CRAM (Binary Sequence Alignment/Map format)
###############################################################################
0 string CRAM CRAM
>0x04 byte >-1 version %d.
@@ -69,13 +69,13 @@
# used by SAMtools & VCFtools (http://vcftools.sourceforge.net/bcf.pdf)
# data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it
###############################################################################
-0 string BCF\4
+0 string BCF\4
# length of seqnm data in bytes is positive
->&0x00 lelong >0
+>&0x00 lelong >0
# length of smpl data in bytes is positive
>>&(&-0x04) lelong >0 SAMtools BCF (Binary Call Format)
# length of meta in bytes
->>>&(&-0x04) lelong >0
+>>>&(&-0x04) lelong >0
# have meta text string
>>>>&0x00 search ##samtoolsVersion=
>>>>>&0x00 string x \b, generated by SAMtools version %s
@@ -88,7 +88,7 @@
###############################################################################
0 string BCF\2\1 Binary Call Format (BCF) version 2.1
# length of header text
->&0x00 lelong >0
+>&0x00 lelong >0
# have header string
>>&0x00 search ##samtoolsVersion=
>>>&0x00 string x \b, generated by SAMtools version %s
@@ -101,7 +101,7 @@
###############################################################################
0 string BCF\2\2 Binary Call Format (BCF) version 2.2
# length of header text
->&0x00 lelong >0
+>&0x00 lelong >0
# have header string
>>&0x00 search ##samtoolsVersion=
>>>&0x00 string x \b, generated by SAMtools version %s
@@ -119,11 +119,11 @@
###############################################################################
# XXX Broken?
# @<seqname>
-#0 regex =^@[A-Za-z0-9_.:-]+\?\n
+#0 regex =^@[A-Za-z0-9_.:-]+\?\n
# <seq>
#>&1 regex =^[A-Za-z\n.~]++
# +[<seqname>]
-#>>&1 regex =^[A-Za-z0-9_.:-]*\?\n
+#>>&1 regex =^[A-Za-z0-9_.:-]*\?\n
# <qual>
#>>>&1 regex =^[!-~\n]+\n FASTQ
@@ -132,7 +132,7 @@
# used by FASTA (http://fasta.bioch.virginia.edu/fasta_www2/fasta_guide.pdf)
###############################################################################
#0 byte 0x3e
-# q>0 regex =^[>][!-~\t\ ]+$
+# q>0 regex =^[>][!-~\t\ ]+$
# Amino Acid codes: [A-IK-Z*-]+
#>>1 regex !=[!-'Jj;:=?@^`|~\\] FASTA
# IUPAC codes/gaps: [ACGTURYKMSWBDHVNX-]+
@@ -141,37 +141,37 @@
#>>>1 regex =^[EFIJLOPQZefijlopqz]+$ \b, with Amino Acid codes
###############################################################################
-# SAM (Sequence Alignment/Map format)
-# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf)
+# SAM (Sequence Alignment/Map format)
+# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf)
###############################################################################
# Short-cut version to recognise SAM files with (optional) header at beginning
###############################################################################
-0 string @HD\t
+0 string @HD\t
>4 search VN: Sequence Alignment/Map (SAM), with header
>>&0 regex [0-9.]+ \b version %s
###############################################################################
# Longer version to recognise SAM alignment lines using (many) regexes
###############################################################################
# SAM Alignment QNAME
-0 regex =^[!-?A-~]{1,255}(\t[^\t]+){11}
+0 regex =^[!-?A-~]{1,255}(\t[^\t]+){11}
# SAM Alignment FLAG
->0 regex =^([^\t]+\t){1}[0-9]{1,5}\t
+>0 regex =^([^\t]+\t){1}[0-9]{1,5}\t
# SAM Alignment RNAME
->>0 regex =^([^\t]+\t){2}\\*|[^*=]*\t
+>>0 regex =^([^\t]+\t){2}\\*|[^*=]*\t
# SAM Alignment POS
->>>0 regex =^([^\t]+\t){3}[0-9]{1,9}\t
+>>>0 regex =^([^\t]+\t){3}[0-9]{1,9}\t
# SAM Alignment MAPQ
->>>>0 regex =^([^\t]+\t){4}[0-9]{1,3}\t
+>>>>0 regex =^([^\t]+\t){4}[0-9]{1,3}\t
# SAM Alignment CIGAR
->>>>>0 regex =\t\\*|([0-9]+[MIDNSHPX=])+)\t
+>>>>>0 regex =\t(\\*|([0-9]+[MIDNSHPX=])+)\t
# SAM Alignment RNEXT
->>>>>>0 regex =\t(\\*|=|[!-()+->?-~][!-~]*)\t
+>>>>>>0 regex =\t(\\*|=|[!-()+->?-~][!-~]*)\t
# SAM Alignment PNEXT
->>>>>>>0 regex =^([^\t]+\t){7}[0-9]{1,9}\t
+>>>>>>>0 regex =^([^\t]+\t){7}[0-9]{1,9}\t
# SAM Alignment TLEN
->>>>>>>>0 regex =\t[+-]{0,1}[0-9]{1,9}\t.*\t
+>>>>>>>>0 regex =\t[+-]{0,1}[0-9]{1,9}\t.*\t
# SAM Alignment SEQ
->>>>>>>>>0 regex =^([^\t]+\t){9}(\\*|[A-Za-z=.]+)\t
+>>>>>>>>>0 regex =^([^\t]+\t){9}(\\*|[A-Za-z=.]+)\t
# SAM Alignment QUAL
>>>>>>>>>>0 regex =^([^\t]+\t){10}[!-~]+ Sequence Alignment/Map (SAM)
>>>>>>>>>>>0 regex =^[@]HD\t.*VN: \b, with header