diff options
Diffstat (limited to 'magic/Magdir/bioinformatics')
-rw-r--r-- | magic/Magdir/bioinformatics | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/magic/Magdir/bioinformatics b/magic/Magdir/bioinformatics index 7de08a1e0088..0c761ed351fe 100644 --- a/magic/Magdir/bioinformatics +++ b/magic/Magdir/bioinformatics @@ -1,6 +1,6 @@ #------------------------------------------------------------------------------ -# $File: bioinformatics,v 1.2 2016/02/14 15:53:53 christos Exp $ +# $File: bioinformatics,v 1.4 2016/06/20 16:13:46 christos Exp $ # bioinfomatics: file(1) magic for Bioinfomatics file formats ############################################################################### @@ -16,32 +16,32 @@ ############################################################################### -# Tabix index file +# Tabix index file # used by SAMtools bgzip/tabix (http://samtools.sourceforge.net/tabix.shtml) ############################################################################### 0 string TBI\1 SAMtools TBI (Tabix index format) >0x04 lelong =1 \b, with %d reference sequence >0x04 lelong >1 \b, with %d reference sequences >0x08 lelong &0x10000 \b, using half-closed-half-open coordinates (BED style) ->0x08 lelong ^0x10000 +>0x08 lelong ^0x10000 >>0x08 lelong =0 \b, using closed and one based coordinates (GFF style) >>0x08 lelong =1 \b, using SAM format >>0x08 lelong =2 \b, using VCF format >0x0c lelong x \b, sequence name column: %d >0x10 lelong x \b, region start column: %d ->0x08 lelong =0 +>0x08 lelong =0 >>0x14 lelong x \b, region end column: %d >0x18 byte x \b, comment character: %c >0x1c lelong x \b, skip line count: %d ############################################################################### -# BAM (Binary Sequence Alignment/Map format) -# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) +# BAM (Binary Sequence Alignment/Map format) +# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) # data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it ############################################################################### 0 string BAM\1 SAMtools BAM (Binary Sequence Alignment/Map) ->0x04 lelong >0 +>0x04 lelong >0 >>&0x00 regex =^[@]HD\t.*VN: \b, with SAM header >>>&0 regex =[0-9.]+ \b version %s >>&(0x04) lelong >0 \b, with %d reference sequences @@ -49,14 +49,14 @@ ############################################################################### # BAI (BAM indexing format) -# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) +# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) ############################################################################### 0 string BAI\1 SAMtools BAI (BAM indexing format) >0x04 lelong >0 \b, with %d reference sequences ############################################################################### -# CRAM (Binary Sequence Alignment/Map format) +# CRAM (Binary Sequence Alignment/Map format) ############################################################################### 0 string CRAM CRAM >0x04 byte >-1 version %d. @@ -69,13 +69,13 @@ # used by SAMtools & VCFtools (http://vcftools.sourceforge.net/bcf.pdf) # data is normally present only within compressed BGZF blocks (CDATA), so use file -z to examine it ############################################################################### -0 string BCF\4 +0 string BCF\4 # length of seqnm data in bytes is positive ->&0x00 lelong >0 +>&0x00 lelong >0 # length of smpl data in bytes is positive >>&(&-0x04) lelong >0 SAMtools BCF (Binary Call Format) # length of meta in bytes ->>>&(&-0x04) lelong >0 +>>>&(&-0x04) lelong >0 # have meta text string >>>>&0x00 search ##samtoolsVersion= >>>>>&0x00 string x \b, generated by SAMtools version %s @@ -88,7 +88,7 @@ ############################################################################### 0 string BCF\2\1 Binary Call Format (BCF) version 2.1 # length of header text ->&0x00 lelong >0 +>&0x00 lelong >0 # have header string >>&0x00 search ##samtoolsVersion= >>>&0x00 string x \b, generated by SAMtools version %s @@ -101,7 +101,7 @@ ############################################################################### 0 string BCF\2\2 Binary Call Format (BCF) version 2.2 # length of header text ->&0x00 lelong >0 +>&0x00 lelong >0 # have header string >>&0x00 search ##samtoolsVersion= >>>&0x00 string x \b, generated by SAMtools version %s @@ -119,11 +119,11 @@ ############################################################################### # XXX Broken? # @<seqname> -#0 regex =^@[A-Za-z0-9_.:-]+\?\n +#0 regex =^@[A-Za-z0-9_.:-]+\?\n # <seq> #>&1 regex =^[A-Za-z\n.~]++ # +[<seqname>] -#>>&1 regex =^[A-Za-z0-9_.:-]*\?\n +#>>&1 regex =^[A-Za-z0-9_.:-]*\?\n # <qual> #>>>&1 regex =^[!-~\n]+\n FASTQ @@ -132,7 +132,7 @@ # used by FASTA (http://fasta.bioch.virginia.edu/fasta_www2/fasta_guide.pdf) ############################################################################### #0 byte 0x3e -# q>0 regex =^[>][!-~\t\ ]+$ +# q>0 regex =^[>][!-~\t\ ]+$ # Amino Acid codes: [A-IK-Z*-]+ #>>1 regex !=[!-'Jj;:=?@^`|~\\] FASTA # IUPAC codes/gaps: [ACGTURYKMSWBDHVNX-]+ @@ -141,37 +141,37 @@ #>>>1 regex =^[EFIJLOPQZefijlopqz]+$ \b, with Amino Acid codes ############################################################################### -# SAM (Sequence Alignment/Map format) -# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) +# SAM (Sequence Alignment/Map format) +# used by SAMtools (http://samtools.sourceforge.net/SAM1.pdf) ############################################################################### # Short-cut version to recognise SAM files with (optional) header at beginning ############################################################################### -0 string @HD\t +0 string @HD\t >4 search VN: Sequence Alignment/Map (SAM), with header >>&0 regex [0-9.]+ \b version %s ############################################################################### # Longer version to recognise SAM alignment lines using (many) regexes ############################################################################### # SAM Alignment QNAME -0 regex =^[!-?A-~]{1,255}(\t[^\t]+){11} +0 regex =^[!-?A-~]{1,255}(\t[^\t]+){11} # SAM Alignment FLAG ->0 regex =^([^\t]+\t){1}[0-9]{1,5}\t +>0 regex =^([^\t]+\t){1}[0-9]{1,5}\t # SAM Alignment RNAME ->>0 regex =^([^\t]+\t){2}\\*|[^*=]*\t +>>0 regex =^([^\t]+\t){2}\\*|[^*=]*\t # SAM Alignment POS ->>>0 regex =^([^\t]+\t){3}[0-9]{1,9}\t +>>>0 regex =^([^\t]+\t){3}[0-9]{1,9}\t # SAM Alignment MAPQ ->>>>0 regex =^([^\t]+\t){4}[0-9]{1,3}\t +>>>>0 regex =^([^\t]+\t){4}[0-9]{1,3}\t # SAM Alignment CIGAR ->>>>>0 regex =\t\\*|([0-9]+[MIDNSHPX=])+)\t +>>>>>0 regex =\t(\\*|([0-9]+[MIDNSHPX=])+)\t # SAM Alignment RNEXT ->>>>>>0 regex =\t(\\*|=|[!-()+->?-~][!-~]*)\t +>>>>>>0 regex =\t(\\*|=|[!-()+->?-~][!-~]*)\t # SAM Alignment PNEXT ->>>>>>>0 regex =^([^\t]+\t){7}[0-9]{1,9}\t +>>>>>>>0 regex =^([^\t]+\t){7}[0-9]{1,9}\t # SAM Alignment TLEN ->>>>>>>>0 regex =\t[+-]{0,1}[0-9]{1,9}\t.*\t +>>>>>>>>0 regex =\t[+-]{0,1}[0-9]{1,9}\t.*\t # SAM Alignment SEQ ->>>>>>>>>0 regex =^([^\t]+\t){9}(\\*|[A-Za-z=.]+)\t +>>>>>>>>>0 regex =^([^\t]+\t){9}(\\*|[A-Za-z=.]+)\t # SAM Alignment QUAL >>>>>>>>>>0 regex =^([^\t]+\t){10}[!-~]+ Sequence Alignment/Map (SAM) >>>>>>>>>>>0 regex =^[@]HD\t.*VN: \b, with header |