Calculate corrected theta from resequencing data: Difference between revisions

From HPCwiki
Jump to navigation Jump to search
(Created page with "<source lang ='bash'> #!/bin/bash #SBATCH --time=10000 #SBATCH --mem=4000 #SBATCH --ntasks=1 #SBATCH --nodes=1 #SBATCH --constraint=normalmem #SBATCH --output=output_%j.txt #S...")
 
No edit summary
Line 29: Line 29:
</source>
</source>


<source lang = 'R'>
<source lang = 'rsplus'>
files=list.files(pattern="wintheta")
files=list.files(pattern="wintheta")
a <- data.frame("file" = character(), "theta_het" = numeric())
a <- data.frame("file" = character(), "theta_het" = numeric())

Revision as of 11:21, 30 March 2014

<source lang ='bash'>

  1. !/bin/bash
  2. SBATCH --time=10000
  3. SBATCH --mem=4000
  4. SBATCH --ntasks=1
  5. SBATCH --nodes=1
  6. SBATCH --constraint=normalmem
  7. SBATCH --output=output_%j.txt
  8. SBATCH --error=error_output_%j.txt
  9. SBATCH --job-name=ngstheta
  10. SBATCH --partition=ABGC_Research

module load samtools/0.1.19 VAR=`gunzip -c /lustre/nobackup/WUR/ABGC/shared/Pig/vars_hjm_newbuild10_2/vars-flt_$1-final.txt.gz | cut -f8 | head -1000000 | sort | uniq -c | sed 's/^ \+//' | sed 's/ \+/\t/' | sort -k1 -nr | head -1 | cut -f2` let MAX=2*VAR echo "$1 max_depth is $MAX" MIN=$(( $VAR / 3 )) if [ $MIN -lt 5 ]; then MIN=4; fi

echo "$1 min_depth is $MIN" samtools mpileup -uf /lustre/nobackup/WUR/ABGC/shared/Pig/Sscrofa_build10_2/FASTA/Ssc10_2_chromosomes.fa /lustre/nobackup/WUR/ABGC/shared/Pig/BAM_files_hjm_newbuild10_2/$1_rh.bam | bcftools view -bvcg - > $1.mig.bcf bcftools view $1.mig.bcf | vcfutils.pl varFilter -d$MIN -D$MAX > $1.mig.vcf awk '$6 >= 20' $1.mig.vcf > $1.miguel.vcf samtools mpileup -Bq 20 -d 50000 /lustre/nobackup/WUR/ABGC/shared/Pig/BAM_files_hjm_newbuild10_2/$1_rh.bam | perl covXwin-v3.1.pl -v $1.miguel.vcf -w 50000 -d $MIN -m $MAX -b /lustre/nobackup/WUR/ABGC/shared/Pig/BAM_files_hjm_newbuild10_2/$1_rh.bam | ./ngs_theta -d $MIN -m $MAX > $1.wintheta </source>

<source lang='bash'> INDS=`cat bams.txt | grep -v LB01` for IND in $INDS; do sbatch nucdiv_pipeline.sh $IND; done </source>

<source lang = 'rsplus'> files=list.files(pattern="wintheta") a <- data.frame("file" = character(), "theta_het" = numeric()) for (file1 in files){x <- read.table(file1,header=T); mn=mean(x$THETA_HET[x$BP>20000 & x$CHR != 'chrUN_nr']); print(paste(file1,mn,sep=" "));a<- rbind(a,data.frame("file"=file1,"theta_het"=mn))} write.table(x=a,file="theta_het_results.txt") </source>