Provean Sus scrofa: Difference between revisions

From HPCwiki
Jump to navigation Jump to search
No edit summary
No edit summary
Line 74: Line 74:
mv $TRANS.var $PROT.var;
mv $TRANS.var $PROT.var;
provean.sh -q $PROT.fa -v $PROT.var --save_supporting_set $PROT.sss >$PROT.result.txt 2>$PROT.error;
provean.sh -q $PROT.fa -v $PROT.var --save_supporting_set $PROT.sss >$PROT.result.txt 2>$PROT.error;
</source>
Individual transcripts can also be submitted using the following script:
<source lang='bash'>
#!/bin/bash
#SBATCH --time=4800
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=16000
#SBATCH --nice=1000
#SBATCH --output=output_%j.txt
#SBATCH --error=error_output_%j.txt
#SBATCH --job-name=Provean
#SBATCH --partition=ABGC_Research
#cat outVEP_*.txt | awk '$11~/\//' | sed 's/:/\t/' | sort -k6 >prot_alt.txt
TRANS=$1
PROT=`cat /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa | grep $TRANS | sed 's/ \+/\t/g' | sed 's/^>//' | cut -f1`
if [ -f $PROT.sss ];
  then
  echo "$PROT $TRANS already done.";
  else
  cat prot_alt.txt | grep $TRANS | awk '{print $11,$12}' | sed 's/ \+/\t/' | sed 's/\//\t/' | awk '{OFS=","; print $1,$2,$3}' | sed 's/\t//g' | sed 's/ \+//g' >$TRANS.var;
  cat prot_alt.txt | grep $TRANS | awk -v prot=$PROT '{OFS="\t"; print $1,$2,$3,$5,$6,$7,$8,prot, $11,$12,$13,$14,$15}' >$PROT.var.info;
  faOneRecord /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa $PROT >$PROT.fa;
  mv $TRANS.var $PROT.var;
  provean.sh -q $PROT.fa -v $PROT.var --save_supporting_set $PROT.sss >$PROT.result.txt 2>$PROT.error;
fi;
</source>
</source>

Revision as of 12:00, 27 December 2013

From Variant Effect Predictor output, select only protein altering variants and sort by transcript: <source lang='bash'> cat outVEP_*.txt | awk '$11~/\//' | sed 's/:/\t/' | sort -k6 >prot_alt.txt </source>

Protein models for Sus scrofa:

 /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa


<source lang='bash'> !/bin/bash

  1. SBATCH --time=4800
  2. SBATCH --ntasks=1
  3. SBATCH --mem-per-cpu=16000
  4. SBATCH --nice=1000
  5. SBATCH --output=output_%j.txt
  6. SBATCH --error=error_output_%j.txt
  7. SBATCH --job-name=Provean
  8. SBATCH --partition=ABGC_Research
  9. cat outVEP_*.txt | awk '$11~/\//' | sed 's/:/\t/' | sort -k6 >prot_alt.txt

TELLER=100 echo $TELLER; let TELLER+=1; echo $TELLER; while [ $TELLER -gt 99 ]; do

 PROVS=`squeue | grep Provean | sed 's/^ \+//' | sed 's/ \+/\t/' | cut -f1`;
 for PROV in $PROVS; do scancel $PROV; done;
 sleep 10;
 for i in `seq 1 2`; do ssh fat00$i 'rm -rf /tmp/provean*'; done;
 for i in `seq 10 60`; do ssh node0$i 'rm -rf /tmp/provean*'; done;
 for i in `seq 1 9`; do ssh node00$i 'rm -rf /tmp/provean*'; done;
 TRANS=`cat prot_alt.txt | head -15000 | cut -f6 | sort | uniq`;
 TELLER2=0;
 for TRAN in $TRANS; do
    if [ $TELLER2 -lt 300 ]; then
      echo "transcript: $TRAN";
      echo "teller boven: $TELLER2";
      PROT=`cat /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa | grep $TRAN | sed 's/ \+/\t/g' | sed 's/^>//' | cut -f1`;
      echo "protein: $PROT";
      if [ -f $PROT.sss ];
       then
         echo "$PROT $TRAN already done";
       else
         echo "will do sbatch testProvean_sub.sh $TRAN'";
         sbatch runProvean_sub.sh $TRAN;
         let TELLER2+=1;
         echo "teller onder: $TELLER2";
      fi;
   fi;
 done;
 sleep 3600;

done

</source>

The 'runProvean_sub.sh' script referred to in the above script consists of the following code: <source lang='bash'>

  1. !/bin/bash
  2. SBATCH --time=4800
  3. SBATCH --ntasks=1
  4. SBATCH --mem-per-cpu=16000
  5. SBATCH --nice=1000
  6. SBATCH --output=output_%j.txt
  7. SBATCH --error=error_output_%j.txt
  8. SBATCH --job-name=Provean
  9. SBATCH --partition=ABGC_Research

TRANS=$1 PROT=`cat /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa | grep $TRANS | sed 's/ \+/\t/g' | sed 's/^>//' | cut -f1` cat prot_alt.txt | grep $TRANS | awk '{print $11,$12}' | sed 's/ \+/\t/' | sed 's/\//\t/' | awk '{OFS=","; print $1,$2,$3}' | sed 's/\t//g' | sed 's/ \+//g' >$TRANS.var; cat prot_alt.txt | grep $TRANS | awk -v prot=$PROT '{OFS="\t"; print $1,$2,$3,$5,$6,$7,$8,prot, $11,$12,$13,$14,$15}' >$PROT.var.info; faOneRecord /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa $PROT >$PROT.fa; mv $TRANS.var $PROT.var; provean.sh -q $PROT.fa -v $PROT.var --save_supporting_set $PROT.sss >$PROT.result.txt 2>$PROT.error; </source>

Individual transcripts can also be submitted using the following script: <source lang='bash'>

  1. !/bin/bash
  2. SBATCH --time=4800
  3. SBATCH --ntasks=1
  4. SBATCH --mem-per-cpu=16000
  5. SBATCH --nice=1000
  6. SBATCH --output=output_%j.txt
  7. SBATCH --error=error_output_%j.txt
  8. SBATCH --job-name=Provean
  9. SBATCH --partition=ABGC_Research
  10. cat outVEP_*.txt | awk '$11~/\//' | sed 's/:/\t/' | sort -k6 >prot_alt.txt

TRANS=$1 PROT=`cat /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa | grep $TRANS | sed 's/ \+/\t/g' | sed 's/^>//' | cut -f1` if [ -f $PROT.sss ];

 then
 echo "$PROT $TRANS already done.";
 else
 cat prot_alt.txt | grep $TRANS | awk '{print $11,$12}' | sed 's/ \+/\t/' | sed 's/\//\t/' | awk '{OFS=","; print $1,$2,$3}' | sed 's/\t//g' | sed 's/ \+//g' >$TRANS.var;
 cat prot_alt.txt | grep $TRANS | awk -v prot=$PROT '{OFS="\t"; print $1,$2,$3,$5,$6,$7,$8,prot, $11,$12,$13,$14,$15}' >$PROT.var.info;
 faOneRecord /lustre/nobackup/WUR/ABGC/shared/public_data_store/genomes/pig/Ensembl74/pep/Sus_scrofa.Sscrofa10.2.74.pep.all.fa $PROT >$PROT.fa;
 mv $TRANS.var $PROT.var;
 provean.sh -q $PROT.fa -v $PROT.var --save_supporting_set $PROT.sss >$PROT.result.txt 2>$PROT.error;

fi;

</source>