RNAseqQuantification HowTo

From TaejoonLab
Jump to: navigation, search

Make the database of primary transcripts

$ bwa index -p my_db_name my_db.fa
$ rsem-prepare-reference my_db.fa my_db_name

Mapping RNA-seq reads on database

for FQ1 in $(ls ../fastq/*untie_1.fastq)
do
  FQ2=${FQ1/untie_1/untie_2}
  SAM=$(basename $FQ1)
  SAM=${SAM/.untie_1.fastq/_paired}"."$DBNAME".bwa_mem.sam"

  $BWA mem -t 12 $DB $FQ1 $FQ2 > $SAM
done

Filtering hits with multiple targets

To save the memory space for filtering reads with multiple hits, I split sam files first and filtered them with 'sam-prepare_rsem.py' script.. The version I used

SPLIT="$HOME/git/HTseq-toolbox/sam/split-sam-by-reads.py"
PREP_RSEM="$HOME/git/HTseq-toolbox/sam/sam-prepare_rsem.py"

for SAM in $(ls *.sam)
do
  $SPLIT $SAM
done

for SAM_PART in $(ls *sam_part*)
do
  echo $SAM_PART
  SAM_RSEM=$SAM_PART".rsem"
  $PREP_RSEM $SAM_PART > $SAM_RSEM
done

for PART_H in $(ls *sam_part_h.rsem)
do
   RSEM_IN=${PART_H/.sam_part_h.rsem/}".rsem.sam"
   PREFIX=$(echo $PART_H | awk -F"." '{print $1"."$2}')
   cp $PART_H $RSEM_IN
   for PART in $(ls $PREFIX*sam_part_[0-9]*.rsem)
   do
      echo $PART "->" $RSEM_IN
      cat $PART >> $RSEM_IN
   done
done

Run RSEM

RSEM_EXP="$HOME/src.HTseq/rsem/rsem-1.2.19/rsem-calculate-expression --paired-end -p 12 "
CONVERT="$HOME/src.HTseq/rsem/rsem-1.2.19/convert-sam-for-rsem"
REF="$WORK/xenopus.db/rsem.idx/XENLA_JGIv17p"

for SAM in $(ls *.rsem.sam)
do
  RSEM_IN=${SAM/.sam/}
  LOG=${SAM/.sam/}".log"

  $CONVERT $SAM $RSEM_IN 2>$LOG

  RSEM_BAM=$RSEM_IN".bam"
  RSEM_OUT=$RSEM_IN
  $RSEM_EXP --bam $RSEM_BAM $REF $RSEM_OUT 2>>$LOG
done