From 3c21d7fc3c5f2b6810e9e2b267f9bfc9df4b1480 Mon Sep 17 00:00:00 2001 From: Laura Cook <l.cook2@student.unimelb.edu.au> Date: Thu, 15 Oct 2020 10:57:47 +1100 Subject: [PATCH] added links to scripts for lastz ad repeak masker --- cross_species_comparison/README.md | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/cross_species_comparison/README.md b/cross_species_comparison/README.md index 85fefdc..73bb4e7 100644 --- a/cross_species_comparison/README.md +++ b/cross_species_comparison/README.md @@ -84,14 +84,15 @@ Run RepeatModeler to de novo find repeat regions in the dunnart genome: ``` BuildDatabase -name dunnart -engine ncbi Scras_dunnart_assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2xwgs2_60chr.fasta -RepeatModeler -database dunnart +nohup RepeatModeler -database dunnart -pa 20 >& repeatmodeler.out ``` -Run RepeatMasker to mask repeats in dunnart genome (makes repeats lowercase): +Run RepeatMasker to mask repeats in dunnart genome (makes repeats lowercase). Run as an array for scaffolds to make it quicker. +Create commands for array slurm script: `repeatMasker.sh` ``` -RepeatMasker -xsmall Scras_dunnart_assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2xwgs2_60chr.fasta -default_search_engine hmmer -trf_prgm /home/lecook/.conda/envs/wga/bin/trf -hmmer_dir /home/lecook/.conda/envs/wga/bin/ +RepeatMasker -q -xsmall Scras_dunnart_assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2xwgs2_60chr.fasta -default_search_engine hmmer -trf_prgm /home/lecook/.conda/envs/wga/bin/trf -hmmer_dir /home/lecook/.conda/envs/wga/bin/ ``` #### Split into scaffolds @@ -125,17 +126,9 @@ To align placental mammals, we used previously determined lastz parameters (K = To align placental mammals, we used the lastz alignment parameters K = 2400, L = 3000, Y = 9400, H = 2000 and the lastz default scoring matrix, correspond- ing to parameter set 2 in Table 1. To align non-placental vertebrates, we used K = 2400, L = 3000, Y = 3400, H = 2000 and the HoxD55 scoring matrix. Citation: Increased alignment sensitivity improves the usage of genome alignments for comparative gene annotation. Nucleic Acids Res. 2017;45(14):8369–77. -``` -TRA=($(for file in *.fa; do echo $file |cut -d "." -f 1;done)) - -echo ${TRA[@]} - -for tr in ${TRA[@]}; +Create commands for running lastZ for all scaffolds: `lastz.sh` +Run as an array on slurm: `array_wrapper.slurm` -do - -echo lastz_32 /data/projects/punim0586/lecook/chipseq-pipeline/cross_species/data/genomes/mm10.fa[multi] /data/projects/punim0586/lecook/chipseq-pipeline/cross_species/data/genomes/smiCra1/${tr}.fa H=2000 K=2400 L=3000 Y=9400 --format=maf > /data/projects/punim0586/lecook/chipseq-pipeline/cross_species/data/genomes/${tr}_mm10.smiCra1.maf -``` #### Convert maf to axt-format -- GitLab