diff --git a/GC_content.R b/cross-species-analyses/GC_content.R similarity index 100% rename from GC_content.R rename to cross-species-analyses/GC_content.R diff --git a/GO_orthologous_peaks.R b/cross-species-analyses/GO_orthologous_peaks.R similarity index 100% rename from GO_orthologous_peaks.R rename to cross-species-analyses/GO_orthologous_peaks.R diff --git a/GO_plots.R b/cross-species-analyses/GO_plots.R similarity index 100% rename from GO_plots.R rename to cross-species-analyses/GO_plots.R diff --git a/README.md b/cross-species-analyses/README.md similarity index 100% rename from README.md rename to cross-species-analyses/README.md diff --git a/annotation_10M.R b/cross-species-analyses/annotation_10M.R similarity index 100% rename from annotation_10M.R rename to cross-species-analyses/annotation_10M.R diff --git a/biomart_compare_orthoG.R b/cross-species-analyses/biomart_compare_orthoG.R similarity index 100% rename from biomart_compare_orthoG.R rename to cross-species-analyses/biomart_compare_orthoG.R diff --git a/comparePeaks.sh b/cross-species-analyses/comparePeaks.sh similarity index 100% rename from comparePeaks.sh rename to cross-species-analyses/comparePeaks.sh diff --git a/compare_liftover_genes.R b/cross-species-analyses/compare_liftover_genes.R similarity index 100% rename from compare_liftover_genes.R rename to cross-species-analyses/compare_liftover_genes.R diff --git a/geneSet_upsetPlot.R b/cross-species-analyses/geneSet_upsetPlot.R similarity index 100% rename from geneSet_upsetPlot.R rename to cross-species-analyses/geneSet_upsetPlot.R diff --git a/genomicRegionsPlot.R b/cross-species-analyses/genomicRegionsPlot.R similarity index 100% rename from genomicRegionsPlot.R rename to cross-species-analyses/genomicRegionsPlot.R diff --git a/go_semantic_similarity.R b/cross-species-analyses/go_semantic_similarity.R similarity index 100% rename from go_semantic_similarity.R rename to cross-species-analyses/go_semantic_similarity.R diff --git a/go_similarity_clusters_heatmaps.R b/cross-species-analyses/go_similarity_clusters_heatmaps.R similarity index 100% rename from go_similarity_clusters_heatmaps.R rename to cross-species-analyses/go_similarity_clusters_heatmaps.R diff --git a/liftOverPeaks.R b/cross-species-analyses/liftOverPeaks.R similarity index 100% rename from liftOverPeaks.R rename to cross-species-analyses/liftOverPeaks.R diff --git a/mm9_vista.bed b/cross-species-analyses/mm9_vista.bed similarity index 100% rename from mm9_vista.bed rename to cross-species-analyses/mm9_vista.bed diff --git a/mm9_vista.fasta b/cross-species-analyses/mm9_vista.fasta similarity index 100% rename from mm9_vista.fasta rename to cross-species-analyses/mm9_vista.fasta diff --git a/mm9_vista.txt b/cross-species-analyses/mm9_vista.txt similarity index 100% rename from mm9_vista.txt rename to cross-species-analyses/mm9_vista.txt diff --git a/mouse_geneExp_clustering_TCseq.R b/cross-species-analyses/mouse_geneExp_clustering_TCseq.R similarity index 100% rename from mouse_geneExp_clustering_TCseq.R rename to cross-species-analyses/mouse_geneExp_clustering_TCseq.R diff --git a/peakFeature2.R b/cross-species-analyses/peakFeature2.R similarity index 100% rename from peakFeature2.R rename to cross-species-analyses/peakFeature2.R diff --git a/peakFeatures.R b/cross-species-analyses/peakFeatures.R similarity index 100% rename from peakFeatures.R rename to cross-species-analyses/peakFeatures.R diff --git a/peak_QC.R b/cross-species-analyses/peak_QC.R similarity index 100% rename from peak_QC.R rename to cross-species-analyses/peak_QC.R diff --git a/peak_metrics.R b/cross-species-analyses/peak_metrics.R similarity index 100% rename from peak_metrics.R rename to cross-species-analyses/peak_metrics.R diff --git a/peak_metrics_PCA.R b/cross-species-analyses/peak_metrics_PCA.R similarity index 100% rename from peak_metrics_PCA.R rename to cross-species-analyses/peak_metrics_PCA.R diff --git a/subsamplePeaks.R b/cross-species-analyses/subsamplePeaks.R similarity index 100% rename from subsamplePeaks.R rename to cross-species-analyses/subsamplePeaks.R diff --git a/dunnart/.DS_Store b/dunnart-chipseq/.DS_Store similarity index 100% rename from dunnart/.DS_Store rename to dunnart-chipseq/.DS_Store diff --git a/dunnart/README.md b/dunnart-chipseq/README.md similarity index 100% rename from dunnart/README.md rename to dunnart-chipseq/README.md diff --git a/dunnart/Snakefile b/dunnart-chipseq/Snakefile similarity index 100% rename from dunnart/Snakefile rename to dunnart-chipseq/Snakefile diff --git a/dunnart/Snakefile_10M b/dunnart-chipseq/Snakefile_10M similarity index 100% rename from dunnart/Snakefile_10M rename to dunnart-chipseq/Snakefile_10M diff --git a/dunnart/configs/.multiqc_config.yaml b/dunnart-chipseq/configs/.multiqc_config.yaml similarity index 100% rename from dunnart/configs/.multiqc_config.yaml rename to dunnart-chipseq/configs/.multiqc_config.yaml diff --git a/dunnart/configs/SRR.txt b/dunnart-chipseq/configs/SRR.txt similarity index 100% rename from dunnart/configs/SRR.txt rename to dunnart-chipseq/configs/SRR.txt diff --git a/dunnart/configs/cluster.json b/dunnart-chipseq/configs/cluster.json similarity index 100% rename from dunnart/configs/cluster.json rename to dunnart-chipseq/configs/cluster.json diff --git a/dunnart/configs/config.yaml b/dunnart-chipseq/configs/config.yaml similarity index 100% rename from dunnart/configs/config.yaml rename to dunnart-chipseq/configs/config.yaml diff --git a/dunnart/scripts/.DS_Store b/dunnart-chipseq/scripts/.DS_Store similarity index 100% rename from dunnart/scripts/.DS_Store rename to dunnart-chipseq/scripts/.DS_Store diff --git a/dunnart/scripts/GC_CpG_plots.R b/dunnart-chipseq/scripts/GC_CpG_plots.R similarity index 99% rename from dunnart/scripts/GC_CpG_plots.R rename to dunnart-chipseq/scripts/GC_CpG_plots.R index 418a54f6c50626597395888d8fbce0756810664c..8bdf5100091452139c9e765a7813c101402436bb 100644 --- a/dunnart/scripts/GC_CpG_plots.R +++ b/dunnart-chipseq/scripts/GC_CpG_plots.R @@ -1,5 +1,6 @@ # GC content and CpG % plots +# Purpose library(data.table) library(tidyverse) library(ggridges) diff --git a/dunnart/scripts/GO_plots.R b/dunnart-chipseq/scripts/GO_plots.R similarity index 100% rename from dunnart/scripts/GO_plots.R rename to dunnart-chipseq/scripts/GO_plots.R diff --git a/dunnart/scripts/array_wrapper.slurm.sh b/dunnart-chipseq/scripts/array_wrapper.slurm.sh similarity index 100% rename from dunnart/scripts/array_wrapper.slurm.sh rename to dunnart-chipseq/scripts/array_wrapper.slurm.sh diff --git a/dunnart/scripts/count_peaks.py b/dunnart-chipseq/scripts/count_peaks.py similarity index 100% rename from dunnart/scripts/count_peaks.py rename to dunnart-chipseq/scripts/count_peaks.py diff --git a/dunnart/scripts/encode_frip.py b/dunnart-chipseq/scripts/encode_frip.py similarity index 100% rename from dunnart/scripts/encode_frip.py rename to dunnart-chipseq/scripts/encode_frip.py diff --git a/dunnart/scripts/genes.peaks_perScaffold.R b/dunnart-chipseq/scripts/genes.peaks_perScaffold.R similarity index 100% rename from dunnart/scripts/genes.peaks_perScaffold.R rename to dunnart-chipseq/scripts/genes.peaks_perScaffold.R diff --git a/dunnart/scripts/genomicRegionsPlot.R b/dunnart-chipseq/scripts/genomicRegionsPlot.R similarity index 100% rename from dunnart/scripts/genomicRegionsPlot.R rename to dunnart-chipseq/scripts/genomicRegionsPlot.R diff --git a/dunnart/scripts/homerAnnot.sh b/dunnart-chipseq/scripts/homerAnnot.sh similarity index 100% rename from dunnart/scripts/homerAnnot.sh rename to dunnart-chipseq/scripts/homerAnnot.sh diff --git a/dunnart/scripts/homerMotifEnrich_plot.R b/dunnart-chipseq/scripts/homerMotifEnrich_plot.R similarity index 100% rename from dunnart/scripts/homerMotifEnrich_plot.R rename to dunnart-chipseq/scripts/homerMotifEnrich_plot.R diff --git a/dunnart/scripts/homer_motifEnrich.sh b/dunnart-chipseq/scripts/homer_motifEnrich.sh similarity index 100% rename from dunnart/scripts/homer_motifEnrich.sh rename to dunnart-chipseq/scripts/homer_motifEnrich.sh diff --git a/dunnart/scripts/overlap_dunnart_peaks_with_ARs.sh b/dunnart-chipseq/scripts/overlap_dunnart_peaks_with_ARs.sh similarity index 100% rename from dunnart/scripts/overlap_dunnart_peaks_with_ARs.sh rename to dunnart-chipseq/scripts/overlap_dunnart_peaks_with_ARs.sh diff --git a/dunnart/scripts/overlap_peaks.py b/dunnart-chipseq/scripts/overlap_peaks.py similarity index 100% rename from dunnart/scripts/overlap_peaks.py rename to dunnart-chipseq/scripts/overlap_peaks.py diff --git a/dunnart/scripts/peakAnno.R b/dunnart-chipseq/scripts/peakAnno.R similarity index 100% rename from dunnart/scripts/peakAnno.R rename to dunnart-chipseq/scripts/peakAnno.R diff --git a/dunnart/scripts/peakFeatures.R b/dunnart-chipseq/scripts/peakFeatures.R similarity index 100% rename from dunnart/scripts/peakFeatures.R rename to dunnart-chipseq/scripts/peakFeatures.R diff --git a/dunnart/scripts/subsample.sh b/dunnart-chipseq/scripts/subsample.sh similarity index 100% rename from dunnart/scripts/subsample.sh rename to dunnart-chipseq/scripts/subsample.sh diff --git a/dunnart/scripts/trimfastq.py b/dunnart-chipseq/scripts/trimfastq.py similarity index 100% rename from dunnart/scripts/trimfastq.py rename to dunnart-chipseq/scripts/trimfastq.py diff --git a/dunnart/scripts/twars_in_dunnart_peaks_hyperTest.R b/dunnart-chipseq/scripts/twars_in_dunnart_peaks_hyperTest.R similarity index 100% rename from dunnart/scripts/twars_in_dunnart_peaks_hyperTest.R rename to dunnart-chipseq/scripts/twars_in_dunnart_peaks_hyperTest.R diff --git a/dunnart/scripts/convergentTFBS_enrichment.R b/dunnart/scripts/convergentTFBS_enrichment.R deleted file mode 100755 index d5633b5adc93cc498297e4d0722bd72f508c2aee..0000000000000000000000000000000000000000 --- a/dunnart/scripts/convergentTFBS_enrichment.R +++ /dev/null @@ -1,168 +0,0 @@ - -## Author: Davide Vespasiani 2020 - -library(data.table);library(magrittr);library(dplyr) -library(tidyr);library(openxlsx) ## this is for exporting tables in excel format -library(wesanderson);library(RColorBrewer); library(viridis);library(viridisLite)## these are just for colors -library(ggthemes);library(ggplot2);library(ggpubr) -library(ggrepel) -library(readxl) - - -numb_threads=getDTthreads() -threads=setDTthreads(numb_threads-1) - -setwd('/Users/lauracook/OneDrive - The University of Melbourne/PhD (2018-2021)/5-TFBS/4-Results/1-BioMotif/') - -plot_dir='~/Desktop' -table_dir='~/Desktop/' - -input_file <- fread("motifs2.csv", sep=",", header=TRUE) - -## these lines count the number of twars per condition (i.e. lost/gained/conserved) in each cluster -## this is done with data.table package (if you dont know and you'll need R my adise is to look for it, it's very nice) -## in any case when you see object=object[,colname:=something] it basically creates a column (colname) with entries corresponding on the arguments on the right -## or if you see object=object[,c('colname1,'colname2'...)], it subset the objects retaining only the columns specified -input_file_counts=copy(input_file) -input_file_counts=input_file_counts[,c('twar','gain.lost','vierstra_cluster_no.','vierstra_cluster_name')] %>% - unique() ## ps: if you have a twar that disrupts the motif recognised by >1 tf belonging to the same cluster calling unique here will remove this duplicate and count it only once -input_file_counts=input_file_counts[,tfs_in_cluster_percondition:=.N,by=.(vierstra_cluster_no.,gain.lost)][ ## this means count all rows (i.e. .N) by= factor level in specified column - ,totTFs_percondition:=.N,by=.(gain.lost) - ][,tfs_not_cluster_percondition:=totTFs_percondition-tfs_in_cluster_percondition] ## row-wise subtraction - -## this function returns you a simplified version of the df subsetting it for the specified condition -simplify_table=function(x,condition){ - df=copy(x)[gain.lost%in%condition][ - ,c('vierstra_cluster_no.','vierstra_cluster_name','tfs_in_cluster_percondition','tfs_not_cluster_percondition') - ] %>% unique()## this unique here simplifies the resulting table -} - -input_file_counts_altered=simplify_table(input_file_counts,'Altered') -input_file_counts_lost=simplify_table(input_file_counts,'Lost') -input_file_counts_conserved=simplify_table(input_file_counts,'Conserved') - -concatenate_df=function(x){ - bgkr=input_file_counts_conserved - test=copy(x) - ## this line is an inner join, so it returns all rows of test where there is a matching value in bkgr for rows of the two columns specified in 'on' argument - ## i.e. it returns the clusters in the test set that are also in the bkgr set so that u can create the matrix for fisher test - test=test[bgkr,on=c('vierstra_cluster_no.','vierstra_cluster_name'),nomatch=0]%>% - setnames(old=c('tfs_in_cluster_percondition','tfs_not_cluster_percondition','i.tfs_in_cluster_percondition','i.tfs_not_cluster_percondition'), - new=c('test_in_cluster','test_not_cluster','bkgr_in_cluster','bkgr_not_cluster')) -} - -input_altered_vs_conserved=concatenate_df(input_file_counts_altered) - -input_gained_vs_conserved=concatenate_df(input_file_counts_gained) -input_lost_vs_conserved=concatenate_df(input_file_counts_lost) - -## this function computes the fisher.exact pvalues and fdr corrects them -fisher_pvalues=function(x){ - pvals=copy(x) - pvals=pvals[,c('vierstra_cluster_no.','vierstra_cluster_name'):=NULL] - pvals=apply(pvals, 1, - function(y) { - tbl <- matrix(as.numeric(y), ncol=2, byrow=T) - fisher.test(tbl)$p.value - }) ## this function applies to every row in your df. for each row it creates the 2x2 matrix and perform fisher test and returns just the p.value as vector of characters - ## ps if you are not sure how it works/dont believe me (i would understand it) just run this: - ## pvals=copy(your_input); pvals=pvals[,c(1:2):=NULL][1,] take first row only and then matrix(as.numeric(pvals), ncol=2, byrow=T). you'll see how it makes the 2x2 table - - pvalues_table=data.table(pval=pvals) ## then this converts the list of pvalues into a column (orders are maintained, so the first pval refers to the fisrt row in the input dataframe and so on) - pvalues_table=pvalues_table[ - ,adj_p:=p.adjust(pval,method = 'fdr')][ ## this creates another column with the adj_pvalues - ,log10_p_adjust:=-log10(adj_p) - ][ - ,significant_score:=ifelse(`adj_p`<=0.0001,'****', - ifelse(`adj_p`>0.0001 &`adj_p`<=0.001,'***', - ifelse(`adj_p`>0.001 & `adj_p`<=0.01,'**', - ifelse(`adj_p`>0.01 & `adj_p`<=0.05,'*',' ')))) - ] ## this is just for quick visualisation/subsecting of only the significant ones - - return(pvalues_table) -} - -input_altered_vs_conserved_pvals=fisher_pvalues(input_altered_vs_conserved) - -input_gained_vs_conserved_pvals=fisher_pvalues(input_gained_vs_conserved) -input_lost_vs_conserved_pvals=fisher_pvalues(input_lost_vs_conserved) - -## now it creates final df with all columns you need -altered_vs_conserved=cbind(input_altered_vs_conserved,input_altered_vs_conserved_pvals) - -gained_vs_conserved=cbind(input_gained_vs_conserved,input_gained_vs_conserved_pvals) -lost_vs_conserved=cbind(input_lost_vs_conserved,input_lost_vs_conserved_pvals) - -## this computes fold enrichment of the number of twars lost/gained for each cluster over that of conserved ones for the same cluster -## ps this is the way i have been calculating the fold enrichmnent(i.e. the ratio of your test set/bkgr set in the cluster over the mean of this ratio across all clusters) but you could change it -fold_enrichment=function(x,group){ - df=copy(x) - df=df[ - ,group:=group ## ps: i have created here a column that takes values specified in group argument (i.e. gained/lost) because this allows me at the end to combine the 2 df and use a single function to plot them together (in separate facets) -see below - ][,ratio_twar_lostgain_cluster:=test_in_cluster/bkgr_in_cluster - ][,mean_ratio:=mean(ratio_twar_lostgain_cluster) - ][,log2_fold_enrichment:=log2(ratio_twar_lostgain_cluster/mean_ratio) - ][,log10_numb_twars_incluster:=log10(test_in_cluster) - ][,c('vierstra_cluster_no.','vierstra_cluster_name','log2_fold_enrichment','pval','adj_p', - 'significant_score','log10_p_adjust','log10_numb_twars_incluster','group') - ]%>% setorderv('log10_p_adjust',-1) -} - -altered_vs_conserved_final=fold_enrichment(altered_vs_conserved,'Altered') - -gained_vs_conserved_final=fold_enrichment(gained_vs_conserved,'Gained') -lost_vs_conserved_final=fold_enrichment(lost_vs_conserved,'Lost') - -## here i make a single dt -final_table=rbind(gained_vs_conserved_final,lost_vs_conserved_final) - - -final_table=rbind(lost_vs_conserved_final) - - -## make volcano plot -tf_enrich_plot=function(x){ - df=copy(x)[,'Log10 total TFs per Cluster':=log10_numb_twars_incluster] - gradient=scale_colour_viridis(aes(`Log10 total TFs per cluster`),option="inferno",discrete = F) - #text=ifelse(!df$significant_score%in%' ',df$vierstra_cluster_name,'') ## this wont report in the plot the name of the clusters that are not significantly enriched/depleted - - ggplot(df,aes(x=log2_fold_enrichment,log10_p_adjust,label = text, col=log10_numb_twars_incluster))+ - geom_point(size=2)+ - geom_vline(xintercept=0, linetype="dashed", color = "black",size=0.2)+ - geom_text_repel(size = 5,color='black', - box.padding = unit(0.5, "lines"), - point.padding = unit(0.5, "lines") - )+ - gradient+ - xlab('\n Log2 fold enrichment \n')+ - ylab('-Log10 (P)')+ - xlim(-4,4)+ - facet_wrap(group~.,ncol = 2)+ ## so this is what i was saying above: the group column allows now to split the plots into 2 facets - theme(strip.text.x = element_text(), - strip.text.y = element_text(hjust = 0.5), - strip.background = element_rect(color = 'black', linetype = 'solid'), - strip.background.y = element_blank(), - strip.background.x =element_blank(), - panel.spacing=unit(1, "lines"), - panel.background =element_rect(fill = 'white', colour = 'black',size=1), - panel.grid.minor = element_blank(), - panel.grid.major = element_blank(), - legend.position = "bottom", - legend.key = element_rect(fill = "white", colour = "black"), - axis.line = element_blank()) -} - - -pdf(paste(plot_dir,'TFBS_enrichment_fisher_test_volcano.pdf',sep=''),width = 10,height=6) -tf_enrich_plot(final_table) -dev.off() - -## This code below produces an excel file with gained/lost enrichment results in two ≠sheets - -excel_table=copy(final_table) -excel_table=split(excel_table,as.factor(excel_table$group)) %>%lapply(function(y)y=y[ - ,c('group','vierstra_cluster_no.','vierstra_cluster_name','log2_fold_enrichment','log10_numb_twars_incluster','pval','adj_p','log10_p_adjust','significant_score')]%>% - unique()) -names(excel_table)=c('altered') ## name of the sheets - -write.xlsx(excel_table,paste(table_dir,'Table_twars_fisher_pvalues.xlsx',sep='')) \ No newline at end of file diff --git a/mouse/.DS_Store b/mouse-chipseq/.DS_Store similarity index 100% rename from mouse/.DS_Store rename to mouse-chipseq/.DS_Store diff --git a/mouse/README.md b/mouse-chipseq/README.md similarity index 100% rename from mouse/README.md rename to mouse-chipseq/README.md diff --git a/mouse/Snakefile1_H3K4me3 b/mouse-chipseq/Snakefile1_H3K4me3 similarity index 100% rename from mouse/Snakefile1_H3K4me3 rename to mouse-chipseq/Snakefile1_H3K4me3 diff --git a/mouse/Snakefile_H3K27ac b/mouse-chipseq/Snakefile_H3K27ac similarity index 100% rename from mouse/Snakefile_H3K27ac rename to mouse-chipseq/Snakefile_H3K27ac diff --git a/mouse/Snakefile_H3K4me3 b/mouse-chipseq/Snakefile_H3K4me3 similarity index 100% rename from mouse/Snakefile_H3K4me3 rename to mouse-chipseq/Snakefile_H3K4me3 diff --git a/mouse/configs/.multiqc_config.yaml b/mouse-chipseq/configs/.multiqc_config.yaml similarity index 100% rename from mouse/configs/.multiqc_config.yaml rename to mouse-chipseq/configs/.multiqc_config.yaml diff --git a/mouse/configs/SRR_H3K27ac.txt b/mouse-chipseq/configs/SRR_H3K27ac.txt similarity index 100% rename from mouse/configs/SRR_H3K27ac.txt rename to mouse-chipseq/configs/SRR_H3K27ac.txt diff --git a/mouse/configs/SRR_H3K4me3.txt b/mouse-chipseq/configs/SRR_H3K4me3.txt similarity index 100% rename from mouse/configs/SRR_H3K4me3.txt rename to mouse-chipseq/configs/SRR_H3K4me3.txt diff --git a/mouse/configs/cluster.json b/mouse-chipseq/configs/cluster.json similarity index 100% rename from mouse/configs/cluster.json rename to mouse-chipseq/configs/cluster.json diff --git a/mouse/configs/config_H3K27ac.yaml b/mouse-chipseq/configs/config_H3K27ac.yaml similarity index 100% rename from mouse/configs/config_H3K27ac.yaml rename to mouse-chipseq/configs/config_H3K27ac.yaml diff --git a/mouse/configs/config_H3K4me3.yaml b/mouse-chipseq/configs/config_H3K4me3.yaml similarity index 100% rename from mouse/configs/config_H3K4me3.yaml rename to mouse-chipseq/configs/config_H3K4me3.yaml diff --git a/mouse/envs/chip_environment.yml b/mouse-chipseq/envs/chip_environment.yml similarity index 100% rename from mouse/envs/chip_environment.yml rename to mouse-chipseq/envs/chip_environment.yml diff --git a/mouse/scripts/count_peaks.py b/mouse-chipseq/scripts/count_peaks.py similarity index 100% rename from mouse/scripts/count_peaks.py rename to mouse-chipseq/scripts/count_peaks.py diff --git a/mouse/scripts/encode_frip.py b/mouse-chipseq/scripts/encode_frip.py similarity index 100% rename from mouse/scripts/encode_frip.py rename to mouse-chipseq/scripts/encode_frip.py diff --git a/mouse/scripts/overlap_peaks.py b/mouse-chipseq/scripts/overlap_peaks.py similarity index 100% rename from mouse/scripts/overlap_peaks.py rename to mouse-chipseq/scripts/overlap_peaks.py diff --git a/mouse/scripts/subsample.sh b/mouse-chipseq/scripts/subsample.sh similarity index 100% rename from mouse/scripts/subsample.sh rename to mouse-chipseq/scripts/subsample.sh diff --git a/dunnart/scripts/create_jobs.sh b/whole-genome-alignment/create_jobs.sh similarity index 88% rename from dunnart/scripts/create_jobs.sh rename to whole-genome-alignment/create_jobs.sh index 9329754edea39e4e3df6bbced86ae42c102aa6d5..303ea68ef52666e5d4c0beed0b65db58949f386b 100755 --- a/dunnart/scripts/create_jobs.sh +++ b/whole-genome-alignment/create_jobs.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -## This script loops through all files in a directory +## Author: Laura E Cook, University of Melbourne +## Purpose: This script loops through all files in a directory ## These commands are then used in a slurm array script to run jobs in parallel TRA=($(for file in *.maf; do echo $file |cut -d "." -f 1-2;done)) # change this to whatever your file prefix is @@ -14,7 +15,6 @@ do # generate lastz commands echo 'lastz_32 /data/projects/punim0586/lecook/chipseq-pipeline/cross_species/data/genomes/mm10.fa[multiple] /data/projects/punim0586/lecook/chipseq-pipeline/cross_species/data/genomes/smiCra1_RM/'${tr}.fa 'H=2000 K=2400 L=3000 Y=3400 --scores=/data/projects/punim0586/lecook/chipseq-pipeline/cross_species/bin/GenomeAlignmentTools/HoxD55.q --format=maf > /data/projects/punim0586/lecook/chipseq-pipeline/cross_species/data/genomes/maf/'${tr}_mm10.smiCra1.maf - # generate convert maf to axt format commands echo 'maf-convert psl '${tr}.maf' > '${tr}.psl