From 56722cf29e649aec59f5df668bd7b411c07eeb96 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Fri, 23 Oct 2015 15:37:48 +0300 Subject: [PATCH 1/8] start function for better parsing of results --- primer_designer/designer.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index 23399be..249988c 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -115,7 +115,7 @@ def __init__(self, folder=None, taxon_for_codon_usage=None, tm="55", self.clustype = clustype self.amptype = amptype self.email = email - self.designed_primers = [] + self.report = "" def design_primers(self): if os.path.exists(self.folder): @@ -176,7 +176,14 @@ def process_response(self, aln, response_body): with open("{0}.html".format(aln), "w") as handle: handle.write(response_body) - # Show primer pair to user + self.make_report_from_html_file(response_body, this_file) + + def make_report_from_html_file(self, response_body, this_file): + """Processes the results from primer4clades (a html file). + + Extracts the best possible primer pair (with highest quality and + longest amplicon). + """ html_file = response_body.split("\n") i = 1 while i < 4: @@ -193,7 +200,7 @@ def process_response(self, aln, response_body): seq_record = SeqRecord(seq) seq_record.id = this_id seq_record.description = description - self.designed_primers.append(seq_record) + self.report += seq_record i += 1 def request_primers(self, aln): From f22878530353524c53c04c601497a89613fc4bcf Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Mon, 26 Oct 2015 11:28:56 +0200 Subject: [PATCH 2/8] choosing best amplicon --- primer_designer/designer.py | 65 +++++++++++++++++++++++++++++++------------ tests/test_primer-designer.py | 4 +-- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index 249988c..afef352 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -184,24 +184,53 @@ def make_report_from_html_file(self, response_body, this_file): Extracts the best possible primer pair (with highest quality and longest amplicon). """ - html_file = response_body.split("\n") - i = 1 - while i < 4: - for line in html_file: - if "degen_corr" in line: - seq = line.split(" ")[0].strip() - - description = line.split(" ")[2].strip() - - this_id = this_file + "_" + line.split(" ")[1].strip() - this_id += "_" + str(i) - - seq = Seq(seq, IUPAC.ambiguous_dna) - seq_record = SeqRecord(seq) - seq_record.id = this_id - seq_record.description = description - self.report += seq_record - i += 1 + html_file = response_body + import re + amplicons = re.findall("(## Amplicon.+)codon", html_file) + primers_codehop = self.group_primers(re.findall("(\w+ codeh)_corr.+\n", html_file)) + primers_relaxed = self.group_primers(re.findall("(\w+ relax)_corr.+\n", html_file)) + primers_degen = self.group_primers(re.findall("(\w+ degen)_corr.+\n", html_file)) + primer_pair_qualities = re.findall("# primer pair.+= ([0-9]+)%\n", html_file) + expected_pcr_product_lengths = re.findall("# expected PCR .+= ([0-9]+)\n", html_file) + forward_temperatures = re.findall("(# fwd: minTm.+)\n", html_file) + reverse_temperatures = re.findall("(# rev: minTm.+)\n", html_file) + + amplicon_tuples = zip(amplicons, primers_codehop, primers_relaxed, primers_degen, + primer_pair_qualities, expected_pcr_product_lengths, + forward_temperatures, reverse_temperatures) + + amplicon = self.choose_best_amplicon(amplicon_tuples) + print(amplicon) + self.report = "" + + def group_primers(self, my_list): + """Group elements in list by certain number 'n'""" + new_list = [] + n = 2 + for i in range(0, len(my_list), n): + grouped_primers = my_list[i:i + n] + forward_primer = grouped_primers[0].split(" ") + reverse_primer = grouped_primers[1].split(" ") + formated_primers = ">F_{0}\n{1}".format(forward_primer[1], forward_primer[0]) + formated_primers += "\n>R_{0}\n{1}".format(reverse_primer[1], reverse_primer[0]) + new_list.append(formated_primers) + return new_list + + def choose_best_amplicon(self, amplicon_tuples): + """Iterates over amplicon tuples and returns the one with highest quality + and amplicon length. + """ + quality = 0 + amplicon_length = 0 + best_amplicon = None + + for amplicon in amplicon_tuples: + if int(amplicon[4]) >= quality and int(amplicon[5]) >= amplicon_length: + quality = int(amplicon[4]) + amplicon_length = int(amplicon[5]) + best_amplicon = amplicon + + return best_amplicon def request_primers(self, aln): url = "http://floresta.eead.csic.es/primers4clades/primers4clades.cgi" diff --git a/tests/test_primer-designer.py b/tests/test_primer-designer.py index e9b1f0b..68bca88 100644 --- a/tests/test_primer-designer.py +++ b/tests/test_primer-designer.py @@ -47,8 +47,8 @@ def test_request_primers(self): def test_process_response(self): self.pd.process_response(ALIGNMENT, open(RESPONSE).read()) - self.assertTrue('gayaaytaygahytdaargaagaaytdggvaargghgc' in - [str(seq.seq) for seq in self.pd.designed_primers]) + self.assertEqual('gayaaytaygahytdaargaagaaytdggvaargghgc', + self.pd.report) def test_inserting_taxon_in_fasta_seq_descriptions(self): self.pd.taxon_for_codon_usage = 'Bombyx mori' From f18e9516aaa48377abe9234bd4a277f209610780 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Mon, 26 Oct 2015 11:45:08 +0200 Subject: [PATCH 3/8] formatting best amplicon --- primer_designer/designer.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index afef352..758c9d3 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -199,8 +199,19 @@ def make_report_from_html_file(self, response_body, this_file): primer_pair_qualities, expected_pcr_product_lengths, forward_temperatures, reverse_temperatures) - amplicon = self.choose_best_amplicon(amplicon_tuples) - print(amplicon) + best_amplicon = self.choose_best_amplicon(amplicon_tuples) + best_amplicon_formatted = "" + for idx, value in enumerate(best_amplicon): + if idx in [2, 3]: + best_amplicon_formatted += "\n\n{0}".format(value) + elif idx == 4: + best_amplicon_formatted += "\n\n# primer pair quality = {0}%".format(value) + elif idx == 5: + best_amplicon_formatted += "\n# expected PCR product length (nt) = {0}".format(value) + else: + best_amplicon_formatted += "\n{0}".format(value) + + print(best_amplicon_formatted) self.report = "" def group_primers(self, my_list): @@ -211,9 +222,9 @@ def group_primers(self, my_list): grouped_primers = my_list[i:i + n] forward_primer = grouped_primers[0].split(" ") reverse_primer = grouped_primers[1].split(" ") - formated_primers = ">F_{0}\n{1}".format(forward_primer[1], forward_primer[0]) - formated_primers += "\n>R_{0}\n{1}".format(reverse_primer[1], reverse_primer[0]) - new_list.append(formated_primers) + formatted_primers = ">F_{0}\n{1}".format(forward_primer[1], forward_primer[0]) + formatted_primers += "\n>R_{0}\n{1}".format(reverse_primer[1], reverse_primer[0]) + new_list.append(formatted_primers) return new_list def choose_best_amplicon(self, amplicon_tuples): From 3a4c8e96ef31ab41cddde8d9876f206852030d9a Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Mon, 26 Oct 2015 12:07:29 +0200 Subject: [PATCH 4/8] report ready --- primer_designer/designer.py | 60 ++++++++++++++++++++++++++----------------- tests/test_primer-designer.py | 29 ++++++++++++++++++--- 2 files changed, 62 insertions(+), 27 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index 758c9d3..a61b310 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -3,8 +3,6 @@ import re import requests -from Bio.Alphabet import IUPAC -from Bio.Seq import Seq from Bio.SeqIO import SeqRecord from Bio import SeqIO @@ -181,38 +179,52 @@ def process_response(self, aln, response_body): def make_report_from_html_file(self, response_body, this_file): """Processes the results from primer4clades (a html file). - Extracts the best possible primer pair (with highest quality and - longest amplicon). + Makes a report based on the best possible primer pair (with highest + quality and longest amplicon). """ - html_file = response_body - import re - amplicons = re.findall("(## Amplicon.+)codon", html_file) - primers_codehop = self.group_primers(re.findall("(\w+ codeh)_corr.+\n", html_file)) - primers_relaxed = self.group_primers(re.findall("(\w+ relax)_corr.+\n", html_file)) - primers_degen = self.group_primers(re.findall("(\w+ degen)_corr.+\n", html_file)) - primer_pair_qualities = re.findall("# primer pair.+= ([0-9]+)%\n", html_file) - expected_pcr_product_lengths = re.findall("# expected PCR .+= ([0-9]+)\n", html_file) - forward_temperatures = re.findall("(# fwd: minTm.+)\n", html_file) - reverse_temperatures = re.findall("(# rev: minTm.+)\n", html_file) - - amplicon_tuples = zip(amplicons, primers_codehop, primers_relaxed, primers_degen, - primer_pair_qualities, expected_pcr_product_lengths, - forward_temperatures, reverse_temperatures) + amplicon_tuples = self.get_amplicon_data_as_tuples(response_body) best_amplicon = self.choose_best_amplicon(amplicon_tuples) + + self.report += """\n\n\ +#################################################### +# Alignment {0} +""".format(this_file) + self.report += self.format_amplicon(best_amplicon) + + def get_amplicon_data_as_tuples(self, response_body): + amplicons = re.findall("(## Amplicon.+) codon", response_body) + primers_codehop = self.group_primers( re.findall("(\w+ codeh)_corr.+\n", response_body)) + primers_relaxed = self.group_primers( re.findall("(\w+ relax)_corr.+\n", response_body)) + primers_degen = self.group_primers( re.findall("(\w+ degen)_corr.+\n", response_body)) + primer_pair_qualities = re.findall("# primer pair.+= ([0-9]+)%\n", response_body) + expected_pcr_product_lengths = re.findall( "# expected PCR .+= ([0-9]+)\n", response_body) + forward_temperatures = re.findall("(# fwd: minTm.+)\n", response_body) + reverse_temperatures = re.findall("(# rev: minTm.+)\n", response_body) + + amplicon_tuples = zip(amplicons, primers_codehop, primers_relaxed, + primers_degen, + primer_pair_qualities, + expected_pcr_product_lengths, + forward_temperatures, reverse_temperatures) + return amplicon_tuples + + def format_amplicon(self, best_amplicon): best_amplicon_formatted = "" for idx, value in enumerate(best_amplicon): - if idx in [2, 3]: + if idx == 0: + best_amplicon_formatted += "{0}".format(value).replace("##", "# Best") + elif idx in [2, 3]: best_amplicon_formatted += "\n\n{0}".format(value) elif idx == 4: - best_amplicon_formatted += "\n\n# primer pair quality = {0}%".format(value) + best_amplicon_formatted += "\n\n# primer pair quality = {0}%".format( + value) elif idx == 5: - best_amplicon_formatted += "\n# expected PCR product length (nt) = {0}".format(value) + best_amplicon_formatted += "\n# expected PCR product length (nt) = {0}".format( + value) else: best_amplicon_formatted += "\n{0}".format(value) - - print(best_amplicon_formatted) - self.report = "" + return best_amplicon_formatted def group_primers(self, my_list): """Group elements in list by certain number 'n'""" diff --git a/tests/test_primer-designer.py b/tests/test_primer-designer.py index 68bca88..1ec25c8 100644 --- a/tests/test_primer-designer.py +++ b/tests/test_primer-designer.py @@ -25,6 +25,7 @@ def setUp(self): amptype="dna_GTRG", email="youremail@email.com", ) + self.maxDiff = None def tearDown(self): output_html_file = '{0}.html'.format(ALIGNMENT) @@ -45,10 +46,32 @@ def test_request_primers(self): resp = self.pd.request_primers(ALIGNMENT) assert resp.content.decode('ascii') == response_html_body - def test_process_response(self): + def test_report_from_html_response(self): + expected = """\n\n\ +#################################################### +# Alignment Ca2.fst +# Best Amplicon 4 +>F_codeh +GACCTGAAAGAAGAACTGggvaargghgc +>R_codeh +CCAGGTGTACCAGCGaadccraacca + +>F_relax +GACCTGAAAGAAGAACTGggvaargghgc +>R_relax +CCAGGTGTACCAgcraadccraacca + +>F_degen +gahytdaargaagaaytdggvaargghgc +>R_degen +ccnggdgtdccdgcraadccraacca + +# primer pair quality = 80% +# expected PCR product length (nt) = 471 +# fwd: minTm = 62.0 maxTm = 67.5 +# rev: minTm = 65.5 maxTm = 68.8""" self.pd.process_response(ALIGNMENT, open(RESPONSE).read()) - self.assertEqual('gayaaytaygahytdaargaagaaytdggvaargghgc', - self.pd.report) + self.assertEqual(expected, self.pd.report) def test_inserting_taxon_in_fasta_seq_descriptions(self): self.pd.taxon_for_codon_usage = 'Bombyx mori' From 33bfd8aaf6b4658f2feb1b83fc0a9bb50f79734d Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 27 Oct 2015 14:26:27 +0200 Subject: [PATCH 5/8] ready --- primer_designer/designer.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index a61b310..184c19b 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -133,9 +133,11 @@ def design_primers(self): self.process_response(aln, r.text) # Write primers to alignment file - SeqIO.write(self.designed_primers, "primers.fasta", "fasta") - print("\nDone.\nAll primers have been saved in the file \"primers.fasta\"") - return self.designed_primers + with open("primers_report.txt", "a") as handle: + handle.write(self.report) + + print("\nDone.\nAll primers have been saved in the file \"primers_report.txt\"") + return self.report else: print("\nError! the folder {0} is empty.\n".format(self.folder)) else: @@ -186,11 +188,12 @@ def make_report_from_html_file(self, response_body, this_file): best_amplicon = self.choose_best_amplicon(amplicon_tuples) - self.report += """\n\n\ + if best_amplicon is not None: + self.report += """\n\n\ #################################################### # Alignment {0} """.format(this_file) - self.report += self.format_amplicon(best_amplicon) + self.report += self.format_amplicon(best_amplicon) def get_amplicon_data_as_tuples(self, response_body): amplicons = re.findall("(## Amplicon.+) codon", response_body) From 9742b2045b6d19ca25f370d680bb427d6cb87852 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 27 Oct 2015 14:41:57 +0200 Subject: [PATCH 6/8] some refactoring --- primer_designer/designer.py | 48 ++++++++++++++++++++++++++----------------- tests/test_primer-designer.py | 10 +++++++++ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index 184c19b..57b8d0d 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -1,6 +1,7 @@ import glob import os import re +import sys import requests from Bio.SeqIO import SeqRecord @@ -116,32 +117,41 @@ def __init__(self, folder=None, taxon_for_codon_usage=None, tm="55", self.report = "" def design_primers(self): - if os.path.exists(self.folder): - all_files = os.path.join(self.folder, "*") - alns = glob.glob(all_files) + alns = self.get_alignments() - if alns: - for aln in alns: - if is_fasta(aln): + if alns: + self.call_primer4clades_for_primers(alns) - if self.taxon_for_codon_usage: - aln = self.insert_taxon_in_new_fasta_file(aln) + # Write primers to alignment file + with open("primers_report.txt", "a") as handle: + handle.write(self.report) - print("\nProcessing file \"{0}\"".format(aln)) + print("\nDone.\nAll primers have been saved in the file \"primers_report.txt\"") + return self.report + else: + print("\nError! the folder {0} is empty.\n".format(self.folder)) + sys.exit(1) - r = self.request_primers(aln) - self.process_response(aln, r.text) + def call_primer4clades_for_primers(self, alns): + for aln in alns: + if is_fasta(aln): - # Write primers to alignment file - with open("primers_report.txt", "a") as handle: - handle.write(self.report) + if self.taxon_for_codon_usage: + aln = self.insert_taxon_in_new_fasta_file(aln) - print("\nDone.\nAll primers have been saved in the file \"primers_report.txt\"") - return self.report - else: - print("\nError! the folder {0} is empty.\n".format(self.folder)) + print("\nProcessing file \"{0}\"".format(aln)) + + r = self.request_primers(aln) + self.process_response(aln, r.text) + + def get_alignments(self): + if os.path.exists(self.folder): + all_files = os.path.join(self.folder, "*") + alns = glob.glob(all_files) else: - print("\nError! the folder {0} does not exist.\n".format(self.folder)) + msg = "\nError! the folder {0} does not exist.\n".format(self.folder) + raise AttributeError(msg) + return alns def insert_taxon_in_new_fasta_file(self, aln): """primer4clades infers the codon usage table from the taxon names in the diff --git a/tests/test_primer-designer.py b/tests/test_primer-designer.py index 1ec25c8..f22aace 100644 --- a/tests/test_primer-designer.py +++ b/tests/test_primer-designer.py @@ -1,3 +1,4 @@ +import copy import os import unittest @@ -32,6 +33,15 @@ def tearDown(self): if os.path.isfile(output_html_file): os.remove(output_html_file) + def test_get_alignments(self): + result = self.pd.get_alignments() + self.assertTrue(len(result) > 0) + + def test_get_alignments_error(self): + pd = copy.copy(self.pd) + pd.folder = "fake_folder" + self.assertRaises(pd.get_alignments) + @responses.activate def test_request_primers(self): url = "http://floresta.eead.csic.es/primers4clades/primers4clades.cgi" From 03d3a5dd987bd4308449339749c7a178ae9e4e6f Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 27 Oct 2015 14:43:34 +0200 Subject: [PATCH 7/8] PEP8 --- primer_designer/designer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index 57b8d0d..09c534a 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -207,11 +207,11 @@ def make_report_from_html_file(self, response_body, this_file): def get_amplicon_data_as_tuples(self, response_body): amplicons = re.findall("(## Amplicon.+) codon", response_body) - primers_codehop = self.group_primers( re.findall("(\w+ codeh)_corr.+\n", response_body)) - primers_relaxed = self.group_primers( re.findall("(\w+ relax)_corr.+\n", response_body)) - primers_degen = self.group_primers( re.findall("(\w+ degen)_corr.+\n", response_body)) + primers_codehop = self.group_primers(re.findall("(\w+ codeh)_corr.+\n", response_body)) + primers_relaxed = self.group_primers(re.findall("(\w+ relax)_corr.+\n", response_body)) + primers_degen = self.group_primers(re.findall("(\w+ degen)_corr.+\n", response_body)) primer_pair_qualities = re.findall("# primer pair.+= ([0-9]+)%\n", response_body) - expected_pcr_product_lengths = re.findall( "# expected PCR .+= ([0-9]+)\n", response_body) + expected_pcr_product_lengths = re.findall("# expected PCR .+= ([0-9]+)\n", response_body) forward_temperatures = re.findall("(# fwd: minTm.+)\n", response_body) reverse_temperatures = re.findall("(# rev: minTm.+)\n", response_body) From 47bfd8cdd350751fc97bbbbeb090f20581b45e82 Mon Sep 17 00:00:00 2001 From: carlosp420 Date: Tue, 27 Oct 2015 14:55:46 +0200 Subject: [PATCH 8/8] test empty folder --- primer_designer/designer.py | 5 ++--- tests/test_primer-designer.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/primer_designer/designer.py b/primer_designer/designer.py index 09c534a..2852382 100644 --- a/primer_designer/designer.py +++ b/primer_designer/designer.py @@ -1,7 +1,6 @@ import glob import os import re -import sys import requests from Bio.SeqIO import SeqRecord @@ -129,8 +128,8 @@ def design_primers(self): print("\nDone.\nAll primers have been saved in the file \"primers_report.txt\"") return self.report else: - print("\nError! the folder {0} is empty.\n".format(self.folder)) - sys.exit(1) + msg = "\nError! the folder {0} is empty.\n".format(self.folder) + raise AttributeError(msg) def call_primer4clades_for_primers(self, alns): for aln in alns: diff --git a/tests/test_primer-designer.py b/tests/test_primer-designer.py index f22aace..be99362 100644 --- a/tests/test_primer-designer.py +++ b/tests/test_primer-designer.py @@ -26,6 +26,9 @@ def setUp(self): amptype="dna_GTRG", email="youremail@email.com", ) + self.tmp_folder= os.path.join(TEST_FOLDER, '..', 'tmp_folder') + if not os.path.isdir(self.tmp_folder): + os.mkdir(self.tmp_folder) self.maxDiff = None def tearDown(self): @@ -33,6 +36,14 @@ def tearDown(self): if os.path.isfile(output_html_file): os.remove(output_html_file) + if os.path.isfile(self.tmp_folder): + os.remove(self.tmp_folder) + + def test_design_primers_from_empty_folder(self): + pd = copy.copy(self.pd) + pd.folder = self.tmp_folder + self.assertRaises(AttributeError, pd.design_primers) + def test_get_alignments(self): result = self.pd.get_alignments() self.assertTrue(len(result) > 0) @@ -40,7 +51,7 @@ def test_get_alignments(self): def test_get_alignments_error(self): pd = copy.copy(self.pd) pd.folder = "fake_folder" - self.assertRaises(pd.get_alignments) + self.assertRaises(AttributeError, pd.get_alignments) @responses.activate def test_request_primers(self):