as-cle-bert commited on
Commit
829ca47
·
verified ·
1 Parent(s): e689b36

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_client import Client
3
+ import subprocess as sp
4
+ import time
5
+ import requests
6
+
7
+ def upload_to_fileio(file_path):
8
+ with open(file_path, 'rb') as f:
9
+ response = requests.post('https://file.io', files={'file': f})
10
+ if response.status_code == 200:
11
+ return response.json().get('link')
12
+ else:
13
+ return "#"
14
+
15
+ api_client = Client("eswardivi/Phi-3-mini-128k-instruct")
16
+
17
+ def blast_search(inputfile, matchesnumber, qcovcutoff, evaluecutoff):
18
+ sp.run("echo 'QUERY_SEQ\tTAXON\tQUERY_COVERAGE\tPERC_ID\tLENGTH\tMISMATCHES\tGAPS\tE_VALUE\tBITSCORE' > results.txt", shell=True)
19
+ sp.run(f"blastn -query {inputfile} -db 16S_ribosomal_RNA -outfmt '6 qseqid sscinames qcov pident length mismatch gapopen evalue bitscore' -max_target_seqs {matchesnumber} -evalue {evaluecutoff} -qcov_hsp_perc {qcovcutoff} >> results.txt", shell=True)
20
+ f = open("results.txt")
21
+ content = f.read()
22
+ f.close()
23
+ link = upload_to_fileio("results.txt")
24
+ return content, link
25
+
26
+ def reply(user_prompt, history, inputfile, matchesnumber, qcovcutoff, evaluecutoff):
27
+ context, filelink = blast_search(inputfile, matchesnumber, qcovcutoff, evaluecutoff)
28
+ instructions = "You are a helpful assistant whose job is to summarize in a straight-to-the-point but effective way the result of a BLAST search conducted on a 16S rRNA bacterial sequences database from NCBI."
29
+ full_prompt = f"{instructions} Based on thr content of this TSV file resulting from a BLAST search: \"\"\"{context}\"\"\", summarize the mentioned output complying with these user-provided instructions: {user_prompt}"
30
+ response = api_client.predict(
31
+ full_prompt, # str in 'Message' Textbox component
32
+ 0.2, # float (numeric value between 0 and 1) in 'Temperature' Slider component
33
+ True, # bool in 'Sampling' Checkbox component
34
+ 512, # float (numeric value between 128 and 4096) in 'Max new tokens' Slider component
35
+ api_name="/chat"
36
+ )
37
+ response = response + f"\n\nDownload you BLAST results [at this link]({filelink})"
38
+ this_hist = ''
39
+ for char in response:
40
+ this_hist += char
41
+ time.sleep(0.0001)
42
+ yield this_hist
43
+
44
+ def summarize_description_table(description_table_file):
45
+ f = open(description_table_file)
46
+ lines = f.readlines()
47
+ if len(lines) > 10:
48
+ incipit = "**⚠️: The number of hits was higher than 10. Only the first 10 hits were taken into account.**\n\n"
49
+ lines = lines[:11]
50
+ else:
51
+ incipit = ""
52
+ content = "".join(lines)
53
+ return incipit, content
54
+
55
+ def ai_summarize(user_prompt, history, inputfile):
56
+ incipit, context = summarize_description_table(inputfile)
57
+ instructions = "You are a helpful assistant whose job is to summarize in a straight-to-the-point but effective way the result of a BLAST search conducted online on NCBI databases."
58
+ full_prompt = f"{instructions} Based on thr content of this CSV file resulting from a BLAST search: \"\"\"{context}\"\"\", summarize the mentioned output complying with these user-provided instructions: {user_prompt}"
59
+ response = api_client.predict(
60
+ full_prompt, # str in 'Message' Textbox component
61
+ 0.2, # float (numeric value between 0 and 1) in 'Temperature' Slider component
62
+ True, # bool in 'Sampling' Checkbox component
63
+ 512, # float (numeric value between 128 and 4096) in 'Max new tokens' Slider component
64
+ api_name="/chat"
65
+ )
66
+ response = incipit+response
67
+ this_hist = ''
68
+ for char in response:
69
+ this_hist += char
70
+ time.sleep(0.0001)
71
+ yield this_hist
72
+
73
+
74
+ user_file = gr.File(label="Upload FASTA File")
75
+
76
+ user_file1 = gr.File(label="Upload Description Table (CSV) Downloadable From Online BLAST Results")
77
+
78
+ user_max_matches = gr.Slider(5, 50, value=20, label="Max Hits per Sequence", info="Select maximum number of BLAST hits per sequence (higher number of hits will result in a longer latency)")
79
+
80
+ user_qcov = gr.Slider(0, 100, value=0, label="Minimum Query Coverage", info="Minimum query coverage for a hit to be considered")
81
+
82
+ user_evalue = gr.Textbox(label="E-value threshold",info="All the hits below the threshold will be considered",value="1e-10")
83
+
84
+ additional_accordion = gr.Accordion(label="Parameters to be set before you start chatting", open=True)
85
+
86
+ demo0 = gr.ChatInterface(fn=reply, additional_inputs=[user_file, user_max_matches, user_qcov, user_evalue], additional_inputs_accordion=additional_accordion, title="""<h2 align='center'>Bacteria 16S rRNA</h2>
87
+ <h3 align='center'>BLAST 16S rRNA bacterial sequences and get a nice summary of the results with the power of AI!</h3>
88
+ <h4 align='center'>Support this space with a ⭐ on <a href='https://github.com/AstraBert/BLAST-SummarAIzer'>GitHub</a></h4>""")
89
+
90
+ demo1 = gr.ChatInterface(fn=ai_summarize, additional_inputs=[user_file1], additional_inputs_accordion=additional_accordion, title="""<h2 align='center'>Online BLAST results</h2>
91
+ <h3 align='center'>Upload a Description Table from Online BLAST results and get a nice summary with the power of AI!</h3>
92
+ <h4 align='center'>Support this space with a ⭐ on <a href='https://github.com/AstraBert/BLAST-SummarAIzer'>GitHub</a></h4>""")
93
+
94
+ demo = gr.TabbedInterface([demo0, demo1], ["16S rRNA", "Online BLAST results"], title="BLAST SummarAIzer")
95
+
96
+ if __name__=="__main__":
97
+ demo.launch(server_name="0.0.0.0", share=False)