Spaces:

tagirshin
/

VQGAE

Sleeping

App Files Files Community

tagirshin commited on Sep 26, 2023

Commit

2d13a08

•

1 Parent(s): 4a0267b

fixed progress bar

Browse files

Files changed (1) hide show

app.py +11 -13

app.py CHANGED Viewed

@@ -91,7 +91,7 @@ def on_generation_progress(ga):
     global ga_progress
     global ga_bar
     ga_progress = ga_progress + 1
-    ga_bar.progress(ga_progress // num_generations * 100, text=ga_progress_text)
 @st.cache_data
@@ -120,11 +120,6 @@ def load_data(batch_size):
 st.title('Inverse QSAR of Tubulin with VQGAE')
-batch_size = 200
-X, Y, rf_model, vqgae_model, ordering_model = load_data(batch_size)
-assert X.shape == (603, 4096)
 with st.sidebar:
     with st.form("my_form"):
         num_generations = st.slider(
@@ -195,11 +190,14 @@ with st.sidebar:
         )
         # 2/3 of num_parents_mating
         use_ordering_score = st.toggle('Use ordering score', value=True)
         random_seed = int(st.number_input("Random seed", value=42, placeholder="Type a number..."))
         submit = st.form_submit_button('Start optimisation')
 if submit:
     ga_instance = pygad.GA(
         fitness_func=fitness_func_batch,
         on_generation=on_generation_progress,
@@ -244,8 +242,8 @@ if submit:
     rescoring_progress_text = "Rescoring obtained solutions"
     rescoring_bar = st.progress(0, text=rescoring_progress_text)
     total_rescoring_steps = len(unique_solutions) // batch_size + 1
-    for i in range(total_rescoring_steps):
-        vqgae_latents = unique_solutions[i * batch_size: (i + 1) * batch_size]
         frag_counts = np.array(vqgae_latents)
         rf_scores = rf_model.predict_proba(frag_counts)[:, 1]
         similarity_scores = tanimoto_kernel(frag_counts, X).max(-1)
@@ -255,7 +253,7 @@ if submit:
             frag_inds = frag_counts_to_inds(frag_counts, max_atoms=51)
             _, ordering_scores = restore_order(frag_inds, ordering_model)
             scores["ordering_score"].extend(ordering_scores)
-        rescoring_bar.progress(i // total_rescoring_steps, text=rescoring_progress_text)
     sc_df = pd.DataFrame(scores)
     rescoring_bar.empty()
@@ -276,8 +274,8 @@ if submit:
     decoding_progress_text = "Decoding chosen solutions"
     decoding_bar = st.progress(0, text=decoding_progress_text)
     total_decoding_steps = gen_frag_inds.shape[0] // batch_size + 1
-    for i in range(total_decoding_steps):
-        inputs = gen_frag_inds[i * batch_size: (i + 1) * batch_size]
         canon_order_inds, scores = restore_order(
             frag_inds=inputs,
             ordering_model=ordering_model,
@@ -290,7 +288,7 @@ if submit:
         results["smiles"].extend([str(molecule) for molecule in molecules])
         results["ordering_score"].extend(scores)
         results["validity"].extend([1 if i else 0 for i in validity])
-        decoding_bar.progress(i // total_decoding_steps, text=rescoring_progress_text)
     gen_stats = pd.DataFrame(results)
     decoding_bar.empty()
     full_stats = pd.concat([gen_stats, chosen_gen[["similarity_score", "rf_score"]].reset_index(), ], axis=1, ignore_index=False)

     global ga_progress
     global ga_bar
     ga_progress = ga_progress + 1
+    ga_bar.progress(ga_progress / num_generations, text=ga_progress_text)
 @st.cache_data
 st.title('Inverse QSAR of Tubulin with VQGAE')
 with st.sidebar:
     with st.form("my_form"):
         num_generations = st.slider(
         )
         # 2/3 of num_parents_mating
         use_ordering_score = st.toggle('Use ordering score', value=True)
+        batch_size = int(st.number_input("Random seed", value=200, placeholder="Type a number..."))
         random_seed = int(st.number_input("Random seed", value=42, placeholder="Type a number..."))
         submit = st.form_submit_button('Start optimisation')
 if submit:
+    X, Y, rf_model, vqgae_model, ordering_model = load_data(batch_size)
+    assert X.shape == (603, 4096)
     ga_instance = pygad.GA(
         fitness_func=fitness_func_batch,
         on_generation=on_generation_progress,
     rescoring_progress_text = "Rescoring obtained solutions"
     rescoring_bar = st.progress(0, text=rescoring_progress_text)
     total_rescoring_steps = len(unique_solutions) // batch_size + 1
+    for rescoring_step in range(total_rescoring_steps):
+        vqgae_latents = unique_solutions[rescoring_step * batch_size: (rescoring_step + 1) * batch_size]
         frag_counts = np.array(vqgae_latents)
         rf_scores = rf_model.predict_proba(frag_counts)[:, 1]
         similarity_scores = tanimoto_kernel(frag_counts, X).max(-1)
             frag_inds = frag_counts_to_inds(frag_counts, max_atoms=51)
             _, ordering_scores = restore_order(frag_inds, ordering_model)
             scores["ordering_score"].extend(ordering_scores)
+        rescoring_bar.progress(rescoring_step / total_rescoring_steps, text=rescoring_progress_text)
     sc_df = pd.DataFrame(scores)
     rescoring_bar.empty()
     decoding_progress_text = "Decoding chosen solutions"
     decoding_bar = st.progress(0, text=decoding_progress_text)
     total_decoding_steps = gen_frag_inds.shape[0] // batch_size + 1
+    for decoding_step in range(total_decoding_steps):
+        inputs = gen_frag_inds[decoding_step * batch_size: (decoding_step + 1) * batch_size]
         canon_order_inds, scores = restore_order(
             frag_inds=inputs,
             ordering_model=ordering_model,
         results["smiles"].extend([str(molecule) for molecule in molecules])
         results["ordering_score"].extend(scores)
         results["validity"].extend([1 if i else 0 for i in validity])
+        decoding_bar.progress(decoding_step / total_decoding_steps, text=decoding_progress_text)
     gen_stats = pd.DataFrame(results)
     decoding_bar.empty()
     full_stats = pd.concat([gen_stats, chosen_gen[["similarity_score", "rf_score"]].reset_index(), ], axis=1, ignore_index=False)