pohjie commited on
Commit
e8e6bd3
β€’
1 Parent(s): d6f3762

Update app.py to run computation

Browse files
Files changed (2) hide show
  1. .vscode/settings.json +6 -0
  2. app.py +73 -1
.vscode/settings.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.defaultFormatter": "ms-python.black-formatter"
4
+ },
5
+ "python.formatting.provider": "none"
6
+ }
app.py CHANGED
@@ -1,7 +1,79 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def greet(name):
4
  return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
7
  iface.launch()
 
1
  import gradio as gr
2
+ import json
3
+ from pathlib import Path
4
+
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ from InstructorEmbedding import INSTRUCTOR
8
+
9
+ model = INSTRUCTOR("hkunlp/instructor-large")
10
+
11
+ EMBED_FILE_PATH = Path(__file__).parent / "depressive_disorders_embed.json"
12
+ case_note_instruction = (
13
+ "Represent the case note for a possible DSM-5 mental health diagnosis:"
14
+ )
15
+
16
+
17
+ def read_json_file(file_path):
18
+ with open(file_path, "r", encoding="utf-8") as file:
19
+ data = json.load(file)
20
+ return data
21
+
22
+
23
+ def get_case_note_embedding(case_note):
24
+ case_note_emb = model.encode([[case_note_instruction, case_note]])
25
+ return case_note_emb
26
+
27
+
28
+ def get_top_n_diagnoses(case_note):
29
+ top_n = 3
30
+
31
+ case_note_emb = get_case_note_embedding(case_note)
32
+ diagnoses_embed = read_json_file(EMBED_FILE_PATH)["diagnoses"]
33
+
34
+ diagnosis_list = []
35
+ for diagnosis in diagnoses_embed:
36
+ diagnosis_name = diagnosis["name"]
37
+ criteria_scores = (
38
+ []
39
+ ) # Store criterion descriptions and scores for each diagnosis
40
+
41
+ count = 0
42
+ sum_score = 0
43
+ for criterion in diagnosis["criteria"]:
44
+ count += 1
45
+ score = cosine_similarity(criterion["embedding"], case_note_emb)
46
+ sum_score += score
47
+ criteria_scores.append(
48
+ {"description": criterion["description"], "score": float(score)}
49
+ )
50
+
51
+ sum_score /= count
52
+ diagnosis_list.append(
53
+ {
54
+ "name": diagnosis_name,
55
+ "sum_score": float(sum_score),
56
+ "criteria_scores": criteria_scores,
57
+ }
58
+ )
59
+
60
+ # Sort the diagnoses based on the sum_score in descending order
61
+ sorted_diagnoses = sorted(
62
+ diagnosis_list, key=lambda x: x["sum_score"], reverse=True
63
+ )
64
+
65
+ # Select the top n diagnoses
66
+ top_n_diagnoses = sorted_diagnoses[:top_n]
67
+
68
+ # Convert the top_n_diagnoses to JSON format
69
+ top_n_diagnoses_json = json.dumps(top_n_diagnoses, indent=2)
70
+
71
+ return top_n_diagnoses_json
72
+
73
 
74
  def greet(name):
75
  return "Hello " + name + "!!"
76
 
77
+
78
+ iface = gr.Interface(fn=get_top_n_diagnoses, inputs="text", outputs="text")
79
  iface.launch()