hlnicholls commited on
Commit
4b60c06
1 Parent(s): 55c4b4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -3
app.py CHANGED
@@ -1,16 +1,37 @@
1
  import streamlit as st
2
  import numpy as np
3
  import pandas as pd
4
- import pickle
5
  import sklearn
6
  import xgboost
7
 
8
  data = pd.read_csv("annotations_dataset.csv")
9
  data = data.set_index("Gene")
10
 
11
- model = pickle.load(open('fitted_model.sav', 'rb'))
 
 
 
 
12
 
13
- predictions = list(model.predict(data))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  output = pd.Series(data=predictions, index=data.index, name="XGB_Score")
16
  df_total = pd.concat([data, output], axis=1)
 
1
  import streamlit as st
2
  import numpy as np
3
  import pandas as pd
 
4
  import sklearn
5
  import xgboost
6
 
7
  data = pd.read_csv("annotations_dataset.csv")
8
  data = data.set_index("Gene")
9
 
10
+ training_data = pd.read_csv("selected_features_training_data.csv", header=0)
11
+ training_data.columns = [
12
+ regex.sub("_", col) if any(x in str(col) for x in set(("[", "]", "<"))) else col
13
+ for col in training_data.columns.values
14
+ ]
15
 
16
+ training_data["BPlabel_encoded"] = training_data["BPlabel"].map(
17
+ {"most likely": 1, "probable": 0.75, "least likely": 0.1}
18
+ )
19
+ Y = training_data["BPlabel_encoded"]
20
+ X = training_data.drop("BPlabel_encoded","BPlabel", errors=ignore)
21
+ xgb = xgboost.XGBRegressor(
22
+ n_estimators=40,
23
+ learning_rate=0.2,
24
+ max_depth=4,
25
+ reg_alpha=1,
26
+ reg_lambda=1,
27
+ random_state=seed,
28
+ objective="reg:squarederror",
29
+ )
30
+
31
+
32
+ xgb.fit(X, Y)
33
+
34
+ predictions = list(xgb.predict(data))
35
 
36
  output = pd.Series(data=predictions, index=data.index, name="XGB_Score")
37
  df_total = pd.concat([data, output], axis=1)