victormiller commited on
Commit
e723f50
1 Parent(s): 7cc1892

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +32 -0
curated.py CHANGED
@@ -919,6 +919,30 @@ def get_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo")
919
  )
920
 
921
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
  def update(target: str, request):
923
  params = request.query_params
924
  if data_source := params.get(f"data_source_{target}"):
@@ -1043,6 +1067,13 @@ def curated(request):
1043
 
1044
 
1045
 
 
 
 
 
 
 
 
1046
  preprocessing_steps = pd.DataFrame(
1047
  {
1048
  "Step": [
@@ -1127,6 +1158,7 @@ def curated(request):
1127
  plotly2fasthtml(diff2_stacked_bar),
1128
  P("The figure above provides a global view of the document filtering results. ~8% of documents were removed during these three steps."),
1129
  filtering_process,
 
1130
  data_preparation_div,
1131
  #H2("Local Deduplication"), are these numbers even right?
1132
  #local_dedup_text,
 
919
  )
920
 
921
 
922
+
923
+ def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
924
+ doc_id = max(0, min(int(doc_id), 9))
925
+
926
+ if data_source == "Freelaw":
927
+ raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
928
+ extracted_sample_doc = json.load(
929
+ open("data/curated_samples/freelaw_extract.json")
930
+ )
931
+ else:
932
+ raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
933
+
934
+ raw_json = raw_sample_doc[doc_id]
935
+ extracted_json = extracted_sample_doc[doc_id]
936
+ return view_data(
937
+ raw_json,
938
+ extracted_json,
939
+ doc_id=doc_id,
940
+ data_source=data_source,
941
+ data_sources=data_sources,
942
+ target=target,
943
+ )
944
+
945
+
946
  def update(target: str, request):
947
  params = request.query_params
948
  if data_source := params.get(f"data_source_{target}"):
 
1067
 
1068
 
1069
 
1070
+ freelaw_examples = Div(
1071
+ Div(
1072
+ get_freelaw_data(target=gen_random_id()),
1073
+ style="border: 1px solid #ccc; padding: 20px;",
1074
+ ),
1075
+ )
1076
+
1077
  preprocessing_steps = pd.DataFrame(
1078
  {
1079
  "Step": [
 
1158
  plotly2fasthtml(diff2_stacked_bar),
1159
  P("The figure above provides a global view of the document filtering results. ~8% of documents were removed during these three steps."),
1160
  filtering_process,
1161
+ freelaw_examples,
1162
  data_preparation_div,
1163
  #H2("Local Deduplication"), are these numbers even right?
1164
  #local_dedup_text,