victormiller commited on
Commit
e8dab56
1 Parent(s): 67e65d7

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +7 -13
curated.py CHANGED
@@ -63,26 +63,20 @@ wikipedia_filter = pd.DataFrame(
63
  "Wikipedia",
64
  ],
65
  "Lines Downloaded": [
66
- "",
67
- ],
68
- "Lines Remaining After Language Filter": [
69
- "",
70
  ],
71
- "Percent Removed": [
72
  "0.00%",
73
  ],
74
- "Lines Remaining After Min Word Count Filter": [
75
- "",
76
- ],
77
- "Percent Removed": [
78
  "1.86%",
79
  ],
80
- "Lines Remaining After Unigram Probability Filter": [
81
- "",
82
- ],
83
- "Percent Removed": [
84
  "0.00%",
85
  ],
 
 
 
86
  "Total Percentage Remaining": [
87
  "98.14%",
88
  ],
 
63
  "Wikipedia",
64
  ],
65
  "Lines Downloaded": [
66
+ "61614907",
 
 
 
67
  ],
68
+ "Percent Removed After Language Filter": [
69
  "0.00%",
70
  ],
71
+ "Percent Removed After Min Word Count Filter": [
 
 
 
72
  "1.86%",
73
  ],
74
+ "Percent Removed After Unigram Probability Filter": [
 
 
 
75
  "0.00%",
76
  ],
77
+ "Lines Remaining After Local Dedup": [
78
+ "",
79
+ ],
80
  "Total Percentage Remaining": [
81
  "98.14%",
82
  ],