diff --git a/.DS_Store b/.DS_Store index 3f8513ed7f971a747ef150a5993a81447bc4fae3..93bddd1c6901d8f94fdf359d3a2b7aa6b7671549 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.gitattributes b/.gitattributes index 10b0bc62a84d03cc004ae4d582d4c12381ff7ce7..611266269cd11a1ccd68eea5ffc4a8ec85a1b69f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,4 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.mov filter=lfs diff=lfs merge=lfs -text *.gif filter=lfs diff=lfs merge=lfs -text cache_dir/**/* filter=lfs diff=lfs merge=lfs -text -cache_dir/**/**/* filter=lfs diff=lfs merge=lfs -text \ No newline at end of file +cache_dir/**/**/* filter=lfs diff=lfs merge=lfs -text +*.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index c13388a6ee78eb4a9152d1e63852b51e1896c3c9..1a599814c90be0267e004a7c898b65d1b5619331 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ sdk: streamlit sdk_version: 1.0.0 app_file: app.py pinned: false -python_version: 3.9 +python_version: 3.9.6 --- # Data Measurements Tool diff --git a/cache_dir/.DS_Store b/cache_dir/.DS_Store index 54996a31d16648a1c7008898f28ac71cc86e6d8f..76ad83c58df96926933397ab95a45cbe607f9ee9 100644 --- a/cache_dir/.DS_Store +++ b/cache_dir/.DS_Store @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5f54406681cc569a9d56248a3be247afb7952fa5272ab20bb039e584f80d26c6 +oid sha256:19c39a390bcad50240ee311709c795ccc781d245bbc0177fec7b798e4aa0f86a size 14340 diff --git a/cache_dir/HuggingFaceM4/.DS_Store b/cache_dir/HuggingFaceM4/.DS_Store index 3a72a4324613d683776604e1f63d07b94a4e5225..f2e9ca8f5a45cc4c77f7b3cdede84653399102fa 100644 --- a/cache_dir/HuggingFaceM4/.DS_Store +++ b/cache_dir/HuggingFaceM4/.DS_Store @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3eda935b3db4b738eb991f2ad964f1f4f075f9161bf3df13fb43c6a7a0fda6e1 +oid sha256:3e21720a64cd789a9d8e7c6369628d2ba25057fde8d2394a45e8f798fb71f14d size 6148 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_text/.DS_Store b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3d6d77936687eb18cc729ab683e9e64ddfa9e6ec --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/.DS_Store @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7057786add5ec9a6a3fac1e38d1fd334a8e3006ed45e10564a7957bed82a627f +size 8196 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/associations/identity_terms.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/associations/identity_terms.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/associations/identity_terms.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/associations/identity_terms.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/.DS_Store b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5fb1c8b280ac9469398d13ebd725488399c532e7 --- /dev/null +++ b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/.DS_Store @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d65165279105ca6773180500688df4bdc69a2c7b771752f0a46ef120b7fd8ec3 +size 6148 diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00000-of-00002.arrow b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/data-00000-of-00002.arrow similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00000-of-00002.arrow rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/data-00000-of-00002.arrow diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00001-of-00002.arrow b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/data-00001-of-00002.arrow similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/data-00001-of-00002.arrow rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/data-00001-of-00002.arrow diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/dataset_info.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/dataset_info.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/dataset_info.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/state.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/base_dset/state.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/base_dset/state.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/general_stats_dict.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/general_stats_dict.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/general_stats_dict.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/lengths/length_measurements.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/length_measurements.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/lengths/length_measurements.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/lengths/lengths_fig.png similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_fig.png rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/lengths/lengths_fig.png diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/lengths/lengths_table.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/lengths/lengths_table.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/lengths/lengths_table.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/sorted_top_vocab.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/sorted_top_vocab.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/sorted_top_vocab.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_dset/data-00000-of-00001.arrow similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/data-00000-of-00001.arrow rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_dset/data-00000-of-00001.arrow diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_dset/dataset_info.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/dataset_info.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_dset/dataset_info.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_dset/state.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_dset/state.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_dset/state.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_duplicates/text_duplicates.html similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.html rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_duplicates/text_duplicates.html diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_duplicates/text_duplicates.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/text_duplicates/text_duplicates.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/text_duplicates/text_duplicates.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/tokenized_df.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/tokenized_df.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/tokenized_df.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_default_train_text/vocab_counts.json similarity index 100% rename from cache_dir/HuggingFaceM4/OBELICS_default_train_texts/vocab_counts.json rename to cache_dir/HuggingFaceM4/OBELICS_default_train_text/vocab_counts.json diff --git a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/.DS_Store b/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/.DS_Store deleted file mode 100644 index 96f2b617a406a2edba0977b91fc17cc3d0b5d8d4..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_default_train_texts/.DS_Store +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f40ba1ae0df6f456333e7b4708a4b420f29a8910a9f93dea56797202bb4b8d2 -size 6148 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/data-00000-of-00001.arrow deleted file mode 100644 index e6dbd1cc73f6f70eaf2b121354cb8808a3c6980c..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/data-00000-of-00001.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f23eb11877f1bf934ba12d0ce910ccb71b9ce3865798dc6651d12425244b529 -size 489144 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/dataset_info.json deleted file mode 100644 index 1dfdb32df891cb4bec1107d5edee9c07d2905009..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/dataset_info.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbbef344f84ed9a7f11d0778f52ff0e2decf2ef7aacd1fbf5f059d069d2c6ba6 -size 946 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/state.json deleted file mode 100644 index a7925f467949e4a6e94ff489f13667b228614944..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/base_dset/state.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab2c18f9e7e23bc6b501df499e16c721198a5452b993cbc3921612254438a7c3 -size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/dset_peek.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/dset_peek.json deleted file mode 100644 index 1751c969329e31c30a86204214e2d70b8a547aab..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/dset_peek.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8be5416e6a1bf0c3d02709b3d5c13b7b9af543ea902301e5b268b4d16b026c0 -size 502887 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/general_stats_dict.json deleted file mode 100644 index da74a2136afa60dc095884a520f0802ca5699f46..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/general_stats_dict.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c7a45931e78871dcb39b814081ef2c49939709c3feebcddaf7dc9e221a557b24 -size 95 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/length_measurements.json deleted file mode 100644 index 7224ed2aacba79d349c91858251a64a913508a4d..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/length_measurements.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:500e2c7bd48b76556389af8630b5093b147c32affc0528305696531841df7582 -size 113 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_fig.png deleted file mode 100644 index 95b17353049c9f321098b3e0a4c9f18c58011499..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_fig.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4136b1d8653ecb9b425b162624c8adc8ca2c53ae44af90a34b65b0aaa946fb1 -size 69909 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_table.json deleted file mode 100644 index e6b794afc1a944aadf035ded539af64eeb8d6817..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/lengths/lengths_table.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d585449245038b71c834729d34b61b023b9866eec20f39fe33430a97a84be38 -size 33277 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/sorted_top_vocab.json deleted file mode 100644 index bed85900a45808e27b8ea6519d858ea9b2525553..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/sorted_top_vocab.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a64574c2dd6522190927d73577c84e293e11f179f37482e2305a159d27e7e80 -size 7785 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/cache-f6aa4a70e38b4a04.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/cache-f6aa4a70e38b4a04.arrow deleted file mode 100644 index fe7c2cbb80d16f477b51f15dc207e177c09d7a07..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/cache-f6aa4a70e38b4a04.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:07e5fe498e11eeb301bd8430d2f2c408c696d36bbc21b1d18f31c4cf957a8d47 -size 67576 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/data-00000-of-00001.arrow deleted file mode 100644 index acb327aa45186b4ba9beb2e595e0c51e81df9bc5..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/data-00000-of-00001.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d04dc98da752f00f252635eed55717c8a12dee20edb3c8347bd34e2228b16bb3 -size 29512 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/dataset_info.json deleted file mode 100644 index 1b56504c71ed317b6e9bad4d42cc4c0949e4c35b..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/dataset_info.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:362e98087a1b5bad2dc4e4d4b04dde74ed38ab64be0c12722e1f328518055d3b -size 604 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/state.json deleted file mode 100644 index 525a1826aa510bdbe54761f9230e6febc5af4659..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_dset/state.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5589b05d616fd17ead2fd469ad921b88ee2c464cb5bd21afca6a4876403c1ba7 -size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.html deleted file mode 100644 index 9b69961eedaf6813a142361e28d635099138ed1a..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.html +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b1690a778ad39a5363a94de88f59dda4fcf5ffbf4c3cc5abfbddbbc3be927fc0 -size 116 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.json deleted file mode 100644 index e228df0d5145a7052b71fa08c77f53a5a7851041..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/text_duplicates/text_duplicates.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c40970be1f0326d8ef1b689eda4ac2695d0c536d95fc69cfcd569cb361a77ad6 -size 50 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/tokenized_df.json deleted file mode 100644 index 438a4f46b294c972d10264f18f133b5c63bbb2e3..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/tokenized_df.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d68e3413893f2173dc0abf1c0f818d51b14c5c1618622b5ba31d903423df3a21 -size 71186 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/vocab_counts.json deleted file mode 100644 index fd0a3520f43810820603aa8920d683492b216c12..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/vocab_counts.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc2dbb4848f61dcc440ce6a940617092309b10c5dd589517772509102dc4b88f -size 121825 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_basic_stats.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_basic_stats.json deleted file mode 100644 index aac369d32ecce8762df463e967eca746eef343e8..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_basic_stats.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b56322c6815bf2be43a1d03dc8dd20c76c8ddf96e68a719e95c92d07917b06e3 -size 329 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.html deleted file mode 100644 index 4f0088b93eb7cbbf2882934356d8e6fde038137f..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.html +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f188fe76c2135208a74b9c5bfb7e107d3473f513205489ade28cb0e727adc42 -size 3606637 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.json deleted file mode 100644 index 0abf49eba915d4cbfc27476f2c6db97708cda2ce..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_general_metadata/zipf/zipf_fig.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a49d21ab8319d8a2d9177770b511b2b31b735c70a28f89afeb140b5c6f938ff -size 20320 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/data-00000-of-00001.arrow deleted file mode 100644 index e6dbd1cc73f6f70eaf2b121354cb8808a3c6980c..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/data-00000-of-00001.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f23eb11877f1bf934ba12d0ce910ccb71b9ce3865798dc6651d12425244b529 -size 489144 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/dataset_info.json deleted file mode 100644 index 1dfdb32df891cb4bec1107d5edee9c07d2905009..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/dataset_info.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbbef344f84ed9a7f11d0778f52ff0e2decf2ef7aacd1fbf5f059d069d2c6ba6 -size 946 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/state.json deleted file mode 100644 index a7925f467949e4a6e94ff489f13667b228614944..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/base_dset/state.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab2c18f9e7e23bc6b501df499e16c721198a5452b993cbc3921612254438a7c3 -size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/dset_peek.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/dset_peek.json deleted file mode 100644 index 1751c969329e31c30a86204214e2d70b8a547aab..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/dset_peek.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8be5416e6a1bf0c3d02709b3d5c13b7b9af543ea902301e5b268b4d16b026c0 -size 502887 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/general_stats_dict.json deleted file mode 100644 index e791015a9d3d1def46573f3d2c25f9ca31597245..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/general_stats_dict.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51c5e8fe33ca5024e519e7d550f46e3917c9ba8dc63f43ecc107c3c31430cb00 -size 93 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/length_measurements.json deleted file mode 100644 index 3c45595d465c8658d2590b2fb1353734a3561b10..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/length_measurements.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0889a390641bd052cbf295478822db4509a1731d20f7fda753198c64f9970c88 -size 125 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_fig.png deleted file mode 100644 index 281d61173e342afd86075945dc5303e8ba7f762e..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_fig.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87e075d701bdeacb58f17fab26d2e4a98e861be2a5e67282b9fa0fabb589181e -size 66291 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_table.json deleted file mode 100644 index 962855171dcdcb070bbc148adbbadf0055d6d1f7..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/lengths/lengths_table.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b8704412454924107067ae6b3d20534e8cc9917a3ae22ec8b105e7506af867cd -size 31341 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/sorted_top_vocab.json deleted file mode 100644 index 3e1d8f996b521986a347beddec9e404da98582d2..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/sorted_top_vocab.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6bc5cfc0aa78c0984d45847ffb88a666149f8ce364a66a447a08a7f445d83671 -size 7907 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/cache-630eedbf17afb681.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/cache-630eedbf17afb681.arrow deleted file mode 100644 index b040f2d20cc27ec0cbd6f8cc31ff4aadaf5f0fc1..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/cache-630eedbf17afb681.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b553cbdbee049fad4ebd08fddd52db9145c77ad3846008557226e1c0157c3b93 -size 57920 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/data-00000-of-00001.arrow deleted file mode 100644 index e676e4a964f3ed452053b995f1bab0ccbc9322a9..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/data-00000-of-00001.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19726400fd8623d601b7cfbb85e9cf1aa063c8f5c00b7baa0f1de46eacff0d80 -size 24208 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/dataset_info.json deleted file mode 100644 index 1b56504c71ed317b6e9bad4d42cc4c0949e4c35b..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/dataset_info.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:362e98087a1b5bad2dc4e4d4b04dde74ed38ab64be0c12722e1f328518055d3b -size 604 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/state.json deleted file mode 100644 index f3de28d8dc1042eea52d147862261de0be686acb..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_dset/state.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e52676a9496280a7002faad9facf941ac9ca075b8bb0fed7aa655e546e4e9d6 -size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.html deleted file mode 100644 index 9b69961eedaf6813a142361e28d635099138ed1a..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.html +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b1690a778ad39a5363a94de88f59dda4fcf5ffbf4c3cc5abfbddbbc3be927fc0 -size 116 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.json deleted file mode 100644 index e228df0d5145a7052b71fa08c77f53a5a7851041..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/text_duplicates/text_duplicates.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c40970be1f0326d8ef1b689eda4ac2695d0c536d95fc69cfcd569cb361a77ad6 -size 50 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/tokenized_df.json deleted file mode 100644 index 2ca5d7c5f1ccf663f5eaa3a0572a1d09ee07f84f..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/tokenized_df.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4c247100203577651e9438449aa807aefa89e6f3f435fa86a72e1962a34620e3 -size 65220 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/vocab_counts.json deleted file mode 100644 index 99b1f399e9e28d3dc620b9ab338f887d546721de..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/vocab_counts.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:432408d6694ad4a306b8cfc88b7c4bbe81b9520ff569d6bd40a6c4f976283b8a -size 85002 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_basic_stats.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_basic_stats.json deleted file mode 100644 index e6649f3cf69d596cb42c2a12b911e5626fee7426..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_basic_stats.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f08d9b8167bd637b20b42930ab92d77aa3e654ba3b9adfd38511925db033737 -size 450 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.html deleted file mode 100644 index 67d8c970987f9fdf3b221edff0f4d13ca9dfa104..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.html +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a40f7cd29896713f3d18e0b7f5eb94f940145960917a221e0826826ea602193d -size 3603675 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.json deleted file mode 100644 index dd37cc120961848e8be8e2542f31d35b38cc0889..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_images/zipf/zipf_fig.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf29753dd02d79f51129508dbe3b95d6b7f00020b8ca5a0a8a5e23d98582b47a -size 17420 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/identity_terms.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/identity_terms.json deleted file mode 100644 index 3aad02b4402f43554bcace344e426bcb34cf369c..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/identity_terms.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f63abf99273843d82dfb120f6b5424186b36192a77131b993f920ffa8f69d495 -size 66 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-her.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-her.json deleted file mode 100644 index 6d20e8227c0de9100629646a63d87aa2798ced59..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-her.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:74810d05acea27865e51072a54bbeccfeeaf25fb43f916c1e000142e848ab5aa -size 206518 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-him.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-him.json deleted file mode 100644 index 850d2ae940fec999c026c8f586b30ec6b145d0e9..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-him.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eed3cdbfb67411b61c50ed8fc7ff8671535b5093993adc725af99a2121627779 -size 336288 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-his.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-his.json deleted file mode 100644 index 0846a7602fa3249124a7bf60db90b675d247ade5..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-his.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eadc8cbd0178d1a8ac567aeb29f460e46048b260e2e5f0e3314eafddd9d1d8d6 -size 555707 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-man.json deleted file mode 100644 index d4f8425389c39bee4de0a8ce67860529d76ae3ab..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-man.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae639cd319d22742bb019e0c37616ef7abe6342f43cb23f8829738cc72178803 -size 327867 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-she.json deleted file mode 100644 index 18a99715f5ed1c4179dd7f4335ef058974da0bae..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-she.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e91383fc3bf336adbc3b6350b1f5c37c312b8bbd4821432f61a4647884402ea1 -size 187297 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-their.json deleted file mode 100644 index 56ed89234b80e1fcf420748adf280eb6e05bd6e6..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-their.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f52da51e47fddc9655c0a98bc5bc4e72a954f90b25019b1cec2030d33d0dcd44 -size 494217 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-them.json deleted file mode 100644 index 161e7538a6b56cff65d0ca8f09f18ec9966b71fe..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-them.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6d058e8b1d68e4beb2349a00a37875c15b80abe3e5ae68603111c11c04bd3b93 -size 387062 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-they.json deleted file mode 100644 index e331bf3d64f647d095059ba0415aa1d8c2f875eb..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-he-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1800a112cda7eddc6e2dc5369c274ba7f25ff69277928dbdf55c506c541046bc -size 492544 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-him.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-him.json deleted file mode 100644 index efea2659663171c7a5bfddb08ac66ccca89028fd..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-him.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f300687ddb8b0288dfbd2fab15b1e055159317877a9719400a6b99b7495894fb -size 173867 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-his.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-his.json deleted file mode 100644 index f19c6c2460c677a35d107b0313ab6db4192c6498..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-his.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e48e0b376be3b9913c173841c2297cefe24d30236b2118801884a4c1cf6cf2de -size 231600 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-man.json deleted file mode 100644 index 853c8aeb728fb614bdaf1877293edeac73409d1c..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-man.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67495752ef7cc424172f541728bd467b5a5f9c3949035becb8620560cb617ea9 -size 146058 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-she.json deleted file mode 100644 index 48ecb51ee20b6c340907b4b8bf55e1aff39d6249..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-she.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5ce6cfa0a718838bbef5e19618c6cb0df3036af3fde4f5b7f1bd26e17cdf481c -size 208749 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-their.json deleted file mode 100644 index e1fc291c18c17a48139c57e12c94f6ce47512878..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-their.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a79d6f4af19e958d6cac923afb85913fd9ebfd519903c2247d62475d1ee38f0a -size 239132 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-them.json deleted file mode 100644 index 103bdb67896d5a3bc9d59ba27f2cb342c2765dc9..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-them.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:041e3197b2ee659930e23a4de987be064e0fcac6b6143ececa96dbd0777e1f6f -size 209045 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-they.json deleted file mode 100644 index dc43b26660544f648a7363ab34af01fef1a2a1ee..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-her-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4010a4090b0a141c00ab8ed769145b9bb48264318230f509af4ef8e232474a3e -size 226944 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-his.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-his.json deleted file mode 100644 index f22169f4e36c6adb107d6ac4280900093eb85155..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-his.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:49a7a33d6c4972d184e7bd7f0b8938801f1a5b35bc28abf6642ddbc6d9ab57e7 -size 348370 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-man.json deleted file mode 100644 index d76abfc78d5aeed3cc7c18bec1708a566dc7e8d1..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-man.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dcfa94884fed8e7d095c2428e3d07991119c7d6e1b96ca7bfcc46bca46d76252 -size 237802 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-she.json deleted file mode 100644 index 23a8b1c88c1a019608a4ee54d72c9a803b6069b7..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-she.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39cf30e9852fec52c2d74d29a96aef444c9a446d139f21f33d0686a081561761 -size 169186 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-their.json deleted file mode 100644 index 9c2a0a433835eaf41666c795047020d097afc899..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-their.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dee47111d0ab159b8e4b417900aa31c56ebe3753df77435210b53235dd8c2808 -size 319010 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-them.json deleted file mode 100644 index c0007346aaa87b92b9927d29567bb1d1e8317be1..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-them.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:abd082fcc2b35a9d59804c43d420437e08d756a30ed680e1b924e467e50a07db -size 276215 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-they.json deleted file mode 100644 index 6827845cfa4e978f5244a79fd742e713d32932b3..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-him-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e94f078b1e532fe1cac49a2266d650054a1bd4499fd8832ca9861da0efa23eda -size 306427 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-man.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-man.json deleted file mode 100644 index 63e2df694384542255414fc5d5836ae3c83d8676..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-man.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3369908c36ebf52efd4d404dcd94abb3511e726f1b69a5de8da7022f27ef9285 -size 346447 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-she.json deleted file mode 100644 index 6a9ce777fc49256acc070c8a69e71842cd3f9eca..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-she.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d57652a80447511077634120337aa92884516c7026f04c46ce158fae6c7c51a1 -size 206318 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-their.json deleted file mode 100644 index f110c55d711a7e7bac7c71a33a198b381ff17eb3..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-their.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24cefd39e27af2b8606dfe4607f9dc20b088cfc24e2abbd918754813b83fac28 -size 533027 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-them.json deleted file mode 100644 index fe50138f79b72df296248a40d35a6fe2990ec807..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-them.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a929834c3c602e7789051476649d068e6dba1b9b0b37a74824482e760ef34423 -size 409163 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-they.json deleted file mode 100644 index c7477393d4ee1160555411fbf48b1681e1db2f95..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-his-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:289807e00e862a7d06a124a7a4f52730c90d9276f48d719c9e9365c0791e3761 -size 495612 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-she.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-she.json deleted file mode 100644 index d7e46e65d25a5805e66b8af95a221f3e76aa9cc4..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-she.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a682be4c7c874628ca76d3013a0ef43b5f973c127292ba7a0ecd7729d21006a -size 140597 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-their.json deleted file mode 100644 index f0c6faa6ab5e48ca0c730ec4c5c3d86670433749..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-their.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f55698bbcb75adb9c230e27c11200f499c221ce2abe7bcd0d878519502f608f1 -size 316781 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-them.json deleted file mode 100644 index 9b65f7956183adf17727f51c17c1cc022ab1ce03..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-them.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09c931b2629be08259fe3ae8e9e21afa10726172fcf0c922545ff7cf658d6616 -size 244569 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-they.json deleted file mode 100644 index c6208c507668c2e6f32ec26298198c6162a8c522..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-man-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff38625a0f342625440fdc25090a7a3d91319c9dd665c86a625f2c14c00c8924 -size 299200 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-their.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-their.json deleted file mode 100644 index ed8d624cf18be894eb61729a122a702372724d67..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-their.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cffea61c52510ad5e8435103cf9aa1694d0e879fb29d73ceb20a367c55407263 -size 230036 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-them.json deleted file mode 100644 index 69b45b8aa85648bc868aa1948ded27b70cec8614..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-them.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:850b46812e7067fb03cc5cfa1942a5a57d9df7901cbe2d5b19b4c258a8d96595 -size 215824 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-they.json deleted file mode 100644 index 0bd67f6c55da123d93f0a9ce21d204f6e7074393..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-she-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67e80998caa6bf4ea98bd4f3a220a54ca149daf820ca23c74b58db1f01e5caae -size 228986 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-them.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-them.json deleted file mode 100644 index 9462716b227f268782439c68b382257bc576297a..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-them.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4a58606bd342585ddf74582d5deb267562466dde26935fda8f831608747b74a -size 476151 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-they.json deleted file mode 100644 index ffb39e38929a3bfe404d35f8d9233adeed88cfdf..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-their-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a65ff26d962ced6bdbc7c28b4560c2a2983661bc217cec8662525fd4b9a75fb2 -size 595929 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-them-they.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-them-they.json deleted file mode 100644 index 7f7112afca122e31b93799730627c6d0f50a56e3..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/associations/npmi/combined-them-they.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed3b8f4a8554467ac921a38cd52ca793a489fbabfca149dea8da1a28cc4c37ad -size 456036 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/data-00000-of-00001.arrow deleted file mode 100644 index e6dbd1cc73f6f70eaf2b121354cb8808a3c6980c..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/data-00000-of-00001.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f23eb11877f1bf934ba12d0ce910ccb71b9ce3865798dc6651d12425244b529 -size 489144 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/dataset_info.json deleted file mode 100644 index 1dfdb32df891cb4bec1107d5edee9c07d2905009..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/dataset_info.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbbef344f84ed9a7f11d0778f52ff0e2decf2ef7aacd1fbf5f059d069d2c6ba6 -size 946 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/state.json deleted file mode 100644 index a7925f467949e4a6e94ff489f13667b228614944..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/base_dset/state.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab2c18f9e7e23bc6b501df499e16c721198a5452b993cbc3921612254438a7c3 -size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/dset_peek.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/dset_peek.json deleted file mode 100644 index 1751c969329e31c30a86204214e2d70b8a547aab..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/dset_peek.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8be5416e6a1bf0c3d02709b3d5c13b7b9af543ea902301e5b268b4d16b026c0 -size 502887 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/general_stats_dict.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/general_stats_dict.json deleted file mode 100644 index bf1146bc7790a7068abf3241a5b5909a51ad32cc..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/general_stats_dict.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dddd2be628746feeccc04859d54c4133e7f1b3e1964ffa98d533ad318d6d0eb2 -size 95 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/length_measurements.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/length_measurements.json deleted file mode 100644 index 6531be1d81048cb2559b2b85c579f0bde88d9cf8..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/length_measurements.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf446327b541d83f38aff236b89cf65d85bf69bfe03b6df7605132398a2afd1b -size 127 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_fig.png b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_fig.png deleted file mode 100644 index e7d3d5e2dd1f61399cbf37a21bbc674e66828764..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_fig.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ebeb8e7a35dfc3b84bd148b60335cefbbbd1f74db88c95e1854749d6ac4b8e23 -size 63234 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_table.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_table.json deleted file mode 100644 index a5d8ee97ff8de47dd88457bfe4466d9a7d30c37f..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/lengths/lengths_table.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d0cf4f5f1fb12118cfac5d1e8a5cf4a4e2eb9584800b3e32c9277eb11b544f59 -size 326143 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/sorted_top_vocab.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/sorted_top_vocab.json deleted file mode 100644 index fc18bbb0c06aaa025f6ecacf78b672a55191c0dd..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/sorted_top_vocab.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8c7533a246364b3b3563ed57985cf7458c1129b6a24ccaae28d81c37bb9065ac -size 7798 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/cache-44c5b4fd795439b5.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/cache-44c5b4fd795439b5.arrow deleted file mode 100644 index be6649ca3a733747b34d5f764b21053f719ee649..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/cache-44c5b4fd795439b5.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb4db59fda64595da71953d940ebbb945bd66ba4b078ec81cd8682d4370761eb -size 778488 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/data-00000-of-00001.arrow b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/data-00000-of-00001.arrow deleted file mode 100644 index 71b5d57556865c53e787cc065886701739e53202..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/data-00000-of-00001.arrow +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:84f617cce2defaf83255893aa927877b864273b53dfd553a67d7f68a24a803af -size 314048 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/dataset_info.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/dataset_info.json deleted file mode 100644 index 1b56504c71ed317b6e9bad4d42cc4c0949e4c35b..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/dataset_info.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:362e98087a1b5bad2dc4e4d4b04dde74ed38ab64be0c12722e1f328518055d3b -size 604 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/state.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/state.json deleted file mode 100644 index 2e04d37fb0a4a8e9cfaf820d96cac1c5b53a8535..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_dset/state.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75d1f2ee6f0854a390ac6a6af4958a257144af7fbebcdffca28b3ae70eab713c -size 250 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.html deleted file mode 100644 index 9b69961eedaf6813a142361e28d635099138ed1a..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.html +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b1690a778ad39a5363a94de88f59dda4fcf5ffbf4c3cc5abfbddbbc3be927fc0 -size 116 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.json deleted file mode 100644 index e228df0d5145a7052b71fa08c77f53a5a7851041..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/text_duplicates/text_duplicates.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c40970be1f0326d8ef1b689eda4ac2695d0c536d95fc69cfcd569cb361a77ad6 -size 50 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/tokenized_df.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/tokenized_df.json deleted file mode 100644 index 21ed7c556e0e8c50f85ab7ede1f2d516664bddec..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/tokenized_df.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ddb2eeb446ee8a05a3c37498d571550950b3849bebc612e32bda31303fd4320f -size 790903 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/vocab_counts.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/vocab_counts.json deleted file mode 100644 index e3a53e59ff0378f6ef65fae23b12fee61b2af2ce..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/vocab_counts.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:73615bc7ec4fb0388b01047bc947a1249694c87e5f452e365a12d327d833396b -size 757585 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_basic_stats.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_basic_stats.json deleted file mode 100644 index 2cac616cf58d047563fb01cf07596ec3c1587ea3..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_basic_stats.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b58db6c3bd0c6ccdcc60e0d726244e0632d0f14a0d639597a28d78de059328fc -size 1294 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.html b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.html deleted file mode 100644 index ee447c1d28d6e143e8412f71371f2165d0ec38ef..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.html +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e2a534847f537dd917a528ebd172e2d955eef3d543771a757d51c80e0f96147 -size 3693133 diff --git a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.json b/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.json deleted file mode 100644 index b344a3323ace3dac3cbad30446614a7aac25ca0f..0000000000000000000000000000000000000000 --- a/cache_dir/HuggingFaceM4/OBELICS_opt_out_docs_removed_2023_07_12_train_texts/zipf/zipf_fig.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d4cbf90b3bf6b84aa83603b2c42797b53b7567553e66e2e5aaec7dbbb322327 -size 107430 diff --git a/cache_dir/has_cache.json b/cache_dir/has_cache.json index caae5b186f1b274cbb8eb40382613067555f8b36..05df1bdb546565fd3e0dbb8e093c558be0f0cdf8 100644 --- a/cache_dir/has_cache.json +++ b/cache_dir/has_cache.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9e7d89146f736ca9852dd82abaa7d29225499d53ca16f7714cfa576915e0a7d7 -size 3584 +oid sha256:52a5a5ca41ba3fecb1d56bcb5eb58b4d4e9d1d6c9165d00e6008ef0cd0b25db6 +size 3738 diff --git a/data_measurements/__pycache__/__init__.cpython-311.pyc b/data_measurements/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9bee57ff6e3fbdfa027f5389b9692b61fb0d0c5e Binary files /dev/null and b/data_measurements/__pycache__/__init__.cpython-311.pyc differ diff --git a/data_measurements/__pycache__/dataset_statistics.cpython-311.pyc b/data_measurements/__pycache__/dataset_statistics.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca4412891ec3a7a12a38b57e421c4359621be1da Binary files /dev/null and b/data_measurements/__pycache__/dataset_statistics.cpython-311.pyc differ diff --git a/data_measurements/__pycache__/dataset_utils.cpython-311.pyc b/data_measurements/__pycache__/dataset_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e2b15e668130c66bc7037021f99c606a94794da6 Binary files /dev/null and b/data_measurements/__pycache__/dataset_utils.cpython-311.pyc differ diff --git a/data_measurements/__pycache__/embeddings.cpython-311.pyc b/data_measurements/__pycache__/embeddings.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..03495fd37438ca830b594589229b6d11a94fbbb7 Binary files /dev/null and b/data_measurements/__pycache__/embeddings.cpython-311.pyc differ diff --git a/data_measurements/__pycache__/npmi.cpython-311.pyc b/data_measurements/__pycache__/npmi.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..185011d4e59dbc6718031480c03ce636aec2fefd Binary files /dev/null and b/data_measurements/__pycache__/npmi.cpython-311.pyc differ diff --git a/data_measurements/__pycache__/streamlit_utils.cpython-311.pyc b/data_measurements/__pycache__/streamlit_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe6f3393858728ed9d96d67f486ad44353696c20 Binary files /dev/null and b/data_measurements/__pycache__/streamlit_utils.cpython-311.pyc differ diff --git a/data_measurements/__pycache__/zipf.cpython-311.pyc b/data_measurements/__pycache__/zipf.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b03b345f9fad6a4b46a62c13f769a03e44fb8559 Binary files /dev/null and b/data_measurements/__pycache__/zipf.cpython-311.pyc differ diff --git a/data_measurements/dataset_statistics.py b/data_measurements/dataset_statistics.py index 65a67e18616d2ab7a6e31ae0d1afcd1b5f19eddf..a90770d7054f043540f7aa344ded43da8ac818bb 100644 --- a/data_measurements/dataset_statistics.py +++ b/data_measurements/dataset_statistics.py @@ -611,6 +611,8 @@ class DatasetStatisticsCacheClass: batched=True, remove_columns=list(self.dset.features), ) + ##additon + self.text_dset = self.text_dset.filter(lambda ex: ex["text"] is not None) def do_tokenization(self): """ diff --git a/data_measurements/dataset_utils.py b/data_measurements/dataset_utils.py index 88b86684529b5e44cfc65485a8fe6f654cf2daa6..7296313f6b8bf5fcc254141580da9f6860137038 100644 --- a/data_measurements/dataset_utils.py +++ b/data_measurements/dataset_utils.py @@ -65,7 +65,7 @@ _STREAMABLE_DATASET_LIST = [ "HuggingFaceM4/OBELICS", ] -_MAX_ROWS = 200000 +_MAX_ROWS = 100 def load_truncated_dataset( @@ -231,7 +231,7 @@ def dictionarize_info(dset_info): res = { "config_name": info_dict["config_name"], "splits": { - spl: spl_info["num_examples"] + spl: 100 #spl_info["num_examples"] for spl, spl_info in info_dict["splits"].items() }, "features": { diff --git a/data_measurements/streamlit_utils.py b/data_measurements/streamlit_utils.py index e13ec2490661a351f82fb69541cc3a9390d0f25d..fbe549e959e4eb97afb42e681bb5a52532f4729a 100644 --- a/data_measurements/streamlit_utils.py +++ b/data_measurements/streamlit_utils.py @@ -18,7 +18,7 @@ import json import pandas as pd import seaborn as sns import streamlit as st -from st_aggrid import AgGrid, GridOptionsBuilder +#from st_aggrid import AgGrid, GridOptionsBuilder from .dataset_utils import HF_DESC_FIELD, HF_FEATURE_FIELD, HF_LABEL_FIELD st.set_option('deprecation.showPyplotGlobalUse', False) @@ -65,7 +65,7 @@ def sidebar_selection(ds_name_to_dict, column_id): ) # choose a subset of num_examples # TODO: Handling for multiple text features - ds_config = ds_configs[config_name] + #ds_config = ds_configs[config_name] # text_features = ds_config[HF_FEATURE_FIELD]["string"] text_features = [tuple(text_field.split('-')) for text_field in _HAS_CACHE[ds_name][config_name]] # TODO @yacine: Explain what this is doing and why eg tp[0] could = "id" diff --git a/log_files/app.log b/log_files/app.log new file mode 100644 index 0000000000000000000000000000000000000000..6809acd60e35f276d24662442f1750d31a5c5274 --- /dev/null +++ b/log_files/app.log @@ -0,0 +1,44 @@ +2023-08-23 17:29:50,194:Using Single Dataset Mode +2023-08-23 17:29:50,202:Using cache +2023-08-23 17:34:04,702:Using Single Dataset Mode +2023-08-23 17:43:38,030:Using Single Dataset Mode +2023-08-23 17:43:38,035:Using cache +2023-08-23 17:45:36,703:Using Single Dataset Mode +2023-08-23 17:48:20,572:Using Single Dataset Mode +2023-08-23 17:52:30,321:Using Single Dataset Mode +2023-08-23 17:54:35,084:Using Single Dataset Mode +2023-08-23 17:56:12,155:Using Comparison Mode +2023-08-24 07:51:23,364:Using Single Dataset Mode +2023-08-24 07:57:23,750:Using Single Dataset Mode +2023-08-24 08:01:29,502:Using Single Dataset Mode +2023-08-24 08:03:08,131:Using Single Dataset Mode +2023-08-24 08:04:51,132:Using Single Dataset Mode +2023-08-24 08:04:51,138:Using cache +2023-08-24 08:10:10,454:Using Single Dataset Mode +2023-08-24 08:15:29,052:Using Single Dataset Mode +2023-08-24 08:15:29,060:Using cache +2023-08-24 08:17:31,506:Using Single Dataset Mode +2023-08-24 08:19:49,714:Using Single Dataset Mode +2023-08-24 18:42:47,928:Using Single Dataset Mode +2023-08-24 18:46:27,220:Using Single Dataset Mode +2023-08-24 18:49:34,812:Using Single Dataset Mode +2023-08-24 18:50:59,294:Using Single Dataset Mode +2023-08-24 18:52:13,936:Using Single Dataset Mode +2023-08-24 18:52:13,942:Using cache +2023-08-24 18:53:35,540:Using Single Dataset Mode +2023-08-24 18:54:55,961:Using Single Dataset Mode +2023-08-24 18:56:59,520:Using Single Dataset Mode +2023-08-24 18:58:22,133:Using Single Dataset Mode +2023-08-24 19:00:13,836:Using Single Dataset Mode +2023-08-24 19:01:23,903:Using Single Dataset Mode +2023-08-24 20:23:51,453:Using Single Dataset Mode +2023-08-24 20:24:59,017:Using Single Dataset Mode +2023-08-24 20:26:46,678:Using Single Dataset Mode +2023-08-24 20:27:59,157:Using Single Dataset Mode +2023-08-24 20:29:31,861:Using Single Dataset Mode +2023-08-24 20:30:48,436:Using Single Dataset Mode +2023-08-24 20:33:15,450:Using Single Dataset Mode +2023-08-24 20:34:29,544:Using Single Dataset Mode +2023-08-25 08:41:31,588:Using Single Dataset Mode +2023-08-25 08:42:41,115:Using Single Dataset Mode +2023-08-25 08:44:16,584:Using Single Dataset Mode diff --git a/log_files/dataset_statistics.log b/log_files/dataset_statistics.log index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4b124eb5b2f0d5a39998765c1fc4a33e45d54c4a 100644 --- a/log_files/dataset_statistics.log +++ b/log_files/dataset_statistics.log @@ -0,0 +1,3 @@ +2023-08-23 17:29:50,216:Loaded dataset from disk +2023-08-23 17:43:38,040:Loaded dataset from disk +2023-08-24 18:52:13,955:Loaded dataset from disk diff --git a/requirements.txt b/requirements.txt index fc4c2d343f85de32a562e075d6d08524eeddff12..e52dea21f56dbcbbb844495cdef1acb9e87392f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ dataclasses==0.6 iso639==0.1.4 -python_igraph==0.9.6 +python_igraph PyYAML>=5.4.1 @@ -37,13 +37,15 @@ seaborn==0.11.2 streamlit-aggrid -numexpr==2.7.3 +numexpr scikit-learn>=0.24.2 - -scipy~=1.7.3 - tqdm~=4.62.3 -pyarrow~=6.0.1 +pyarrow altair<5 + + +scipy + +streamlit==1.24.1