| Name | Last modified | Size | Description | |
|---|---|---|---|---|
| Parent Directory | - | |||
| __init__.py | 2025-05-20 14:15 | 4.3K | ||
| __pycache__/ | 2025-05-20 14:15 | - | ||
| add_target_dataset.py | 2025-05-20 14:15 | 2.9K | ||
| append_token_dataset.py | 2025-05-20 14:15 | 1.0K | ||
| audio/ | 2025-05-20 14:15 | - | ||
| backtranslation_dataset.py | 2025-05-20 14:15 | 6.1K | ||
| base_wrapper_dataset.py | 2025-05-20 14:15 | 2.1K | ||
| bucket_pad_length_dataset.py | 2025-05-20 14:15 | 2.3K | ||
| codedataset.py | 2025-05-20 14:15 | 18K | ||
| colorize_dataset.py | 2025-05-20 14:15 | 843 | ||
| concat_dataset.py | 2025-05-20 14:15 | 4.5K | ||
| concat_sentences_dataset.py | 2025-05-20 14:15 | 1.5K | ||
| data_utils.py | 2025-05-20 14:15 | 21K | ||
| data_utils_fast.cpython-310-x86_64-linux-gnu.so | 2025-05-20 14:15 | 1.2M | ||
| denoising_dataset.py | 2025-05-20 14:15 | 15K | ||
| dictionary.py | 2025-05-20 14:15 | 13K | ||
| encoders/ | 2025-05-20 14:15 | - | ||
| fairseq_dataset.py | 2025-05-20 14:15 | 7.0K | ||
| fasta_dataset.py | 2025-05-20 14:15 | 3.3K | ||
| huffman/ | 2025-05-20 14:15 | - | ||
| id_dataset.py | 2025-05-20 14:15 | 423 | ||
| indexed_dataset.py | 2025-05-20 14:15 | 18K | ||
| iterators.py | 2025-05-20 14:15 | 31K | ||
| language_pair_dataset.py | 2025-05-20 14:15 | 19K | ||
| legacy/ | 2025-05-20 14:15 | - | ||
| list_dataset.py | 2025-05-20 14:15 | 729 | ||
| lm_context_window_dataset.py | 2025-05-20 14:15 | 3.3K | ||
| lru_cache_dataset.py | 2025-05-20 14:15 | 570 | ||
| mask_tokens_dataset.py | 2025-05-20 14:15 | 8.6K | ||
| monolingual_dataset.py | 2025-05-20 14:15 | 8.6K | ||
| multi_corpus_dataset.py | 2025-05-20 14:15 | 9.1K | ||
| multi_corpus_sampled_dataset.py | 2025-05-20 14:15 | 5.2K | ||
| multilingual/ | 2025-05-20 14:15 | - | ||
| nested_dictionary_dataset.py | 2025-05-20 14:15 | 3.9K | ||
| noising.py | 2025-05-20 14:15 | 12K | ||
| num_samples_dataset.py | 2025-05-20 14:15 | 404 | ||
| numel_dataset.py | 2025-05-20 14:15 | 786 | ||
| offset_tokens_dataset.py | 2025-05-20 14:15 | 444 | ||
| pad_dataset.py | 2025-05-20 14:15 | 941 | ||
| plasma_utils.py | 2025-05-20 14:15 | 6.1K | ||
| prepend_dataset.py | 2025-05-20 14:15 | 953 | ||
| prepend_token_dataset.py | 2025-05-20 14:15 | 1.0K | ||
| raw_label_dataset.py | 2025-05-20 14:15 | 546 | ||
| replace_dataset.py | 2025-05-20 14:15 | 1.3K | ||
| resampling_dataset.py | 2025-05-20 14:15 | 4.2K | ||
| roll_dataset.py | 2025-05-20 14:15 | 485 | ||
| round_robin_zip_datasets.py | 2025-05-20 14:15 | 6.2K | ||
| shorten_dataset.py | 2025-05-20 14:15 | 2.4K | ||
| sort_dataset.py | 2025-05-20 14:15 | 621 | ||
| strip_token_dataset.py | 2025-05-20 14:15 | 647 | ||
| subsample_dataset.py | 2025-05-20 14:15 | 2.1K | ||
| text_compressor.py | 2025-05-20 14:15 | 1.8K | ||
| token_block_dataset.py | 2025-05-20 14:15 | 7.5K | ||
| token_block_utils_fast.cpython-310-x86_64-linux-gnu.so | 2025-05-20 14:15 | 1.3M | ||
| transform_eos_concat_langpair_dataset.py | 2025-05-20 14:15 | 5.0K | ||
| transform_eos_dataset.py | 2025-05-20 14:15 | 4.5K | ||
| transform_eos_lang_pair_dataset.py | 2025-05-20 14:15 | 3.8K | ||