Instructions to use CATIE-AQ/FAT5-small with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use CATIE-AQ/FAT5-small with Transformers:
# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("CATIE-AQ/FAT5-small", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "alibi_mode": "symetric", | |
| "architectures": [ | |
| "FlashT5ForConditionalGeneration" | |
| ], | |
| "attention_dropout_rate": 0.0, | |
| "attention_scale": 1.0, | |
| "attention_type": "triton", | |
| "auto_map": { | |
| "AutoConfig": "configuration_flash_t5.FlashT5Config", | |
| "AutoModel": "modeling_flash_t5.FlashT5EncoderModel", | |
| "AutoModelForQuestionAnswering": "custom_heads_flash_t5.FlashT5ForQuestionAnswering", | |
| "AutoModelForSeq2SeqLM": "modeling_flash_t5.FlashT5ForConditionalGeneration", | |
| "AutoModelForSequenceClassification": "custom_heads_flash_t5.FlashT5ForSequenceClassification", | |
| "AutoModelForTokenClassification": "custom_heads_flash_t5.FlashT5ForTokenClassification" | |
| }, | |
| "classifier_dropout": 0.0, | |
| "crossentropy_inplace_backward": false, | |
| "d_ff": 2048, | |
| "d_kv": 64, | |
| "d_model": 512, | |
| "decoder_start_token_id": 0, | |
| "dense_act_fn": "relu", | |
| "dropout_rate": 0.0, | |
| "eos_token_id": 1, | |
| "feed_forward_proj": "relu", | |
| "fire_mlp_width": 32, | |
| "initializer_factor": 1.0, | |
| "is_encoder_decoder": false, | |
| "is_gated_act": false, | |
| "label_smoothing": 0.0, | |
| "layer_norm_epsilon": 1e-06, | |
| "max_sequence_length": 1024, | |
| "model_type": "flash_t5", | |
| "num_decoder_layers": 12, | |
| "num_heads": 8, | |
| "num_layers": 12, | |
| "pad_token_id": 3, | |
| "position_encoding_type": "t5", | |
| "relative_attention_max_distance": 128, | |
| "relative_attention_num_buckets": 32, | |
| "rotary_base": 10000, | |
| "rotary_emb_fraction": 1.0, | |
| "rotary_interleaved": false, | |
| "rotary_scale_base": null, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.46.0.dev0", | |
| "use_cache": true, | |
| "use_full_bias_size": false, | |
| "use_gelu_act": true, | |
| "use_glu_mlp": true, | |
| "use_masking": false, | |
| "use_randomized_position_encoding": false, | |
| "use_triton_crossentropy": true, | |
| "use_triton_layernorm": true, | |
| "vocab_size": 32768, | |
| "z_loss": 0.0001 | |
| } | |