{ "_name_or_path": "MIT/ast-finetuned-audioset-10-10-0.4593", "architectures": [ "ASTForAudioClassification" ], "attention_probs_dropout_prob": 0.0, "frequency_stride": 10, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "barswa", "1": "blakit1", "10": "greegr", "11": "hoopoe", "12": "litegr", "13": "ratcis1", "14": "rbsrob1", "15": "rerswa1", "16": "somgre1", "17": "thrnig1", "18": "wlwwar", "19": "woosan", "2": "cohmar1", "3": "colsun2", "4": "combul2", "5": "combuz1", "6": "comsan", "7": "eaywag1", "8": "eubeat1", "9": "gnbcam2" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "barswa": "0", "blakit1": "1", "cohmar1": "2", "colsun2": "3", "combul2": "4", "combuz1": "5", "comsan": "6", "eaywag1": "7", "eubeat1": "8", "gnbcam2": "9", "greegr": "10", "hoopoe": "11", "litegr": "12", "ratcis1": "13", "rbsrob1": "14", "rerswa1": "15", "somgre1": "16", "thrnig1": "17", "wlwwar": "18", "woosan": "19" }, "layer_norm_eps": 1e-12, "max_length": 1024, "model_type": "audio-spectrogram-transformer", "num_attention_heads": 12, "num_hidden_layers": 12, "num_mel_bins": 128, "patch_size": 16, "problem_type": "single_label_classification", "qkv_bias": true, "time_stride": 10, "torch_dtype": "float32", "transformers_version": "4.31.0.dev0" }