{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "park", "1": "2", "2": "dirt", "3": "suv", "4": "small", "5": "red", "6": "crown", "7": "2000", "8": "neon", "9": "shrimp", "10": "10", "11": "7:35", "12": "wine", "13": "man", "14": "birthday", "15": "sky", "16": "hair", "17": "forest", "18": "car", "19": "green", "20": "lg", "21": "wedding", "22": "white", "23": "8", "24": "gray", "25": "brick", "26": "cage", "27": "pink", "28": "chopsticks", "29": "shadow", "30": "tent", "31": "style", "32": "beige", "33": "out", "34": "skateboard", "35": "3", "36": "fence", "37": "7:45", "38": "curtains", "39": "windows", "40": "double", "41": "stripes", "42": "rack", "43": "crossing", "44": "chair", "45": "solid", "46": "french", "47": "blue", "48": "exit", "49": "black and white", "50": "tabby", "51": "cat", "52": "snowboarding", "53": "tower", "54": "walking", "55": "wall", "56": "giraffe", "57": "ball", "58": "snowboard", "59": "leather", "60": "calico", "61": "africa", "62": "bicycles", "63": "soccer", "64": "church", "65": "skateboarding", "66": "right", "67": "bikes", "68": "5", "69": "2013", "70": "low", "71": "clock", "72": "door", "73": "red and yellow", "74": "desert", "75": "bus", "76": "snowboarder", "77": "down", "78": "train", "79": "cup", "80": "red and blue", "81": "tan", "82": "happy", "83": "beagle", "84": "canopy", "85": "resting", "86": "person", "87": "hat", "88": "camera", "89": "trees", "90": "soccer ball", "91": "talking", "92": "air", "93": "lanyard", "94": "arrow", "95": "can't tell", "96": "0", "97": "queen", "98": "unknown", "99": "donut", "100": "plastic", "101": "name tag", "102": "skier", "103": "woman", "104": "smile", "105": "security", "106": "laying down", "107": "brown", "108": "doughnut", "109": "net", "110": "on road", "111": "cross", "112": "watching", "113": "plain", "114": "backpack", "115": "picnic table", "116": "yes", "117": "not sure", "118": "they aren't", "119": "big ben", "120": "zoo", "121": "king", "122": "roof", "123": "blue and white", "124": "bricks", "125": "many", "126": "protection", "127": "1", "128": "tired", "129": "bedroom", "130": "dog", "131": "blonde", "132": "curtain", "133": "ground", "134": "outside", "135": "hawaii", "136": "large", "137": "lady", "138": "boy", "139": "window", "140": "bike rack", "141": "station", "142": "monitor", "143": "cloudy", "144": "plate", "145": "photographer", "146": "lying down", "147": "shelter", "148": "street", "149": "talking on phone", "150": "yellow", "151": "4", "152": "purple", "153": "platform", "154": "9:35", "155": "woods", "156": "on street", "157": "shadows", "158": "snow", "159": "ice cream", "160": "don't know", "161": "no", "162": "smiling", "163": "table", "164": "tv", "165": "not there", "166": "little girl", "167": "clock tower", "168": "white and blue", "169": "clear", "170": "skiing", "171": "bicycle", "172": "in car", "173": "white and black", "174": "shade", "175": "natural", "176": "black", "177": "sleeping", "178": "7", "179": "gray and black", "180": "girl", "181": "stand", "182": "full", "183": "nothing", "184": "human", "185": "giraffes", "186": "orange", "187": "sidewalk", "188": "2010", "189": "6", "190": "fashion", "191": "wine tasting", "192": "screen", "193": "at table", "194": "jeep", "195": "8:35", "196": "sun", "197": "women", "198": "necklace" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 96, "1": 127, "10": 10, "2": 1, "2000": 7, "2010": 188, "2013": 69, "3": 35, "4": 151, "5": 68, "6": 189, "7": 178, "7:35": 11, "7:45": 37, "8": 23, "8:35": 195, "9:35": 154, "africa": 61, "air": 92, "arrow": 94, "at table": 193, "backpack": 114, "ball": 57, "beagle": 83, "bedroom": 129, "beige": 32, "bicycle": 171, "bicycles": 62, "big ben": 119, "bike rack": 140, "bikes": 67, "birthday": 14, "black": 176, "black and white": 49, "blonde": 131, "blue": 47, "blue and white": 123, "boy": 138, "brick": 25, "bricks": 124, "brown": 107, "bus": 75, "cage": 26, "calico": 60, "camera": 88, "can't tell": 95, "canopy": 84, "car": 18, "cat": 51, "chair": 44, "chopsticks": 28, "church": 64, "clear": 169, "clock": 71, "clock tower": 167, "cloudy": 143, "cross": 111, "crossing": 43, "crown": 6, "cup": 79, "curtain": 132, "curtains": 38, "desert": 74, "dirt": 2, "dog": 130, "don't know": 160, "donut": 99, "door": 72, "double": 40, "doughnut": 108, "down": 77, "exit": 48, "fashion": 190, "fence": 36, "forest": 17, "french": 46, "full": 182, "giraffe": 56, "giraffes": 185, "girl": 180, "gray": 24, "gray and black": 179, "green": 19, "ground": 133, "hair": 16, "happy": 82, "hat": 87, "hawaii": 135, "human": 184, "ice cream": 159, "in car": 172, "jeep": 194, "king": 121, "lady": 137, "lanyard": 93, "large": 136, "laying down": 106, "leather": 59, "lg": 20, "little girl": 166, "low": 70, "lying down": 146, "man": 13, "many": 125, "monitor": 142, "name tag": 101, "natural": 175, "necklace": 198, "neon": 8, "net": 109, "no": 161, "not sure": 117, "not there": 165, "nothing": 183, "on road": 110, "on street": 156, "orange": 186, "out": 33, "outside": 134, "park": 0, "person": 86, "photographer": 145, "picnic table": 115, "pink": 27, "plain": 113, "plastic": 100, "plate": 144, "platform": 153, "protection": 126, "purple": 152, "queen": 97, "rack": 42, "red": 5, "red and blue": 80, "red and yellow": 73, "resting": 85, "right": 66, "roof": 122, "screen": 192, "security": 105, "shade": 174, "shadow": 29, "shadows": 157, "shelter": 147, "shrimp": 9, "sidewalk": 187, "skateboard": 34, "skateboarding": 65, "skier": 102, "skiing": 170, "sky": 15, "sleeping": 177, "small": 4, "smile": 104, "smiling": 162, "snow": 158, "snowboard": 58, "snowboarder": 76, "snowboarding": 52, "soccer": 63, "soccer ball": 90, "solid": 45, "stand": 181, "station": 141, "street": 148, "stripes": 41, "style": 31, "sun": 196, "suv": 3, "tabby": 50, "table": 163, "talking": 91, "talking on phone": 149, "tan": 81, "tent": 30, "they aren't": 118, "tired": 128, "tower": 53, "train": 78, "trees": 89, "tv": 164, "unknown": 98, "walking": 54, "wall": 55, "watching": 112, "wedding": 21, "white": 22, "white and black": 173, "white and blue": 168, "window": 139, "windows": 39, "wine": 12, "wine tasting": 191, "woman": 103, "women": 197, "woods": 155, "yellow": 150, "yes": 116, "zoo": 120 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.44.2", "type_vocab_size": 2, "vocab_size": 30522 }