Hhuggingface-webUpdate config.json
6fa9246c创建于 2022年11月21日历史提交
{
  "architectures": [
    "ASTForAudioClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "frequency_stride": 14,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "Speech",
    "1": "Male speech, man speaking",
    "2": "Female speech, woman speaking",
    "3": "Child speech, kid speaking",
    "4": "Conversation",
    "5": "Narration, monologue",
    "6": "Babbling",
    "7": "Speech synthesizer",
    "8": "Shout",
    "9": "Bellow",
    "10": "Whoop",
    "11": "Yell",
    "12": "Battle cry",
    "13": "Children shouting",
    "14": "Screaming",
    "15": "Whispering",
    "16": "Laughter",
    "17": "Baby laughter",
    "18": "Giggle",
    "19": "Snicker",
    "20": "Belly laugh",
    "21": "Chuckle, chortle",
    "22": "Crying, sobbing",
    "23": "Baby cry, infant cry",
    "24": "Whimper",
    "25": "Wail, moan",
    "26": "Sigh",
    "27": "Singing",
    "28": "Choir",
    "29": "Yodeling",
    "30": "Chant",
    "31": "Mantra",
    "32": "Male singing",
    "33": "Female singing",
    "34": "Child singing",
    "35": "Synthetic singing",
    "36": "Rapping",
    "37": "Humming",
    "38": "Groan",
    "39": "Grunt",
    "40": "Whistling",
    "41": "Breathing",
    "42": "Wheeze",
    "43": "Snoring",
    "44": "Gasp",
    "45": "Pant",
    "46": "Snort",
    "47": "Cough",
    "48": "Throat clearing",
    "49": "Sneeze",
    "50": "Sniff",
    "51": "Run",
    "52": "Shuffle",
    "53": "Walk, footsteps",
    "54": "Chewing, mastication",
    "55": "Biting",
    "56": "Gargling",
    "57": "Stomach rumble",
    "58": "Burping, eructation",
    "59": "Hiccup",
    "60": "Fart",
    "61": "Hands",
    "62": "Finger snapping",
    "63": "Clapping",
    "64": "Heart sounds, heartbeat",
    "65": "Heart murmur",
    "66": "Cheering",
    "67": "Applause",
    "68": "Chatter",
    "69": "Crowd",
    "70": "Hubbub, speech noise, speech babble",
    "71": "Children playing",
    "72": "Animal",
    "73": "Domestic animals, pets",
    "74": "Dog",
    "75": "Bark",
    "76": "Yip",
    "77": "Howl",
    "78": "Bow-wow",
    "79": "Growling",
    "80": "Whimper (dog)",
    "81": "Cat",
    "82": "Purr",
    "83": "Meow",
    "84": "Hiss",
    "85": "Caterwaul",
    "86": "Livestock, farm animals, working animals",
    "87": "Horse",
    "88": "Clip-clop",
    "89": "Neigh, whinny",
    "90": "Cattle, bovinae",
    "91": "Moo",
    "92": "Cowbell",
    "93": "Pig",
    "94": "Oink",
    "95": "Goat",
    "96": "Bleat",
    "97": "Sheep",
    "98": "Fowl",
    "99": "Chicken, rooster",
    "100": "Cluck",
    "101": "Crowing, cock-a-doodle-doo",
    "102": "Turkey",
    "103": "Gobble",
    "104": "Duck",
    "105": "Quack",
    "106": "Goose",
    "107": "Honk",
    "108": "Wild animals",
    "109": "Roaring cats (lions, tigers)",
    "110": "Roar",
    "111": "Bird",
    "112": "Bird vocalization, bird call, bird song",
    "113": "Chirp, tweet",
    "114": "Squawk",
    "115": "Pigeon, dove",
    "116": "Coo",
    "117": "Crow",
    "118": "Caw",
    "119": "Owl",
    "120": "Hoot",
    "121": "Bird flight, flapping wings",
    "122": "Canidae, dogs, wolves",
    "123": "Rodents, rats, mice",
    "124": "Mouse",
    "125": "Patter",
    "126": "Insect",
    "127": "Cricket",
    "128": "Mosquito",
    "129": "Fly, housefly",
    "130": "Buzz",
    "131": "Bee, wasp, etc.",
    "132": "Frog",
    "133": "Croak",
    "134": "Snake",
    "135": "Rattle",
    "136": "Whale vocalization",
    "137": "Music",
    "138": "Musical instrument",
    "139": "Plucked string instrument",
    "140": "Guitar",
    "141": "Electric guitar",
    "142": "Bass guitar",
    "143": "Acoustic guitar",
    "144": "Steel guitar, slide guitar",
    "145": "Tapping (guitar technique)",
    "146": "Strum",
    "147": "Banjo",
    "148": "Sitar",
    "149": "Mandolin",
    "150": "Zither",
    "151": "Ukulele",
    "152": "Keyboard (musical)",
    "153": "Piano",
    "154": "Electric piano",
    "155": "Organ",
    "156": "Electronic organ",
    "157": "Hammond organ",
    "158": "Synthesizer",
    "159": "Sampler",
    "160": "Harpsichord",
    "161": "Percussion",
    "162": "Drum kit",
    "163": "Drum machine",
    "164": "Drum",
    "165": "Snare drum",
    "166": "Rimshot",
    "167": "Drum roll",
    "168": "Bass drum",
    "169": "Timpani",
    "170": "Tabla",
    "171": "Cymbal",
    "172": "Hi-hat",
    "173": "Wood block",
    "174": "Tambourine",
    "175": "Rattle (instrument)",
    "176": "Maraca",
    "177": "Gong",
    "178": "Tubular bells",
    "179": "Mallet percussion",
    "180": "Marimba, xylophone",
    "181": "Glockenspiel",
    "182": "Vibraphone",
    "183": "Steelpan",
    "184": "Orchestra",
    "185": "Brass instrument",
    "186": "French horn",
    "187": "Trumpet",
    "188": "Trombone",
    "189": "Bowed string instrument",
    "190": "String section",
    "191": "Violin, fiddle",
    "192": "Pizzicato",
    "193": "Cello",
    "194": "Double bass",
    "195": "Wind instrument, woodwind instrument",
    "196": "Flute",
    "197": "Saxophone",
    "198": "Clarinet",
    "199": "Harp",
    "200": "Bell",
    "201": "Church bell",
    "202": "Jingle bell",
    "203": "Bicycle bell",
    "204": "Tuning fork",
    "205": "Chime",
    "206": "Wind chime",
    "207": "Change ringing (campanology)",
    "208": "Harmonica",
    "209": "Accordion",
    "210": "Bagpipes",
    "211": "Didgeridoo",
    "212": "Shofar",
    "213": "Theremin",
    "214": "Singing bowl",
    "215": "Scratching (performance technique)",
    "216": "Pop music",
    "217": "Hip hop music",
    "218": "Beatboxing",
    "219": "Rock music",
    "220": "Heavy metal",
    "221": "Punk rock",
    "222": "Grunge",
    "223": "Progressive rock",
    "224": "Rock and roll",
    "225": "Psychedelic rock",
    "226": "Rhythm and blues",
    "227": "Soul music",
    "228": "Reggae",
    "229": "Country",
    "230": "Swing music",
    "231": "Bluegrass",
    "232": "Funk",
    "233": "Folk music",
    "234": "Middle Eastern music",
    "235": "Jazz",
    "236": "Disco",
    "237": "Classical music",
    "238": "Opera",
    "239": "Electronic music",
    "240": "House music",
    "241": "Techno",
    "242": "Dubstep",
    "243": "Drum and bass",
    "244": "Electronica",
    "245": "Electronic dance music",
    "246": "Ambient music",
    "247": "Trance music",
    "248": "Music of Latin America",
    "249": "Salsa music",
    "250": "Flamenco",
    "251": "Blues",
    "252": "Music for children",
    "253": "New-age music",
    "254": "Vocal music",
    "255": "A capella",
    "256": "Music of Africa",
    "257": "Afrobeat",
    "258": "Christian music",
    "259": "Gospel music",
    "260": "Music of Asia",
    "261": "Carnatic music",
    "262": "Music of Bollywood",
    "263": "Ska",
    "264": "Traditional music",
    "265": "Independent music",
    "266": "Song",
    "267": "Background music",
    "268": "Theme music",
    "269": "Jingle (music)",
    "270": "Soundtrack music",
    "271": "Lullaby",
    "272": "Video game music",
    "273": "Christmas music",
    "274": "Dance music",
    "275": "Wedding music",
    "276": "Happy music",
    "277": "Funny music",
    "278": "Sad music",
    "279": "Tender music",
    "280": "Exciting music",
    "281": "Angry music",
    "282": "Scary music",
    "283": "Wind",
    "284": "Rustling leaves",
    "285": "Wind noise (microphone)",
    "286": "Thunderstorm",
    "287": "Thunder",
    "288": "Water",
    "289": "Rain",
    "290": "Raindrop",
    "291": "Rain on surface",
    "292": "Stream",
    "293": "Waterfall",
    "294": "Ocean",
    "295": "Waves, surf",
    "296": "Steam",
    "297": "Gurgling",
    "298": "Fire",
    "299": "Crackle",
    "300": "Vehicle",
    "301": "Boat, Water vehicle",
    "302": "Sailboat, sailing ship",
    "303": "Rowboat, canoe, kayak",
    "304": "Motorboat, speedboat",
    "305": "Ship",
    "306": "Motor vehicle (road)",
    "307": "Car",
    "308": "Vehicle horn, car horn, honking",
    "309": "Toot",
    "310": "Car alarm",
    "311": "Power windows, electric windows",
    "312": "Skidding",
    "313": "Tire squeal",
    "314": "Car passing by",
    "315": "Race car, auto racing",
    "316": "Truck",
    "317": "Air brake",
    "318": "Air horn, truck horn",
    "319": "Reversing beeps",
    "320": "Ice cream truck, ice cream van",
    "321": "Bus",
    "322": "Emergency vehicle",
    "323": "Police car (siren)",
    "324": "Ambulance (siren)",
    "325": "Fire engine, fire truck (siren)",
    "326": "Motorcycle",
    "327": "Traffic noise, roadway noise",
    "328": "Rail transport",
    "329": "Train",
    "330": "Train whistle",
    "331": "Train horn",
    "332": "Railroad car, train wagon",
    "333": "Train wheels squealing",
    "334": "Subway, metro, underground",
    "335": "Aircraft",
    "336": "Aircraft engine",
    "337": "Jet engine",
    "338": "Propeller, airscrew",
    "339": "Helicopter",
    "340": "Fixed-wing aircraft, airplane",
    "341": "Bicycle",
    "342": "Skateboard",
    "343": "Engine",
    "344": "Light engine (high frequency)",
    "345": "Dental drill, dentist's drill",
    "346": "Lawn mower",
    "347": "Chainsaw",
    "348": "Medium engine (mid frequency)",
    "349": "Heavy engine (low frequency)",
    "350": "Engine knocking",
    "351": "Engine starting",
    "352": "Idling",
    "353": "Accelerating, revving, vroom",
    "354": "Door",
    "355": "Doorbell",
    "356": "Ding-dong",
    "357": "Sliding door",
    "358": "Slam",
    "359": "Knock",
    "360": "Tap",
    "361": "Squeak",
    "362": "Cupboard open or close",
    "363": "Drawer open or close",
    "364": "Dishes, pots, and pans",
    "365": "Cutlery, silverware",
    "366": "Chopping (food)",
    "367": "Frying (food)",
    "368": "Microwave oven",
    "369": "Blender",
    "370": "Water tap, faucet",
    "371": "Sink (filling or washing)",
    "372": "Bathtub (filling or washing)",
    "373": "Hair dryer",
    "374": "Toilet flush",
    "375": "Toothbrush",
    "376": "Electric toothbrush",
    "377": "Vacuum cleaner",
    "378": "Zipper (clothing)",
    "379": "Keys jangling",
    "380": "Coin (dropping)",
    "381": "Scissors",
    "382": "Electric shaver, electric razor",
    "383": "Shuffling cards",
    "384": "Typing",
    "385": "Typewriter",
    "386": "Computer keyboard",
    "387": "Writing",
    "388": "Alarm",
    "389": "Telephone",
    "390": "Telephone bell ringing",
    "391": "Ringtone",
    "392": "Telephone dialing, DTMF",
    "393": "Dial tone",
    "394": "Busy signal",
    "395": "Alarm clock",
    "396": "Siren",
    "397": "Civil defense siren",
    "398": "Buzzer",
    "399": "Smoke detector, smoke alarm",
    "400": "Fire alarm",
    "401": "Foghorn",
    "402": "Whistle",
    "403": "Steam whistle",
    "404": "Mechanisms",
    "405": "Ratchet, pawl",
    "406": "Clock",
    "407": "Tick",
    "408": "Tick-tock",
    "409": "Gears",
    "410": "Pulleys",
    "411": "Sewing machine",
    "412": "Mechanical fan",
    "413": "Air conditioning",
    "414": "Cash register",
    "415": "Printer",
    "416": "Camera",
    "417": "Single-lens reflex camera",
    "418": "Tools",
    "419": "Hammer",
    "420": "Jackhammer",
    "421": "Sawing",
    "422": "Filing (rasp)",
    "423": "Sanding",
    "424": "Power tool",
    "425": "Drill",
    "426": "Explosion",
    "427": "Gunshot, gunfire",
    "428": "Machine gun",
    "429": "Fusillade",
    "430": "Artillery fire",
    "431": "Cap gun",
    "432": "Fireworks",
    "433": "Firecracker",
    "434": "Burst, pop",
    "435": "Eruption",
    "436": "Boom",
    "437": "Wood",
    "438": "Chop",
    "439": "Splinter",
    "440": "Crack",
    "441": "Glass",
    "442": "Chink, clink",
    "443": "Shatter",
    "444": "Liquid",
    "445": "Splash, splatter",
    "446": "Slosh",
    "447": "Squish",
    "448": "Drip",
    "449": "Pour",
    "450": "Trickle, dribble",
    "451": "Gush",
    "452": "Fill (with liquid)",
    "453": "Spray",
    "454": "Pump (liquid)",
    "455": "Stir",
    "456": "Boiling",
    "457": "Sonar",
    "458": "Arrow",
    "459": "Whoosh, swoosh, swish",
    "460": "Thump, thud",
    "461": "Thunk",
    "462": "Electronic tuner",
    "463": "Effects unit",
    "464": "Chorus effect",
    "465": "Basketball bounce",
    "466": "Bang",
    "467": "Slap, smack",
    "468": "Whack, thwack",
    "469": "Smash, crash",
    "470": "Breaking",
    "471": "Bouncing",
    "472": "Whip",
    "473": "Flap",
    "474": "Scratch",
    "475": "Scrape",
    "476": "Rub",
    "477": "Roll",
    "478": "Crushing",
    "479": "Crumpling, crinkling",
    "480": "Tearing",
    "481": "Beep, bleep",
    "482": "Ping",
    "483": "Ding",
    "484": "Clang",
    "485": "Squeal",
    "486": "Creak",
    "487": "Rustle",
    "488": "Whir",
    "489": "Clatter",
    "490": "Sizzle",
    "491": "Clicking",
    "492": "Clickety-clack",
    "493": "Rumble",
    "494": "Plop",
    "495": "Jingle, tinkle",
    "496": "Hum",
    "497": "Zing",
    "498": "Boing",
    "499": "Crunch",
    "500": "Silence",
    "501": "Sine wave",
    "502": "Harmonic",
    "503": "Chirp tone",
    "504": "Sound effect",
    "505": "Pulse",
    "506": "Inside, small room",
    "507": "Inside, large room or hall",
    "508": "Inside, public space",
    "509": "Outside, urban or manmade",
    "510": "Outside, rural or natural",
    "511": "Reverberation",
    "512": "Echo",
    "513": "Noise",
    "514": "Environmental noise",
    "515": "Static",
    "516": "Mains hum",
    "517": "Distortion",
    "518": "Sidetone",
    "519": "Cacophony",
    "520": "White noise",
    "521": "Pink noise",
    "522": "Throbbing",
    "523": "Vibration",
    "524": "Television",
    "525": "Radio",
    "526": "Field recording"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "A capella": 255,
    "Accelerating, revving, vroom": 353,
    "Accordion": 209,
    "Acoustic guitar": 143,
    "Afrobeat": 257,
    "Air brake": 317,
    "Air conditioning": 413,
    "Air horn, truck horn": 318,
    "Aircraft": 335,
    "Aircraft engine": 336,
    "Alarm": 388,
    "Alarm clock": 395,
    "Ambient music": 246,
    "Ambulance (siren)": 324,
    "Angry music": 281,
    "Animal": 72,
    "Applause": 67,
    "Arrow": 458,
    "Artillery fire": 430,
    "Babbling": 6,
    "Baby cry, infant cry": 23,
    "Baby laughter": 17,
    "Background music": 267,
    "Bagpipes": 210,
    "Bang": 466,
    "Banjo": 147,
    "Bark": 75,
    "Basketball bounce": 465,
    "Bass drum": 168,
    "Bass guitar": 142,
    "Bathtub (filling or washing)": 372,
    "Battle cry": 12,
    "Beatboxing": 218,
    "Bee, wasp, etc.": 131,
    "Beep, bleep": 481,
    "Bell": 200,
    "Bellow": 9,
    "Belly laugh": 20,
    "Bicycle": 341,
    "Bicycle bell": 203,
    "Bird": 111,
    "Bird flight, flapping wings": 121,
    "Bird vocalization, bird call, bird song": 112,
    "Biting": 55,
    "Bleat": 96,
    "Blender": 369,
    "Bluegrass": 231,
    "Blues": 251,
    "Boat, Water vehicle": 301,
    "Boiling": 456,
    "Boing": 498,
    "Boom": 436,
    "Bouncing": 471,
    "Bow-wow": 78,
    "Bowed string instrument": 189,
    "Brass instrument": 185,
    "Breaking": 470,
    "Breathing": 41,
    "Burping, eructation": 58,
    "Burst, pop": 434,
    "Bus": 321,
    "Busy signal": 394,
    "Buzz": 130,
    "Buzzer": 398,
    "Cacophony": 519,
    "Camera": 416,
    "Canidae, dogs, wolves": 122,
    "Cap gun": 431,
    "Car": 307,
    "Car alarm": 310,
    "Car passing by": 314,
    "Carnatic music": 261,
    "Cash register": 414,
    "Cat": 81,
    "Caterwaul": 85,
    "Cattle, bovinae": 90,
    "Caw": 118,
    "Cello": 193,
    "Chainsaw": 347,
    "Change ringing (campanology)": 207,
    "Chant": 30,
    "Chatter": 68,
    "Cheering": 66,
    "Chewing, mastication": 54,
    "Chicken, rooster": 99,
    "Child singing": 34,
    "Child speech, kid speaking": 3,
    "Children playing": 71,
    "Children shouting": 13,
    "Chime": 205,
    "Chink, clink": 442,
    "Chirp tone": 503,
    "Chirp, tweet": 113,
    "Choir": 28,
    "Chop": 438,
    "Chopping (food)": 366,
    "Chorus effect": 464,
    "Christian music": 258,
    "Christmas music": 273,
    "Chuckle, chortle": 21,
    "Church bell": 201,
    "Civil defense siren": 397,
    "Clang": 484,
    "Clapping": 63,
    "Clarinet": 198,
    "Classical music": 237,
    "Clatter": 489,
    "Clickety-clack": 492,
    "Clicking": 491,
    "Clip-clop": 88,
    "Clock": 406,
    "Cluck": 100,
    "Coin (dropping)": 380,
    "Computer keyboard": 386,
    "Conversation": 4,
    "Coo": 116,
    "Cough": 47,
    "Country": 229,
    "Cowbell": 92,
    "Crack": 440,
    "Crackle": 299,
    "Creak": 486,
    "Cricket": 127,
    "Croak": 133,
    "Crow": 117,
    "Crowd": 69,
    "Crowing, cock-a-doodle-doo": 101,
    "Crumpling, crinkling": 479,
    "Crunch": 499,
    "Crushing": 478,
    "Crying, sobbing": 22,
    "Cupboard open or close": 362,
    "Cutlery, silverware": 365,
    "Cymbal": 171,
    "Dance music": 274,
    "Dental drill, dentist's drill": 345,
    "Dial tone": 393,
    "Didgeridoo": 211,
    "Ding": 483,
    "Ding-dong": 356,
    "Disco": 236,
    "Dishes, pots, and pans": 364,
    "Distortion": 517,
    "Dog": 74,
    "Domestic animals, pets": 73,
    "Door": 354,
    "Doorbell": 355,
    "Double bass": 194,
    "Drawer open or close": 363,
    "Drill": 425,
    "Drip": 448,
    "Drum": 164,
    "Drum and bass": 243,
    "Drum kit": 162,
    "Drum machine": 163,
    "Drum roll": 167,
    "Dubstep": 242,
    "Duck": 104,
    "Echo": 512,
    "Effects unit": 463,
    "Electric guitar": 141,
    "Electric piano": 154,
    "Electric shaver, electric razor": 382,
    "Electric toothbrush": 376,
    "Electronic dance music": 245,
    "Electronic music": 239,
    "Electronic organ": 156,
    "Electronic tuner": 462,
    "Electronica": 244,
    "Emergency vehicle": 322,
    "Engine": 343,
    "Engine knocking": 350,
    "Engine starting": 351,
    "Environmental noise": 514,
    "Eruption": 435,
    "Exciting music": 280,
    "Explosion": 426,
    "Fart": 60,
    "Female singing": 33,
    "Female speech, woman speaking": 2,
    "Field recording": 526,
    "Filing (rasp)": 422,
    "Fill (with liquid)": 452,
    "Finger snapping": 62,
    "Fire": 298,
    "Fire alarm": 400,
    "Fire engine, fire truck (siren)": 325,
    "Firecracker": 433,
    "Fireworks": 432,
    "Fixed-wing aircraft, airplane": 340,
    "Flamenco": 250,
    "Flap": 473,
    "Flute": 196,
    "Fly, housefly": 129,
    "Foghorn": 401,
    "Folk music": 233,
    "Fowl": 98,
    "French horn": 186,
    "Frog": 132,
    "Frying (food)": 367,
    "Funk": 232,
    "Funny music": 277,
    "Fusillade": 429,
    "Gargling": 56,
    "Gasp": 44,
    "Gears": 409,
    "Giggle": 18,
    "Glass": 441,
    "Glockenspiel": 181,
    "Goat": 95,
    "Gobble": 103,
    "Gong": 177,
    "Goose": 106,
    "Gospel music": 259,
    "Groan": 38,
    "Growling": 79,
    "Grunge": 222,
    "Grunt": 39,
    "Guitar": 140,
    "Gunshot, gunfire": 427,
    "Gurgling": 297,
    "Gush": 451,
    "Hair dryer": 373,
    "Hammer": 419,
    "Hammond organ": 157,
    "Hands": 61,
    "Happy music": 276,
    "Harmonic": 502,
    "Harmonica": 208,
    "Harp": 199,
    "Harpsichord": 160,
    "Heart murmur": 65,
    "Heart sounds, heartbeat": 64,
    "Heavy engine (low frequency)": 349,
    "Heavy metal": 220,
    "Helicopter": 339,
    "Hi-hat": 172,
    "Hiccup": 59,
    "Hip hop music": 217,
    "Hiss": 84,
    "Honk": 107,
    "Hoot": 120,
    "Horse": 87,
    "House music": 240,
    "Howl": 77,
    "Hubbub, speech noise, speech babble": 70,
    "Hum": 496,
    "Humming": 37,
    "Ice cream truck, ice cream van": 320,
    "Idling": 352,
    "Independent music": 265,
    "Insect": 126,
    "Inside, large room or hall": 507,
    "Inside, public space": 508,
    "Inside, small room": 506,
    "Jackhammer": 420,
    "Jazz": 235,
    "Jet engine": 337,
    "Jingle (music)": 269,
    "Jingle bell": 202,
    "Jingle, tinkle": 495,
    "Keyboard (musical)": 152,
    "Keys jangling": 379,
    "Knock": 359,
    "Laughter": 16,
    "Lawn mower": 346,
    "Light engine (high frequency)": 344,
    "Liquid": 444,
    "Livestock, farm animals, working animals": 86,
    "Lullaby": 271,
    "Machine gun": 428,
    "Mains hum": 516,
    "Male singing": 32,
    "Male speech, man speaking": 1,
    "Mallet percussion": 179,
    "Mandolin": 149,
    "Mantra": 31,
    "Maraca": 176,
    "Marimba, xylophone": 180,
    "Mechanical fan": 412,
    "Mechanisms": 404,
    "Medium engine (mid frequency)": 348,
    "Meow": 83,
    "Microwave oven": 368,
    "Middle Eastern music": 234,
    "Moo": 91,
    "Mosquito": 128,
    "Motor vehicle (road)": 306,
    "Motorboat, speedboat": 304,
    "Motorcycle": 326,
    "Mouse": 124,
    "Music": 137,
    "Music for children": 252,
    "Music of Africa": 256,
    "Music of Asia": 260,
    "Music of Bollywood": 262,
    "Music of Latin America": 248,
    "Musical instrument": 138,
    "Narration, monologue": 5,
    "Neigh, whinny": 89,
    "New-age music": 253,
    "Noise": 513,
    "Ocean": 294,
    "Oink": 94,
    "Opera": 238,
    "Orchestra": 184,
    "Organ": 155,
    "Outside, rural or natural": 510,
    "Outside, urban or manmade": 509,
    "Owl": 119,
    "Pant": 45,
    "Patter": 125,
    "Percussion": 161,
    "Piano": 153,
    "Pig": 93,
    "Pigeon, dove": 115,
    "Ping": 482,
    "Pink noise": 521,
    "Pizzicato": 192,
    "Plop": 494,
    "Plucked string instrument": 139,
    "Police car (siren)": 323,
    "Pop music": 216,
    "Pour": 449,
    "Power tool": 424,
    "Power windows, electric windows": 311,
    "Printer": 415,
    "Progressive rock": 223,
    "Propeller, airscrew": 338,
    "Psychedelic rock": 225,
    "Pulleys": 410,
    "Pulse": 505,
    "Pump (liquid)": 454,
    "Punk rock": 221,
    "Purr": 82,
    "Quack": 105,
    "Race car, auto racing": 315,
    "Radio": 525,
    "Rail transport": 328,
    "Railroad car, train wagon": 332,
    "Rain": 289,
    "Rain on surface": 291,
    "Raindrop": 290,
    "Rapping": 36,
    "Ratchet, pawl": 405,
    "Rattle": 135,
    "Rattle (instrument)": 175,
    "Reggae": 228,
    "Reverberation": 511,
    "Reversing beeps": 319,
    "Rhythm and blues": 226,
    "Rimshot": 166,
    "Ringtone": 391,
    "Roar": 110,
    "Roaring cats (lions, tigers)": 109,
    "Rock and roll": 224,
    "Rock music": 219,
    "Rodents, rats, mice": 123,
    "Roll": 477,
    "Rowboat, canoe, kayak": 303,
    "Rub": 476,
    "Rumble": 493,
    "Run": 51,
    "Rustle": 487,
    "Rustling leaves": 284,
    "Sad music": 278,
    "Sailboat, sailing ship": 302,
    "Salsa music": 249,
    "Sampler": 159,
    "Sanding": 423,
    "Sawing": 421,
    "Saxophone": 197,
    "Scary music": 282,
    "Scissors": 381,
    "Scrape": 475,
    "Scratch": 474,
    "Scratching (performance technique)": 215,
    "Screaming": 14,
    "Sewing machine": 411,
    "Shatter": 443,
    "Sheep": 97,
    "Ship": 305,
    "Shofar": 212,
    "Shout": 8,
    "Shuffle": 52,
    "Shuffling cards": 383,
    "Sidetone": 518,
    "Sigh": 26,
    "Silence": 500,
    "Sine wave": 501,
    "Singing": 27,
    "Singing bowl": 214,
    "Single-lens reflex camera": 417,
    "Sink (filling or washing)": 371,
    "Siren": 396,
    "Sitar": 148,
    "Sizzle": 490,
    "Ska": 263,
    "Skateboard": 342,
    "Skidding": 312,
    "Slam": 358,
    "Slap, smack": 467,
    "Sliding door": 357,
    "Slosh": 446,
    "Smash, crash": 469,
    "Smoke detector, smoke alarm": 399,
    "Snake": 134,
    "Snare drum": 165,
    "Sneeze": 49,
    "Snicker": 19,
    "Sniff": 50,
    "Snoring": 43,
    "Snort": 46,
    "Sonar": 457,
    "Song": 266,
    "Soul music": 227,
    "Sound effect": 504,
    "Soundtrack music": 270,
    "Speech": 0,
    "Speech synthesizer": 7,
    "Splash, splatter": 445,
    "Splinter": 439,
    "Spray": 453,
    "Squawk": 114,
    "Squeak": 361,
    "Squeal": 485,
    "Squish": 447,
    "Static": 515,
    "Steam": 296,
    "Steam whistle": 403,
    "Steel guitar, slide guitar": 144,
    "Steelpan": 183,
    "Stir": 455,
    "Stomach rumble": 57,
    "Stream": 292,
    "String section": 190,
    "Strum": 146,
    "Subway, metro, underground": 334,
    "Swing music": 230,
    "Synthesizer": 158,
    "Synthetic singing": 35,
    "Tabla": 170,
    "Tambourine": 174,
    "Tap": 360,
    "Tapping (guitar technique)": 145,
    "Tearing": 480,
    "Techno": 241,
    "Telephone": 389,
    "Telephone bell ringing": 390,
    "Telephone dialing, DTMF": 392,
    "Television": 524,
    "Tender music": 279,
    "Theme music": 268,
    "Theremin": 213,
    "Throat clearing": 48,
    "Throbbing": 522,
    "Thump, thud": 460,
    "Thunder": 287,
    "Thunderstorm": 286,
    "Thunk": 461,
    "Tick": 407,
    "Tick-tock": 408,
    "Timpani": 169,
    "Tire squeal": 313,
    "Toilet flush": 374,
    "Tools": 418,
    "Toot": 309,
    "Toothbrush": 375,
    "Traditional music": 264,
    "Traffic noise, roadway noise": 327,
    "Train": 329,
    "Train horn": 331,
    "Train wheels squealing": 333,
    "Train whistle": 330,
    "Trance music": 247,
    "Trickle, dribble": 450,
    "Trombone": 188,
    "Truck": 316,
    "Trumpet": 187,
    "Tubular bells": 178,
    "Tuning fork": 204,
    "Turkey": 102,
    "Typewriter": 385,
    "Typing": 384,
    "Ukulele": 151,
    "Vacuum cleaner": 377,
    "Vehicle": 300,
    "Vehicle horn, car horn, honking": 308,
    "Vibraphone": 182,
    "Vibration": 523,
    "Video game music": 272,
    "Violin, fiddle": 191,
    "Vocal music": 254,
    "Wail, moan": 25,
    "Walk, footsteps": 53,
    "Water": 288,
    "Water tap, faucet": 370,
    "Waterfall": 293,
    "Waves, surf": 295,
    "Wedding music": 275,
    "Whack, thwack": 468,
    "Whale vocalization": 136,
    "Wheeze": 42,
    "Whimper": 24,
    "Whimper (dog)": 80,
    "Whip": 472,
    "Whir": 488,
    "Whispering": 15,
    "Whistle": 402,
    "Whistling": 40,
    "White noise": 520,
    "Whoop": 10,
    "Whoosh, swoosh, swish": 459,
    "Wild animals": 108,
    "Wind": 283,
    "Wind chime": 206,
    "Wind instrument, woodwind instrument": 195,
    "Wind noise (microphone)": 285,
    "Wood": 437,
    "Wood block": 173,
    "Writing": 387,
    "Yell": 11,
    "Yip": 76,
    "Yodeling": 29,
    "Zing": 497,
    "Zipper (clothing)": 378,
    "Zither": 150
  },
  "layer_norm_eps": 1e-12,
  "max_length": 1024,
  "model_type": "audio-spectrogram-transformer",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_mel_bins": 128,
  "patch_size": 16,
  "qkv_bias": true,
  "time_stride": 14,
  "torch_dtype": "float32",
  "transformers_version": "4.25.0.dev0"
}