{"id":1207,"date":"2023-03-25T20:48:55","date_gmt":"2023-03-25T11:48:55","guid":{"rendered":"https:\/\/fukugyouhistory.tokyo\/?p=1207"},"modified":"2024-01-18T00:04:20","modified_gmt":"2024-01-17T15:04:20","slug":"whisper%e3%80%80%e9%ab%98%e9%80%9f%e5%8c%96%e3%81%97%e3%81%9ffaster-whisper%e3%82%92%e7%b0%a1%e5%8d%98%e3%81%ab%e5%8b%95%e3%81%8b%e3%81%97%e3%81%a6%e3%81%bf%e3%82%8b","status":"publish","type":"post","link":"https:\/\/fukugyouhistory.tokyo\/?p=1207","title":{"rendered":"Whisper\u3000\u9ad8\u901f\u5316\u3057\u305ffaster-whisper\u3092\u7c21\u5358\u306b\u52d5\u304b\u3057\u3066\u307f\u308b"},"content":{"rendered":"\n<p><a rel=\"noreferrer noopener\" href=\"https:\/\/fukugyouhistory.tokyo\/?p=179\" target=\"_blank\">Whisper\u3000\u97f3\u58f0\u30fb\u52d5\u753b\u306e\u81ea\u52d5\u66f8\u304d\u8d77\u3053\u3057AI\u3092\u7121\u6599\u3067\u3001\u7c21\u5358\u306b\u4f7f\u304a\u3046<\/a>\u306e\u8a18\u4e8b\u3092\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3057\u305f\u304c\u3001\u9ad8\u901f\u5316\u3055\u308c\u305f\u300c<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/guillaumekln\/faster-whisper\" target=\"_blank\">Faster-Whisper<\/a>\u300d\u304c\u516c\u958b\u3055\u308c\u3066\u3044\u307e\u3057\u305f\u306e\u3067\u3001<a href=\"https:\/\/colab.research.google.com\/?hl=ja\" target=\"_blank\" rel=\"noreferrer noopener\">Google Colaboratory<\/a>\u3067\u5b9f\u88c5\u3057\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u307e\u305f\u3001\u300clarge-v2\u300d\u3068\u8a00\u3046\u30a2\u30c3\u30d7\u30c7\u30fc\u30c8\u3055\u308c\u305f\u30e2\u30c7\u30eb\u304c\u63d0\u4f9b\u3055\u308c\u3066\u3044\u307e\u3057\u305f\u3002<br>\u3053\u3061\u3089\u3082\u5408\u308f\u305b\u3066\u4f7f\u7528\u3057\u3066\u307f\u305f\u3044\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<!--more-->\n\n\n\n<p><\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Faster-Whisper<\/h2>\n\n\n\n<p>\u3053\u308c\u307e\u3067\u306eWhisper\u3068\u306e\u9055\u3044\u3092\u8a18\u8f09\u3057\u3066\u3044\u304d\u305f\u3044\u3068\u601d\u3044\u307e\u3059\u3002<br><a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/guillaumekln\/faster-whisper\" target=\"_blank\">Faster-Whisper<\/a>\u304b\u3089\u5f15\u7528\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2, which is a fast inference engine for Transformer models.\n\nThis implementation is up to 4 times faster than openai\/whisper for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>faster-whisper\u306f\u3001OpenAI\u306eWhisper\u30e2\u30c7\u30eb\u3092\u3001Transformer\u30e2\u30c7\u30eb\u306e\u9ad8\u901f\u63a8\u8ad6\u30a8\u30f3\u30b8\u30f3\u3067\u3042\u308bCTranslate2\u3092\u4f7f\u3063\u3066\u518d\u5b9f\u88c5\u3057\u305f\u3082\u306e\u3067\u3059\u3002\n\n\u3053\u306e\u5b9f\u88c5\u306f\u3001\u540c\u3058\u7cbe\u5ea6\u3067openai\/whisper\u3088\u308a\u6700\u59274\u500d\u9ad8\u901f\u3067\u3001\u304b\u3064\u5c11\u306a\u3044\u30e1\u30e2\u30ea\u3057\u304b\u4f7f\u7528\u3057\u307e\u305b\u3093\u3002CPU\u3068GPU\u306e\u4e21\u65b9\u30678\u30d3\u30c3\u30c8\u91cf\u5b50\u5316\u3092\u884c\u3046\u3053\u3068\u3067\u3001\u3055\u3089\u306b\u52b9\u7387\u3092\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">Large-v2 model on GPU<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table><thead><tr><th>Implementation<\/th><th>Precision<\/th><th>Beam size<\/th><th>Time<\/th><th>Max. GPU memory<\/th><th>Max. CPU memory<\/th><\/tr><\/thead><tbody><tr><td>openai\/whisper<\/td><td>fp16<\/td><td>5<\/td><td>4m30s<\/td><td>11325MB<\/td><td>9439MB<\/td><\/tr><tr><td>faster-whisper<\/td><td>fp16<\/td><td>5<\/td><td>54s<\/td><td>4755MB<\/td><td>3244MB<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>CTranslate2\u3068\u8a00\u3046\u9ad8\u901f\u63a8\u8ad6\u30a8\u30f3\u30b8\u30f3\u3067\u5909\u63db\u3057\u305f\u30e2\u30c7\u30eb\u3092\u5b9f\u88c5\u3059\u308b\u3053\u3068\u3067\u3001<br>\u51e6\u7406\u901f\u5ea6\u304c\u7d044\u500d\u306b\u65e9\u304f\u306a\u3063\u3066\u3001\u30e1\u30e2\u30ea\u30fc\u306e\u4f7f\u7528\u7387\u306f\u7d041\/3\u306b\u306a\u3063\u305f\u307f\u305f\u3044\u3067\u3059\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">Fast-Whisper\u306e\u5b9f\u88c5<\/h2>\n\n\n\n<p>\u6700\u521d\u306b\u3001\u300c\u7de8\u96c6\u300d\u2192\u300c\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u8a2d\u5b9a\u300d\u304b\u3089\u3001<br>\u30cf\u30fc\u30c9\u30a6\u30a7\u30a2\u30a2\u30af\u30bb\u30e9\u30ec\u30fc\u30c8\u3092\u300cGPU\u300d\u306b\u8a2d\u5b9a\u3057\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image\"><img decoding=\"async\" width=\"880\" height=\"343\" sizes=\"(max-width: 880px) 100vw, 880px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-35.png\" alt=\"\" class=\"wp-image-182\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-35.png 880w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-35-300x117.png 300w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-35-768x299.png 768w\" \/><\/figure>\n\n\n\n<p>\u4ee5\u4e0b\u306e\u30b3\u30de\u30f3\u30c9\u3092\u5165\u529b\u3057\u3001Shift+Enter\u3092\u62bc\u3057\u3066\u5b9f\u884c\u3057\u307e\u3057\u3087\u3046\u3002<br>GPU\u306e\u60c5\u5831\u304c\u51fa\u3066\u304f\u308c\u3070\u3001\u6b63\u5e38\u306b\u8a2d\u5b9a\u5909\u66f4\u3067\u304d\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!nvidia-smi<\/code><\/pre>\n\n\n\n<p>\u7d9a\u3051\u3066\u30b3\u30fc\u30c9\u3092\u8a18\u8f09\u3057\u3066\u304d\u307e\u3059\u3002<br>\u3053\u308c\u307e\u3067\u306eWhisper\u306e\u4f7f\u7528\u65b9\u6cd5\u306b\u3064\u3044\u3066\u306f\u3001<a href=\"https:\/\/fukugyouhistory.tokyo\/?p=179\">Whisper\u3000\u97f3\u58f0\u30fb\u52d5\u753b\u306e\u81ea\u52d5\u66f8\u304d\u8d77\u3053\u3057AI\u3092\u7121\u6599\u3067\u3001\u7c21\u5358\u306b\u4f7f\u304a\u3046<\/a>\u3092\u53c2\u7167\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install git+https:\/\/github.com\/guillaumekln\/faster-whisper.git\n!apt install libcublas11<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>from faster_whisper import WhisperModel\n\nmodel_size = \"large-v2\"\n\n# Run on GPU with FP16\nmodel = WhisperModel(model_size, device=\"cuda\", compute_type=\"float16\")\n\n# or run on GPU with INT8\n# model = WhisperModel(model_size, device=\"cuda\", compute_type=\"int8_float16\")\n\n# or run on CPU with INT8\n# model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")<\/code><\/pre>\n\n\n\n<p>\u4ee5\u524d\u306e<a href=\"https:\/\/fukugyouhistory.tokyo\/?p=923\">CHATGPT + SHOTCUT\u3000\u534a\u81ea\u52d5\u3067\u52d5\u753b\u3092\u4f5c\u6210\u3057\u3066\u307f\u3088\u3046<\/a>\u306e\u8a18\u4e8b\u3067\u4f7f\u7528\u3057\u305f\u52d5\u753b\u3092\u7528\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>! pip install yt-dlp\n! rm input.mp3 \n! yt-dlp -x --audio-format mp3 https:\/\/youtu.be\/1XQVbMz4j-0 -o \"input.mp3\"<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>segments, info = model.transcribe(\"input.mp3\", beam_size=5)\n\nfor segment in segments:\n    print(\"&#91;%.2fs -&gt; %.2fs] %s\" % (segment.start, segment.end, segment.text))<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-large is-resized\"><img decoding=\"async\" width=\"1024\" height=\"350\" sizes=\"(max-width: 1024px) 100vw, 1024px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-306-1024x350.png\" alt=\"\" class=\"wp-image-1208\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-306-1024x350.png 1024w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-306-300x103.png 300w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-306-768x263.png 768w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/03\/image-306.png 1061w\" \/><\/figure>\n\n\n\n<p>\u3046\u3093\u3001\u65e9\u3044\u305e\u3002<br>\u3053\u308c\u307e\u3067\u306eWhisper\u3068\u306f\u51fa\u529b\u7d50\u679c\u306e\u5410\u304d\u65b9\u304c\u7570\u306a\u308b\u306e\u304b\u306a\uff1f\uff08\u8981\u52c9\u5f37\u3067\u3059\u2026\uff09<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">Fast-Whisper\u3067\u5b57\u5e55\u30d5\u30a1\u30a4\u30eb\u3092\u4f5c\u6210\u3059\u308b<\/h2>\n\n\n\n<p>\u30b3\u30fc\u30c9\u306e\u307f\u5099\u5fd8\u9332\u3068\u3057\u3066\u8a18\u8f09\u3057\u3066\u304a\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install git+https:\/\/github.com\/guillaumekln\/faster-whisper.git\r\n!apt install libcublas11\r\n\r\nfrom faster_whisper import WhisperModel\r\n\r\nmodel_size = \"large-v2\"\r\n\r\n# Run on GPU with FP16\r\nmodel = WhisperModel(model_size, device=\"cuda\", compute_type=\"float16\")\r\n\r\n# or run on GPU with INT8\r\n# model = WhisperModel(model_size, device=\"cuda\", compute_type=\"int8_float16\")\r\n\r\n# or run on CPU with INT8\r\n# model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")\r<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>segments, info = model.transcribe(\"input.mp3\", beam_size=5)<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>! pip install srt\r\n\r\nfrom datetime import timedelta\r\nfrom srt import Subtitle\r\nimport srt\r\n\r\n\r\nsubs = &#91;]\r\n \r\nfor data in segments:\r\n    index = data.id + 1\r\n    start = data.start\r\n    end = data.end\r\n    text = data.text\r\n    sub = Subtitle(index=1, start=timedelta(\r\n                            seconds=timedelta(seconds=start).seconds,\r\n                            microseconds=timedelta(seconds=start).microseconds),\r\n                            end=timedelta(\r\n                            seconds=timedelta(seconds=end).seconds,\r\n                            microseconds=timedelta(seconds=end).microseconds),\r\n                            content=text, proprietary='')\r\n \r\n    subs.append(sub)\r\n\r\nwith open(\"test.srt\", mode=\"w\", encoding=\"utf-8\") as f:\r\n    f.write(srt.compose(subs))<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>!cat test.srt<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p>Fast-Whisper\u306f\u3001WhisperModel(&#8220;large-v2&#8221;)\u306e\u3088\u3046\u306a\u30e2\u30c7\u30eb\u3092\u8aad\u307f\u8fbc\u3080\u5834\u5408\u3001\u5bfe\u5fdc\u3059\u308bCTranslate2\u30e2\u30c7\u30eb\u306fHugging Face Hub\u304b\u3089\u81ea\u52d5\u7684\u306b\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u3044\u308b\u3068\u306e\u3053\u3068\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u3082\u3057\u5909\u63db\u3057\u305fCTranslate2\u30e2\u30c7\u30eb\u304c\u624b\u5143\u306b\u6b32\u3057\u3044\u5834\u5408\u306f\u3001\u4ee5\u4e0b\u306e\u30b3\u30de\u30f3\u30c9\u3067\u4f5c\u6210\u3059\u308b\u3053\u3068\u304c\u53ef\u80fd\u3067\u3059\u3002<br>\u5b9f\u884c\u5f8c\u300cwhisper-large-v2-ct2\u300d\u30d5\u30a9\u30eb\u30c0\u306b\u5909\u63db\u3055\u308c\u305f\u30e2\u30c7\u30eb\u304c\u4f5c\u6210\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install git+https:\/\/github.com\/guillaumekln\/faster-whisper.git\n!pip install transformers\n\n!ct2-transformers-converter --model openai\/whisper-large-v2 --output_dir whisper-large-v2-ct2 --quantization float16 --low_cpu_mem_usage\n# !ct2-transformers-converter --model openai\/whisper-large-v2 --output_dir whisper-large-v2-ct2 --quantization int8_float16 --low_cpu_mem_usage\n# !ct2-transformers-converter --model openai\/whisper-large-v2 --output_dir whisper-large-v2-ct2 --quantization int8 --low_cpu_mem_usage<\/code><\/pre>\n\n\n\n<p>\u3053\u306e\u5909\u63db\u3057\u305f\u30e2\u30c7\u30eb\u3092\u7528\u3044\u3066fast-whisper\u3092\u52d5\u304b\u3059\u5834\u5408\u3001\u300cmodel_size\u300d\u3092\u4f5c\u6210\u3055\u308c\u305f\u30d5\u30a9\u30eb\u30c0\u306e\u30d1\u30b9\u306b\u5909\u66f4\u3059\u308b\u3060\u3051\u3067OK\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install git+https:\/\/github.com\/guillaumekln\/faster-whisper.git<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>from faster_whisper import WhisperModel\n\n# \u5909\u63db\u3055\u308c\u305f\u30e2\u30c7\u30eb\u306e\u683c\u7d0d\u3055\u308c\u305f\u30d5\u30a9\u30eb\u30c0\u3092\u6307\u5b9a\u3059\u308b\nmodel_size = \"whisper-large-v2-ct2\/\"\n\n# Run on GPU with FP16\nmodel = WhisperModel(model_size, device=\"cuda\", compute_type=\"float16\")\n\n# or run on GPU with INT8\n# model = WhisperModel(model_size, device=\"cuda\", compute_type=\"int8_float16\")\n\n# or run on CPU with INT8\n# model = WhisperModel(model_size, device=\"cpu\", compute_type=\"int8\")<\/code><\/pre>\n\n\n\n<p>\u4ee5\u5f8c\u306f\u3001\u5148\u307b\u3069\u3068\u540c\u3058\u306b\u306a\u308a\u307e\u3059\u306e\u3067\u3001\u7701\u7565\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p>\u78ba\u304b\u306b\u65e9\u3044\u3067\u3059\u3057\u30e1\u30e2\u30ea\u306e\u4f7f\u7528\u91cf\u304c\u5c11\u306a\u3044\u306e\u3067\u3001\u30ed\u30fc\u30ab\u30eb\u3067\u52d5\u304b\u3059\u3053\u3068\u3092\u8003\u3048\u308b\u3068\u4fbf\u5229\u3067\u3059\u3002<br>\u3072\u3068\u307e\u305a\u3001\u3053\u3053\u307e\u3067\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Whisper\u3000\u97f3\u58f0\u30fb\u52d5\u753b\u306e\u81ea\u52d5\u66f8\u304d\u8d77\u3053\u3057AI\u3092\u7121\u6599\u3067\u3001\u7c21\u5358\u306b\u4f7f\u304a\u3046\u306e\u8a18\u4e8b\u3092\u7d39\u4ecb\u3057\u3066\u3044\u307e\u3057\u305f\u304c\u3001\u9ad8\u901f\u5316\u3055\u308c\u305f\u300cFaster-Whisper\u300d\u304c\u516c\u958b\u3055\u308c\u3066\u3044\u307e\u3057\u305f\u306e\u3067\u3001Google Colaboratory\u3067\u5b9f\u88c5\u3057\u3066\u3044\u304d\u307e [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[21],"tags":[],"class_list":{"0":"post-1207","1":"post","2":"type-post","3":"status-publish","4":"format-standard","6":"category-ai"},"_links":{"self":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts\/1207","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1207"}],"version-history":[{"count":10,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts\/1207\/revisions"}],"predecessor-version":[{"id":1815,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts\/1207\/revisions\/1815"}],"wp:attachment":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1207"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1207"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1207"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}