{"id":53775,"date":"2025-02-16T10:58:57","date_gmt":"2025-02-16T02:58:57","guid":{"rendered":"https:\/\/fwq.ai\/blog\/53775\/"},"modified":"2025-02-16T10:58:57","modified_gmt":"2025-02-16T02:58:57","slug":"%e7%94%a8kokoro-tts%e5%88%b6%e4%bd%9cai%e9%85%8d%e9%9f%b3","status":"publish","type":"post","link":"https:\/\/fwq.ai\/blog\/53775\/","title":{"rendered":"\u7528Kokoro TTS\u5236\u4f5cAI\u914d\u97f3"},"content":{"rendered":"<p>\u5982\u679c\u6211\u544a\u8bc9\u4f60\uff0c\u73b0\u5728\u53ef\u4ee5\u5b8c\u5168\u514d\u8d39\u4f7f\u7528\u4e00\u79cd\u4e0d\u4ec5\u597d\u800c\u4e14\u53ef\u80fd\u6bd4 ElevenLabs \u7b49\u884c\u4e1a\u9886\u5148\uff08\u4e14\u6602\u8d35\uff09\u670d\u52a1\u66f4\u597d\u7684\u5de5\u5177\u6765\u521b\u5efa\u4e13\u4e1a\u54c1\u8d28\u7684 AI \u914d\u97f3\uff0c\u4f60\u4f1a\u600e\u4e48\u60f3\uff1f\u542c\u8d77\u6765\u597d\u5f97\u4ee4\u4eba\u96be\u4ee5\u7f6e\u4fe1\uff1f<\/p>\n<p>\u8fd9\u5c31\u662f Kokoro TTS \u7684\u521b\u5efa\u8005\u6bcf\u5929\u90fd\u5728\u8bc1\u660e\u7684\u9519\u8bef\u3002\u8fd9\u4e0d\u4ec5\u4ec5\u662f\u53e6\u4e00\u4e2a\u6587\u672c\u8f6c\u8bed\u97f3\u6a21\u578b\uff1b\u8fd9\u662f\u4e00\u573a 8200 \u4e07\u53c2\u6570\u7684\u5f00\u6e90\u9769\u547d\uff0c\u5b83\u98a0\u8986\u4e86 AI \u8bed\u97f3\u4e16\u754c\u3002\u6211\u4eec\u8c08\u8bba\u7684\u662f\u4e00\u4e2a\u5728\u6781\u4f4e\u9884\u7b97\u4e0b\u8bad\u7ec3\u7684\u6a21\u578b\uff0c\u5728\u65e5\u5e38\u8ba1\u7b97\u673a\u4e0a\u8fd0\u884c\uff0c\u200b\u200b\u4f46\u5728\u4e3b\u8981\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u5374\u4ee5\u67d0\u79cd\u65b9\u5f0f\u8d85\u8d8a\u4e86\u5de8\u5934\u3002<\/p>\n<p>\u597d\u5947\u5417\uff1f\u4f60\u5e94\u8be5\u597d\u5947\u3002<\/p>\n<p>\u8fd9\u7bc7\u535a\u6587\u4e0d\u4ec5\u4f1a\u63ed\u5f00 Kokoro TTS \u80cc\u540e\u7684\u79d8\u5bc6\uff0c\u8fd8\u4f1a\u9010\u6b65\u6307\u5bfc\u4f60\u5982\u4f55\u5229\u7528\u5b83\u7684\u529b\u91cf\u6765\u521b\u5efa\u4ee4\u4eba\u60ca\u53f9\u7684\u753b\u5916\u97f3\uff0c\u5c06\u4f60\u7684\u9879\u76ee\u63d0\u5347\u5230\u65b0\u7684\u9ad8\u5ea6\u3002<\/p>\n<p>\u7cfb\u597d\u5b89\u5168\u5e26\uff0c\u56e0\u4e3a\u4eba\u5de5\u667a\u80fd\u8bed\u97f3\u7684\u672a\u6765\u5c31\u5728\u8fd9\u91cc\uff0c\u800c\u4e14\u5b83\u662f\u514d\u8d39\u7684\u3002\u8fd9\u4e0d\u4ec5\u4ec5\u662f\u4e00\u4e2a\u6559\u7a0b\uff1b\u5b83\u662f\u4f60\u8fdb\u5165\u4e00\u4e2a\u6b63\u5728\u91cd\u65b0\u5b9a\u4e49\u666f\u89c2\u7684\u793e\u533a\u7684\u5927\u95e8\u3002<\/p>\n<h2>1\u3001Kokoro TTS \u73b0\u8c61\uff1aAI\u7ade\u6280\u573a\u4e0a\u7684\u5927\u536b\u4e0e\u6b4c\u5229\u4e9a<\/h2>\n<p>\u6211\u4eec\u90fd\u559c\u6b22\u5f31\u8005\u7684\u6545\u4e8b\u3002\u800c\u5728\u4eba\u5de5\u667a\u80fd\u8bed\u97f3\u7684\u4e16\u754c\u91cc\uff0cKokoro TTS \u662f\u7ec8\u6781\u5f31\u8005\u3002\u60f3\u8c61\u4e00\u4e0b\uff1a\u4e00\u4e2a\u53ea\u6709 8200 \u4e07\u4e2a\u53c2\u6570\u7684\u5fae\u578b\u6a21\u578b\uff0c\u4ec5\u7528\u4e0d\u5230 100 \u5c0f\u65f6\u7684\u97f3\u9891\u6570\u636e\u8fdb\u884c\u8bad\u7ec3\uff0c\u4e0e ElevenLabs \u7b49\u5de8\u5934\u6b63\u9762\u4ea4\u950b\uff0c\u540e\u8005\u7684\u6a21\u578b\u662f\u5728\u5e9e\u5927\u7684\u6570\u636e\u96c6\u4e0a\u8fdb\u884c\u8bad\u7ec3\uff0c\u5e76\u83b7\u5f97\u4e86\u5927\u91cf\u8d44\u91d1\u652f\u6301\u3002\u542c\u8d77\u6765\u50cf\u662f\u6ce8\u5b9a\u8981\u5931\u8d25\u7684\uff0c\u5bf9\u5427\uff1f\u9519\u4e86\u3002<\/p>\n<p>\u4ee4\u4eba\u9707\u60ca\u7684\u662f\uff1aKokoro TTS \u4e0d\u4ec5\u53c2\u52a0\u4e86\u6bd4\u8d5b\uff0c\u8fd8\u8d62\u4e86\u3002\u5b83\u5728 Hugging Face TTS Arena \u4e0a\u593a\u5f97\u4e86\u7b2c\u4e00\u540d\uff0c\u8fd9\u662f\u4e00\u4e2a\u4e25\u683c\u8bc4\u4f30\u6700\u4f73\u6587\u672c\u8f6c\u8bed\u97f3\u6a21\u578b\u7684\u6392\u884c\u699c\u3002\u5b83\u662f\u5982\u4f55\u5b9e\u73b0\u8fd9\u4e00\u770b\u4f3c\u4e0d\u53ef\u80fd\u7684\u58ee\u4e3e\u7684\uff1f<\/p>\n<p>\u79d8\u8bc0\u5728\u4e8e\u5176\u5de7\u5999\u7684\u67b6\u6784\u548c\u8bbe\u8ba1\u7684\u6548\u7387\u3002\u8fd9\u662f\u4e00\u4e2a\u9ad8\u5ea6\u4f18\u5316\u7684\u6a21\u578b\uff0c\u5229\u7528\u4e86\u4ec5\u89e3\u7801\u5668\u7684\u67b6\u6784\uff0c\u5e76\u5728 A100 80GB GPU \u4e0a\u4f7f\u7528\u5927\u7ea6 500 \u4e2a GPU \u5c0f\u65f6\u8fdb\u884c\u8bad\u7ec3\u3002\u8fd9\u79cd\u6548\u7387\u8f6c\u5316\u4e3a\u51e0\u4e2a\u4ee4\u4eba\u96be\u4ee5\u7f6e\u4fe1\u7684\u4f18\u52bf\uff1a<\/p>\n<ul>\n<li>\u8d85\u5feb\u63a8\u7406\uff1a\u5373\u4f7f\u5728 CPU \u4e0a\u4e5f\u80fd\u5b9e\u65f6\u751f\u6210\u97f3\u9891\u3002\u65e0\u9700\u518d\u7b49\u5f85\u4e91\u670d\u52a1\u5668\u3002<\/li>\n<li>\u968f\u5904\u8fd0\u884c\uff1a\u4ece\u7b14\u8bb0\u672c\u7535\u8111\u5230 Raspberry Pi\uff0cKokoro TTS \u51e0\u4e4e\u53ef\u4ee5\u5728\u4efb\u4f55\u8bbe\u5907\u4e0a\u8fd0\u884c\u3002<\/li>\n<li>\u5f00\u6e90\u81ea\u7531\uff1a\u4fee\u6539\u3001\u6539\u8fdb\u5b83\uff0c\u5c06\u5176\u96c6\u6210\u5230\u4f60\u7684\u9879\u76ee\u4e2d\uff0c\u4e0d\u53d7\u9650\u5236\u3002\u6743\u529b\u771f\u6b63\u638c\u63e1\u5728\u4f60\u624b\u4e2d\u3002<\/li>\n<\/ul>\n<p>\u4f46\u6700\u4ee4\u4eba\u60ca\u8bb6\u7684\u90e8\u5206\u662f\u4ec0\u4e48\uff1f\u5b83\u5b8c\u5168\u514d\u8d39\uff0c\u540c\u65f6\u505a\u5230\u4e86\u8fd9\u4e00\u5207\u3002\u8fd9\u4e0d\u4ec5\u4ec5\u662f\u4e3a\u4e86\u7701\u94b1\uff1b\u8fd9\u662f\u4e3a\u4e86\u8ba9\u5c16\u7aef\u4eba\u5de5\u667a\u80fd\u7684\u666e\u53ca\u6210\u4e3a\u53ef\u80fd\u3002<\/p>\n<p>\u4f46\u95ee\u9898\u4ecd\u7136\u5b58\u5728\uff1a\u514d\u8d39\u5de5\u5177\u771f\u7684\u80fd\u4e0e\u9ad8\u7ea7\u670d\u52a1\u76f8\u5ab2\u7f8e\u5417\uff1f\u8ba9\u6211\u4eec\u6765\u6d4b\u8bd5\u4e00\u4e0b\u2026\u2026<\/p>\n<h2>2\u3001\u89e3\u9501\u9b54\u6cd5\uff1aKokoro TTS \u7684\u5206\u6b65\u6307\u5357<\/h2>\n<p>\u60ac\u5ff5\u591f\u4e86\u3002\u8ba9\u6211\u4eec\u5f00\u59cb\u5b9e\u8df5\u5427\u3002\u4ee5\u4e0b\u662f\u73b0\u5728\u5f00\u59cb\u4f7f\u7528 Kokoro TTS \u8fdb\u884c\u521b\u4f5c\u7684\u65b9\u6cd5\u3002\u6211\u4eec\u5c06\u5728\u672c\u6559\u7a0b\u4e2d\u4f7f\u7528 Google Colab\uff0c\u8ba9\u6bcf\u4e2a\u4eba\u90fd\u53ef\u4ee5\u8bbf\u95ee\u5b83\uff0c\u65e0\u8bba\u4f60\u7684\u786c\u4ef6\u5982\u4f55\u3002<\/p>\n<h3>2.1 Colab \u8fde\u63a5\uff08\u514d\u8d39 AI \u6e38\u4e50\u573a\uff09<\/h3>\n<p>\u9996\u5148\uff0c\u6253\u5f00\u8fd9\u4e2a Google Colab \u7b14\u8bb0\u672c\uff1a<\/p>\n<h3>2.2 \u542f\u52a8\u5f15\u64ce\uff08\u4e00\u952e\u5b89\u88c5\uff09<\/h3>\n<p>\u8fd0\u884c\u7b14\u8bb0\u672c\u4e2d\u7684\u7b2c\u4e00\u4e2a\u5355\u5143\u3002\u6b64\u5355\u5143\u5b89\u88c5\u6240\u6709\u5fc5\u8981\u7684\u4f9d\u8d56\u9879\uff0c\u5305\u62ec kokoro \u5e93\u672c\u8eab\uff0c\u4ee5\u53ca\u4e00\u4e9b\u5176\u4ed6\u4f7f\u9b54\u6cd5\u5b9e\u73b0\u7684\u5de5\u5177\u3002\u5c06\u5176\u89c6\u4e3a\u51c6\u5907\u4f60\u7684\u6570\u5b57\u8bed\u97f3\u5de5\u4f5c\u5ba4\u3002<\/p>\n<pre><code># 1\ufe0f\u20e3 Install kokoro\n!pip install -q kokoro soundfile\n\n# 2\ufe0f\u20e3 Install espeak, used for out-of-dictionary fallback\n!apt-get -qq -y install espeak-ng &gt; \/dev\/null 2&gt;&amp;1\n# You can skip espeak installation, but OOD words will be skipped unless you provide a fallback\n\n# 3\ufe0f\u20e3 Initalize a pipeline\nfrom kokoro import KPipeline\nfrom IPython.display import display, Audio\nimport soundfile as sf\n\n#  'a' =&gt; American English\n#  'b' =&gt; British English\npipeline = KPipeline(lang_code='a') # make sure lang_code matches voice\n\n# The following text is for demonstration purposes only, unseen during training\ntext = '''\nThe sky above the port was the color of television, tuned to a dead channel.\n\"It's not like I'm using,\" Case heard someone say, as he shouldered his way through the crowd around the door of the Chat. \"It's like my body's developed this massive drug deficiency.\"\nIt was a Sprawl voice and a Sprawl joke. The Chatsubo was a bar for professional expatriates; you could drink there for a week and never hear two words in Japanese.\n\nThese were to have an enormous impact, not only because they were associated with Constantine, but also because, as in so many other areas, the decisions taken by Constantine (or in his name) were to have great significance for centuries to come. One of the main issues was the shape that Christian churches were to take, since there was not, apparently, a tradition of monumental church buildings when Constantine decided to help the Christian church build a series of truly spectacular structures. The main form that these churches took was that of the basilica, a multipurpose rectangular structure, based ultimately on the earlier Greek stoa, which could be found in most of the great cities of the empire. Christianity, unlike classical polytheism, needed a large interior space for the celebration of its religious services, and the basilica aptly filled that need. We naturally do not know the degree to which the emperor was involved in the design of new churches, but it is tempting to connect this with the secular basilica that Constantine completed in the Roman forum (the so-called Basilica of Maxentius) and the one he probably built in Trier, in connection with his residence in the city at a time when he was still caesar.\n'''<\/code><\/pre>\n<h3>2.3 \u9009\u62e9\u4f60\u7684\u58f0\u97f3\uff08\u4e0e\u6f14\u5458\u89c1\u9762\uff09<\/h3>\n<p>Kokoro TTS \u5e26\u6709\u4e00\u7cfb\u5217\u72ec\u7279\u7684\u58f0\u97f3\uff0c\u6bcf\u4e2a\u58f0\u97f3\u90fd\u6709\u81ea\u5df1\u7684\u4e2a\u6027\u3002\u8fd0\u884c\u7b2c\u4e8c\u4e2a\u5355\u5143\u4ee5\u52a0\u8f7d\u9ed8\u8ba4\u8bed\u97f3\u5305\uff0c\u8fd9\u662f\u4e24\u4e2a\u4e0d\u540c\u58f0\u97f3\u7684\u8ff7\u4eba\u878d\u5408\uff1aBella \u548c Sarah\u3002<\/p>\n<p>\u79d8\u5bc6\u6b66\u5668\uff1a\u60f3\u5c1d\u8bd5\u5176\u4ed6\u58f0\u97f3\u5417\uff1f\u60a8\u53ef\u4ee5\u5728 \u4e0a\u627e\u5230\u53ef\u7528\u8bed\u97f3\u5305\u7684\u5217\u8868\u3002\u53ea\u9700\u66f4\u6539\u4ee3\u7801\u4e2d\u7684 <code>VOICE_NAME<\/code> \u5373\u53ef\u3002\u4f8b\u5982\uff0c\u8bed\u97f3\u5305\u76ee\u524d\u5305\u62ec\uff1a<\/p>\n<ul>\n<li>af\uff1a\u9ed8\u8ba4\u8bed\u97f3\uff0cBella \u548c Sarah \u5404\u5360\u4e00\u534a<\/li>\n<li>af_bella\u3001af_sarah\u3001am_adam\u3001am_michael\u3001<\/li>\n<li>bf_emma\u3001bf_isabella\u3001bm_george\u3001bm_lewis\u3001<\/li>\n<li>af_nicole\u3001af_sky<\/li>\n<\/ul>\n<h3>2.4 \u7545\u6240\u6b32\u8a00\uff08\u8ba9 AI \u8bf4\u8bdd\uff09<\/h3>\n<p>\u73b0\u5728\u5230\u4e86\u6709\u8da3\u7684\u90e8\u5206\uff01\u5728\u7b2c\u4e09\u4e2a\u5355\u5143\u683c\u4e2d\uff0c\u4f60\u5c06\u627e\u5230\u4e00\u4e2a\u6587\u672c\u53d8\u91cf\u3002\u5c06\u793a\u4f8b\u6587\u672c\u66ff\u6362\u4e3a\u4f60\u5e0c\u671b Kokoro TTS \u8bf4\u7684\u4efb\u4f55\u5185\u5bb9\u3002\u53d1\u6325\u521b\u610f\uff01<\/p>\n<h3>2.5 \u8046\u542c\u9b54\u6cd5\uff08\u5373\u65f6\u97f3\u9891\uff09<\/h3>\n<p>\u8fd0\u884c\u5355\u5143\u683c\u3002\u51e0\u79d2\u949f\u5185\uff0c\u4f60\u7684 Colab \u7b14\u8bb0\u672c\u4e2d\u5c31\u4f1a\u51fa\u73b0\u4e00\u4e2a\u97f3\u9891\u64ad\u653e\u5668\u3002\u70b9\u51fb\u64ad\u653e\uff0c\u8046\u542c\u4f60\u7684\u6587\u672c\u8f6c\u6362\u4e3a\u4ee4\u4eba\u60ca\u8bb6\u7684\u81ea\u7136\u8bed\u97f3\u3002<\/p>\n<blockquote><p>\n  \u884c\u52a8\u9879\u76ee\uff1a\u5c1d\u8bd5\u4f7f\u7528\u4e0d\u540c\u7684\u6587\u672c\u8f93\u5165\u751f\u6210\u97f3\u9891\u3002\u5c1d\u8bd5\u4e0d\u540c\u7684\u58f0\u97f3\uff0c\u5e76\u4ed4\u7ec6\u8046\u542c\u7ec6\u5fae\u5dee\u522b\u3002\n<\/p><\/blockquote>\n<p>\u4f46\u6211\u4eec\u624d\u521a\u521a\u5f00\u59cb\u3002\u5982\u679c\u4f60\u80fd\u521b\u9020\u81ea\u5df1\u72ec\u7279\u7684 AI \u58f0\u97f3\u4f1a\u600e\u6837\uff1f<\/p>\n<pre><code>from kokoro import KPipeline\nfrom IPython.display import display, Audio\nimport soundfile as sf\nimport torch\n\n# 1. Initialize the pipeline\npipeline = KPipeline(lang_code='a')  # 'a' for American English\n\n# 2. Define the available voices (from the model's config)\n# Though not directly used in mixing, it's good practice to keep this list\navailable_voices = [\n    'af', 'af_bella', 'af_sarah', 'am_adam', 'am_michael',\n    'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis',\n    'af_nicole', 'af_sky'\n]\n\n# 3. Specify Device\ndevice = 'cuda' if torch.cuda.is_available() else 'cpu'\n\n# 4. Load the voice embeddings (download these from Hugging Face)\nvoice_01 = 'af_bella'\nvoice_02 = 'am_michael'\n# voice_03 = 'bm_lewis'\n# 5. Define the text\ntext = '''\nKokoro TTS stands at the intersection of art and technology, weaving humanlike warmth into synthetic voices. \nIt\u2019s the future of audio creation, boldly forging a path where open-source innovation conquers barriers,\n granting every creator the freedom to share their story.\n'''\n\n# 6. Generate audio using individual voices (similar to the image)\nprint(\"Generating audio with individual voices...\")\n\ngenerator_01 = pipeline(text, voice=voice_01, speed=1, split_pattern=r'\\n+')\nfor i, (gs, ps, audio) in enumerate(generator_01):\n    print(i)  # i =&gt; index\n    print(gs) # gs =&gt; graphemes\/text\n    print(ps) # ps =&gt; phonemes\n    display(Audio(data=audio, rate=24000, autoplay=i==0))\n    sf.write(f'{i}.wav', audio, 24000) # save each audio file\n\ngenerator_02 = pipeline(text, voice=voice_02, speed=1, split_pattern=r'\\n+')\nfor i, (gs, ps, audio) in enumerate(generator_02):\n    print(i)  # i =&gt; index\n    print(gs) # gs =&gt; graphemes\/text\n    print(ps) # ps =&gt; phonemes\n    display(Audio(data=audio, rate=24000, autoplay=i==0))\n    sf.write(f'{i}.wav', audio, 24000) # save each audio file\n\n# generator_03 = pipeline(text, voice=voice_03, speed=1, split_pattern=r'\\n+')\n# for i, (gs, ps, audio) in enumerate(generator_03):\n#     print(f\"Voice 03 - Segment {i}:\")\n#     print(gs)\n#     display(Audio(data=audio, rate=pipeline.sample_rate, autoplay=False))\n#     sf.write(f'voice_03_segment_{i}.wav', audio, pipeline.sample_rate)\n<\/code><\/pre>\n<h2>3\u3001\u8bed\u97f3\u70bc\u91d1\u672f\uff1a\u5236\u4f5c\u4f60\u7684\u6807\u5fd7\u6027\u58f0\u97f3<\/h2>\n<p>\u8fd9\u5c31\u662f Kokoro TTS \u771f\u6b63\u95ea\u8000\u7684\u5730\u65b9\u3002\u5b83\u4e0d\u4ec5\u4ec5\u662f\u9009\u62e9\u4e00\u79cd\u58f0\u97f3\uff1b\u800c\u662f\u521b\u9020\u4e00\u79cd\u58f0\u97f3\u3002\u5f97\u76ca\u4e8e\u5176\u5de7\u5999\u7684\u8bed\u97f3\u6df7\u5408\u529f\u80fd\uff0c\u4f60\u53ef\u4ee5\u5c06\u4e0d\u540c\u7684\u8bed\u97f3\u5305\u6df7\u5408\u5728\u4e00\u8d77\uff0c\u5fae\u8c03\u6bd4\u4f8b\u4ee5\u5236\u4f5c\u51fa\u4f60\u72ec\u6709\u7684\u58f0\u97f3\u3002<\/p>\n<p>\u60f3\u8c61\u4e00\u4e0b\uff1a<\/p>\n<ul>\n<li>Bella \u7684\u6e29\u6696\u4e0e Sarah \u7684\u6e05\u8106\u76f8\u7ed3\u5408\uff0c\u521b\u9020\u51fa\u4e00\u79cd\u65e2\u53cb\u597d\u53c8\u6709\u6743\u5a01\u7684\u58f0\u97f3\u3002<\/li>\n<li>Michael \u7684\u6df1\u6c89\u97f3\u8c03\u4e0e Emma \u7684\u6e29\u67d4\u58f0\u97f3\u878d\u5408\u5728\u4e00\u8d77\uff0c\u975e\u5e38\u9002\u5408\u5e73\u9759\u7684\u51a5\u60f3\u5e94\u7528\u7a0b\u5e8f\u3002<\/li>\n<\/ul>\n<p>\u53ef\u80fd\u6027\u53ea\u6536\u5230\u4f60\u7684\u60f3\u8c61\u529b\u7684\u9650\u5236\u3002<\/p>\n<p>\u5b83\u662f\u5982\u4f55\u5de5\u4f5c\u7684\uff1f<\/p>\n<p>\u5b83\u975e\u5e38\u7b80\u5355\u3002\u6bcf\u4e2a\u8bed\u97f3\u5305\u90fd\u7531\u4e00\u7ec4\u6570\u5b57\uff08\u5d4c\u5165\uff09\u8868\u793a\u3002\u901a\u8fc7\u4ee5\u4e0d\u540c\u7684\u6bd4\u4f8b\u5c06\u8fd9\u4e9b\u6570\u5b57\u76f8\u52a0\uff0c\u4f60\u53ef\u4ee5\u6709\u6548\u5730\u6df7\u5408\u8fd9\u4e9b\u58f0\u97f3\u7684\u7279\u5f81\u3002<\/p>\n<pre><code># 7. Mix the voices (e.g., average of af_bella and am_michael)\nprint(\"\\nGenerating audio with mixed voice (average)...\")\nmixed_voice_bf = voice_01_pt* 0.7 + voice_02_pt*0.3 \n\n# 8. Generate audio using the mixed voice\n# Instead of passing the tensor directly, create a new voice key and register it with the pipeline\n# Reshape the mixed voice to the expected shape (510, 1, 256)\npipeline.voices['mixed_voice'] = mixed_voice_bf.squeeze(0)  # Register the mixed voice after squeezing\n\ngenerator_mixed = pipeline(\n    text, voice='mixed_voice',  # Use the registered key\n    speed=1, split_pattern=r'\\n+'\n)\n\nfor i, (gs, ps, audio) in enumerate(generator_mixed):\n    print(f\"Mixed Voice - Segment {i}:\")\n    print(i)  # i =&gt; index\n    print(gs) # gs =&gt; graphemes\/text\n    print(ps) # ps =&gt; phonemes\n    display(Audio(data=audio, rate=24000, autoplay=i==0))\n    sf.write(f'{i}.wav', audio, 24000) # save each audio file\n\nprint(\"\\nAudio generation complete!\")<\/code><\/pre>\n<p>\u8bf4\u660e\uff1a<\/p>\n<ul>\n<li>\u52a0\u8f7d\u8bed\u97f3\u5305\uff1a\u6211\u4eec\u4f7f\u7528 torch.load() \u52a0\u8f7d\u4e24\u4e2a\u4e0d\u540c\u7684\u8bed\u97f3\u5305\u3002<\/li>\n<li>\u6df7\u5408\u8bed\u97f3\uff1a\u6211\u4eec\u5c06\u6bcf\u4e2a\u8bed\u97f3\u5305\u4e58\u4ee5\u6743\u91cd\uff08\u8868\u793a\u6df7\u5408\u6bd4\uff09\u5e76\u5c06\u5b83\u4eec\u76f8\u52a0\uff0c\u4ece\u800c\u521b\u5efa\u4e00\u4e2amixed_voice\u3002\u5728\u6b64\u793a\u4f8b\u4e2d\uff0c\u6211\u4eec\u521b\u5efa\u4e86 af_bella \u548c am_michael \u7684 70\/30 \u6df7\u5408\u3002<\/li>\n<li>\u4f7f\u7528\u6df7\u5408\u8bed\u97f3\u751f\u6210\uff1a\u6211\u4eec\u4f7f\u7528\u5e26\u6709mixed_voice\u7684generate()\u51fd\u6570\uff0c\u4f7f\u7528\u6df7\u5408\u8bed\u97f3\u5408\u6210\u97f3\u9891\u3002<\/li>\n<li>\u663e\u793a\u97f3\u9891\u4f7f\u7528IPython.display\u4e2d\u7684display\u548cAudio\u51fd\u6570\u64ad\u653e\u97f3\u9891<\/li>\n<\/ul>\n<blockquote><p>\n  \u64cd\u4f5c\u9879\uff1a\u82b1\u4e00\u4e9b\u65f6\u95f4\u5c1d\u8bd5\u4e0d\u540c\u7684\u8bed\u97f3\u7ec4\u5408\u548c\u6bd4\u4f8b\u3002\u521b\u5efa\u5b8c\u5168\u7b26\u5408\u4f60\u9879\u76ee\u9700\u6c42\u7684\u8bed\u97f3\u3002\u4fdd\u5b58\u4f60\u6700\u559c\u6b22\u7684\u81ea\u5b9a\u4e49\u8bed\u97f3\u7684\u8bbe\u7f6e\u4ee5\u4f9b\u65e5\u540e\u4f7f\u7528\u3002\n<\/p><\/blockquote>\n<p>\u7c7b\u4f3c\u793a\u4f8b 2\uff1a<\/p>\n<pre><code># 7. Mix the voices (e.g., average of af_bella and am_michael)\nprint(\"\\nGenerating audio with mixed voice (average)...\")\nmixed_voice_bf = (voice_01_pt + voice_02_pt) \/ 2\n\n# 8. Generate audio using the mixed voice\n# Instead of passing the tensor directly, create a new voice key and register it with the pipeline\n# Reshape the mixed voice to the expected shape (510, 1, 256)\npipeline.voices['mixed_voice'] = mixed_voice_bf.squeeze(0)  # Register the mixed voice after squeezing\n\ngenerator_mixed = pipeline(\n    text, voice='mixed_voice',  # Use the registered key\n    speed=1, split_pattern=r'\\n+'\n)\n\nfor i, (gs, ps, audio) in enumerate(generator_mixed):\n    print(f\"Mixed Voice - Segment {i}:\")\n    print(i)  # i =&gt; index\n    print(gs) # gs =&gt; graphemes\/text\n    print(ps) # ps =&gt; phonemes\n    display(Audio(data=audio, rate=24000, autoplay=i==0))\n    sf.write(f'{i}.wav', audio, 24000) # save each audio file\n\nprint(\"\\nAudio generation complete!\")<\/code><\/pre>\n<p>\u5728\u8fd9\u91cc\uff0c\u4f60\u5c06\u6210\u4e3a\u914d\u97f3\u827a\u672f\u5bb6\uff0c\u4e3a\u4f60\u7684\u9879\u76ee\u5851\u9020\u5b8c\u7f8e\u7684 AI \u58f0\u97f3\u3002<\/p>\n<h2>4\u3001\u8d85\u8d8a\u57fa\u7840\uff1a\u4e13\u4e1a\u63d0\u793a\u548c\u672a\u6765<\/h2>\n<p>\u4f60\u73b0\u5728\u5df2\u7ecf\u638c\u63e1\u4e86 Kokoro TTS \u7684\u57fa\u7840\u77e5\u8bc6\u3002\u4f46\u8981\u771f\u6b63\u91ca\u653e\u5176\u4e13\u4e1a\u6f5c\u529b\uff0c\u8fd9\u91cc\u6709\u4e00\u4e9b\u9ad8\u7ea7\u6280\u5de7\u548c\u5bf9\u672a\u6765\u7684\u5c55\u671b\uff1a<\/p>\n<ul>\n<li>\u638c\u63e1\u53d1\u97f3\uff1aKokoro TTS \u53ef\u4ee5\u8f7b\u677e\u5904\u7406\u5927\u591a\u6570\u5355\u8bcd\uff0c\u4f46\u5b83\u786e\u5b9e\u4f9d\u8d56 espeak-ng \u8fdb\u884c\u5b57\u7d20\u5230\u97f3\u7d20 (g2p) \u8f6c\u6362\u3002\u8fd9\u610f\u5473\u7740\u5bf9\u4e8e\u8bcd\u5178\u4e2d\u6ca1\u6709\u7684\u5355\u8bcd\uff0c\u5b83\u5c06\u4f7f\u7528\u6700\u4f73\u731c\u6d4b\uff0c\u4f46\u8fd9\u53ef\u80fd\u5e76\u4e0d\u603b\u662f\u6b63\u786e\u7684\u3002<\/li>\n<li>\u53e5\u5b50\u7ed3\u6784\u5f88\u91cd\u8981\uff1a\u8f83\u77ed\u7684\u53e5\u5b50\u901a\u5e38\u4f1a\u5bfc\u81f4\u66f4\u597d\u7684\u6d41\u7545\u5ea6\u548c\u8bed\u8c03\u3002\u5c1d\u8bd5\u5206\u89e3\u957f\u53e5\u5b50\u4ee5\u83b7\u5f97\u6700\u4f73\u6548\u679c\u3002<\/li>\n<li>\u6807\u70b9\u7b26\u53f7\u7684\u529b\u91cf\uff1a\u9017\u53f7\u3001\u53e5\u53f7\u548c\u95ee\u53f7\u4e0d\u4ec5\u4ec5\u662f\u4e3a\u4e86\u8bed\u6cd5\uff1b\u5b83\u4eec\u6307\u5bfc\u4eba\u5de5\u667a\u80fd\u7684\u505c\u987f\u548c\u8bed\u8c03\u3002\u7b56\u7565\u6027\u5730\u4f7f\u7528\u5b83\u4eec\u3002<\/li>\n<li>\u793e\u533a\u529b\u91cf\uff1a\u52a0\u5165 GitHub \u548c Hugging Face \u4e0a\u7684 Kokoro TTS \u793e\u533a\u3002\u5206\u4eab\u60a8\u7684\u521b\u4f5c\uff0c\u5411\u4ed6\u4eba\u5b66\u4e60\uff0c\u5e76\u4e3a\u9879\u76ee\u7684\u53d1\u5c55\u505a\u51fa\u8d21\u732e\u3002\u5728\u8fd9\u91cc\u60a8\u53ef\u4ee5\u627e\u5230\u6700\u65b0\u7684\u63d0\u793a\u3001\u6280\u5de7\u548c\u8bed\u97f3\u5305\u3002<\/li>\n<\/ul>\n<p>Kokoro TTS \u7684\u4e0b\u4e00\u6b65\u662f\u4ec0\u4e48\uff1f\u5f00\u53d1\u4eba\u5458\u6b63\u5728\u4e0d\u65ad\u6539\u8fdb\u3002\u76ee\u524d\uff0c\u8be5\u6a21\u578b\u6b63\u5728\u8fdb\u4e00\u6b65\u5f00\u53d1\u4e2d\uff0c\u6570\u636e\u96c6\u8d8a\u6765\u8d8a\u5927\uff0c\u8fd9\u5f88\u53ef\u80fd\u4f1a\u63d0\u9ad8\u8f93\u51fa\u8d28\u91cf\u3002<\/p>\n<p>\u603b\u7ed3\uff1aKokoro TTS \u4e0d\u4ec5\u4ec5\u662f\u4e00\u4e2a\u5de5\u5177\uff0c\u66f4\u662f\u4e00\u573a\u8fd0\u52a8\u3002\u5b83\u65e8\u5728\u5c06\u9ad8\u8d28\u91cf AI \u8bed\u97f3\u7684\u529b\u91cf\u4ea4\u5230\u6bcf\u4e2a\u4eba\u624b\u4e2d\u3002\u8fd9\u662f\u4e00\u573a\u6b63\u5728\u915d\u917f\u7684\u9769\u547d\u3002<\/p>\n<h2>5\u3001\u7ed3\u675f\u8bed<\/h2>\n<p>\u8ba9\u4f60\u7684\u58f0\u97f3\u3001\u4f60\u7684\u6545\u4e8b\uff0c\u901a\u8fc7 AI \u653e\u5927\u3002<\/p>\n<p>Kokoro TTS \u4e0d\u4ec5\u4ec5\u662f\u9ad8\u7ea7\u6587\u672c\u8f6c\u8bed\u97f3\u670d\u52a1\u7684\u514d\u8d39\u66ff\u4ee3\u54c1\u3002\u5b83\u8bc1\u660e\u4e86\u5f00\u6e90\u534f\u4f5c\u7684\u529b\u91cf\uff0c\u4e5f\u8ba9\u6211\u4eec\u4e00\u7aa5 AI \u8bed\u97f3\u6280\u672f\u7684\u672a\u6765\u3002\u6211\u4eec\u63a2\u7d22\u4e86\u5b83\u7684\u975e\u51e1\u529f\u80fd\uff0c\u5b66\u4f1a\u4e86\u5982\u4f55\u6709\u6548\u5730\u4f7f\u7528\u5b83\uff0c\u5e76\u53d1\u73b0\u4e86\u901a\u8fc7\u6df7\u5408\u5236\u4f5c\u72ec\u7279\u58f0\u97f3\u7684\u827a\u672f\u3002\u73b0\u5728\u8f6e\u5230\u4f60\u52a0\u5165\u8fd9\u573a\u9769\u547d\u4e86\u3002\u65e0\u8bba\u4f60\u662f\u7ecf\u9a8c\u4e30\u5bcc\u7684\u5f00\u53d1\u4eba\u5458\u3001\u521b\u610f\u5185\u5bb9\u521b\u4f5c\u8005\uff0c\u8fd8\u662f\u53ea\u662f\u5bf9 pos \u611f\u5230\u597d\u5947<\/p>\n<hr>\n","protected":false},"excerpt":{"rendered":"<p>\u5982\u679c\u6211\u544a\u8bc9\u4f60\uff0c\u73b0\u5728\u53ef\u4ee5\u5b8c\u5168\u514d\u8d39\u4f7f\u7528\u4e00\u79cd\u4e0d\u4ec5\u597d\u800c\u4e14\u53ef\u80fd\u6bd4 ElevenLabs \u7b49\u884c\u4e1a\u9886\u5148\uff08\u4e14\u6602\u8d35\uff09\u670d\u52a1\u66f4\u597d\u7684\u5de5\u5177\u6765\u521b\u5efa\u4e13\u4e1a\u54c1\u8d28\u7684 AI \u914d\u97f3\uff0c\u4f60\u4f1a\u600e\u4e48\u60f3\uff1f\u542c\u8d77\u6765\u597d\u5f97\u4ee4\u4eba\u96be\u4ee5\u7f6e\u4fe1\uff1f \u8fd9\u5c31\u662f Kokoro TTS \u7684\u521b\u5efa\u8005\u6bcf\u5929\u90fd\u5728\u8bc1\u660e\u7684\u9519\u8bef\u3002\u8fd9\u4e0d\u4ec5\u4ec5\u662f\u53e6\u4e00\u4e2a\u6587\u672c\u8f6c\u8bed\u97f3\u6a21\u578b\uff1b\u8fd9\u662f\u4e00\u573a 8200 \u4e07\u53c2\u6570\u7684\u5f00\u6e90\u9769\u547d\uff0c\u5b83\u98a0\u8986\u4e86 AI \u8bed\u97f3\u4e16\u754c\u3002\u6211\u4eec\u8c08\u8bba\u7684\u662f\u4e00\u4e2a\u5728\u6781\u4f4e\u9884\u7b97\u4e0b\u8bad\u7ec3\u7684\u6a21\u578b\uff0c\u5728\u65e5\u5e38\u8ba1\u7b97\u673a\u4e0a\u8fd0\u884c\uff0c\u200b\u200b\u4f46\u5728\u4e3b\u8981\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u5374\u4ee5\u67d0\u79cd\u65b9\u5f0f\u8d85\u8d8a\u4e86\u5de8\u5934\u3002 \u597d\u5947\u5417\uff1f\u4f60\u5e94\u8be5\u597d\u5947\u3002 \u8fd9\u7bc7\u535a\u6587\u4e0d\u4ec5\u4f1a\u63ed\u5f00 Kokoro TTS \u80cc\u540e\u7684\u79d8\u5bc6\uff0c\u8fd8\u4f1a\u9010\u6b65\u6307\u5bfc\u4f60\u5982\u4f55\u5229\u7528\u5b83\u7684\u529b\u91cf\u6765\u521b\u5efa\u4ee4\u4eba\u60ca\u53f9\u7684\u753b\u5916\u97f3\uff0c\u5c06\u4f60\u7684\u9879\u76ee\u63d0\u5347\u5230\u65b0\u7684\u9ad8\u5ea6\u3002 \u7cfb\u597d\u5b89\u5168\u5e26\uff0c\u56e0\u4e3a\u4eba\u5de5\u667a\u80fd\u8bed\u97f3\u7684\u672a\u6765\u5c31\u5728\u8fd9\u91cc\uff0c\u800c\u4e14\u5b83\u662f\u514d\u8d39\u7684\u3002\u8fd9\u4e0d\u4ec5\u4ec5\u662f\u4e00\u4e2a\u6559\u7a0b\uff1b\u5b83\u662f\u4f60\u8fdb\u5165\u4e00\u4e2a\u6b63\u5728\u91cd\u65b0\u5b9a\u4e49\u666f\u89c2\u7684\u793e\u533a\u7684\u5927\u95e8\u3002 1\u3001Kokoro TTS \u73b0\u8c61\uff1aAI\u7ade\u6280\u573a\u4e0a\u7684\u5927\u536b\u4e0e\u6b4c\u5229\u4e9a \u6211\u4eec\u90fd\u559c\u6b22\u5f31\u8005\u7684\u6545\u4e8b\u3002\u800c\u5728\u4eba\u5de5\u667a\u80fd\u8bed\u97f3\u7684\u4e16\u754c\u91cc\uff0cKokoro TTS \u662f\u7ec8\u6781\u5f31\u8005\u3002\u60f3\u8c61\u4e00\u4e0b\uff1a\u4e00\u4e2a\u53ea\u6709 8200 \u4e07\u4e2a\u53c2\u6570\u7684\u5fae\u578b\u6a21\u578b\uff0c\u4ec5\u7528\u4e0d\u5230 100 \u5c0f\u65f6\u7684\u97f3\u9891\u6570\u636e\u8fdb\u884c\u8bad\u7ec3\uff0c\u4e0e ElevenLabs \u7b49\u5de8\u5934\u6b63\u9762\u4ea4\u950b\uff0c\u540e\u8005\u7684\u6a21\u578b\u662f\u5728\u5e9e\u5927\u7684\u6570\u636e\u96c6\u4e0a\u8fdb\u884c\u8bad\u7ec3\uff0c\u5e76\u83b7\u5f97\u4e86\u5927\u91cf\u8d44\u91d1\u652f\u6301\u3002\u542c\u8d77\u6765\u50cf\u662f\u6ce8\u5b9a\u8981\u5931\u8d25\u7684\uff0c\u5bf9\u5427\uff1f\u9519\u4e86\u3002 \u4ee4\u4eba\u9707\u60ca\u7684\u662f\uff1aKokoro TTS \u4e0d\u4ec5\u53c2\u52a0\u4e86\u6bd4\u8d5b\uff0c\u8fd8\u8d62\u4e86\u3002\u5b83\u5728 Hugging Face TTS Arena \u4e0a\u593a\u5f97\u4e86\u7b2c\u4e00\u540d\uff0c\u8fd9\u662f\u4e00\u4e2a\u4e25\u683c\u8bc4\u4f30\u6700\u4f73\u6587\u672c\u8f6c\u8bed\u97f3\u6a21\u578b\u7684\u6392\u884c\u699c\u3002\u5b83\u662f\u5982\u4f55\u5b9e\u73b0\u8fd9\u4e00\u770b\u4f3c\u4e0d\u53ef\u80fd\u7684\u58ee\u4e3e\u7684\uff1f \u79d8\u8bc0\u5728\u4e8e\u5176\u5de7\u5999\u7684\u67b6\u6784\u548c\u8bbe\u8ba1\u7684\u6548\u7387\u3002\u8fd9\u662f\u4e00\u4e2a\u9ad8\u5ea6\u4f18\u5316\u7684\u6a21\u578b\uff0c\u5229\u7528\u4e86\u4ec5\u89e3\u7801\u5668\u7684\u67b6\u6784\uff0c\u5e76\u5728 A100 80GB GPU \u4e0a\u4f7f\u7528\u5927\u7ea6 500 \u4e2a GPU \u5c0f\u65f6\u8fdb\u884c\u8bad\u7ec3\u3002\u8fd9\u79cd\u6548\u7387\u8f6c\u5316\u4e3a\u51e0\u4e2a\u4ee4\u4eba\u96be\u4ee5\u7f6e\u4fe1\u7684\u4f18\u52bf\uff1a \u8d85\u5feb\u63a8\u7406\uff1a\u5373\u4f7f\u5728 CPU \u4e0a\u4e5f\u80fd\u5b9e\u65f6\u751f\u6210\u97f3\u9891\u3002\u65e0\u9700\u518d\u7b49\u5f85\u4e91\u670d\u52a1\u5668\u3002 \u968f\u5904\u8fd0\u884c\uff1a\u4ece\u7b14\u8bb0\u672c\u7535\u8111\u5230 Raspberry Pi\uff0cKokoro TTS [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[13],"tags":[],"class_list":["post-53775","post","type-post","status-publish","format-standard","hentry","category-ai"],"_links":{"self":[{"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/posts\/53775","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/comments?post=53775"}],"version-history":[{"count":0,"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/posts\/53775\/revisions"}],"wp:attachment":[{"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/media?parent=53775"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/categories?post=53775"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/fwq.ai\/blog\/wp-json\/wp\/v2\/tags?post=53775"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}