ginipick commited on
Commit
fd70398
ยท
verified ยท
1 Parent(s): 1a7856c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -1
app.py CHANGED
@@ -35,6 +35,7 @@ DEFAULT_SYSTEM_PROMPT = (
35
  "Generate audio following instruction.\n\n"
36
  "<|scene_desc_start|>\n"
37
  "Audio is recorded from a quiet room.\n"
 
38
  "<|scene_desc_end|>"
39
  )
40
 
@@ -99,6 +100,19 @@ PREDEFINED_EXAMPLES = {
99
  "icon": "๐Ÿ‡จ๐Ÿ‡ณ",
100
  "color": "#AA96DA"
101
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  "single-speaker-bgm": {
103
  "system_prompt": DEFAULT_SYSTEM_PROMPT,
104
  "input_text": "[music start] I will remember this, thought Ender, when I am defeated. To keep dignity, and give honor where it's due, so that defeat is not disgrace. And I hope I don't have to do it often. [music end]",
@@ -196,8 +210,18 @@ def normalize_chinese_punctuation(text):
196
 
197
 
198
  def normalize_text(transcript: str):
 
 
 
 
 
 
 
 
 
199
  transcript = normalize_chinese_punctuation(transcript)
200
- # Other normalizations (e.g., parentheses and other symbols. Will be improved in the future)
 
201
  transcript = transcript.replace("(", " ")
202
  transcript = transcript.replace(")", " ")
203
  transcript = transcript.replace("ยฐF", " degrees Fahrenheit")
 
35
  "Generate audio following instruction.\n\n"
36
  "<|scene_desc_start|>\n"
37
  "Audio is recorded from a quiet room.\n"
38
+ "Support for multiple languages including English, Chinese, Korean, Japanese, and more.\n"
39
  "<|scene_desc_end|>"
40
  )
41
 
 
100
  "icon": "๐Ÿ‡จ๐Ÿ‡ณ",
101
  "color": "#AA96DA"
102
  },
103
+ "single-speaker-kr": {
104
+ "system_prompt": "Generate audio following instruction.\n\n"
105
+ "<|scene_desc_start|>\n"
106
+ "Audio is recorded from a quiet room.\n"
107
+ "<|scene_desc_end|>",
108
+ "input_text": "์•ˆ๋…•ํ•˜์„ธ์š”, ์˜ค๋Š˜์€ ์ธ๊ณต์ง€๋Šฅ์˜ ๋ฏธ๋ž˜์— ๋Œ€ํ•ด ์ด์•ผ๊ธฐํ•ด๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค.\n"
109
+ "์ตœ๊ทผ AI ๊ธฐ์ˆ ์˜ ๋ฐœ์ „์ด ์ •๋ง ๋†€๋ผ์šด๋ฐ์š”,\n"
110
+ "ํŠนํžˆ ์Œ์„ฑ ํ•ฉ์„ฑ ๊ธฐ์ˆ ์€ ์ด์ œ ์‚ฌ๋žŒ๊ณผ ๊ตฌ๋ณ„ํ•˜๊ธฐ ์–ด๋ ค์šธ ์ •๋„๋กœ ์ž์—ฐ์Šค๋Ÿฌ์›Œ์กŒ์Šต๋‹ˆ๋‹ค.\n"
111
+ "์—ฌ๋Ÿฌ๋ถ„์€ ์ œ๊ฐ€ ์‹ค์ œ ์‚ฌ๋žŒ์ธ์ง€ AI์ธ์ง€ ๊ตฌ๋ณ„ํ•  ์ˆ˜ ์žˆ์œผ์‹ ๊ฐ€์š”?",
112
+ "description": "๐Ÿ‡ฐ๐Ÿ‡ท <b>Korean Speech</b> - Generate natural Korean speech",
113
+ "icon": "๐Ÿ‡ฐ๐Ÿ‡ท",
114
+ "color": "#FFB6C1"
115
+ },
116
  "single-speaker-bgm": {
117
  "system_prompt": DEFAULT_SYSTEM_PROMPT,
118
  "input_text": "[music start] I will remember this, thought Ender, when I am defeated. To keep dignity, and give honor where it's due, so that defeat is not disgrace. And I hope I don't have to do it often. [music end]",
 
210
 
211
 
212
  def normalize_text(transcript: str):
213
+ # Skip normalization for Korean text to preserve it properly
214
+ if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in transcript):
215
+ # Korean text detected - minimal normalization
216
+ transcript = transcript.strip()
217
+ if transcript and not any([transcript.endswith(c) for c in [".", "!", "?", "ใ€‚", "๏ผ", "๏ผŸ"]]):
218
+ transcript += "."
219
+ return transcript
220
+
221
+ # Chinese punctuation normalization
222
  transcript = normalize_chinese_punctuation(transcript)
223
+
224
+ # Other normalizations (e.g., parentheses and other symbols)
225
  transcript = transcript.replace("(", " ")
226
  transcript = transcript.replace(")", " ")
227
  transcript = transcript.replace("ยฐF", " degrees Fahrenheit")