renhang commited on
Commit
bc3ffb2
·
1 Parent(s): 072d1f9

update space

Browse files
Files changed (3) hide show
  1. app.py +181 -20
  2. jam_infer.yaml +2 -1
  3. utils.py +184 -0
app.py CHANGED
@@ -6,9 +6,39 @@ import tempfile
6
  import requests
7
  import subprocess
8
  from pathlib import Path
 
9
 
10
  from model import Jamify
11
- from utils import json_to_text, text_to_json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def download_resources():
14
  """Download examples data from GitHub repository if not already present"""
@@ -36,10 +66,17 @@ print("Jamify model ready.")
36
  gr.set_static_paths(paths=[Path.cwd().absolute()])
37
 
38
  @spaces.GPU(duration=100)
39
- def generate_song(reference_audio, lyrics_text, style_prompt, duration):
40
  # We need to save the uploaded files to temporary paths to pass to the model
41
  reference_audio = reference_audio not in ("", None) and reference_audio or None
42
 
 
 
 
 
 
 
 
43
  # Convert text format to JSON and save to temporary file
44
  lyrics_json = text_to_json(lyrics_text)
45
 
@@ -77,6 +114,7 @@ def load_examples():
77
  audio_path = example.get('audio_path', '')
78
  lrc_path = example.get('lrc_path', '')
79
  duration = example.get('duration', 120)
 
80
 
81
  # Load lyrics and convert to text format (pre-computed/cached)
82
  lyrics_text = ""
@@ -93,26 +131,38 @@ def load_examples():
93
  'id': example_id,
94
  'audio_path': audio_path if os.path.exists(audio_path) else None,
95
  'lyrics_text': lyrics_text,
96
- 'duration': duration
 
97
  })
98
 
99
  print(f"Loaded {len(examples)} cached examples")
100
  return examples
101
 
102
- def load_example(example_idx, examples):
103
  """Load a specific example and return its data"""
104
  if 0 <= example_idx < len(examples):
105
  example = examples[example_idx]
 
 
 
 
 
 
 
 
 
 
106
  return (
107
  example['audio_path'],
108
- example['lyrics_text'],
109
- example['duration']
 
110
  )
111
- return None, "", 120
112
 
113
  def clear_form():
114
  """Clear all form inputs to allow user to create their own song"""
115
- return None, "", 120 # audio, lyrics, duration
116
 
117
  def update_button_styles(selected_idx, total_examples):
118
  """Update button styles to highlight the selected example"""
@@ -138,6 +188,10 @@ examples = load_examples()
138
  default_audio = examples[0]['audio_path'] if examples else None
139
  default_lyrics = examples[0]['lyrics_text'] if examples else ""
140
  default_duration = examples[0]['duration'] if examples else 120
 
 
 
 
141
 
142
  # Gradio interface
143
  with gr.Blocks() as demo:
@@ -147,6 +201,10 @@ with gr.Blocks() as demo:
147
  # State to track selected example (-1 means "Make Your Own" is selected, 0 is first example)
148
  selected_example = gr.State(0 if examples else -1)
149
 
 
 
 
 
150
  # Sample buttons section
151
  if examples:
152
  gr.Markdown("### Sample Examples")
@@ -178,6 +236,26 @@ with gr.Blocks() as demo:
178
  with gr.Row():
179
  with gr.Column():
180
  gr.Markdown("### Inputs")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  lyrics_text = gr.Textbox(
182
  label="Lyrics",
183
  lines=10,
@@ -186,48 +264,131 @@ with gr.Blocks() as demo:
186
  )
187
  duration_slider = gr.Slider(minimum=120, maximum=230, value=default_duration, step=1, label="Duration (seconds)")
188
 
 
 
 
189
  with gr.Tab("Style from Audio"):
190
  reference_audio = gr.File(label="Reference Audio (.mp3, .wav)", type="filepath", value=default_audio)
191
- with gr.Tab("Style from Text"):
192
- style_prompt = gr.Textbox(label="Style Prompt", lines=3, placeholder="e.g., A high-energy electronic dance track with a strong bassline and euphoric synths.")
 
 
 
193
 
194
  generate_button = gr.Button("Generate Song", variant="primary")
195
 
196
- with gr.Column():
197
  gr.Markdown("### Output")
198
  output_audio = gr.Audio(label="Generated Song")
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  generate_button.click(
201
  fn=generate_song,
202
- inputs=[reference_audio, lyrics_text, style_prompt, duration_slider],
203
  outputs=output_audio,
204
  api_name="generate_song"
205
  )
206
 
207
  # Connect example buttons to load data and update selection
208
  if examples:
209
- def load_example_and_update_selection(idx):
210
  """Load example data and update button selection state"""
211
- audio, lyrics, duration = load_example(idx, examples)
 
212
  button_updates = update_button_styles(idx, len(examples))
213
- return [audio, lyrics, duration, idx] + button_updates
 
214
 
215
  def clear_form_and_update_selection():
216
  """Clear form and update button selection state"""
217
- audio, lyrics, duration = clear_form()
218
  button_updates = update_button_styles(-1, len(examples))
219
- return [audio, lyrics, duration, -1] + button_updates
 
220
 
221
  for i, button in enumerate(example_buttons):
222
  button.click(
223
- fn=lambda idx=i: load_example_and_update_selection(idx),
224
- outputs=[reference_audio, lyrics_text, duration_slider, selected_example] + example_buttons + [make_your_own_button]
 
225
  )
226
 
227
  # Connect "Make Your Own" button to clear form and update selection
228
  make_your_own_button.click(
229
  fn=clear_form_and_update_selection,
230
- outputs=[reference_audio, lyrics_text, duration_slider, selected_example] + example_buttons + [make_your_own_button]
231
  )
232
 
233
  # Create necessary temporary directories for Gradio
 
6
  import requests
7
  import subprocess
8
  from pathlib import Path
9
+ import torchaudio
10
 
11
  from model import Jamify
12
+ from utils import json_to_text, text_to_json, convert_text_time_to_beats, convert_text_beats_to_time, convert_text_beats_to_time_with_regrouping, text_to_words, beats_to_text_with_regrouping, round_to_quarter_beats
13
+
14
+ def crop_audio_to_30_seconds(audio_path):
15
+ """Crop audio to first 30 seconds and return path to temporary cropped file"""
16
+ if not audio_path or not os.path.exists(audio_path):
17
+ return None
18
+
19
+ try:
20
+ # Load audio
21
+ waveform, sample_rate = torchaudio.load(audio_path)
22
+
23
+ # Calculate 30 seconds in samples
24
+ target_samples = sample_rate * 30
25
+
26
+ # Crop to first 30 seconds (or full audio if shorter)
27
+ if waveform.shape[1] > target_samples:
28
+ cropped_waveform = waveform[:, :target_samples]
29
+ else:
30
+ cropped_waveform = waveform
31
+
32
+ # Save to temporary file
33
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
34
+ temp_path = temp_file.name
35
+
36
+ torchaudio.save(temp_path, cropped_waveform, sample_rate)
37
+ return temp_path
38
+
39
+ except Exception as e:
40
+ print(f"Error processing audio: {e}")
41
+ return None
42
 
43
  def download_resources():
44
  """Download examples data from GitHub repository if not already present"""
 
66
  gr.set_static_paths(paths=[Path.cwd().absolute()])
67
 
68
  @spaces.GPU(duration=100)
69
+ def generate_song(reference_audio, lyrics_text, duration, mode="time", bpm=120, style_prompt=None):
70
  # We need to save the uploaded files to temporary paths to pass to the model
71
  reference_audio = reference_audio not in ("", None) and reference_audio or None
72
 
73
+ # Convert beats to time format if in beats mode
74
+ if mode == "beats" and lyrics_text:
75
+ try:
76
+ lyrics_text = convert_text_beats_to_time(lyrics_text, bpm)
77
+ except Exception as e:
78
+ print(f"Error converting beats to time: {e}")
79
+
80
  # Convert text format to JSON and save to temporary file
81
  lyrics_json = text_to_json(lyrics_text)
82
 
 
114
  audio_path = example.get('audio_path', '')
115
  lrc_path = example.get('lrc_path', '')
116
  duration = example.get('duration', 120)
117
+ bpm = example.get('bpm', 120.0) # Read BPM from input.json, default to 120
118
 
119
  # Load lyrics and convert to text format (pre-computed/cached)
120
  lyrics_text = ""
 
131
  'id': example_id,
132
  'audio_path': audio_path if os.path.exists(audio_path) else None,
133
  'lyrics_text': lyrics_text,
134
+ 'duration': duration,
135
+ 'bpm': bpm
136
  })
137
 
138
  print(f"Loaded {len(examples)} cached examples")
139
  return examples
140
 
141
+ def load_example(example_idx, examples, mode="time"):
142
  """Load a specific example and return its data"""
143
  if 0 <= example_idx < len(examples):
144
  example = examples[example_idx]
145
+ lyrics_text = example['lyrics_text']
146
+ bpm = example.get('bpm', 120.0)
147
+
148
+ # Convert to beats format if in beats mode
149
+ if mode == "beats" and lyrics_text:
150
+ try:
151
+ lyrics_text = beats_to_text_with_regrouping(lyrics_text, bpm, round_to_quarters=True)
152
+ except Exception as e:
153
+ print(f"Error converting to beats format: {e}")
154
+
155
  return (
156
  example['audio_path'],
157
+ lyrics_text,
158
+ example['duration'],
159
+ bpm
160
  )
161
+ return None, "", 120, 120.0
162
 
163
  def clear_form():
164
  """Clear all form inputs to allow user to create their own song"""
165
+ return None, "", 120, 120.0 # audio, lyrics, duration, bpm
166
 
167
  def update_button_styles(selected_idx, total_examples):
168
  """Update button styles to highlight the selected example"""
 
188
  default_audio = examples[0]['audio_path'] if examples else None
189
  default_lyrics = examples[0]['lyrics_text'] if examples else ""
190
  default_duration = examples[0]['duration'] if examples else 120
191
+ default_bpm = examples[0]['bpm'] if examples else 120.0
192
+
193
+ # Create cropped version of default audio for display
194
+ default_audio_display = crop_audio_to_30_seconds(default_audio) if default_audio else None
195
 
196
  # Gradio interface
197
  with gr.Blocks() as demo:
 
201
  # State to track selected example (-1 means "Make Your Own" is selected, 0 is first example)
202
  selected_example = gr.State(0 if examples else -1)
203
 
204
+ # States for mode and BPM
205
+ input_mode = gr.State("time")
206
+ current_bpm = gr.State(default_bpm)
207
+
208
  # Sample buttons section
209
  if examples:
210
  gr.Markdown("### Sample Examples")
 
236
  with gr.Row():
237
  with gr.Column():
238
  gr.Markdown("### Inputs")
239
+
240
+ # Mode switcher
241
+ mode_radio = gr.Radio(
242
+ choices=["Time Mode", "Beats Mode"],
243
+ value="Time Mode",
244
+ label="Input Format",
245
+ info="Choose how to specify timing: seconds or musical beats"
246
+ )
247
+
248
+ # BPM input (initially hidden)
249
+ bpm_input = gr.Number(
250
+ label="BPM (Beats Per Minute)",
251
+ value=default_bpm,
252
+ minimum=60,
253
+ maximum=200,
254
+ step=1,
255
+ visible=False,
256
+ info="Tempo for converting beats to time"
257
+ )
258
+
259
  lyrics_text = gr.Textbox(
260
  label="Lyrics",
261
  lines=10,
 
264
  )
265
  duration_slider = gr.Slider(minimum=120, maximum=230, value=default_duration, step=1, label="Duration (seconds)")
266
 
267
+ with gr.Column():
268
+ gr.Markdown("### Style & Generation")
269
+
270
  with gr.Tab("Style from Audio"):
271
  reference_audio = gr.File(label="Reference Audio (.mp3, .wav)", type="filepath", value=default_audio)
272
+ reference_audio_display = gr.Audio(
273
+ label="Reference Audio Preview (First 30 seconds)",
274
+ value=default_audio_display,
275
+ visible=default_audio_display is not None
276
+ )
277
 
278
  generate_button = gr.Button("Generate Song", variant="primary")
279
 
 
280
  gr.Markdown("### Output")
281
  output_audio = gr.Audio(label="Generated Song")
282
 
283
+ # Mode switching functions
284
+ def switch_mode(mode_choice, current_lyrics, current_bpm_val):
285
+ """Handle switching between time and beats mode"""
286
+ mode = "beats" if mode_choice == "Beats Mode" else "time"
287
+
288
+ # Update BPM input visibility
289
+ bpm_visible = (mode == "beats")
290
+
291
+ # Update lyrics placeholder and convert existing text
292
+ if mode == "time":
293
+ placeholder = "Enter lyrics with timestamps: word[start_time:end_time] word[start_time:end_time]...\n\nExample: Hello[0.0:1.2] world[1.5:2.8] this[3.0:3.8] is[4.2:4.6] my[5.0:5.8] song[6.2:7.0]\n\nFormat: Each word followed by [start_seconds:end_seconds] in brackets\nTimestamps should be in seconds with up to 2 decimal places"
294
+ label = "Lyrics"
295
+
296
+ # Convert from beats to time if there's content
297
+ converted_lyrics = current_lyrics
298
+ if current_lyrics.strip():
299
+ try:
300
+ converted_lyrics = convert_text_beats_to_time_with_regrouping(current_lyrics, current_bpm_val)
301
+ except Exception as e:
302
+ print(f"Error converting beats to time: {e}")
303
+ else:
304
+ placeholder = "Enter lyrics with beat timestamps: word[start_beat:end_beat] word[start_beat:end_beat]...\n\nExample: Hello[0:1] world[1.5:2.75] this[3:3.75] is[4.25:4.5] my[5:5.75] song[6.25:7]\n\nFormat: Each word followed by [start_beat:end_beat] in brackets\nBeats are in quarter notes (1 beat = quarter note, 0.25 = sixteenth note)"
305
+ label = "Lyrics (Beats Format)"
306
+
307
+ # Convert from time to beats if there's content
308
+ converted_lyrics = current_lyrics
309
+ if current_lyrics.strip():
310
+ try:
311
+ converted_lyrics = beats_to_text_with_regrouping(current_lyrics, current_bpm_val, round_to_quarters=True)
312
+ except Exception as e:
313
+ print(f"Error converting time to beats: {e}")
314
+
315
+ return (
316
+ gr.update(visible=bpm_visible), # bpm_input visibility
317
+ gr.update(placeholder=placeholder, label=label, value=converted_lyrics), # lyrics_text
318
+ mode # input_mode state
319
+ )
320
+
321
+ def update_bpm_state(bpm_val):
322
+ """Update the BPM state"""
323
+ return bpm_val
324
+
325
+ def update_reference_audio_display(audio_file):
326
+ """Process and display the cropped reference audio"""
327
+ if audio_file is None:
328
+ return gr.update(visible=False, value=None)
329
+
330
+ cropped_path = crop_audio_to_30_seconds(audio_file)
331
+ if cropped_path:
332
+ return gr.update(visible=True, value=cropped_path)
333
+ else:
334
+ return gr.update(visible=False, value=None)
335
+
336
+ # Connect mode switching
337
+ mode_radio.change(
338
+ fn=switch_mode,
339
+ inputs=[mode_radio, lyrics_text, current_bpm],
340
+ outputs=[bpm_input, lyrics_text, input_mode]
341
+ )
342
+
343
+ # Connect BPM changes
344
+ bpm_input.change(
345
+ fn=update_bpm_state,
346
+ inputs=[bpm_input],
347
+ outputs=[current_bpm]
348
+ )
349
+
350
+ # Connect reference audio file changes to display
351
+ reference_audio.change(
352
+ fn=update_reference_audio_display,
353
+ inputs=[reference_audio],
354
+ outputs=[reference_audio_display]
355
+ )
356
+
357
  generate_button.click(
358
  fn=generate_song,
359
+ inputs=[reference_audio, lyrics_text, duration_slider, input_mode, current_bpm],
360
  outputs=output_audio,
361
  api_name="generate_song"
362
  )
363
 
364
  # Connect example buttons to load data and update selection
365
  if examples:
366
+ def load_example_and_update_selection(idx, current_mode):
367
  """Load example data and update button selection state"""
368
+ mode = "beats" if current_mode == "Beats Mode" else "time"
369
+ audio, lyrics, duration, bpm = load_example(idx, examples, mode)
370
  button_updates = update_button_styles(idx, len(examples))
371
+ audio_display_update = update_reference_audio_display(audio)
372
+ return [audio, lyrics, duration, bpm, idx, audio_display_update] + button_updates
373
 
374
  def clear_form_and_update_selection():
375
  """Clear form and update button selection state"""
376
+ audio, lyrics, duration, bpm = clear_form()
377
  button_updates = update_button_styles(-1, len(examples))
378
+ audio_display_update = update_reference_audio_display(audio)
379
+ return [audio, lyrics, duration, bpm, -1, audio_display_update] + button_updates
380
 
381
  for i, button in enumerate(example_buttons):
382
  button.click(
383
+ fn=lambda current_mode, idx=i: load_example_and_update_selection(idx, current_mode),
384
+ inputs=[mode_radio],
385
+ outputs=[reference_audio, lyrics_text, duration_slider, current_bpm, selected_example, reference_audio_display] + example_buttons + [make_your_own_button]
386
  )
387
 
388
  # Connect "Make Your Own" button to clear form and update selection
389
  make_your_own_button.click(
390
  fn=clear_form_and_update_selection,
391
+ outputs=[reference_audio, lyrics_text, duration_slider, current_bpm, selected_example, reference_audio_display] + example_buttons + [make_your_own_button]
392
  )
393
 
394
  # Create necessary temporary directories for Gradio
jam_infer.yaml CHANGED
@@ -23,9 +23,10 @@ evaluation:
23
  cfg_range:
24
  - 0.05
25
  - 1
 
26
  dual_cfg:
27
  - 4.7
28
- - 2.5
29
  steps: 50
30
 
31
  model:
 
23
  cfg_range:
24
  - 0.05
25
  - 1
26
+ fix_dual_cfg: true
27
  dual_cfg:
28
  - 4.7
29
+ - 2.6
30
  steps: 50
31
 
32
  model:
utils.py CHANGED
@@ -141,6 +141,190 @@ def json_to_text(json_data: dict) -> str:
141
  return '\n\n'.join(segment_lines)
142
 
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  def text_to_json(text: str) -> dict:
145
  """
146
  Convert text format to JSON structure expected by the model.
 
141
  return '\n\n'.join(segment_lines)
142
 
143
 
144
+ def round_to_quarter_beats(beat_position: float) -> float:
145
+ """Round beat position to nearest quarter note for sample display."""
146
+ return round(beat_position * 4) / 4
147
+
148
+
149
+ def beats_to_seconds(beat_position: float, bpm: float) -> float:
150
+ """Convert beat position to time in seconds."""
151
+ return (beat_position * 60.0) / bpm
152
+
153
+
154
+ def seconds_to_beats(time_seconds: float, bpm: float) -> float:
155
+ """Convert time in seconds to beat position."""
156
+ return (time_seconds * bpm) / 60.0
157
+
158
+
159
+ def convert_text_time_to_beats(text: str, bpm: float, round_to_quarters: bool = False) -> str:
160
+ """
161
+ Convert time-based text format to beats-based format.
162
+
163
+ Args:
164
+ text: String in format "word[start_sec:end_sec] ..."
165
+ bpm: Beats per minute for conversion
166
+ round_to_quarters: If True, round beats to quarter notes (for sample display)
167
+
168
+ Returns:
169
+ String in format "word[start_beat:end_beat] ..."
170
+ """
171
+ if not text.strip():
172
+ return ""
173
+
174
+ words = text_to_words(text)
175
+ beat_words = []
176
+
177
+ for word in words:
178
+ start_beat = seconds_to_beats(word['start'], bpm)
179
+ end_beat = seconds_to_beats(word['end'], bpm)
180
+
181
+ # Round to quarter notes for sample display
182
+ if round_to_quarters:
183
+ start_beat = round_to_quarter_beats(start_beat)
184
+ end_beat = round_to_quarter_beats(end_beat)
185
+
186
+ # Format to reasonable precision
187
+ start_str = f"{start_beat:.2f}".rstrip('0').rstrip('.')
188
+ end_str = f"{end_beat:.2f}".rstrip('0').rstrip('.')
189
+
190
+ beat_words.append(f"{word['word']}[{start_str}:{end_str}]")
191
+
192
+ return " ".join(beat_words)
193
+
194
+
195
+ def beats_to_text_with_regrouping(text: str, bpm: float, round_to_quarters: bool = False) -> str:
196
+ """
197
+ Convert time-based text to beats format with regrouping (like time mode).
198
+
199
+ Args:
200
+ text: String in format "word[start_sec:end_sec] ..."
201
+ bpm: Beats per minute for conversion
202
+ round_to_quarters: If True, round beats to quarter notes (for sample display)
203
+
204
+ Returns:
205
+ String with beats format grouped into lines
206
+ """
207
+ if not text.strip():
208
+ return ""
209
+
210
+ # First convert to beats format
211
+ words = text_to_words(text)
212
+ beat_words = []
213
+
214
+ for word in words:
215
+ start_beat = seconds_to_beats(word['start'], bpm)
216
+ end_beat = seconds_to_beats(word['end'], bpm)
217
+
218
+ # Round to quarter notes for sample display
219
+ if round_to_quarters:
220
+ start_beat = round_to_quarter_beats(start_beat)
221
+ end_beat = round_to_quarter_beats(end_beat)
222
+
223
+ beat_words.append({
224
+ 'word': word['word'],
225
+ 'start': start_beat,
226
+ 'end': end_beat
227
+ })
228
+
229
+ # Group beats into segments (using beat positions instead of seconds)
230
+ segments = regroup_words(beat_words, max_len=20, gap=2.0) # 20 beats max, 2 beat gap
231
+
232
+ # Convert each segment to text format
233
+ segment_lines = []
234
+ for seg in segments:
235
+ # Extract words for this segment based on beat range
236
+ seg_words = []
237
+ for word in beat_words:
238
+ if seg['start'] <= word['start'] < seg['end'] or (
239
+ word['start'] <= seg['start'] < word['end']
240
+ ):
241
+ seg_words.append(word)
242
+
243
+ if seg_words:
244
+ segment_text = words_to_text(seg_words) # This will format as word[beat:beat]
245
+ segment_lines.append(segment_text)
246
+
247
+ return '\n\n'.join(segment_lines)
248
+
249
+
250
+ def convert_text_beats_to_time(text: str, bpm: float) -> str:
251
+ """
252
+ Convert beats-based text format to time-based format.
253
+
254
+ Args:
255
+ text: String in format "word[start_beat:end_beat] ..."
256
+ bpm: Beats per minute for conversion
257
+
258
+ Returns:
259
+ String in format "word[start_sec:end_sec] ..."
260
+ """
261
+ if not text.strip():
262
+ return ""
263
+
264
+ # Parse beats format (same pattern as time format)
265
+ words = text_to_words(text)
266
+ time_words = []
267
+
268
+ for word in words:
269
+ # Convert beat positions to time
270
+ start_time = beats_to_seconds(word['start'], bpm)
271
+ end_time = beats_to_seconds(word['end'], bpm)
272
+
273
+ # Format to reasonable precision
274
+ start_str = f"{start_time:.2f}".rstrip('0').rstrip('.')
275
+ end_str = f"{end_time:.2f}".rstrip('0').rstrip('.')
276
+
277
+ time_words.append(f"{word['word']}[{start_str}:{end_str}]")
278
+
279
+ return " ".join(time_words)
280
+
281
+
282
+ def convert_text_beats_to_time_with_regrouping(text: str, bpm: float) -> str:
283
+ """
284
+ Convert beats-based text format to time-based format while preserving line structure.
285
+
286
+ Args:
287
+ text: String in format "word[start_beat:end_beat] ..." (can be multi-line)
288
+ bpm: Beats per minute for conversion
289
+
290
+ Returns:
291
+ String in format "word[start_sec:end_sec] ..." with preserved line breaks
292
+ """
293
+ if not text.strip():
294
+ return ""
295
+
296
+ # Process each line separately to preserve segmentation
297
+ lines = text.split('\n')
298
+ converted_lines = []
299
+
300
+ for line in lines:
301
+ line = line.strip()
302
+ if not line:
303
+ # Preserve empty lines
304
+ converted_lines.append("")
305
+ continue
306
+
307
+ # Convert this line from beats to time
308
+ words = text_to_words(line)
309
+ time_words = []
310
+
311
+ for word in words:
312
+ # Convert beat positions to time
313
+ start_time = beats_to_seconds(word['start'], bpm)
314
+ end_time = beats_to_seconds(word['end'], bpm)
315
+
316
+ # Format to reasonable precision
317
+ start_str = f"{start_time:.2f}".rstrip('0').rstrip('.')
318
+ end_str = f"{end_time:.2f}".rstrip('0').rstrip('.')
319
+
320
+ time_words.append(f"{word['word']}[{start_str}:{end_str}]")
321
+
322
+ if time_words:
323
+ converted_lines.append(" ".join(time_words))
324
+
325
+ return "\n".join(converted_lines)
326
+
327
+
328
  def text_to_json(text: str) -> dict:
329
  """
330
  Convert text format to JSON structure expected by the model.