Shuwei Hou
commited on
Commit
·
dd54002
1
Parent(s):
107e251
update_pause_between_segments
Browse files- .gitignore +2 -1
- pause.py +21 -36
.gitignore
CHANGED
@@ -52,6 +52,7 @@ ENNI_transcript.py
|
|
52 |
input/
|
53 |
performance_eval/
|
54 |
session_data/
|
55 |
-
|
|
|
56 |
# Model
|
57 |
CrisperWhisper_local/
|
|
|
52 |
input/
|
53 |
performance_eval/
|
54 |
session_data/
|
55 |
+
eval_for_SATE_v1_paper_step1.py
|
56 |
+
eval_for_SATE_v1_paper_step2.py
|
57 |
# Model
|
58 |
CrisperWhisper_local/
|
pause.py
CHANGED
@@ -35,50 +35,35 @@ def annotate_pauses(session_id, threshold, base_dir="session_data"):
|
|
35 |
pauses.append(pause_info)
|
36 |
segment["pauses"] = pauses
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
new_segments.append(segment)
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
49 |
|
50 |
-
if
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
54 |
|
55 |
-
if
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
"text": "",
|
60 |
-
"speaker": "PAUSE",
|
61 |
-
"words": [],
|
62 |
-
"pauses": [
|
63 |
-
{
|
64 |
-
"start": round(last_word_end, 3),
|
65 |
-
"end": round(next_word_start, 3),
|
66 |
-
"duration": round(next_word_start - last_word_end, 3)
|
67 |
-
}
|
68 |
-
]
|
69 |
-
}
|
70 |
-
new_segments.append(pause_segment)
|
71 |
-
|
72 |
-
data["segments"] = new_segments
|
73 |
|
74 |
with open(json_file, "w", encoding="utf-8") as f:
|
75 |
json.dump(data, f, ensure_ascii=False, indent=4)
|
76 |
|
77 |
print(f"Session {session_id} pause annotation done: {json_file}")
|
78 |
-
print(f"Total segments after processing: {len(new_segments)}")
|
79 |
-
|
80 |
-
pause_segments = [seg for seg in new_segments if seg.get("speaker") == "PAUSE"]
|
81 |
-
print(f"Added {len(pause_segments)} inter-sentence pause segments")
|
82 |
|
83 |
return data
|
84 |
|
|
|
35 |
pauses.append(pause_info)
|
36 |
segment["pauses"] = pauses
|
37 |
|
38 |
+
inter_segment_pauses = 0
|
39 |
+
for i in range(len(segments) - 1):
|
40 |
+
current_segment = segments[i]
|
41 |
+
next_segment = segments[i + 1]
|
|
|
42 |
|
43 |
+
current_words = current_segment.get("words", [])
|
44 |
+
next_words = next_segment.get("words", [])
|
45 |
+
|
46 |
+
if current_words and next_words:
|
47 |
+
last_word_end = current_words[-1]["end"]
|
48 |
+
next_word_start = next_words[0]["start"]
|
49 |
+
gap = next_word_start - last_word_end
|
50 |
|
51 |
+
if gap > threshold:
|
52 |
+
inter_pause = {
|
53 |
+
"start": round(last_word_end, 3),
|
54 |
+
"end": round(next_word_start, 3),
|
55 |
+
"duration": round(gap, 3)
|
56 |
+
}
|
57 |
|
58 |
+
if "pauses" not in next_segment:
|
59 |
+
next_segment["pauses"] = []
|
60 |
+
next_segment["pauses"].insert(0, inter_pause)
|
61 |
+
inter_segment_pauses += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
with open(json_file, "w", encoding="utf-8") as f:
|
64 |
json.dump(data, f, ensure_ascii=False, indent=4)
|
65 |
|
66 |
print(f"Session {session_id} pause annotation done: {json_file}")
|
|
|
|
|
|
|
|
|
67 |
|
68 |
return data
|
69 |
|