Upload 4 files
Browse files
TokenDurationPrediction.mlmodelc/analytics/coremldata.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d844856c854d42e6a58215dae5f75f82ea4da7cb7dbefb60db082a56c3a223dc
|
3 |
+
size 243
|
TokenDurationPrediction.mlmodelc/coremldata.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03bd0964aae75139a64e2d25090b2c25c4aabe234bc5f63ae23d5e4d616d25d3
|
3 |
+
size 424
|
TokenDurationPrediction.mlmodelc/metadata.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"shortDescription" : "Token and duration prediction for TDT decoder",
|
4 |
+
"metadataOutputVersion" : "3.0",
|
5 |
+
"outputSchema" : [
|
6 |
+
{
|
7 |
+
"hasShapeFlexibility" : "0",
|
8 |
+
"isOptional" : "0",
|
9 |
+
"dataType" : "Int32",
|
10 |
+
"formattedType" : "MultiArray (Int32 1)",
|
11 |
+
"shortDescription" : "",
|
12 |
+
"shape" : "[1]",
|
13 |
+
"name" : "var_17",
|
14 |
+
"type" : "MultiArray"
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"hasShapeFlexibility" : "0",
|
18 |
+
"isOptional" : "0",
|
19 |
+
"dataType" : "Float16",
|
20 |
+
"formattedType" : "MultiArray (Float16 1)",
|
21 |
+
"shortDescription" : "",
|
22 |
+
"shape" : "[1]",
|
23 |
+
"name" : "reduce_max_0",
|
24 |
+
"type" : "MultiArray"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"hasShapeFlexibility" : "0",
|
28 |
+
"isOptional" : "0",
|
29 |
+
"dataType" : "Int32",
|
30 |
+
"formattedType" : "MultiArray (Int32 1)",
|
31 |
+
"shortDescription" : "",
|
32 |
+
"shape" : "[1]",
|
33 |
+
"name" : "var_24",
|
34 |
+
"type" : "MultiArray"
|
35 |
+
}
|
36 |
+
],
|
37 |
+
"version" : "1.0",
|
38 |
+
"modelParameters" : [
|
39 |
+
|
40 |
+
],
|
41 |
+
"author" : "FluidAudio",
|
42 |
+
"specificationVersion" : 7,
|
43 |
+
"mlProgramOperationTypeHistogram" : {
|
44 |
+
"SliceByIndex" : 2,
|
45 |
+
"Ios16.reduceArgmax" : 2,
|
46 |
+
"Ios16.reshape" : 1,
|
47 |
+
"Ios16.reduceMax" : 1
|
48 |
+
},
|
49 |
+
"computePrecision" : "Mixed (Float16, Int32)",
|
50 |
+
"stateSchema" : [
|
51 |
+
|
52 |
+
],
|
53 |
+
"isUpdatable" : "0",
|
54 |
+
"availability" : {
|
55 |
+
"macOS" : "13.0",
|
56 |
+
"tvOS" : "16.0",
|
57 |
+
"visionOS" : "1.0",
|
58 |
+
"watchOS" : "9.0",
|
59 |
+
"iOS" : "16.0",
|
60 |
+
"macCatalyst" : "16.0"
|
61 |
+
},
|
62 |
+
"modelType" : {
|
63 |
+
"name" : "MLModelType_mlProgram"
|
64 |
+
},
|
65 |
+
"inputSchema" : [
|
66 |
+
{
|
67 |
+
"hasShapeFlexibility" : "0",
|
68 |
+
"isOptional" : "0",
|
69 |
+
"dataType" : "Float16",
|
70 |
+
"formattedType" : "MultiArray (Float16 1 × 1 × 1 × 1030)",
|
71 |
+
"shortDescription" : "",
|
72 |
+
"shape" : "[1, 1, 1, 1030]",
|
73 |
+
"name" : "logits",
|
74 |
+
"type" : "MultiArray"
|
75 |
+
}
|
76 |
+
],
|
77 |
+
"userDefinedMetadata" : {
|
78 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
79 |
+
"com.github.apple.coremltools.source" : "torch==2.5.0",
|
80 |
+
"com.github.apple.coremltools.version" : "8.3.0"
|
81 |
+
},
|
82 |
+
"generatedClassName" : "TokenDurationPrediction",
|
83 |
+
"method" : "predict"
|
84 |
+
}
|
85 |
+
]
|
TokenDurationPrediction.mlmodelc/model.mil
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
program(1.0)
|
2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
|
3 |
+
{
|
4 |
+
func main<ios16>(tensor<fp16, [1, 1, 1, 1030]> logits) {
|
5 |
+
tensor<int32, [1]> var_3 = const()[name = tensor<string, []>("op_3"), val = tensor<int32, [1]>([-1])];
|
6 |
+
tensor<fp16, [1030]> flattened_cast_fp16 = reshape(shape = var_3, x = logits)[name = tensor<string, []>("flattened_cast_fp16")];
|
7 |
+
tensor<int32, [1]> token_logits_begin_0 = const()[name = tensor<string, []>("token_logits_begin_0"), val = tensor<int32, [1]>([0])];
|
8 |
+
tensor<int32, [1]> token_logits_end_0 = const()[name = tensor<string, []>("token_logits_end_0"), val = tensor<int32, [1]>([1025])];
|
9 |
+
tensor<bool, [1]> token_logits_end_mask_0 = const()[name = tensor<string, []>("token_logits_end_mask_0"), val = tensor<bool, [1]>([false])];
|
10 |
+
tensor<fp16, [1025]> token_logits_cast_fp16 = slice_by_index(begin = token_logits_begin_0, end = token_logits_end_0, end_mask = token_logits_end_mask_0, x = flattened_cast_fp16)[name = tensor<string, []>("token_logits_cast_fp16")];
|
11 |
+
tensor<int32, [1]> duration_logits_begin_0 = const()[name = tensor<string, []>("duration_logits_begin_0"), val = tensor<int32, [1]>([1025])];
|
12 |
+
tensor<int32, [1]> duration_logits_end_0 = const()[name = tensor<string, []>("duration_logits_end_0"), val = tensor<int32, [1]>([1])];
|
13 |
+
tensor<bool, [1]> duration_logits_end_mask_0 = const()[name = tensor<string, []>("duration_logits_end_mask_0"), val = tensor<bool, [1]>([true])];
|
14 |
+
tensor<fp16, [5]> duration_logits_cast_fp16 = slice_by_index(begin = duration_logits_begin_0, end = duration_logits_end_0, end_mask = duration_logits_end_mask_0, x = flattened_cast_fp16)[name = tensor<string, []>("duration_logits_cast_fp16")];
|
15 |
+
tensor<int32, []> var_17_axis_0 = const()[name = tensor<string, []>("op_17_axis_0"), val = tensor<int32, []>(0)];
|
16 |
+
tensor<bool, []> var_17_keep_dims_0 = const()[name = tensor<string, []>("op_17_keep_dims_0"), val = tensor<bool, []>(true)];
|
17 |
+
tensor<int32, [1]> var_17 = reduce_argmax(axis = var_17_axis_0, keep_dims = var_17_keep_dims_0, x = token_logits_cast_fp16)[name = tensor<string, []>("op_17_cast_fp16")];
|
18 |
+
tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = tensor<string, []>("reduce_max_0_axes_0"), val = tensor<int32, [1]>([0])];
|
19 |
+
tensor<bool, []> reduce_max_0_keep_dims_0 = const()[name = tensor<string, []>("reduce_max_0_keep_dims_0"), val = tensor<bool, []>(true)];
|
20 |
+
tensor<fp16, [1]> reduce_max_0 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = token_logits_cast_fp16)[name = tensor<string, []>("reduce_max_0_cast_fp16")];
|
21 |
+
tensor<int32, []> var_24_axis_0 = const()[name = tensor<string, []>("op_24_axis_0"), val = tensor<int32, []>(0)];
|
22 |
+
tensor<bool, []> var_24_keep_dims_0 = const()[name = tensor<string, []>("op_24_keep_dims_0"), val = tensor<bool, []>(true)];
|
23 |
+
tensor<int32, [1]> var_24 = reduce_argmax(axis = var_24_axis_0, keep_dims = var_24_keep_dims_0, x = duration_logits_cast_fp16)[name = tensor<string, []>("op_24_cast_fp16")];
|
24 |
+
} -> (var_17, reduce_max_0, var_24);
|
25 |
+
}
|