Commit
·
b3d45f6
1
Parent(s):
8a9e9ed
feat: merged checkpoint, modified qwen, readme
Browse filesSigned-off-by: jupyterjazz <saba.sturua@jina.ai>
- README.md +5 -9
- adapters/{retrieval/adapter_config.json → adapter_config.json} +0 -0
- adapters/{text-matching/adapter_model.safetensors → adapter_model.safetensors} +2 -2
- adapters/code/adapter_config.json +0 -26
- adapters/code/adapter_model.safetensors +0 -3
- adapters/retrieval/adapter_model.safetensors +0 -3
- adapters/text-matching/adapter_config.json +0 -26
- modeling_jina_embeddings_v4.py +5 -2
- qwen2_5_vl.py +10 -9
README.md
CHANGED
@@ -22,11 +22,9 @@ image_paths = ['/<path_to_image>']
|
|
22 |
images = [Image.open(path) for path in image_paths]
|
23 |
|
24 |
# Example 1: Text matching task with single vector embeddings
|
25 |
-
model.set_task(task='text-matching')
|
26 |
-
|
27 |
# Generate embeddings with dimension truncation (256), decrease max_pixels
|
28 |
-
img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112)
|
29 |
-
text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
|
30 |
|
31 |
# Example 2: Retrieval task with multi-vector embeddings
|
32 |
model.set_task(task='retrieval')
|
@@ -36,10 +34,8 @@ img_embeddings = model.encode_images(images=images, vector_type='multi_vector')
|
|
36 |
text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')
|
37 |
|
38 |
# Example 3: Code task with single vector embeddings
|
39 |
-
model.set_task(task='code')
|
40 |
-
|
41 |
code = ["def hello_world():\n print('Hello, World!')"]
|
42 |
-
code_embeddings = model.encode_texts(texts=code)
|
43 |
|
44 |
```
|
45 |
|
@@ -75,8 +71,8 @@ with torch.no_grad():
|
|
75 |
|
76 |
with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
|
77 |
# Get embeddings
|
78 |
-
text_embeddings = model.model(**text_batch).single_vec_emb
|
79 |
-
img_embeddings = model.model(**image_batch).single_vec_emb
|
80 |
|
81 |
|
82 |
```
|
|
|
22 |
images = [Image.open(path) for path in image_paths]
|
23 |
|
24 |
# Example 1: Text matching task with single vector embeddings
|
|
|
|
|
25 |
# Generate embeddings with dimension truncation (256), decrease max_pixels
|
26 |
+
img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112, task='text-matching')
|
27 |
+
text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512, task='text-matching')
|
28 |
|
29 |
# Example 2: Retrieval task with multi-vector embeddings
|
30 |
model.set_task(task='retrieval')
|
|
|
34 |
text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')
|
35 |
|
36 |
# Example 3: Code task with single vector embeddings
|
|
|
|
|
37 |
code = ["def hello_world():\n print('Hello, World!')"]
|
38 |
+
code_embeddings = model.encode_texts(texts=code, task='code')
|
39 |
|
40 |
```
|
41 |
|
|
|
71 |
|
72 |
with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
|
73 |
# Get embeddings
|
74 |
+
text_embeddings = model.model(**text_batch, task_label='retrieval').single_vec_emb
|
75 |
+
img_embeddings = model.model(**image_batch, task_label='retrieval').single_vec_emb
|
76 |
|
77 |
|
78 |
```
|
adapters/{retrieval/adapter_config.json → adapter_config.json}
RENAMED
File without changes
|
adapters/{text-matching/adapter_model.safetensors → adapter_model.safetensors}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a5cb8cc0f4e10f184ccc10f8864999098b887dbc4107221ec0e400d927f4555
|
3 |
+
size 360095344
|
adapters/code/adapter_config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"alpha_pattern": {},
|
3 |
-
"auto_mapping": null,
|
4 |
-
"base_model_name_or_path": "jinaai/colqwen25-duo-base",
|
5 |
-
"bias": "none",
|
6 |
-
"fan_in_fan_out": false,
|
7 |
-
"inference_mode": false,
|
8 |
-
"init_lora_weights": "gaussian",
|
9 |
-
"layer_replication": null,
|
10 |
-
"layers_pattern": null,
|
11 |
-
"layers_to_transform": null,
|
12 |
-
"loftq_config": {},
|
13 |
-
"lora_alpha": 32,
|
14 |
-
"lora_dropout": 0.1,
|
15 |
-
"megatron_config": null,
|
16 |
-
"megatron_core": "megatron.core",
|
17 |
-
"modules_to_save": null,
|
18 |
-
"peft_type": "LORA",
|
19 |
-
"r": 32,
|
20 |
-
"rank_pattern": {},
|
21 |
-
"revision": null,
|
22 |
-
"target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
|
23 |
-
"task_type": "FEATURE_EXTRACTION",
|
24 |
-
"use_dora": false,
|
25 |
-
"use_rslora": false
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
adapters/code/adapter_model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:510d017efc64c97e2db985ed1a96b17477ac97e1a5470996209041ad35beeee7
|
3 |
-
size 119802032
|
|
|
|
|
|
|
|
adapters/retrieval/adapter_model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0c2b1d85506d01bd29a942975cb0abbd8c4af3487fb80b5ad408ae0e55f8bb3a
|
3 |
-
size 120138416
|
|
|
|
|
|
|
|
adapters/text-matching/adapter_config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"alpha_pattern": {},
|
3 |
-
"auto_mapping": null,
|
4 |
-
"base_model_name_or_path": "jinaai/colqwen25-duo-base",
|
5 |
-
"bias": "none",
|
6 |
-
"fan_in_fan_out": false,
|
7 |
-
"inference_mode": true,
|
8 |
-
"init_lora_weights": "gaussian",
|
9 |
-
"layer_replication": null,
|
10 |
-
"layers_pattern": null,
|
11 |
-
"layers_to_transform": null,
|
12 |
-
"loftq_config": {},
|
13 |
-
"lora_alpha": 32,
|
14 |
-
"lora_dropout": 0.1,
|
15 |
-
"megatron_config": null,
|
16 |
-
"megatron_core": "megatron.core",
|
17 |
-
"modules_to_save": null,
|
18 |
-
"peft_type": "LORA",
|
19 |
-
"r": 32,
|
20 |
-
"rank_pattern": {},
|
21 |
-
"revision": null,
|
22 |
-
"target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
|
23 |
-
"task_type": "FEATURE_EXTRACTION",
|
24 |
-
"use_dora": false,
|
25 |
-
"use_rslora": false
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modeling_jina_embeddings_v4.py
CHANGED
@@ -522,6 +522,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
522 |
"""
|
523 |
if "torch_dtype" not in kwargs:
|
524 |
kwargs["torch_dtype"] = "auto"
|
|
|
|
|
|
|
525 |
|
526 |
base_model = super().from_pretrained(
|
527 |
pretrained_model_name_or_path, *args, **kwargs
|
@@ -536,7 +539,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
536 |
)
|
537 |
adapter_dir = os.path.join(adapter_cache_path, "adapters")
|
538 |
|
539 |
-
lora_config = LoraConfig.from_pretrained(
|
540 |
lora_config._custom_modules = {
|
541 |
torch.nn.modules.linear.Linear: partial(
|
542 |
MultiAdapterLinear,
|
@@ -545,7 +548,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
545 |
}
|
546 |
peft_model = PeftModel.from_pretrained(
|
547 |
model=base_model,
|
548 |
-
model_id=
|
549 |
config=lora_config,
|
550 |
)
|
551 |
|
|
|
522 |
"""
|
523 |
if "torch_dtype" not in kwargs:
|
524 |
kwargs["torch_dtype"] = "auto"
|
525 |
+
|
526 |
+
if torch.cuda.is_available() and "attn_implementation" not in kwargs:
|
527 |
+
kwargs["attn_implementation"] = "flash_attention_2"
|
528 |
|
529 |
base_model = super().from_pretrained(
|
530 |
pretrained_model_name_or_path, *args, **kwargs
|
|
|
539 |
)
|
540 |
adapter_dir = os.path.join(adapter_cache_path, "adapters")
|
541 |
|
542 |
+
lora_config = LoraConfig.from_pretrained(adapter_dir)
|
543 |
lora_config._custom_modules = {
|
544 |
torch.nn.modules.linear.Linear: partial(
|
545 |
MultiAdapterLinear,
|
|
|
548 |
}
|
549 |
peft_model = PeftModel.from_pretrained(
|
550 |
model=base_model,
|
551 |
+
model_id=adapter_dir,
|
552 |
config=lora_config,
|
553 |
)
|
554 |
|
qwen2_5_vl.py
CHANGED
@@ -945,6 +945,7 @@ class Qwen2_5_VLAttention(nn.Module):
|
|
945 |
|
946 |
def forward(
|
947 |
self,
|
|
|
948 |
hidden_states: torch.Tensor,
|
949 |
attention_mask: Optional[torch.Tensor] = None,
|
950 |
position_ids: Optional[torch.LongTensor] = None,
|
@@ -956,9 +957,9 @@ class Qwen2_5_VLAttention(nn.Module):
|
|
956 |
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
957 |
bsz, q_len, _ = hidden_states.size()
|
958 |
|
959 |
-
query_states = self.q_proj(hidden_states)
|
960 |
-
key_states = self.k_proj(hidden_states)
|
961 |
-
value_states = self.v_proj(hidden_states)
|
962 |
|
963 |
query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
964 |
key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
@@ -1002,7 +1003,7 @@ class Qwen2_5_VLAttention(nn.Module):
|
|
1002 |
attn_output = attn_output.transpose(1, 2).contiguous()
|
1003 |
attn_output = attn_output.reshape(bsz, q_len, -1)
|
1004 |
|
1005 |
-
attn_output = self.o_proj(attn_output)
|
1006 |
|
1007 |
if not output_attentions:
|
1008 |
attn_weights = None
|
@@ -1021,7 +1022,6 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
|
|
1021 |
|
1022 |
def __init__(self, *args, **kwargs):
|
1023 |
super().__init__(*args, **kwargs)
|
1024 |
-
|
1025 |
# TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
|
1026 |
# flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
|
1027 |
# Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
|
@@ -1029,6 +1029,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
|
|
1029 |
|
1030 |
def forward(
|
1031 |
self,
|
|
|
1032 |
hidden_states: torch.Tensor,
|
1033 |
attention_mask: Optional[torch.Tensor] = None,
|
1034 |
position_ids: Optional[torch.LongTensor] = None,
|
@@ -1040,9 +1041,9 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
|
|
1040 |
):
|
1041 |
bsz, q_len, _ = hidden_states.size()
|
1042 |
|
1043 |
-
query_states = self.q_proj(hidden_states)
|
1044 |
-
key_states = self.k_proj(hidden_states)
|
1045 |
-
value_states = self.v_proj(hidden_states)
|
1046 |
|
1047 |
query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
1048 |
key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
@@ -1113,7 +1114,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
|
|
1113 |
)
|
1114 |
|
1115 |
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
|
1116 |
-
attn_output = self.o_proj(attn_output)
|
1117 |
|
1118 |
if not output_attentions:
|
1119 |
attn_weights = None
|
|
|
945 |
|
946 |
def forward(
|
947 |
self,
|
948 |
+
task_label: Union[str, List[str]],
|
949 |
hidden_states: torch.Tensor,
|
950 |
attention_mask: Optional[torch.Tensor] = None,
|
951 |
position_ids: Optional[torch.LongTensor] = None,
|
|
|
957 |
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
958 |
bsz, q_len, _ = hidden_states.size()
|
959 |
|
960 |
+
query_states = self.q_proj(hidden_states, task_label=task_label)
|
961 |
+
key_states = self.k_proj(hidden_states, task_label=task_label)
|
962 |
+
value_states = self.v_proj(hidden_states, task_label=task_label)
|
963 |
|
964 |
query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
965 |
key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
|
|
1003 |
attn_output = attn_output.transpose(1, 2).contiguous()
|
1004 |
attn_output = attn_output.reshape(bsz, q_len, -1)
|
1005 |
|
1006 |
+
attn_output = self.o_proj(attn_output, task_label=task_label)
|
1007 |
|
1008 |
if not output_attentions:
|
1009 |
attn_weights = None
|
|
|
1022 |
|
1023 |
def __init__(self, *args, **kwargs):
|
1024 |
super().__init__(*args, **kwargs)
|
|
|
1025 |
# TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
|
1026 |
# flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
|
1027 |
# Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
|
|
|
1029 |
|
1030 |
def forward(
|
1031 |
self,
|
1032 |
+
task_label: Union[str, List[str]],
|
1033 |
hidden_states: torch.Tensor,
|
1034 |
attention_mask: Optional[torch.Tensor] = None,
|
1035 |
position_ids: Optional[torch.LongTensor] = None,
|
|
|
1041 |
):
|
1042 |
bsz, q_len, _ = hidden_states.size()
|
1043 |
|
1044 |
+
query_states = self.q_proj(hidden_states, task_label=task_label)
|
1045 |
+
key_states = self.k_proj(hidden_states, task_label=task_label)
|
1046 |
+
value_states = self.v_proj(hidden_states, task_label=task_label)
|
1047 |
|
1048 |
query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
1049 |
key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
|
|
|
1114 |
)
|
1115 |
|
1116 |
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
|
1117 |
+
attn_output = self.o_proj(attn_output, task_label=task_label)
|
1118 |
|
1119 |
if not output_attentions:
|
1120 |
attn_weights = None
|