jupyterjazz commited on
Commit
b3d45f6
·
1 Parent(s): 8a9e9ed

feat: merged checkpoint, modified qwen, readme

Browse files

Signed-off-by: jupyterjazz <saba.sturua@jina.ai>

README.md CHANGED
@@ -22,11 +22,9 @@ image_paths = ['/<path_to_image>']
22
  images = [Image.open(path) for path in image_paths]
23
 
24
  # Example 1: Text matching task with single vector embeddings
25
- model.set_task(task='text-matching')
26
-
27
  # Generate embeddings with dimension truncation (256), decrease max_pixels
28
- img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112)
29
- text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
30
 
31
  # Example 2: Retrieval task with multi-vector embeddings
32
  model.set_task(task='retrieval')
@@ -36,10 +34,8 @@ img_embeddings = model.encode_images(images=images, vector_type='multi_vector')
36
  text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')
37
 
38
  # Example 3: Code task with single vector embeddings
39
- model.set_task(task='code')
40
-
41
  code = ["def hello_world():\n print('Hello, World!')"]
42
- code_embeddings = model.encode_texts(texts=code)
43
 
44
  ```
45
 
@@ -75,8 +71,8 @@ with torch.no_grad():
75
 
76
  with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
77
  # Get embeddings
78
- text_embeddings = model.model(**text_batch).single_vec_emb
79
- img_embeddings = model.model(**image_batch).single_vec_emb
80
 
81
 
82
  ```
 
22
  images = [Image.open(path) for path in image_paths]
23
 
24
  # Example 1: Text matching task with single vector embeddings
 
 
25
  # Generate embeddings with dimension truncation (256), decrease max_pixels
26
+ img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112, task='text-matching')
27
+ text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512, task='text-matching')
28
 
29
  # Example 2: Retrieval task with multi-vector embeddings
30
  model.set_task(task='retrieval')
 
34
  text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')
35
 
36
  # Example 3: Code task with single vector embeddings
 
 
37
  code = ["def hello_world():\n print('Hello, World!')"]
38
+ code_embeddings = model.encode_texts(texts=code, task='code')
39
 
40
  ```
41
 
 
71
 
72
  with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
73
  # Get embeddings
74
+ text_embeddings = model.model(**text_batch, task_label='retrieval').single_vec_emb
75
+ img_embeddings = model.model(**image_batch, task_label='retrieval').single_vec_emb
76
 
77
 
78
  ```
adapters/{retrieval/adapter_config.json → adapter_config.json} RENAMED
File without changes
adapters/{text-matching/adapter_model.safetensors → adapter_model.safetensors} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3677815cef695c54aae2358c574c046d6d9a5787fd96ca457ee00ac656576985
3
- size 120138416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a5cb8cc0f4e10f184ccc10f8864999098b887dbc4107221ec0e400d927f4555
3
+ size 360095344
adapters/code/adapter_config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "jinaai/colqwen25-duo-base",
5
- "bias": "none",
6
- "fan_in_fan_out": false,
7
- "inference_mode": false,
8
- "init_lora_weights": "gaussian",
9
- "layer_replication": null,
10
- "layers_pattern": null,
11
- "layers_to_transform": null,
12
- "loftq_config": {},
13
- "lora_alpha": 32,
14
- "lora_dropout": 0.1,
15
- "megatron_config": null,
16
- "megatron_core": "megatron.core",
17
- "modules_to_save": null,
18
- "peft_type": "LORA",
19
- "r": 32,
20
- "rank_pattern": {},
21
- "revision": null,
22
- "target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
23
- "task_type": "FEATURE_EXTRACTION",
24
- "use_dora": false,
25
- "use_rslora": false
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adapters/code/adapter_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:510d017efc64c97e2db985ed1a96b17477ac97e1a5470996209041ad35beeee7
3
- size 119802032
 
 
 
 
adapters/retrieval/adapter_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c2b1d85506d01bd29a942975cb0abbd8c4af3487fb80b5ad408ae0e55f8bb3a
3
- size 120138416
 
 
 
 
adapters/text-matching/adapter_config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "jinaai/colqwen25-duo-base",
5
- "bias": "none",
6
- "fan_in_fan_out": false,
7
- "inference_mode": true,
8
- "init_lora_weights": "gaussian",
9
- "layer_replication": null,
10
- "layers_pattern": null,
11
- "layers_to_transform": null,
12
- "loftq_config": {},
13
- "lora_alpha": 32,
14
- "lora_dropout": 0.1,
15
- "megatron_config": null,
16
- "megatron_core": "megatron.core",
17
- "modules_to_save": null,
18
- "peft_type": "LORA",
19
- "r": 32,
20
- "rank_pattern": {},
21
- "revision": null,
22
- "target_modules": "(.*(model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$|.*(single_vector_projector|multi_vector_projector).*$)",
23
- "task_type": "FEATURE_EXTRACTION",
24
- "use_dora": false,
25
- "use_rslora": false
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
modeling_jina_embeddings_v4.py CHANGED
@@ -522,6 +522,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
522
  """
523
  if "torch_dtype" not in kwargs:
524
  kwargs["torch_dtype"] = "auto"
 
 
 
525
 
526
  base_model = super().from_pretrained(
527
  pretrained_model_name_or_path, *args, **kwargs
@@ -536,7 +539,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
536
  )
537
  adapter_dir = os.path.join(adapter_cache_path, "adapters")
538
 
539
- lora_config = LoraConfig.from_pretrained(os.path.join(adapter_dir, "test"))
540
  lora_config._custom_modules = {
541
  torch.nn.modules.linear.Linear: partial(
542
  MultiAdapterLinear,
@@ -545,7 +548,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
545
  }
546
  peft_model = PeftModel.from_pretrained(
547
  model=base_model,
548
- model_id=os.path.join(adapter_dir, "test"),
549
  config=lora_config,
550
  )
551
 
 
522
  """
523
  if "torch_dtype" not in kwargs:
524
  kwargs["torch_dtype"] = "auto"
525
+
526
+ if torch.cuda.is_available() and "attn_implementation" not in kwargs:
527
+ kwargs["attn_implementation"] = "flash_attention_2"
528
 
529
  base_model = super().from_pretrained(
530
  pretrained_model_name_or_path, *args, **kwargs
 
539
  )
540
  adapter_dir = os.path.join(adapter_cache_path, "adapters")
541
 
542
+ lora_config = LoraConfig.from_pretrained(adapter_dir)
543
  lora_config._custom_modules = {
544
  torch.nn.modules.linear.Linear: partial(
545
  MultiAdapterLinear,
 
548
  }
549
  peft_model = PeftModel.from_pretrained(
550
  model=base_model,
551
+ model_id=adapter_dir,
552
  config=lora_config,
553
  )
554
 
qwen2_5_vl.py CHANGED
@@ -945,6 +945,7 @@ class Qwen2_5_VLAttention(nn.Module):
945
 
946
  def forward(
947
  self,
 
948
  hidden_states: torch.Tensor,
949
  attention_mask: Optional[torch.Tensor] = None,
950
  position_ids: Optional[torch.LongTensor] = None,
@@ -956,9 +957,9 @@ class Qwen2_5_VLAttention(nn.Module):
956
  ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
957
  bsz, q_len, _ = hidden_states.size()
958
 
959
- query_states = self.q_proj(hidden_states)
960
- key_states = self.k_proj(hidden_states)
961
- value_states = self.v_proj(hidden_states)
962
 
963
  query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
964
  key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
@@ -1002,7 +1003,7 @@ class Qwen2_5_VLAttention(nn.Module):
1002
  attn_output = attn_output.transpose(1, 2).contiguous()
1003
  attn_output = attn_output.reshape(bsz, q_len, -1)
1004
 
1005
- attn_output = self.o_proj(attn_output)
1006
 
1007
  if not output_attentions:
1008
  attn_weights = None
@@ -1021,7 +1022,6 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
1021
 
1022
  def __init__(self, *args, **kwargs):
1023
  super().__init__(*args, **kwargs)
1024
-
1025
  # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
1026
  # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
1027
  # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
@@ -1029,6 +1029,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
1029
 
1030
  def forward(
1031
  self,
 
1032
  hidden_states: torch.Tensor,
1033
  attention_mask: Optional[torch.Tensor] = None,
1034
  position_ids: Optional[torch.LongTensor] = None,
@@ -1040,9 +1041,9 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
1040
  ):
1041
  bsz, q_len, _ = hidden_states.size()
1042
 
1043
- query_states = self.q_proj(hidden_states)
1044
- key_states = self.k_proj(hidden_states)
1045
- value_states = self.v_proj(hidden_states)
1046
 
1047
  query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
1048
  key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
@@ -1113,7 +1114,7 @@ class Qwen2_5_VLFlashAttention2(Qwen2_5_VLAttention):
1113
  )
1114
 
1115
  attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
1116
- attn_output = self.o_proj(attn_output)
1117
 
1118
  if not output_attentions:
1119
  attn_weights = None
 
945
 
946
  def forward(
947
  self,
948
+ task_label: Union[str, List[str]],
949
  hidden_states: torch.Tensor,
950
  attention_mask: Optional[torch.Tensor] = None,
951
  position_ids: Optional[torch.LongTensor] = None,
 
957
  ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
958
  bsz, q_len, _ = hidden_states.size()
959
 
960
+ query_states = self.q_proj(hidden_states, task_label=task_label)
961
+ key_states = self.k_proj(hidden_states, task_label=task_label)
962
+ value_states = self.v_proj(hidden_states, task_label=task_label)
963
 
964
  query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
965
  key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
 
1003
  attn_output = attn_output.transpose(1, 2).contiguous()
1004
  attn_output = attn_output.reshape(bsz, q_len, -1)
1005
 
1006
+ attn_output = self.o_proj(attn_output, task_label=task_label)
1007
 
1008
  if not output_attentions:
1009
  attn_weights = None
 
1022
 
1023
  def __init__(self, *args, **kwargs):
1024
  super().__init__(*args, **kwargs)
 
1025
  # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
1026
  # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
1027
  # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
 
1029
 
1030
  def forward(
1031
  self,
1032
+ task_label: Union[str, List[str]],
1033
  hidden_states: torch.Tensor,
1034
  attention_mask: Optional[torch.Tensor] = None,
1035
  position_ids: Optional[torch.LongTensor] = None,
 
1041
  ):
1042
  bsz, q_len, _ = hidden_states.size()
1043
 
1044
+ query_states = self.q_proj(hidden_states, task_label=task_label)
1045
+ key_states = self.k_proj(hidden_states, task_label=task_label)
1046
+ value_states = self.v_proj(hidden_states, task_label=task_label)
1047
 
1048
  query_states = query_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
1049
  key_states = key_states.view(bsz, q_len, -1, self.head_dim).transpose(1, 2)
 
1114
  )
1115
 
1116
  attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
1117
+ attn_output = self.o_proj(attn_output, task_label=task_label)
1118
 
1119
  if not output_attentions:
1120
  attn_weights = None