In [1]:
import torch.nn as nn
from sentence_transformers import models

dense = models.Dense(
 in_features=768,
 out_features=512,
 bias=False,
 activation_function= nn.Identity()
 )

pooling = models.Pooling(
 768,
 pooling_mode_cls_token=False,
 pooling_mode_mean_tokens=True,
 pooling_mode_max_tokens=False,
 pooling_mode_mean_sqrt_len_tokens=False
 )

 from .autonotebook import tqdm as notebook_tqdm


### Create a combined onnx model by applying dense to transformer model(which seems to be including a transfomer and a pooling already)

In [16]:
from sentence_transformers import SentenceTransformer
from sentence_transformers import models
from transformers import AutoTokenizer
import torch
import torch.nn as nn
import onnx
import numpy as np


class CombinedModel(nn.Module):
 def __init__(self, transformer_model, dense_model):
 super(CombinedModel, self).__init__()
 self.transformer = transformer_model
 self.dense = dense_model
 self.tokenizer = tokenizer

 def forward(self, input_ids, attention_mask):
 transformer_output = self.transformer({'input_ids': input_ids, 'attention_mask': attention_mask})
 token_embeddings = transformer_output['token_embeddings']
 dense_output = self.dense({'sentence_embedding': token_embeddings})
 dense_output_tensor = dense_output['sentence_embedding']
 mean_output = torch.mean(dense_output_tensor, dim=1)
 
 flattened_output = mean_output.squeeze(0)
 return flattened_output

# Load the transformer model
transformer_model = SentenceTransformer('clip-ViT-B-32-multilingual-v1', cache_folder='model_pytorch')
tokenizer = transformer_model.tokenizer
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/clip-ViT-B-32-multilingual-v1")

dense_model = models.Dense(
 in_features=768,
 out_features=512,
 bias=False,
 activation_function= nn.Identity()
)

# Load the state_dict into the model
state_dict = torch.load('model_pytorch/sentence-transformers_clip-ViT-B-32-multilingual-v1/2_Dense/pytorch_model.bin')
dense_model.load_state_dict(state_dict)
# Create the combined model
model = CombinedModel(transformer_model, dense_model)
model.eval()
# model.forward(torch.zeros(1, 128, dtype=torch.long), torch.zeros(1, 128, dtype=torch.long))

input_text = "This is a multi-lingual version of the OpenAI CLIP-ViT-B32 model. You can map text (in 50+ languages) and images to a common dense vector space such that images and the matching texts are close."

inputs = tokenizer(input_text, return_tensors="pt")
# inputs = tokenizer(input_text, padding='longest', truncation=True, max_length=128, return_tensors='pt')
# inputs = tokenizer(input_text, padding='longest', truncation=True, max_length=128, return_tensors='pt')
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

# Export the model
torch.onnx.export(model, # model being run
 (input_ids, attention_mask), # model input (or a tuple for multiple inputs)
 "combined_model.onnx", # where to save the model (can be a file or file-like object)
 export_params=True, # store the trained parameter weights inside the model file
 opset_version=17, # the ONNX version to export the model to
 do_constant_folding=True, # whether to execute constant folding for optimization
 input_names = ['input_ids', 'attention_mask'], # the model's input names
 output_names = ['embedding'], # the model's output names
 dynamic_axes={'input_ids': {0 : 'batch_size', 1: 'seq_length'}, 
 'attention_mask' : {0 : 'batch_size', 1: 'seq_length'},
 'output' : {0 : 'batch_size'}})

onnx.checker.check_model("combined_model.onnx")

comdined_model = onnx.load("combined_model.onnx")

# Get the name and shape of the input
input_name = comdined_model.graph.input[0].name
input_shape = [dim.dim_value for dim in comdined_model.graph.input[0].type.tensor_type.shape.dim]
print(f"Input name: {input_name}, shape: {input_shape}")

# Get the name and shape of the output
output_name = comdined_model.graph.output[0].name
output_shape = [dim.dim_value for dim in comdined_model.graph.output[0].type.tensor_type.shape.dim]
print(f"Output name: {output_name}, shape: {output_shape}")

Input name: input_ids, shape: [0, 0]
Output name: embedding, shape: [512]


### Create Embedding with SentenceTransformer('sentence-transformers/clip-ViT-B-32-multilingual-v1')

In [3]:
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel

model = SentenceTransformer('sentence-transformers/clip-ViT-B-32-multilingual-v1')
# model = AutoModel.from_pretrained('sentence-transformers/clip-ViT-B-32-multilingual-v1')
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/clip-ViT-B-32-multilingual-v1')

# Prepare the text
text = "This is an example sentence."
inputs = tokenizer(text, return_tensors='np')
# time this function
import time

start_time = time.time()
output = model.encode(text, convert_to_tensor=True)
end_time = time.time()

print(f"Time taken: {(end_time - start_time) * 1000} milliseconds")
print(output)

AttributeError: 'DistilBertModel' object has no attribute 'encode'

### Create embedding with combined_model.onnx

In [4]:
import onnxruntime as ort
from transformers import AutoTokenizer

# Load the ONNX model
sess = ort.InferenceSession("combined_model.onnx")

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/clip-ViT-B-32-multilingual-v1')

# Prepare the text
text = "This is an example sentence."
inputs = tokenizer(text, padding='longest', truncation=True, max_length=128, return_tensors='np')

# Run inference
input_names = sess.get_inputs()
input_dict = {input_name.name: inputs[input_name.name] for input_name in input_names}

import time

start_time = time.time()
inputs = tokenizer(text, padding='longest', truncation=True, max_length=128, return_tensors='np')

# Run inference
input_names = sess.get_inputs()
input_dict = {input_name.name: inputs[input_name.name] for input_name in input_names}

output_dict = sess.run(None, input_dict)
end_time = time.time()

print(f"Time taken: {(end_time - start_time) * 1000} milliseconds")


# The output is a list of numpy arrays, one for each output tensor
# print(output_dict)

import numpy as np

# Take the mean of the output along the sequence length dimension
# mean_output = np.mean(output_dict[0], axis=1)

print(output_dict[0])
import json

# Convert the tensor to a list
output_list = output_dict[0].tolist()

with open('embedding.json', 'w') as f:
 json.dump(output_list, f)

Time taken: 4.876136779785156 milliseconds
[-1.26431406e-01 1.03924178e-01 -9.06252712e-02 -1.16209410e-01
 -1.91176921e-01 -1.43773332e-02 -1.63840994e-01 -1.41228139e+00
 3.42575192e-01 -7.24642305e-04 2.37428080e-02 1.82638362e-01
 1.42683402e-01 -2.04959169e-01 1.83473438e-01 -1.49883740e-02
 -9.43370238e-02 2.80145258e-02 -2.11930588e-01 -7.85048008e-02
 6.16276711e-02 -1.26432091e-01 1.72459960e-01 -3.41832429e-01
 -1.47544846e-01 1.70714810e-01 -1.50460646e-01 -2.35026971e-01
 2.82157630e-01 -6.23078272e-02 1.14018589e-01 -5.09377718e-02
 1.37666613e-01 8.92706960e-02 2.19570339e-01 2.68463999e-01
 3.69729966e-01 -5.66888601e-02 1.94477022e-01 -3.64378363e-01
 1.96955115e-01 -3.16487610e-01 7.82806799e-02 4.95261475e-02
 2.04724133e-01 2.51980215e-01 2.99986303e-01 1.94996685e-01
 4.60869633e-02 -4.34494168e-02 -2.18412057e-01 -4.18978930e-01
 -4.98592481e-02 -1.20867960e-01 8.79129488e-03 8.15327391e-02
 1.77408978e-01 -4.02529180e-01 -1.06028266e-01 -7.51051903e-02
 2.78445661

###m compare both model output

In [100]:
import torch
import numpy as np
import onnxruntime as ort
from transformers import AutoTokenizer

model = SentenceTransformer('sentence-transformers/clip-ViT-B-32-multilingual-v1')
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/clip-ViT-B-32-multilingual-v1')

# Prepare the input
text = "texts are close."
inputs = tokenizer(text, padding='longest', truncation=True, max_length=128, return_tensors='pt')

# Run the PyTorch model
pytorch_output = model.encode(text, convert_to_tensor=True, device='cpu')

# Convert the inputs to numpy arrays for the ONNX model
inputs_onnx = {name: tensor.numpy() for name, tensor in inputs.items()}

# Run the ONNX model
sess = ort.InferenceSession("combined_model.onnx")
onnx_output = sess.run(None, inputs_onnx)

# Compare the outputs
print("Are the outputs close?", np.allclose(pytorch_output.detach().numpy(), onnx_output[0], atol=1e-6))


# Calculate the differences between the outputs
differences = pytorch_output.detach().numpy() - onnx_output[0]

# Print the standard deviation of the differences
print("Standard deviation of the differences:", np.std(differences))

print("pytorch_output size:", pytorch_output.size())
print("onnx_output size:", onnx_output[0].shape)

Are the outputs close? True
Standard deviation of the differences: 1.4037249e-07
pytorch_output size: torch.Size([512])
onnx_output size: (512,)


### Compare the operation counts

In [68]:
import onnx

# Load the ONNX models
transformers_onnx_model = onnx.load("model_onnx/model.onnx")
combined_model = onnx.load("combined_model.onnx")

# Function to count ReduceMean and ReduceSum operations
def count_operations(model):
 reduce_mean_count = 0
 reduce_sum_count = 0
 for node in model.graph.node:
 if node.op_type == 'ReduceMean':
 reduce_mean_count += 1
 elif node.op_type == 'ReduceSum':
 reduce_sum_count += 1
 return reduce_mean_count, reduce_sum_count

# Count operations in each model
transformers_reduce_mean_count, transformers_reduce_sum_count = count_operations(transformers_onnx_model)
combined_reduce_mean_count, combined_reduce_sum_count = count_operations(combined_model)

# Print the counts
print(f"Transformers ONNX model: {transformers_reduce_mean_count} ReduceMean operations, {transformers_reduce_sum_count} ReduceSum operations")
print(f"Combined ONNX model: {combined_reduce_mean_count} ReduceMean operations, {combined_reduce_sum_count} ReduceSum operations")

# Function to get ReduceMean operations
def get_reduce_mean_operations(model):
 reduce_mean_operations = [node for node in model.graph.node if node.op_type == 'ReduceMean']
 return reduce_mean_operations

# Get ReduceMean operations in each model
transformers_reduce_mean_operations = get_reduce_mean_operations(transformers_onnx_model)
combined_reduce_mean_operations = get_reduce_mean_operations(combined_model)
# ...

# Check if the number of ReduceMean operations is the same in both models
if len(transformers_reduce_mean_operations) != len(combined_reduce_mean_operations):
 print("The models have a different number of ReduceMean operations.")
else:
 def extract_last_part(s):
 return s.split('/')[-1]

 # Flag to track if any differences were found
 differences_found = False

 for i in range(len(transformers_reduce_mean_operations)):
 transformers_operation = transformers_reduce_mean_operations[i]
 combined_operation = combined_reduce_mean_operations[i]

 # Extract the last part of the input, output, and name strings
 transformers_input = extract_last_part(transformers_operation.input[0])
 transformers_output = extract_last_part(transformers_operation.output[0])
 transformers_name = extract_last_part(transformers_operation.name)

 combined_input = extract_last_part(combined_operation.input[0])
 combined_output = extract_last_part(combined_operation.output[0])
 combined_name = extract_last_part(combined_operation.name)

 # Compare the operations
 if (transformers_input != combined_input or
 transformers_output != combined_output or
 transformers_name != combined_name or
 transformers_operation.op_type != combined_operation.op_type or
 transformers_operation.attribute != combined_operation.attribute):
 print(f"ReduceMean operation {i} is different.")
 print("Transformers ONNX model operation:", transformers_operation)
 print("Combined ONNX model operation:", combined_operation)
 differences_found = True

 # If no differences were found, print a success message
 if not differences_found:
 print("The two models are identical in 'ReduceMean' operations.")

Transformers ONNX model: 26 ReduceMean operations, 0 ReduceSum operations
Combined ONNX model: 1 ReduceMean operations, 0 ReduceSum operations
The models have a different number of ReduceMean operations.


In [69]:
# Function to get Gemm operations
def get_gemm_operations(model):
 gemm_operations = [node for node in model.graph.node if node.op_type == 'Gemm']
 return gemm_operations

# Get Gemm operations in each model
transformers_gemm_operations = get_gemm_operations(transformers_onnx_model)
combined_gemm_operations = get_gemm_operations(combined_model)

# Check if the number of Gemm operations is the same in both models
if len(transformers_gemm_operations) != len(combined_gemm_operations):
 print("The models have a different number of Gemm operations.")
else:
 def extract_last_part(s):
 return s.split('/')[-1]

 # Flag to track if any differences were found
 differences_found = False

 for i in range(len(transformers_gemm_operations)):
 transformers_operation = transformers_gemm_operations[i]
 combined_operation = combined_gemm_operations[i]

 # Extract the last part of the input, output, and name strings
 transformers_input = extract_last_part(transformers_operation.input[0])
 transformers_output = extract_last_part(transformers_operation.output[0])
 transformers_name = extract_last_part(transformers_operation.name)

 combined_input = extract_last_part(combined_operation.input[0])
 combined_output = extract_last_part(combined_operation.output[0])
 combined_name = extract_last_part(combined_operation.name)

 # Compare the operations
 if (transformers_input != combined_input or
 transformers_output != combined_output or
 transformers_name != combined_name or
 transformers_operation.op_type != combined_operation.op_type or
 transformers_operation.attribute != combined_operation.attribute):
 print(f"Gemm operation {i} is different.")
 print("Transformers ONNX model operation:", transformers_operation)
 print("Combined ONNX model operation:", combined_operation)
 differences_found = True

 # If no differences were found, print a success message
 if not differences_found:
 print("The two models are identical in 'Gemm' operations.")

The two models are identical in 'Gemm' operations.


In [70]:
import onnx.shape_inference

# Infer the shapes of the models
transformers_onnx_model = onnx.shape_inference.infer_shapes(transformers_onnx_model)
combined_model = onnx.shape_inference.infer_shapes(combined_model)

# Get the output shapes
transformers_output_shape = [dim.dim_value for dim in transformers_onnx_model.graph.output[0].type.tensor_type.shape.dim]
combined_output_shape = [dim.dim_value for dim in combined_model.graph.output[0].type.tensor_type.shape.dim]

# Compare the output shapes
if transformers_output_shape == combined_output_shape:
 print("The output shapes of the two models are identical.")
else:
 print("The output shapes of the two models are different.")
 print("Transformers ONNX model output shape:", transformers_output_shape)
 print("Combined ONNX model output shape:", combined_output_shape)

The output shapes of the two models are different.
Transformers ONNX model output shape: [0, 0, 768]
Combined ONNX model output shape: [0]


In [92]:
# Function to get all operations
def get_all_operations(model):
 return [node for node in model.graph.node]

# Get all operations in each model
transformers_operations = get_all_operations(transformers_onnx_model)
combined_operations = get_all_operations(combined_model)

# Function to extract the last part of a string after the last '/' or '.'
def extract_last_part(s):
 if '/' in s:
 return s.split('/')[-1]
 else:
 return s.split('.')[-1]


for i in range(min(len(transformers_operations), len(combined_operations))):
 transformers_operation = transformers_operations[i]
 combined_operation = combined_operations[i]

 # Check if the operations have any inputs
 if not transformers_operation.input or not combined_operation.input:
 print(f"Operation {i} does not have any inputs.")
 # print("Transformers ONNX model operation:", transformers_operation)
 # print("Combined ONNX model operation:", combined_operation)

 # Compare the rest of the operation
 transformers_output = extract_last_part(transformers_operation.output[0])
 transformers_name = extract_last_part(transformers_operation.name)
 combined_output = extract_last_part(combined_operation.output[0])
 combined_name = extract_last_part(combined_operation.name)

 if (transformers_output != combined_output or
 transformers_name != combined_name or
 transformers_operation.op_type != combined_operation.op_type or
 transformers_operation.attribute != combined_operation.attribute):
 print(f"Unequal operation {i}.")
 else:
 print(f"Identical operation {i}.")
 continue # Skip to the next iteration

 # Extract the last part of the input, output, and name strings
 transformers_input = extract_last_part(transformers_operation.input[0])
 transformers_output = extract_last_part(transformers_operation.output[0])
 transformers_name = extract_last_part(transformers_operation.name)

 combined_input = extract_last_part(combined_operation.input[0])
 combined_output = extract_last_part(combined_operation.output[0])
 combined_name = extract_last_part(combined_operation.name)

 # Compare the operations
 if (transformers_input != combined_input or
 transformers_output != combined_output or
 transformers_name != combined_name or
 transformers_operation.op_type != combined_operation.op_type or
 transformers_operation.attribute != combined_operation.attribute):
 print(f"Unequal operation {i}.")
 print("Transformers ONNX model operation:", transformers_operation)
 print("Combined ONNX model operation:", combined_operation)
 else:
 print(f"Identical operation {i}.")

# ...

# Print the total number of operations in each model
print(f"Total operations in Transformers ONNX model: {len(transformers_operations)}")
print(f"Total operations in Combined ONNX model: {len(combined_operations)}")

# ...

# If the Combined ONNX model has more operations, print the last operation
if len(combined_operations) > len(transformers_operations):
 print("Excess operation in Combined ONNX model:", combined_operations[-1])

Identical operation 0.
Identical operation 1.
Operation 2 does not have any inputs.
Identical operation 2.
Identical operation 3.
Operation 4 does not have any inputs.
Unequal operation 4.
Operation 5 does not have any inputs.
Identical operation 5.
Operation 6 does not have any inputs.
Identical operation 6.
Operation 7 does not have any inputs.
Unequal operation 7.
Operation 8 does not have any inputs.
Unequal operation 8.
Operation 9 does not have any inputs.
Unequal operation 9.
Unequal operation 10.
Transformers ONNX model operation: input: "embeddings.position_embeddings.weight"
input: "/embeddings/Slice_output_0"
output: "/embeddings/position_embeddings/Gather_output_0"
name: "/embeddings/position_embeddings/Gather"
op_type: "Gather"

Combined ONNX model operation: input: "onnx::Slice_110"
input: "/transformer/transformer.0/auto_model/embeddings/Constant_2_output_0"
input: "/transformer/transformer.0/auto_model/embeddings/Unsqueeze_output_0"
input: "/transformer/transformer.0/au

### Fashion-Clip image 