use std::{path::PathBuf, sync::Arc}; use embed_anything::{ config::TextEmbedConfig, embed_directory_stream, embed_file, embeddings::embed::{EmbedData, Embedder}, }; use anyhow::Result; #[tokio::main] async fn main() -> Result<()> { let text_embed_config = TextEmbedConfig::default() .with_chunk_size(1000) .with_batch_size(512) .with_buffer_size(512); let cohere_model = Embedder::from_pretrained_cloud("cohere", "embed-english-v3.0", None).unwrap(); let openai_model = Embedder::from_pretrained_cloud("openai", "text-embedding-3-small", None).unwrap(); let openai_model: Arc = Arc::new(openai_model); let _openai_embeddings = embed_directory_stream( PathBuf::from("test_files"), &openai_model, Some(vec!["pdf".to_string()]), Some(&text_embed_config), None::)>, ) .await? .unwrap(); let _file_embedding = embed_file( "test_files/attention.pdf", &openai_model, Some(&text_embed_config), None::)>, ) .await? .unwrap(); let _cohere_embedding = embed_file( "test_files/attention.pdf", &cohere_model, Some(&text_embed_config), None::)>, ) .await? .unwrap(); println!("Cohere embedding: {:?}", _cohere_embedding); Ok(()) }