from sentence_transformers import SentenceTransformer
from opensearchpy import OpenSearch
import ironcore_alloy as alloy
import json
from urllib.request import urlopen
import asyncio


def pretty_response(response):
    if len(response["hits"]["hits"]) == 0:
        print("\nYour search returned no results.")
    else:
        print("\n#### Cloaked Search Response: ####")
        for hit in response["hits"]["hits"]:
            id = hit["_id"]
            publication_date = hit["_source"]["publish_date"]
            title = hit["_source"]["title"]
            summary = hit["_source"]["summary"]
            publisher = hit["_source"]["publisher"]
            pretty_output = f"\nID: {id}\nPublication date: {publication_date}\nTitle: {title}\nPublisher: {publisher}\nSummary: {summary}"
            print(pretty_output)


def pretty_encrypted_response(response):
    if len(response["hits"]["hits"]) == 0:
        print("\nYour search returned no results.")
    else:
        print("\n#### OpenSearch Direct Response: ####")
        for hit in response["hits"]["hits"]:
            id = hit["_id"]
            publication_date = hit["_source"]["publish_date"]
            title = hit["_source"]["_icl_p_title"]
            summary = hit["_source"]["_icl_p_summary"]
            publisher = hit["_source"]["_icl_p_publisher"]
            pretty_output = f"\nID: {id}\nPublication date: {publication_date}\nTitle: {title}\nPublisher: {publisher}\nSummary: {summary}"
            print(pretty_output)


async def main():
    # Setup the embedding model
    model = SentenceTransformer("all-MiniLM-L6-v2")

    # Initialize the OpenSearch client
    client = OpenSearch(
        hosts=[{"host": "localhost", "port": 8675}],
        use_ssl=False,
        verify_certs=False,
        ssl_assert_hostname=False,
        ssl_show_warn=False,
    )

    # Initialize the IronCore Cloaked AI standalone client
    tenant_id = "tenant-one"
    # Note: in practice this must be 32 cryptographically-secure bytes
    key_bytes_1 = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
    approximation_factor = 2.5
    vector_secrets = {
        "book_index": alloy.VectorSecret(
            approximation_factor,
            alloy.RotatableSecret(
                alloy.StandaloneSecret(1, alloy.Secret(key_bytes_1)), None
            ),
        )
    }
    standard_secrets = alloy.StandardSecrets(None, [])
    deterministic_secrets = {}
    config = alloy.StandaloneConfiguration(
        standard_secrets, deterministic_secrets, vector_secrets
    )
    sdk = alloy.Standalone(config)

    # First make sure `book_index` doesn't exist.
    client.indices.delete(index="book_index", ignore_unavailable=True)

    # Define a mapping with a title embedding
    index_definition = {
        "settings": {"index": {"knn": True, "knn.algo_param.ef_search": 100}},
        "mappings": {
            "properties": {
                "title_vector": {
                    "type": "knn_vector",
                    "dimension": 384,
                    "method": {"name": "hnsw"},
                }
            }
        },
    }

    # Create the book index
    client.indices.create(index="book_index", body=index_definition)

    # Index book data
    url = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json"
    response = urlopen(url)
    books = json.loads(response.read())

    operations = []
    metadata = alloy.AlloyMetadata.new_simple(tenant_id)
    for book in books:
        # Transforming the title into an embedding using the model
        title_embedding = model.encode(book["title"]).tolist()
        # Encrypt the title embedding with IronCore Labs' Cloaked AI
        encrypted_title_embedding = await sdk.vector().encrypt(
            alloy.PlaintextVector(title_embedding, "book_index", ""), metadata
        )
        operations.append({"index": {"_index": "book_index"}})
        book["title_vector"] = encrypted_title_embedding.encrypted_vector
        book["tenant_id"] = tenant_id
        operations.append(book)
    bulk_resp = client.bulk("\n".join(map(json.dumps, operations)), refresh=True)

    # Run a hybrid query
    title_query_embedding = model.encode("python programming").tolist()
    # `generate_query_vectors` returns a list because the secret involved may be in rotation.
    encrypted_title_query_embeddings = await sdk.vector().generate_query_vectors(
        {"title": alloy.PlaintextVector(title_query_embedding, "book_index", "")},
        metadata,
    )
    embedding_queries = [
        {"knn": {"title_vector": {"vector": title_embedding.encrypted_vector, "k": 5}}}
        for title_embedding in encrypted_title_query_embeddings["title"]
    ]
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "filter": {"term": {"tenant_id.keyword": tenant_id}},
                "should": [
                    {"match": {"summary": "python programming"}},
                ]
                + embedding_queries,
            }
        },
    }
    response = client.search(index="book_index", body=search_query)
    # Response through Cloaked Search with all results decrypted
    pretty_response(response)

    # Take a look at the OpenSearch index directly to see what an over-curious admin or someone who exfiltrated
    # the index would see.
    document_ids = [r["_id"] for r in response["hits"]["hits"]]
    bypass_client = OpenSearch(
        hosts=[{"host": "localhost", "port": 9200}],
        use_ssl=False,
        verify_certs=False,
        ssl_assert_hostname=False,
        ssl_show_warn=False,
    )
    bypass_response = bypass_client.search(
        index="book_index", body={"size": 5, "query": {"terms": {"_id": document_ids}}}
    )

    pretty_encrypted_response(bypass_response)


asyncio.run(main())