// # Deleting and Updating (?) documents // // This example explains how to delete and update documents. // In fact there is actually no such thing as an update in tantivy. // // To update a document, you need to delete a document and then reinsert // its new version. // // --- // Importing tantivy... use tantivy::collector::TopDocs; use tantivy::query::TermQuery; use tantivy::schema::*; use tantivy::{doc, Index, IndexReader}; // A simple helper function to fetch a single document // given its id from our index. // It will be helpful to check our work. fn extract_doc_given_isbn( reader: &IndexReader, isbn_term: &Term, ) -> tantivy::Result> { let searcher = reader.searcher(); // This is the simplest query you can think of. // It matches all of the documents containing a specific term. // // The second argument is here to tell we don't care about decoding positions, // or term frequencies. let term_query = TermQuery::new(isbn_term.clone(), IndexRecordOption::Basic); let top_docs = searcher.search(&term_query, &TopDocs::with_limit(1))?; if let Some((_score, doc_address)) = top_docs.first() { let doc = searcher.doc(*doc_address)?; Ok(Some(doc)) } else { // no doc matching this ID. Ok(None) } } fn main() -> tantivy::Result<()> { // # Defining the schema // // Check out the *basic_search* example if this makes // small sense to you. let mut schema_builder = Schema::builder(); // Tantivy does not really have a notion of primary id. // This may change in the future. // // Still, we can create a `isbn` field and use it as an id. This // field can be `u64` or a `text`, depending on your use case. // It just needs to be indexed. // // If it is `text`, let's make sure to keep it `raw` and let's avoid // running any text processing on it. // This is done by associating this field to the tokenizer named `raw`. // Rather than building our // [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually, We // use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions) // and untokenized. // // Because we also want to be able to see this `id` in our returned documents, // we also mark the field as stored. let isbn = schema_builder.add_text_field("isbn", STRING | STORED); let title = schema_builder.add_text_field("title", TEXT | STORED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema.clone()); let mut index_writer = index.writer(50_000_000)?; // Let's add a couple of documents, for the sake of the example. let mut old_man_doc = Document::default(); old_man_doc.add_text(title, "The Old Man and the Sea"); index_writer.add_document(doc!( isbn => "978-0099908401", title => "The old Man and the see" ))?; index_writer.add_document(doc!( isbn => "978-0140177398", title => "Of Mice and Men", ))?; index_writer.add_document(doc!( title => "Frankentein", //< Oops there is a typo here. isbn => "978-9176370711", ))?; index_writer.commit()?; let reader = index.reader()?; let frankenstein_isbn = Term::from_field_text(isbn, "978-9176370711"); // Oops our frankenstein doc seems misspelled let frankenstein_doc_misspelled = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap(); assert_eq!( schema.to_json(&frankenstein_doc_misspelled), r#"{"isbn":["978-9176370711"],"title":["Frankentein"]}"#, ); // # Update = Delete + Insert // // Here we will want to update the typo in the `Frankenstein` book. // // Tantivy does not handle updates directly, we need to delete // and reinsert the document. // // This can be complicated as it means you need to have access // to the entire document. It is good practise to integrate tantivy // with a key value store for this reason. // // To remove one of the document, we just call `delete_term` // on its id. // // Note that `tantivy` does nothing to enforce the idea that // there is only one document associated with this id. // // Also you might have noticed that we apply the delete before // having committed. This does not matter really... index_writer.delete_term(frankenstein_isbn.clone()); // We now need to reinsert our document without the typo. index_writer.add_document(doc!( title => "Frankenstein", isbn => "978-9176370711", ))?; // You are guaranteed that your clients will only observe your index in // the state it was in after a commit. // In this example, your search engine will at no point be missing the *Frankenstein* document. // Everything happened as if the document was updated. index_writer.commit()?; // We reload our searcher to make our change available to clients. reader.reload()?; // No more typo! let frankenstein_new_doc = extract_doc_given_isbn(&reader, &frankenstein_isbn)?.unwrap(); assert_eq!( schema.to_json(&frankenstein_new_doc), r#"{"isbn":["978-9176370711"],"title":["Frankenstein"]}"#, ); Ok(()) }