syntax = "proto3"; // An index stored in CIFF is a single file comprised of exactly the following: // - A Header protobuf message, // - Exactly the number of PostingsList messages specified in the num_postings_lists field of the Header // - Exactly the number of DocRecord messages specified in the num_doc_records field of the Header // The protobuf messages are defined below. // This is the CIFF header. It always comes first. message Header { int32 version = 1; // Version. int32 num_postings_lists = 2; // Exactly the number of PostingsList messages that follow the Header. int32 num_docs = 3; // Exactly the number of DocRecord messages that follow the PostingsList messages. // The total number of postings lists in the collection; the vocabulary size. This might differ from // num_postings_lists, for example, because we only export the postings lists of query terms. int32 total_postings_lists = 4; // The total number of documents in the collection; might differ from num_doc_records for a similar reason as above. int32 total_docs = 5; // The total number of terms in the entire collection. This is the sum of all document lengths of all documents in // the collection. int64 total_terms_in_collection = 6; // The average document length. We store this value explicitly in case the exporting application wants a particular // level of precision. double average_doclength = 7; // Description of this index, meant for human consumption. Describing, for example, the exporting application, // document processing and tokenization pipeline, etc. string description = 8; } // An individual posting. message Posting { int32 docid = 1; int32 tf = 2; } // A postings list, comprised of one ore more postings. message PostingsList { string term = 1; // The term. int64 df = 2; // The document frequency. int64 cf = 3; // The collection frequency. repeated Posting postings = 4; } // A record containing metadata about an individual document. message DocRecord { int32 docid = 1; // Refers to the docid in the postings lists. string collection_docid = 2; // Refers to a docid in the external collection. int32 doclength = 3; // Length of this document. }