version = 0 index_id = "hdfs-logs" index_uri = "s3://quickwit-indexes/hdfs-logs" [doc_mapping] field_mappings = [ { name = "tenant_id", type = "u64", fast = true }, { name = "timestamp", type = "i64", fast = true }, { name = "severity_text", type = "text", tokenizer = "raw" }, { name = "body", type = "text", tokenizer = "default", record = "position" }, { name = "resource", type = "object", field_mappings = [ { name = "service", type = "text", tokenizer = "raw" } ] }, ] tag_fields = [ "tenant_id" ] store_source = true [indexing_settings] demux_field = "tenant_id" timestamp_field = "timestamp" sort_field = "timestamp" sort_order = "asc" commit_timeout_secs = 61 split_num_docs_target = 10_000_001 [indexing_settings.merge_policy] demux_factor = 7 merge_factor = 9 max_merge_factor = 11 [indexing_settings.resources] num_threads = 3 heap_size = "3G" [search_settings] default_search_fields = [ "severity_text", "body" ] [[sources]] source_id = "hdfs-logs-kafka-source" source_type = "kafka" params = { "topic" = "cloudera-cluster-logs", "client_params" = { "bootstrap.servers" = "host:9092" } } [[sources]] source_id = "hdfs-logs-kinesis-source" source_type = "kinesis" params = { "stream_name" = "emr-cluster-logs" }