syntax="proto3";

package minknow_api.statistics;

option java_package = "com.nanoporetech.minknow_api";
option objc_class_prefix = "MKAPI";

import "minknow_api/acquisition.proto";
import "minknow_api/rpc_options.proto";

service StatisticsService {
    // Tracks how much time has been spent in each channel state, aggregated across all the channels
    //
    // Since 4.0
    rpc stream_duty_time (StreamDutyTimeRequest) returns (stream StreamDutyTimeResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }

    // Tracks experiment output across various filters over time.
    //
    // The first response will give you all the data it can, and continue to provide updates
    // if the acquisition period remains live.
    //
    // The stream will end once the current acquisition period ends, and a caller will need to
    // reinvoke the rpc in order to get new data.
    //
    // Since 1.14
    rpc stream_acquisition_output (StreamAcquisitionOutputRequest) returns (stream StreamAcquisitionOutputResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }

    // Tracks experiment writes across all channels over time
    //
    // The first response will give you all the data it can.
    //
    // The stream will end once the current acquisition period ends, and a caller will need to
    // reinvoke the rpc in order to get new data.
    //
    // Since 4.0
    rpc stream_writer_output (StreamWriterOutputRequest) returns (stream StreamWriterOutputResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }

    // Tracks which barcode names have been encountered
    //
    // When a new barcode name is encountered, a list of all encountered barcode names is returned
    //
    // Since 4.0
    rpc stream_encountered_acquisition_output_keys (StreamEncounteredAcquisitionOutputKeysRequest) returns (stream StreamEncounteredAcquisitionOutputKeysResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }

    // Streams device temperature for a device. The first message will contain all of the temperatures up
    // until the current live point, and then messages after that point will just be updates.
    //
    // Temperatures are averaged over a 1 minute period, and the value of each bucket is given in minute intervals
    //
    // Since 3.0
    rpc stream_temperature (StreamTemperatureRequest) returns (stream StreamTemperatureResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }

    // Streams when bias voltage changes occur, where the response given will be the acquisition
    // index that the voltage changed at, and the voltage itself. The first message will contain
    // all of the bias voltage changes up until the current live point, and then messages after
    // that period will just be updates
    //
    // Will fail with INVALID_ARGUMENT if an unknown acquisition id is given
    //
    // Since 3.2
    rpc stream_bias_voltages (StreamBiasVoltagesRequest) returns (stream StreamBiasVoltagesResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }

    // A histogram of read lengths
    //
    // If the experiment is in-progress, then the latest histogram is streamed on a regular basis
    // If the experiment is complete, then the final histogram is returned
    //
    // Since 4.0
    rpc stream_read_length_histogram (StreamReadLengthHistogramRequest) returns (stream StreamReadLengthHistogramResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }

    // Gets a list of the types of read-length values for which a histogram is available
    //
    // Since 3.2
    rpc get_read_length_types (GetReadLengthTypesRequest) returns (GetReadLengthTypesResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }


    // Returns the qscore over time metric represented as datasets (i.e. boxplots).
    //
    // These metrics apply to all the successfully called reads.
    //
    // A dataset is a collection of quantiles (min, max, q50 etc - please see BoxplotDataset) recorded for
    // a fixed period of time, say dt. dt is specified in the configs, and it defaults to 10 min.
    //
    // When acquisition starts, MinKNOW accumulates these stats for each dt interval. Each dt generates
    // a dataset streamed by this rpc. The stream can request aggregated stats by averaging the stats from
    // consecutive dt periods.
    //
    // Notes:
    //
    // Each streamed message will return ALL the datasets (i.e. boxplots) from the start of the experiment.
    //
    // When using this rpc, basecalling needs to be enabled.
    //
    // Since 4.0
    rpc stream_basecall_boxplots (StreamBoxplotRequest) returns (stream BoxplotResponse) {
        option idempotency_level = NO_SIDE_EFFECTS;
    }
}

// Specify a desired data selection.
//
// Units for values are as specified in the corresponding Request
//
// The actual data selection used may differ from the desired one.  They are adjusted in the
// following sequence:
//
//  1. All values are set equal to the corresponding desired value.
//  2. Negative `start` or `end` values are fixed up by adding the current maximum value to the
//     specified value
//      - Negative start and end values are only supported for certain requests; typically they are
//        supported for time-series data
//      - If, after adding the current bucket count, the `start` value is still negative, then the
//        start value is adjusted to `0`
//      - If, after adding the current bucket count, the `end` value is still negative, or is zero,
//        then the data selection is empty
//      - If data collection is still ongoing, then the current bucket count may change between rpc
//        calls as more data is collected.
//  3. Values which are not set, or which are set at `0`, are then adjusted to a default value:
//      - `start` and `step` will be set to the minimum valid value
//      - `end` will be set to the maximum valid value
//  4. Values which are outside of the valid range are clamped to the nearest valid value:
//      - Values less than minimum valid value will be set equal to the minimum valid value
//      - Values greater than the maximum valid value will be set equal to the maximum valid value
//  5. Finally, all values are 'rounded' to a nearby valid value
//      - `start` and `step` will be rounded down to the first valid value less than or equal to
//        their current values
//      - `end` will be rounded up to the first valid value that is greater than or equal to its
//        current value
//      - This means that the range that is specified after rounding includes the range that was
//        specified prior to rounding
//
// If (`end` - `start`) is not an exact integer multiple of `step`, then the final bucket will cover
// a smaller range than that specified by `step`.
//
// Note also that the maximum valid start and end value may not be known if data collection is still
// ongoing -- for example, the maximum valid time for time series data.  If this is the case, then
// the maximum valid value will be determined when the experiment ends, and values in use will be
// adjusted acordingly.
//
message DataSelection {
    int64 start = 1;
    uint64 step = 2;
    int64 end = 3;
}

message StreamDutyTimeRequest {
    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];

    // The desired data selection.
    //
    // The units for all values are `seconds since the start of the experiment`.
    //
    DataSelection data_selection = 2;
}

message StreamDutyTimeResponse {

    message BucketRange {
        // The range covered by a bucket
        // Values are in seconds
        //
        // The range [start, end) is half open (i.e. the start value is included, the end value is
        // not).
        uint32 start = 1;
        uint32 end = 2;
    }

    // The range covered by each entry in state_times
    repeated BucketRange bucket_ranges = 1;

    message ChannelStateData {
        // How much time (in samples) spent in this channel state, for each bucket
        repeated uint64 state_times = 1;
    }

    // Map between channel state names, and a list of bucketed duty time data
    map<string, ChannelStateData> channel_states = 2;
}

enum ReadLengthType {

    // MinKNOW events are measured as reads are being read, and are available very soon after
    // the read finishes
    //
    MinknowEvents = 0;

    // Estimated bases are calculated from MinKNOW events, based on an approximate
    // events-to-bases relationship.  Estimated bases become available at the same time as
    // MinknowEvents become available.
    //
    EstimatedBases = 1;

    // Basecalled bases are the number of bases reported by live base-calling.  This requires
    // that live basecalling is enabled.  As basecalling takes time to complete, these values
    // will update after the MinknowEvents or EstimatedBases histograms update.
    //
    BasecalledBases = 2;
}


// The data accumulated in read length histogram buckets
enum BucketValueType {

    // Histogram buckets contain the total number of reads with lengths that fall within the
    // histogram bucket range
    ReadCounts = 0;

    // Histogram buckets contain the total number of events or basecalled bases contained
    // within reads that fall within the histogram bucket range
    //
    // The units (events, estimated bases, basecalled bases) is determined by the
    // `read_length_type` setting.
    ReadLengths = 1;
}

enum ReadEndReason {
    All = 0;
    Unknown = 1;
    Partial = 2;
    MuxChange = 3;
    UnblockMuxChange = 4;
    SignalPositive = 5;
    SignalNegative = 6;
    DataServiceUnblockMuxChange = 7;
};

message ReadLengthHistogramKey {
    // Only return data for the given ReadEndReason.
    //
    // Special values:
    //   - Specify `ReadEndReason::All` to return data for all read end reasons
    //
    // If unspecified all read end reasons are returned.
    ReadEndReason read_end_reason = 1;
}

message ReadLengthHistogramSplit {
    // Split returned data by read_end_reason
    bool read_end_reason = 1;
}


message StreamReadLengthHistogramRequest {
    // The `acquisition_run_id` of the acquisition to obtain data for
    //
    // If this is set to the `acquisition_run_id` of an acquisition which is in-progress, then
    // updates containing the latest histogram data for that acquisition will be streamed regularly
    // until that acquisition finishes (see `poll_time_seconds` below)
    //
    // Otherwise, if this is set to the `acquisition_run_id` of an acquisition which is finished,
    // and for which final histogram data is available, then the final histogram data for that
    // acquisition will be returned.  Final histogram data is available until it is cleared.
    //
    // Otherwise, if this parameter is not set, or is set to a value which is neither the
    // `acquisition_run_id` of an acquisition which is in-progress, nor the `acquisition_run_id` of
    // an acquisition for which final histogram data is available, then this call will fail with the
    // status `INVALID_ARGUMENT`.
    //
    string acquisition_run_id = 1 [(rpc_required) = true];

    // How often to return new histogram data, in seconds
    //
    // If not specified, or set to `0`, then the poll time will be set to 60 seconds
    //
    // If data is being returned for an acquisition which is in progress, then one update will be
    // sent when the call is first performed, then subsequently every `poll_time` after that, and
    // then finally once again when the acquisition finishes.
    //
    // Otherwise, if final histogram data is being returned for an acquisition that has already
    // finished, this parameter has no effect.  The final histogram data will be returned, and the
    // call will complete.
    //
    uint32 poll_time_seconds = 2;

    // The source of the read lengths in the histogram
    //
    // If MinKNOW is unable to supply data from the requested source (e.g. if the user requests
    // BasecalledBases, but basecalling is not enabled), then this call will fail with the status
    // `FAILED_PRECONDITION`.
    //
    // See `ReadLengthType` for further information about the available options.
    //
    ReadLengthType read_length_type = 3;

    // The desired read length range which histograms should cover.
    // Units are as set in `read_length_type`, above.
    //
    DataSelection data_selection = 4;

    // What data to accumulate in the read length histogram buckets
    //
    // See `BucketValueType` for further information about the available options.
    //
    BucketValueType bucket_value_type = 5;

    // If set greater than zero then discard some percent of data at the upper end of the source
    // data before producing histograms and N50 values.
    //
    // This is intended to assist in the case where a small number of outliers with very long read
    // lengths cause the histogram axes and N50 to be skewed.
    //
    // Defaults to 0 - no data discarded.
    // Values should be specified in percent - a value of 0.05 will cause the top 5% of the data
    // to be discarded before producing outputs.
    //
    // For histograms, the data discarded depends on the bucket_value_type.  If `ReadCounts`, then
    // a percentage of the total number of reads reads will be discarded; if `ReadLengths` then a
    // percentage of the total read lengths will be discarded.
    //
    // For the N50 value, `discard_outlier_percent` always causes a percentage of the total
    // read lengths to be discarded (since it is always calculated from read length data)
    //
    float discard_outlier_percent = 6;

    // Define filtering parameters for streamed data.
    repeated ReadLengthHistogramKey filtering = 7;

    // Define how results are split for returned data.
    ReadLengthHistogramSplit split = 8;
}

message StreamReadLengthHistogramResponse {
    // The data source for the histograms
    //
    // Also specifies the units for `data_selection` and `n50`
    //
    // See `ReadLengthType` for further information about the possible options.
    //
    ReadLengthType read_length_type = 1;

    message BucketRange {
        // The range covered by a bucket
        // Units are as set in `read_length_type`, above
        //
        // The range [start, end) is half open (i.e. the start value is included, the end value is
        // not).
        uint64 start = 1;
        uint64 end = 2;
    }

    // The range covered by each bucket in the histogram data
    repeated BucketRange bucket_ranges = 2;

    // The right hand edge of the last source bucket which contains data
    //
    // Measured across all source data, after excluding the reads specified by
    // `discard_outlier_percent` in the stream request.
    //
    uint64 source_data_end = 5;

    // The data accumulated in the read length histogram buckets
    //
    // See `BucketValueType` for further information about the possible options.
    //
    BucketValueType bucket_value_type = 3;

    message ReadLengthHistogramData {
        // The filtering parameters which contributed to this bucket.
        repeated ReadLengthHistogramKey filtering = 3;

        // Counts for each histogram bucket
        //
        // Units are as specified in `read_length_type`
        // The range covered by each bucket is as in `bucket_ranges`
        // The type of data accumulated in each bucket is given by `bucket_value_type`
        //
        repeated uint64 bucket_values = 1;

        // The N50 value for the read length data for the selected `read_length_type` and
        // `read_end_reasons`.
        //
        // Units are as specified by `read_length_type`.
        //
        // Measured across all source data, after excluding the reads specified by
        // `discard_outlier_percent` in the stream request.
        //
        float n50 = 2;
    }

    // The histogram data
    repeated ReadLengthHistogramData histogram_data = 4;
}

message GetReadLengthTypesRequest {
    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];
}

message GetReadLengthTypesResponse {

    // Array of the types of bucket for which a histogram is currently available
    repeated ReadLengthType available_types = 1;
}

message AcquisitionOutputKey {
    // Only return data for the given barcode.
    //
    // Special values:
    //   - Specify "unclassified" for data which does not have a barcode.
    //   - Specify "classified" for all data which has a barcode.
    //
    // If unspecified all barcodes are returned.
    string barcode_name = 1;

    // Only return data for the given alignment reference.
    //
    // Special values:
    //   - Specify "unaligned" for data which does not align to a reference
    //   - Specify "aligned" for all data which aligns to a reference
    //
    // If unspecified all alignment targets are returned.
    string alignment_reference = 2;

    // Only return data for the given target region.
    //
    // Target regions are defined in bed files.
    //
    // The region is a string which identifies an entry in the bed file.
    //
    // Special values:
    //   - Specify "offtarget" for data which does not have a bed region.
    //   - Specify "ontarget" for all data which has a bed region.
    //
    // If unspecified all alignment regions are returned.
    string alignment_bed_file_region = 3;

    // An alias to `alignment_bed_file_region`
    //
    // An optional name can be used to identify a target region in the bed file
    string alignment_bed_file_region_name = 8;

    // Only return data for the given lamp barcode.
    //
    // Special values:
    //   - Specify "unclassified" for data which does not have a lamp barcode.
    //   - Specify "classified" for all data which has a lamp barcode.
    //
    // If unspecified all lamp barcodes are returned.
    string lamp_barcode_id = 4;

    // Only return data for the given lamp target.
    //
    // Special values:
    //   - Specify "unclassified" for data which does not have a lamp target.
    //   - Specify "classified" for all data which has a lamp target.
    // Using barcode terms here as lamp is part of barcoding pipeline
    //
    // If unspecified all lamp target are returned.
    string lamp_target_id = 5;

    // The barcode alias corresponding to the `barcode_name` and `lamp_barcode_id`
    string barcode_alias = 6;

    // Only return data for the given ReadEndReason.
    //
    // Special values:
    //   - Specify `ReadEndReason::All` to return data for all read end reasons
    //
    // If unspecified all read end reasons are returned.
    ReadEndReason read_end_reason = 7;
}

message AcquisitionOutputSplit {
    // Split data for every individual barcode.
    bool barcode_name = 1;

    // Split data for each individual alignment reference.
    //
    // References are defined in alignment references.
    bool alignment_reference = 2;

    // Split data for each target region.
    //
    // Target regions are defined in bed files.
    bool alignment_bed_file_region = 3;

    // Split data for each lamp barcode id.
    //
    // Lamp barcodes are defined by the active lamp kit.
    bool lamp_barcode_id = 4;

    // Split data for each lamp targets id.
    //
    // Lamp targets are defined by the active lamp kit.
    bool lamp_target_id = 5;

    // Split returned data by read_end_reason
    bool read_end_reason = 6;
}

message StreamAcquisitionOutputRequest {
    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];

    // The desired data selection.
    //
    // The units for all values are `seconds since the start of the experiment`.
    //
    DataSelection data_selection = 2;

    // Define filtering parameters for streamed data.
    repeated AcquisitionOutputKey filtering = 3;

    // Define how results are split for returned data.
    AcquisitionOutputSplit split = 4;
}

// A snapshot of acquisition output data, for a given set of filtering criteria.
message AcquisitionOutputSnapshot {
    // The time the snapshot was collected, in seconds.
    //
    // Represents the number of seconds since the start of the experiment
    // Will usually stream in minute chunks, so will first see 60, then 120 etc
    uint32 seconds = 1;

    // The yield summary data.
    acquisition.AcquisitionYieldSummary yield_summary = 2;
}

message StreamAcquisitionOutputResponse {
    message FilteredSnapshots {
        // The filtering parameters which contributed to this bucket.
        repeated AcquisitionOutputKey filtering = 1;

        repeated AcquisitionOutputSnapshot snapshots = 2;
    }

    // Snapshots split by requested filtering parameters.
    repeated FilteredSnapshots snapshots = 1;
}

message StreamWriterOutputRequest {
    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];

    // The desired data selection.
    //
    // The units for all values are `seconds since the start of the experiment`.
    //
    DataSelection data_selection = 2;
}

// A snapshot of writer data.
message WriterOutputSnapshot {
    // The time the snapshot was collected, in seconds.
    //
    // Represents the number of seconds since the start of the experiment
    // Will usually stream in minute chunks, so will first see 60, then 120 etc
    uint32 seconds = 1;

    // The writer data for this bucket.
    acquisition.AcquisitionWriterSummary writer_output = 2;
}

message StreamWriterOutputResponse {
    repeated WriterOutputSnapshot snapshots = 1;
}

message StreamEncounteredAcquisitionOutputKeysRequest {
    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];
}

message StreamEncounteredAcquisitionOutputKeysResponse {
    repeated AcquisitionOutputKey acquisition_output_keys = 1;
}

message StreamTemperatureRequest {

    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];

    // The desired data selection.
    //
    // The units for all values are `seconds since the start of the experiment`.
    //
    DataSelection data_selection = 2;
}

message TemperaturePacket {
    // Packet of temperatures appropriate for a MinION.
    message MinIONTemperature
    {
        // Temperature as measured by the probe inside the asic.
        double asic_temperature = 1;
        // Temperature as measured by the probe in the minion heatsink.
        double heatsink_temperature = 2;
    }

    // Packet of temperatures appropriate for a PromethION.
    message PromethIONTemperature
    {
        // Temperature as measured by thermistor TH2 on the P-Chip.
        double flowcell_temperature = 1;

        // Mean of 12 pixel-blocks temperatures measured with sensors in the ASIC.
        double chamber_temperature = 2;
    }

    oneof temperature {
        MinIONTemperature minion = 1;
        PromethIONTemperature promethion = 2;
    }
}

message StreamTemperatureResponse {
    repeated TemperaturePacket temperatures = 1;
}


message BiasVoltagePacket {
    uint64 acquisition_index = 1;
    double bias_voltage = 2;
    uint64 time_seconds = 3;
}

message StreamBiasVoltagesRequest {
    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];
}

message StreamBiasVoltagesResponse {
    repeated BiasVoltagePacket bias_voltages = 1;
}

message StreamBoxplotRequest {
    // The acquisition id of the experiment.
    string acquisition_run_id = 1 [(rpc_required) = true];

    enum BoxplotType {
        // Qscore of reads from the basecaller.
        //
        // Only available if basecalling
        QSCORE = 0;

        // Number of bases per second the data is moving through the sequencer.
        //
        // Only available if basecalling.
        BASES_PER_SECOND = 1;

        // Accuracy of reads aligned to provided reference.
        //
        // Accuracy is measured in percent 0-100.
        //
        // Only available if alignment is enabled.
        ACCURACY = 2;
    }

    // Type of boxplot data to return.
    BoxplotType data_type = 2;

    // Defines, in minutes, the width of each dataset.
    // This is how much time should each dataset (boxplot) cover. Note that MinKNOW stores
    // all stats at a default granularity (specified in the config file, i.e. 10 min in MinKNOW 3.2).
    // This dataset_width HAS to be a multiple of the default granularity!
    //
    // Note:
    // When multiple buckets are aggregated into a single dataset, the resulting dataset will
    // contain the average of the aggregated quantiles (with the exception of min/max)! This is not the
    // same as using a larger granularity in MinKNOW configs - the values that MinKNOW stores
    // are the true quantiles. Averaging quantiles will give a rough approximation, but not a quantile.
    // If the finest granularity is not required, we strongly suggest changing the time coverage in the config,
    // not the dataset_width in the rpc.
    uint32 dataset_width = 3;

    // How often to return messages in this stream, specified in seconds. Note that this stream will
    // return results regardless of the stats updates (because it always returns all the datasets).
    // poll_time should be larger than the basecalled stats update rate in MinKNOW -
    // please see basecalled_stats_refresh_rate_seconds in the configs
    // (set to 1 second in MinKNOW 3.2).
    //
    // If unspecified, defaults to 1 minute.
    uint32 poll_time = 4;
}

message BoxplotResponse {
    // Result boxplots are stored in this array. This is an overview of the stored data
    // from the START of the acquisition period. This includes ALL the basecalled stats
    // from MinKNOW, not just updates since previous calls!
    repeated BoxplotDataset datasets = 1;

    message BoxplotDataset {
        // Minimum value for any point in the dataset.
        float min = 1;
        // 25th quartile value for all points in the dataset.
        float q25 = 2;
        // 50th quartile or median value for all points in the dataset.
        float q50 = 3;
        // 75th quartile value for all points in the dataset.
        float q75 = 4;
        // Maximum value for any point in the dataset.
        float max = 5;

        // Number of items in this box plot's stats.
        uint64 count = 6;

        // Estimated lower value where there is half the data compared to the mode.
        // provides some estimate on the sharpness of the mode peak.
        float lower_full_width_half_maximum = 7;
        // Estimated mode for the dataset.
        float mode = 8;
        // Estimated upper value where there is half the data compared to the mode.
        // provides some estimate on the sharpness of the mode peak.
        float upper_full_width_half_maximum = 9;
    }
}