# Capricorn ### Parse html according to configuration. ### Capricorn is a html parsing library that supports recursion and custom execution order. [![Version info](https://img.shields.io/crates/v/capricorn.svg)](https://crates.io/crates/capricorn) [![Downloads](https://img.shields.io/crates/d/capricorn.svg?style=flat-square)](https://crates.io/crates/capricorn) [![docs](https://img.shields.io/badge/docs-latest-blue.svg?style=flat-square)](https://docs.rs/capricorn) [![example branch parameter](https://github.com/ptechen/capricorn/workflows/CI/badge.svg?branch=main)]() [![dependency status](https://deps.rs/crate/capricorn/0.1.91/status.svg)](https://deps.rs/crate/capricorn) ### Default execution order vec![String::from("selects"), String::from("each"), String::from("select_params"), String::from("nodes"), String::from("has"), String::from("contains")]; selects > each > (one or all or fields) > ... text_attr_html > (text or attr or html); selects > select_params > selects > ... text_attr_html > (text or attr or html); selects > nodes > has > contains > text_attr_html > (text or attr or html); ### Support: | Capricorn | support | example |val type| | :----: | :----: | :----- |:----:| | selects element | ✔ | field_name:
  selects:
      - element_name | String | | selects class | ✔ | field_name:
  selects:
      - .class_name | String | | selects class element | ✔ | field_name:
  selects:
      - .class_name
      - element_name | String | | first | ✔ | field_name:
  selects:
      - element_name
  nodes:
      first: true | String | | last | ✔ | field_name:
  selects:
      - element_name
  nodes:
      last: true | String | | eq | ✔ | field_name:
  selects:
      - element_name
  nodes:
      eq: 0 | String | | parent | ✔ | field_name:
  selects:
      - element_name
  nodes:
      parent: true | String | | children | ✔ | field_name:
  selects:
      - element_name
  nodes:
      children: true | String | | prev_sibling | ✔ | field_name:
  selects:
      - element_name
  nodes:
      prev_sibling: true | String | | next_sibling | ✔ | field_name:
  selects:
      - element_name
  nodes:
      next_sibling: true | String | | has_class | ✔ | field_name:
  selects:
      - element_name
  has:
      class: class_name | String | | has_attr | ✔ | field_name:
  selects:
      - element_name
  has:
      attr: attr_name | String | | each one | ✔ | field_name:
  selects:
      - element_name
  each:
      one:
          selects:
              - .class_name
          ... | String | | each all | ✔ | field_name:
  selects:
      - element_name
  each:
      all:
          selects:
              - .class_name
          ... | Array | | each fields | ✔ | field_name:
  selects:
      - element_name
  each:
      fields:
        field_name:
          selects:
              - .class_name
          ...
        field_name1:
          selects:
              - .class_name
          ... | Map | | select_params | ✔ | field_name:
  selects:
      - element_name
  select_params:
      selects:
          - .class_name
      ... | ... | | text | ✔ | field_name:
  selects:
      - element_name
  text_attr_html:
      text: true | String | | attr | ✔ | field_name:
  selects:
      - element_name
  text_attr_html:
      attr: true | String | | html | ✔ | field_name:
  selects:
      - element_name
  text_attr_html:
      html: true | String | | text contains | ✔ | field_name:
  selects:
      - element_name
  contains:
      contains:
          text:
              - test | String | | text not contains | ✔ | field_name:
  selects:
      - element_name
  contains:
      not_contains:
          text:
              - test | String | | html contains | ✔ | field_name:
  selects:
      - element_name
  contains:
      contains:
          html:
              - test | String | | html not contains | ✔ | field_name:
  selects:
      - element_name
  contains:
      not_contains:
          html:
              - test | String | | exec order | ✔ | field_name:
  exec_order:
      - selects
      - has
      - nodes
  selects:
      - element_name
  has:
      class: class_name
  nodes:
      first: true | String | | data format splits | ✔ | field_name:
  selects:
      - element_name
  data_format:
      splits:
          - { key: str } | Array | | data format splits | ✔ | field_name:
  selects:
      - element_name
  data_format:
      splits:
          - { key: str, index: 0 } | String | | data format replaces | ✔ | field_name:
  selects:
      - element_name
  data_format:
      replaces:
          - str | String | | data format deletes | ✔ | field_name:
  selects:
      - element_name
  data_format:
      deletes:
          - str | String | | data format find | ✔ | field_name:
  selects:
      - element_name
  data_format:
      find:
        - regex | String | | data format find_iter | ✔ | field_name:
  selects:
      - element_name
  data_format:
      find_iter:
        - regex | Array | | Multi-version regular matching err | ✔ |regexes_match_parse_html:
    - regex: regex
      version: 1
      err: err_msg | Err | | Multi-version regular matching fields | ✔ |regexes_match_parse_html:
    - regex: regex
      version: 1
      fields:
        field_name:
          selects:
          ...
        field_name:
          selects:
          ... | Map | #### [Parse html code, more...](https://github.com/ptechen/Capricorn/blob/main/src/lib.rs) let yml = read_file("./test_html/test.yml").unwrap(); let params: parse::HashMapSelectParams = serde_yaml::from_str(&yml).unwrap(); let html = read_file("./test_html/test.html").unwrap(); let r = parse::parse_html(¶ms, &html); #### [Multi-version regular matching parsing html code, more...](https://github.com/ptechen/Capricorn/blob/main/src/lib.rs) let yml = read_file("./test_html/regexes_match_parse_html.yml").unwrap(); let v: match_html::MatchHtmlVec = serde_yaml::from_str(&yml).unwrap(); let html = read_file("./test_html/test.html").unwrap(); let r = v.regexes_match_parse_html(html)?;