Crates.io | matcher_c |
lib.rs | matcher_c |
version | |
source | src |
created_at | 2024-06-15 12:05:53.201536 |
updated_at | 2024-11-18 09:53:52.052336 |
description | A high-performance matcher designed to solve LOGICAL and TEXT VARIATIONS problems in word matching, implemented in Rust. |
homepage | https://github.com/Lips7/Matcher |
repository | https://github.com/Lips7/Matcher |
max_upload_size | |
id | 1272771 |
Cargo.toml error: | TOML parse error at line 19, column 1 | 19 | autolib = false | ^^^^^^^ unknown field `autolib`, expected one of `name`, `version`, `edition`, `authors`, `description`, `readme`, `license`, `repository`, `homepage`, `documentation`, `build`, `resolver`, `links`, `default-run`, `default_dash_run`, `rust-version`, `rust_dash_version`, `rust_version`, `license-file`, `license_dash_file`, `license_file`, `licenseFile`, `license_capital_file`, `forced-target`, `forced_dash_target`, `autobins`, `autotests`, `autoexamples`, `autobenches`, `publish`, `metadata`, `keywords`, `categories`, `exclude`, `include` |
size | 0 |
A high-performance matcher designed to solve LOGICAL and TEXT VARIATIONS problems in word matching, implemented in Rust.
git clone https://github.com/Lips7/Matcher.git
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain nightly -y
cargo build --release
Then you should find the libmatcher_c.so
/libmatcher_c.dylib
/matcher_c.dll
in the target/release
directory.
Visit the release page to download the pre-built binary.
import json
from cffi import FFI
from extension_types import MatchTableType, ProcessType, MatchTable
## define ffi
ffi = FFI()
ffi.cdef(open("./matcher_c.h", "r", encoding="utf-8").read())
lib = ffi.dlopen("./matcher_c.so")
# init matcher
matcher = lib.init_matcher(
json.dumps({
1: [
MatchTable(
table_id=1,
match_table_type=MatchTableType.Simple(
process_type=ProcessType.MatchNone
),
word_list=["hello,world", "hello", "world"],
exemption_process_type=ProcessType.MatchNone,
exemption_word_list=[],
)
]
}).encode()
)
# check is match
lib.matcher_is_match(matcher, "hello".encode("utf-8")) # True
# match as list
res = lib.matcher_process_as_string(matcher, "hello,world".encode("utf-8"))
print(ffi.string(res).decode("utf-8"))
# [{"match_id":1,"table_id":1,"word_id":0,"word":"hello,world","similarity":1.0},{"match_id":1,"table_id":1,"word_id":1,"word":"hello","similarity":1.0},{"match_id":1,"table_id":1,"word_id":2,"word":"world","similarity":1.0}]
lib.drop_string(res)
# match as dict
res = lib.matcher_word_match_as_string(matcher, "hello,world".encode("utf-8"))
print(ffi.string(res).decode("utf-8"))
# {"1":[{"match_id":1,"table_id":1,"word_id":0,"word":"hello,world","similarity":1.0},{"match_id":1,"table_id":1,"word_id":1,"word":"hello","similarity":1.0},{"match_id":1,"table_id":1,"word_id":2,"word":"world","similarity":1.0}]}
lib.drop_string(res)
# drop matcher
lib.drop_matcher(matcher)
# init simple matcher
simple_matcher = lib.init_simple_matcher(
json.dumps(({
ProcessType.MatchFanjianDeleteNormalize | ProcessType.MatchPinYinChar: {
1: "妳好&世界",
2: "hello",
}
})).encode()
)
# check is match
lib.simple_matcher_is_match(simple_matcher, "你好世界".encode("utf-8")) # True
# match as list
res = lib.simple_matcher_process_as_string(
simple_matcher, "nihaoshijie!hello!world!".encode("utf-8")
)
print(ffi.string(res).decode("utf-8"))
# [{"word_id":1,"word":"妳好&世界"},{"word_id":2,"word":"hello"}]
lib.drop_string(res)
# drop simple matcher
lib.drop_simple_matcher(simple_matcher)
drop_matcher
, drop_simple_matcher
, and drop_string
after initializing and processing to avoid memory leaks.