Bumped vendor, corrected vendor sources
This commit is contained in:
parent
bc7c72d80c
commit
8aa6ff9c3a
18561 changed files with 5650955 additions and 3529757 deletions
|
|
@ -1 +1 @@
|
||||||
{"files":{".cargo_vcs_info.json":"bb2a79143e8a86d09bc6eb3522b5abb8e9cf6f9f3be955e683efa7a3202d0c94","CHANGELOG.md":"b278b1b797ac494c3653c85ede8a0f423a866300137117d2e914e4a52a87c08d","Cargo.lock":"ded666c16c5232c89b807781768ee73a4f8ed51d3b05a86850856d1a482dd5e6","Cargo.toml":"af4c88d2c7e685f9adf0f473fc9a0c33a2c8f54e3497f8eb92c8ec5d1dcfb1b5","Cargo.toml.orig":"8d11a89cafe93e86ce8d849907d07182a31b3b4fd30fab1dc5a762f72a528031","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"e99d88d232bf57d70f0fb87f6b496d44b6653f99f8a63d250a54c61ea4bcde40","README.md":"76d28502bd2e83f6a9e3576bd45e9a7fe5308448c4b5384b0d249515b5f67a5c","bench.plot.r":"6a5d7a4d36ed6b3d9919be703a479bef47698bf947818b483ff03951df2d4e01","benchmark.sh":"b35f89b1ca2c1dc0476cdd07f0284b72d41920d1c7b6054072f50ffba296d78d","coverage.sh":"4677e81922d08a82e83068a911717a247c66af12e559f37b78b6be3337ac9f07","examples/addr2line.rs":"0ffdd45eeac7a2a5ddb71895009ea5e41306e22404bce375600b8371728e36ed","rustfmt.toml":"01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b","src/builtin_split_dwarf_loader.rs":"dc6979de81b35f82e97275e6be27ec61f3c4225ea10574a9e031813e00185174","src/function.rs":"68f047e0c78afe18ad165db255c8254ee74c35cd6df0cc07e400252981f661ed","src/lazy.rs":"0bf23f7098f1902f181e43c2ffa82a3f86df2c0dbcb9bc0ebce6a0168dd8b060","src/lib.rs":"ebfdd66a4b553e6dcbb937903c4f30bfe3f4a0b6ae9fec810ec9eb9e6bf0ec2f","tests/correctness.rs":"4081f8019535305e3aa254c6a4e1436272dd873f9717c687ca0e66ea8d5871ed","tests/output_equivalence.rs":"b2cd7c59fa55808a2e66e9fe7f160d846867e3ecefe22c22a818f822c3c41f23","tests/parse.rs":"c2f7362e4679c1b4803b12ec6e8dca6da96aed7273fd210a857524a4182c30e7"},"package":"f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"}
|
{"files":{".cargo_vcs_info.json":"cac8a6c406d0953ab9f3bdf7a7518acc5e0a71c3ea20787cb98731737df9af9d","CHANGELOG.md":"0d04c7dddcffba3d83d4a05466c9b79cf13360c33f081fc9800596716bf954ce","Cargo.lock":"783a3adbf5047e30e0aa52f28dc6a0134534de715e5a36fd402c17e2eba43b1a","Cargo.toml":"940373297f67456852d5236a1eb7a3856bc672c69529cbbe6052ce5f85221170","Cargo.toml.orig":"d8ed954f7923d197568cf269d7c148dbd6b84c74bfe55418a89b1e57b03e9cd8","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"e99d88d232bf57d70f0fb87f6b496d44b6653f99f8a63d250a54c61ea4bcde40","README.md":"c635ed91d7b0c87ff2f0f311cd1a31336d2cbc4d011965d3b58afaca073538d9","src/bin/addr2line.rs":"3da8c7657604578961f2bf89052b94e6c59c55abe27a2707913f98875d666124","src/frame.rs":"de3b23388c36a0874db5569d1f49ce6cc52ef2006b9ae9b9a3eba7654b201e2b","src/function.rs":"7fc741622c44c24fdf54c8a44cbdee058d49da33974bc86cd45b70c143071e68","src/lazy.rs":"ec230b69a0d194fe253227a41903231ca70a88f896af7a6f8a5a7d9ac63bf618","src/lib.rs":"8bf9fe3f3ced8ff84d60fdd456a8ff6e73170825cd696b0291b4644c01e620d2","src/line.rs":"049e9b1526ae3433a6001e8377245131e9cbd056d17e67a9b34898598a4f1c28","src/loader.rs":"9ad08da02599b9742a9821742fd84dbe0294838b6faa5f5753eacddc6101ffc1","src/lookup.rs":"0d28a2fd00f0696f8fb50cdc88cb7d55a910df8bf3052b7c74ae50a387346e67","src/unit.rs":"f4399c401759e14db5d596cfddfe2c8a0591a81c18d9adaedba7d243cc3bd192"},"package":"dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"}
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"git": {
|
"git": {
|
||||||
"sha1": "2edc700412f2a17b0b997c4b89e0f04886bfb0c0"
|
"sha1": "621a3abe985b32f43dd1e8c10e003abe902c68e2"
|
||||||
},
|
},
|
||||||
"path_in_vcs": ""
|
"path_in_vcs": ""
|
||||||
}
|
}
|
||||||
|
|
@ -2,6 +2,129 @@
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## 0.24.2 (2024/10/04)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
* Enabled caching of DWARF abbreviations.
|
||||||
|
[#318](https://github.com/gimli-rs/addr2line/pull/318)
|
||||||
|
|
||||||
|
* Changed the `addr2line` binary to prefer symbol names over DWARF names.
|
||||||
|
[#332](https://github.com/gimli-rs/addr2line/pull/332)
|
||||||
|
|
||||||
|
* Updated `gimli` dependency.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
* Added `Context::from_arc_dwarf`.
|
||||||
|
[#327](https://github.com/gimli-rs/addr2line/pull/327)
|
||||||
|
|
||||||
|
* Added benchmark comparison.
|
||||||
|
[#315](https://github.com/gimli-rs/addr2line/pull/315)
|
||||||
|
[#321](https://github.com/gimli-rs/addr2line/pull/321)
|
||||||
|
[#322](https://github.com/gimli-rs/addr2line/pull/322)
|
||||||
|
[#325](https://github.com/gimli-rs/addr2line/pull/325)
|
||||||
|
|
||||||
|
* Added more tests.
|
||||||
|
[#328](https://github.com/gimli-rs/addr2line/pull/328)
|
||||||
|
[#330](https://github.com/gimli-rs/addr2line/pull/330)
|
||||||
|
[#331](https://github.com/gimli-rs/addr2line/pull/331)
|
||||||
|
[#333](https://github.com/gimli-rs/addr2line/pull/333)
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## 0.24.1 (2024/07/26)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
* Fixed parsing of partial units, which are found in supplementary object files.
|
||||||
|
[#313](https://github.com/gimli-rs/addr2line/pull/313)
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## 0.24.0 (2024/07/16)
|
||||||
|
|
||||||
|
### Breaking changes
|
||||||
|
|
||||||
|
* Updated `gimli` dependency.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
* Changed the order of ranges returned by `Context::find_location_range`, and
|
||||||
|
fixed handling in rare situations.
|
||||||
|
[#303](https://github.com/gimli-rs/addr2line/pull/303)
|
||||||
|
[#304](https://github.com/gimli-rs/addr2line/pull/304)
|
||||||
|
[#306](https://github.com/gimli-rs/addr2line/pull/306)
|
||||||
|
|
||||||
|
* Improved the performance of `Context::find_location`.
|
||||||
|
[#305](https://github.com/gimli-rs/addr2line/pull/305)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
* Added `LoaderReader`.
|
||||||
|
[#307](https://github.com/gimli-rs/addr2line/pull/307)
|
||||||
|
|
||||||
|
* Added `--all` option to `addr2line`.
|
||||||
|
[#307](https://github.com/gimli-rs/addr2line/pull/307)
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## 0.23.0 (2024/05/26)
|
||||||
|
|
||||||
|
### Breaking changes
|
||||||
|
|
||||||
|
* Updated `gimli` dependency.
|
||||||
|
|
||||||
|
* Deleted `Context::new`, `Context::new_with_sup`, and `builtin_split_dwarf_loader`.
|
||||||
|
Use `Context::from_dwarf` or `Loader::new` instead.
|
||||||
|
This removes `object` from the public API.
|
||||||
|
[#296](https://github.com/gimli-rs/addr2line/pull/296)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
* Fixed handling of column 0 in the line table.
|
||||||
|
[#290](https://github.com/gimli-rs/addr2line/pull/290)
|
||||||
|
|
||||||
|
* Moved `addr2line` from `examples` to `bin`. Requires the `bin` feature.
|
||||||
|
[#291](https://github.com/gimli-rs/addr2line/pull/291)
|
||||||
|
|
||||||
|
* Split up `lib.rs` into smaller modules.
|
||||||
|
[#292](https://github.com/gimli-rs/addr2line/pull/292)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
* Added `Loader`. Requires the `loader` feature.
|
||||||
|
[#296](https://github.com/gimli-rs/addr2line/pull/296)
|
||||||
|
[#297](https://github.com/gimli-rs/addr2line/pull/297)
|
||||||
|
|
||||||
|
* Added unpacked Mach-O support to `Loader`.
|
||||||
|
[#298](https://github.com/gimli-rs/addr2line/pull/298)
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## 0.22.0 (2024/04/11)
|
||||||
|
|
||||||
|
### Breaking changes
|
||||||
|
|
||||||
|
* Updated `gimli` and `object` dependencies.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
## 0.21.0 (2023/08/12)
|
||||||
|
|
||||||
|
### Breaking changes
|
||||||
|
|
||||||
|
* Updated `gimli`, `object`, and `fallible-iterator` dependencies.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
* The minimum supported rust version is 1.65.0.
|
||||||
|
|
||||||
|
* Store boxed slices instead of `Vec` objects in `Context`.
|
||||||
|
[#278](https://github.com/gimli-rs/addr2line/pull/278)
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
## 0.20.0 (2023/04/15)
|
## 0.20.0 (2023/04/15)
|
||||||
|
|
||||||
### Breaking changes
|
### Breaking changes
|
||||||
|
|
|
||||||
493
.gear/predownloaded-development/vendor/addr2line/Cargo.lock
generated
vendored
493
.gear/predownloaded-development/vendor/addr2line/Cargo.lock
generated
vendored
|
|
@ -4,16 +4,16 @@ version = 3
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "addr2line"
|
name = "addr2line"
|
||||||
version = "0.19.0"
|
version = "0.24.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97"
|
checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"gimli",
|
"gimli",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "addr2line"
|
name = "addr2line"
|
||||||
version = "0.20.0"
|
version = "0.24.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"backtrace",
|
"backtrace",
|
||||||
"clap",
|
"clap",
|
||||||
|
|
@ -24,7 +24,7 @@ dependencies = [
|
||||||
"gimli",
|
"gimli",
|
||||||
"libtest-mimic",
|
"libtest-mimic",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"object 0.31.0",
|
"object",
|
||||||
"rustc-demangle",
|
"rustc-demangle",
|
||||||
"rustc-std-workspace-alloc",
|
"rustc-std-workspace-alloc",
|
||||||
"rustc-std-workspace-core",
|
"rustc-std-workspace-core",
|
||||||
|
|
@ -33,60 +33,89 @@ dependencies = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "adler"
|
name = "adler2"
|
||||||
version = "1.0.2"
|
version = "2.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atty"
|
name = "anstream"
|
||||||
version = "0.2.14"
|
version = "0.6.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hermit-abi 0.1.19",
|
"anstyle",
|
||||||
"libc",
|
"anstyle-parse",
|
||||||
"winapi",
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is_terminal_polyfill",
|
||||||
|
"utf8parse",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "anstyle"
|
||||||
version = "1.1.0"
|
version = "1.0.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "3.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "backtrace"
|
name = "backtrace"
|
||||||
version = "0.3.67"
|
version = "0.3.74"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca"
|
checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"addr2line 0.19.0",
|
"addr2line 0.24.1",
|
||||||
"cc",
|
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"libc",
|
"libc",
|
||||||
"miniz_oxide",
|
"miniz_oxide",
|
||||||
"object 0.30.3",
|
"object",
|
||||||
"rustc-demangle",
|
"rustc-demangle",
|
||||||
|
"windows-targets",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "1.3.2"
|
version = "2.6.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "byteorder"
|
|
||||||
version = "1.4.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.0.79"
|
version = "1.1.24"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938"
|
||||||
|
dependencies = [
|
||||||
|
"shlex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
|
|
@ -96,72 +125,96 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "3.2.23"
|
version = "4.5.19"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
|
checksum = "7be5744db7978a28d9df86a214130d106a89ce49644cbc4e3f0c22c3fba30615"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atty",
|
"clap_builder",
|
||||||
"bitflags",
|
|
||||||
"clap_derive",
|
"clap_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.5.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a5fbc17d3ef8278f55b282b2a2e75ae6f6c7d4bb70ed3d0382375104bfafdb4b"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
"clap_lex",
|
"clap_lex",
|
||||||
"indexmap",
|
|
||||||
"once_cell",
|
|
||||||
"strsim",
|
"strsim",
|
||||||
"termcolor",
|
"terminal_size",
|
||||||
"textwrap",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_derive"
|
name = "clap_derive"
|
||||||
version = "3.2.18"
|
version = "4.5.18"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
|
checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"heck",
|
"heck",
|
||||||
"proc-macro-error",
|
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 1.0.109",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_lex"
|
name = "clap_lex"
|
||||||
version = "0.2.4"
|
version = "0.7.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||||
dependencies = [
|
|
||||||
"os_str_bytes",
|
[[package]]
|
||||||
]
|
name = "colorchoice"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "compiler_builtins"
|
name = "compiler_builtins"
|
||||||
version = "0.1.91"
|
version = "0.1.131"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "571298a3cce7e2afbd3d61abb91a18667d5ab25993ec577a88ee8ac45f00cc3a"
|
checksum = "d18d2ba094b78965890b2912f45dc8cb6bb3aff315ef54755ec33223b6454502"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cpp_demangle"
|
name = "cpp_demangle"
|
||||||
version = "0.4.1"
|
version = "0.4.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2c76f98bdfc7f66172e6c7065f981ebb576ffc903fe4c0561d9f0c2509226dc6"
|
checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crc32fast"
|
name = "crc32fast"
|
||||||
version = "1.3.2"
|
version = "1.4.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fallible-iterator"
|
name = "errno"
|
||||||
version = "0.2.0"
|
version = "0.3.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
|
checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "escape8259"
|
||||||
|
version = "0.5.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fallible-iterator"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "findshlibs"
|
name = "findshlibs"
|
||||||
|
|
@ -177,9 +230,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flate2"
|
name = "flate2"
|
||||||
version = "1.0.25"
|
version = "1.0.34"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
|
checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crc32fast",
|
"crc32fast",
|
||||||
"miniz_oxide",
|
"miniz_oxide",
|
||||||
|
|
@ -187,9 +240,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gimli"
|
name = "gimli"
|
||||||
version = "0.27.2"
|
version = "0.31.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
|
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"compiler_builtins",
|
"compiler_builtins",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
|
|
@ -198,182 +251,122 @@ dependencies = [
|
||||||
"stable_deref_trait",
|
"stable_deref_trait",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hashbrown"
|
|
||||||
version = "0.12.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.4.1"
|
version = "0.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.1.19"
|
version = "0.3.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "is_terminal_polyfill"
|
||||||
version = "0.2.6"
|
version = "1.70.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
|
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "indexmap"
|
|
||||||
version = "1.9.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
"hashbrown",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.141"
|
version = "0.2.159"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5"
|
checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libtest-mimic"
|
name = "libtest-mimic"
|
||||||
version = "0.5.2"
|
version = "0.7.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "79529479c298f5af41375b0c1a77ef670d450b4c9cd7949d2b43af08121b20ec"
|
checksum = "cc0bda45ed5b3a2904262c1bb91e526127aa70e7ef3758aba2ef93cf896b9b58"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
|
"escape8259",
|
||||||
"termcolor",
|
"termcolor",
|
||||||
"threadpool",
|
"threadpool",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "linux-raw-sys"
|
||||||
version = "2.5.0"
|
version = "0.4.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.7.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memmap2"
|
name = "memmap2"
|
||||||
version = "0.5.10"
|
version = "0.9.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
|
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "miniz_oxide"
|
name = "miniz_oxide"
|
||||||
version = "0.6.2"
|
version = "0.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
|
checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"adler",
|
"adler2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num_cpus"
|
name = "num_cpus"
|
||||||
version = "1.15.0"
|
version = "1.16.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
|
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hermit-abi 0.2.6",
|
"hermit-abi",
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "object"
|
name = "object"
|
||||||
version = "0.30.3"
|
version = "0.36.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439"
|
checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e"
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "object"
|
|
||||||
version = "0.31.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d08090140cfee2e09897d6be320b47a45b79eb68b414de87130f9532966e2f1d"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"flate2",
|
"flate2",
|
||||||
"memchr",
|
"memchr",
|
||||||
"ruzstd",
|
"ruzstd",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell"
|
|
||||||
version = "1.17.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "os_str_bytes"
|
|
||||||
version = "6.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "proc-macro-error"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro-error-attr",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn 1.0.109",
|
|
||||||
"version_check",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "proc-macro-error-attr"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"version_check",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.56"
|
version = "1.0.86"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435"
|
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.26"
|
version = "1.0.37"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
|
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-demangle"
|
name = "rustc-demangle"
|
||||||
version = "0.1.22"
|
version = "0.1.24"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b"
|
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-std-workspace-alloc"
|
name = "rustc-std-workspace-alloc"
|
||||||
|
|
@ -388,21 +381,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1956f5517128a2b6f23ab2dadf1a976f4f5b27962e7724c2bf3d45e539ec098c"
|
checksum = "1956f5517128a2b6f23ab2dadf1a976f4f5b27962e7724c2bf3d45e539ec098c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ruzstd"
|
name = "rustix"
|
||||||
version = "0.3.1"
|
version = "0.38.37"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9a15e661f0f9dac21f3494fe5d23a6338c0ac116a2d22c2b63010acd89467ffe"
|
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
"linux-raw-sys",
|
||||||
|
"windows-sys 0.52.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ruzstd"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "99c3938e133aac070997ddc684d4b393777d293ba170f2988c8fd5ea2ad4ce21"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
|
||||||
"thiserror",
|
|
||||||
"twox-hash",
|
"twox-hash",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "smallvec"
|
name = "shlex"
|
||||||
version = "1.10.0"
|
version = "1.3.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "smallvec"
|
||||||
|
version = "1.13.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stable_deref_trait"
|
name = "stable_deref_trait"
|
||||||
|
|
@ -418,26 +428,15 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.10.0"
|
version = "0.11.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.109"
|
version = "2.0.79"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn"
|
|
||||||
version = "2.0.15"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
|
@ -446,37 +445,21 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "termcolor"
|
name = "termcolor"
|
||||||
version = "1.2.0"
|
version = "1.4.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
|
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"winapi-util",
|
"winapi-util",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "textwrap"
|
name = "terminal_size"
|
||||||
version = "0.16.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror"
|
|
||||||
version = "1.0.40"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"thiserror-impl",
|
"rustix",
|
||||||
]
|
"windows-sys 0.59.0",
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror-impl"
|
|
||||||
version = "1.0.40"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn 2.0.15",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -506,15 +489,15 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.8"
|
version = "1.0.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
|
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "version_check"
|
name = "utf8parse"
|
||||||
version = "0.9.4"
|
version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi"
|
name = "winapi"
|
||||||
|
|
@ -534,11 +517,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi-util"
|
name = "winapi-util"
|
||||||
version = "0.1.5"
|
version = "0.1.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
|
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"winapi",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -546,3 +529,85 @@ name = "winapi-x86_64-pc-windows-gnu"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.52.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
|
||||||
|
|
@ -11,16 +11,26 @@
|
||||||
|
|
||||||
[package]
|
[package]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
rust-version = "1.65"
|
||||||
name = "addr2line"
|
name = "addr2line"
|
||||||
version = "0.20.0"
|
version = "0.24.2"
|
||||||
exclude = [
|
build = false
|
||||||
"/benches/*",
|
include = [
|
||||||
"/fixtures/*",
|
"/CHANGELOG.md",
|
||||||
".github",
|
"/Cargo.lock",
|
||||||
|
"/Cargo.toml",
|
||||||
|
"/LICENSE-APACHE",
|
||||||
|
"/LICENSE-MIT",
|
||||||
|
"/README.md",
|
||||||
|
"/src",
|
||||||
]
|
]
|
||||||
|
autobins = false
|
||||||
|
autoexamples = false
|
||||||
|
autotests = false
|
||||||
|
autobenches = false
|
||||||
description = "A cross-platform symbolication library written in Rust, using `gimli`"
|
description = "A cross-platform symbolication library written in Rust, using `gimli`"
|
||||||
documentation = "https://docs.rs/addr2line"
|
documentation = "https://docs.rs/addr2line"
|
||||||
readme = "./README.md"
|
readme = "README.md"
|
||||||
keywords = [
|
keywords = [
|
||||||
"DWARF",
|
"DWARF",
|
||||||
"debug",
|
"debug",
|
||||||
|
|
@ -34,33 +44,30 @@ repository = "https://github.com/gimli-rs/addr2line"
|
||||||
|
|
||||||
[profile.bench]
|
[profile.bench]
|
||||||
codegen-units = 1
|
codegen-units = 1
|
||||||
debug = true
|
debug = 2
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
debug = true
|
debug = 2
|
||||||
|
|
||||||
[[example]]
|
[lib]
|
||||||
name = "addr2line"
|
name = "addr2line"
|
||||||
required-features = ["default"]
|
path = "src/lib.rs"
|
||||||
|
|
||||||
[[test]]
|
[[bin]]
|
||||||
name = "output_equivalence"
|
name = "addr2line"
|
||||||
harness = false
|
path = "src/bin/addr2line.rs"
|
||||||
required-features = ["default"]
|
required-features = ["bin"]
|
||||||
|
|
||||||
[[test]]
|
|
||||||
name = "correctness"
|
|
||||||
required-features = ["default"]
|
|
||||||
|
|
||||||
[[test]]
|
|
||||||
name = "parse"
|
|
||||||
required-features = ["std-object"]
|
|
||||||
|
|
||||||
[dependencies.alloc]
|
[dependencies.alloc]
|
||||||
version = "1.0.0"
|
version = "1.0.0"
|
||||||
optional = true
|
optional = true
|
||||||
package = "rustc-std-workspace-alloc"
|
package = "rustc-std-workspace-alloc"
|
||||||
|
|
||||||
|
[dependencies.clap]
|
||||||
|
version = "4.3.21"
|
||||||
|
features = ["wrap_help"]
|
||||||
|
optional = true
|
||||||
|
|
||||||
[dependencies.compiler_builtins]
|
[dependencies.compiler_builtins]
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
optional = true
|
optional = true
|
||||||
|
|
@ -77,22 +84,25 @@ optional = true
|
||||||
default-features = false
|
default-features = false
|
||||||
|
|
||||||
[dependencies.fallible-iterator]
|
[dependencies.fallible-iterator]
|
||||||
version = "0.2"
|
version = "0.3.0"
|
||||||
optional = true
|
optional = true
|
||||||
default-features = false
|
default-features = false
|
||||||
|
|
||||||
[dependencies.gimli]
|
[dependencies.gimli]
|
||||||
version = "0.27.2"
|
version = "0.31.1"
|
||||||
features = ["read"]
|
features = ["read"]
|
||||||
default-features = false
|
default-features = false
|
||||||
|
|
||||||
[dependencies.memmap2]
|
[dependencies.memmap2]
|
||||||
version = "0.5.5"
|
version = "0.9.4"
|
||||||
optional = true
|
optional = true
|
||||||
|
|
||||||
[dependencies.object]
|
[dependencies.object]
|
||||||
version = "0.31.0"
|
version = "0.36.0"
|
||||||
features = ["read"]
|
features = [
|
||||||
|
"read",
|
||||||
|
"compression",
|
||||||
|
]
|
||||||
optional = true
|
optional = true
|
||||||
default-features = false
|
default-features = false
|
||||||
|
|
||||||
|
|
@ -105,29 +115,42 @@ version = "1"
|
||||||
optional = true
|
optional = true
|
||||||
default-features = false
|
default-features = false
|
||||||
|
|
||||||
|
[dependencies.typed-arena]
|
||||||
|
version = "2"
|
||||||
|
optional = true
|
||||||
|
|
||||||
[dev-dependencies.backtrace]
|
[dev-dependencies.backtrace]
|
||||||
version = "0.3.13"
|
version = "0.3.13"
|
||||||
|
|
||||||
[dev-dependencies.clap]
|
|
||||||
version = "3.1.6"
|
|
||||||
|
|
||||||
[dev-dependencies.findshlibs]
|
[dev-dependencies.findshlibs]
|
||||||
version = "0.10"
|
version = "0.10"
|
||||||
|
|
||||||
[dev-dependencies.libtest-mimic]
|
[dev-dependencies.libtest-mimic]
|
||||||
version = "0.5.2"
|
version = "0.7.2"
|
||||||
|
|
||||||
[dev-dependencies.typed-arena]
|
|
||||||
version = "2"
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
all = ["bin"]
|
||||||
|
bin = [
|
||||||
|
"loader",
|
||||||
|
"rustc-demangle",
|
||||||
|
"cpp_demangle",
|
||||||
|
"fallible-iterator",
|
||||||
|
"smallvec",
|
||||||
|
"dep:clap",
|
||||||
|
]
|
||||||
|
cargo-all = []
|
||||||
default = [
|
default = [
|
||||||
"rustc-demangle",
|
"rustc-demangle",
|
||||||
"cpp_demangle",
|
"cpp_demangle",
|
||||||
"std-object",
|
"loader",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"memmap2",
|
]
|
||||||
|
loader = [
|
||||||
|
"std",
|
||||||
|
"dep:object",
|
||||||
|
"dep:memmap2",
|
||||||
|
"dep:typed-arena",
|
||||||
]
|
]
|
||||||
rustc-dep-of-std = [
|
rustc-dep-of-std = [
|
||||||
"core",
|
"core",
|
||||||
|
|
@ -136,10 +159,3 @@ rustc-dep-of-std = [
|
||||||
"gimli/rustc-dep-of-std",
|
"gimli/rustc-dep-of-std",
|
||||||
]
|
]
|
||||||
std = ["gimli/std"]
|
std = ["gimli/std"]
|
||||||
std-object = [
|
|
||||||
"std",
|
|
||||||
"object",
|
|
||||||
"object/std",
|
|
||||||
"object/compression",
|
|
||||||
"gimli/endian-reader",
|
|
||||||
]
|
|
||||||
|
|
|
||||||
53
.gear/predownloaded-development/vendor/addr2line/Cargo.toml.orig
generated
vendored
53
.gear/predownloaded-development/vendor/addr2line/Cargo.toml.orig
generated
vendored
|
|
@ -1,25 +1,40 @@
|
||||||
[package]
|
[package]
|
||||||
name = "addr2line"
|
name = "addr2line"
|
||||||
version = "0.20.0"
|
version = "0.24.2"
|
||||||
description = "A cross-platform symbolication library written in Rust, using `gimli`"
|
description = "A cross-platform symbolication library written in Rust, using `gimli`"
|
||||||
documentation = "https://docs.rs/addr2line"
|
documentation = "https://docs.rs/addr2line"
|
||||||
exclude = ["/benches/*", "/fixtures/*", ".github"]
|
|
||||||
keywords = ["DWARF", "debug", "elf", "symbolicate", "atos"]
|
keywords = ["DWARF", "debug", "elf", "symbolicate", "atos"]
|
||||||
categories = ["development-tools::debugging"]
|
categories = ["development-tools::debugging"]
|
||||||
license = "Apache-2.0 OR MIT"
|
license = "Apache-2.0 OR MIT"
|
||||||
readme = "./README.md"
|
readme = "./README.md"
|
||||||
repository = "https://github.com/gimli-rs/addr2line"
|
repository = "https://github.com/gimli-rs/addr2line"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
rust-version = "1.65"
|
||||||
|
include = [
|
||||||
|
"/CHANGELOG.md",
|
||||||
|
"/Cargo.lock",
|
||||||
|
"/Cargo.toml",
|
||||||
|
"/LICENSE-APACHE",
|
||||||
|
"/LICENSE-MIT",
|
||||||
|
"/README.md",
|
||||||
|
"/src",
|
||||||
|
]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
gimli = { version = "0.27.2", default-features = false, features = ["read"] }
|
gimli = { version = "0.31.1", default-features = false, features = ["read"] }
|
||||||
fallible-iterator = { version = "0.2", default-features = false, optional = true }
|
fallible-iterator = { version = "0.3.0", default-features = false, optional = true }
|
||||||
memmap2 = { version = "0.5.5", optional = true }
|
|
||||||
object = { version = "0.31.0", default-features = false, features = ["read"], optional = true }
|
|
||||||
smallvec = { version = "1", default-features = false, optional = true }
|
smallvec = { version = "1", default-features = false, optional = true }
|
||||||
rustc-demangle = { version = "0.1", optional = true }
|
rustc-demangle = { version = "0.1", optional = true }
|
||||||
cpp_demangle = { version = "0.4", default-features = false, features = ["alloc"], optional = true }
|
cpp_demangle = { version = "0.4", default-features = false, features = ["alloc"], optional = true }
|
||||||
|
|
||||||
|
# loader dependencies
|
||||||
|
object = { version = "0.36.0", default-features = false, features = ["read", "compression"], optional = true }
|
||||||
|
memmap2 = { version = "0.9.4", optional = true }
|
||||||
|
typed-arena = { version = "2", optional = true }
|
||||||
|
|
||||||
|
# bin dependencies
|
||||||
|
clap = { version = "4.3.21", features = ["wrap_help"], optional = true }
|
||||||
|
|
||||||
# Internal feature, only used when building as part of libstd, not part of the
|
# Internal feature, only used when building as part of libstd, not part of the
|
||||||
# stable interface of this crate.
|
# stable interface of this crate.
|
||||||
core = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-core' }
|
core = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-core' }
|
||||||
|
|
@ -27,12 +42,10 @@ alloc = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-all
|
||||||
compiler_builtins = { version = '0.1.2', optional = true }
|
compiler_builtins = { version = '0.1.2', optional = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
clap = "3.1.6"
|
|
||||||
backtrace = "0.3.13"
|
backtrace = "0.3.13"
|
||||||
findshlibs = "0.10"
|
findshlibs = "0.10"
|
||||||
libtest-mimic = "0.5.2"
|
libtest-mimic = "0.7.2"
|
||||||
auxiliary = { path = "tests/auxiliary" }
|
auxiliary = { path = "tests/auxiliary" }
|
||||||
typed-arena = "2"
|
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
debug = true
|
debug = true
|
||||||
|
|
@ -42,27 +55,33 @@ debug = true
|
||||||
codegen-units = 1
|
codegen-units = 1
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["rustc-demangle", "cpp_demangle", "std-object", "fallible-iterator", "smallvec", "memmap2"]
|
default = ["rustc-demangle", "cpp_demangle", "loader", "fallible-iterator", "smallvec"]
|
||||||
std = ["gimli/std"]
|
std = ["gimli/std"]
|
||||||
std-object = ["std", "object", "object/std", "object/compression", "gimli/endian-reader"]
|
loader = ["std", "dep:object", "dep:memmap2", "dep:typed-arena"]
|
||||||
|
bin = ["loader", "rustc-demangle", "cpp_demangle", "fallible-iterator", "smallvec", "dep:clap"]
|
||||||
|
all = ["bin"]
|
||||||
|
|
||||||
|
# Use of --all-features is not supported.
|
||||||
|
# This is a dummy feature to detect when --all-features is used.
|
||||||
|
cargo-all = []
|
||||||
|
|
||||||
# Internal feature, only used when building as part of libstd, not part of the
|
# Internal feature, only used when building as part of libstd, not part of the
|
||||||
# stable interface of this crate.
|
# stable interface of this crate.
|
||||||
rustc-dep-of-std = ['core', 'alloc', 'compiler_builtins', 'gimli/rustc-dep-of-std']
|
rustc-dep-of-std = ['core', 'alloc', 'compiler_builtins', 'gimli/rustc-dep-of-std']
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
name = "output_equivalence"
|
name = "testinput"
|
||||||
harness = false
|
harness = false
|
||||||
required-features = ["default"]
|
required-features = ["bin"]
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
name = "correctness"
|
name = "correctness"
|
||||||
required-features = ["default"]
|
required-features = ["loader", "fallible-iterator"]
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
name = "parse"
|
name = "parse"
|
||||||
required-features = ["std-object"]
|
required-features = ["loader"]
|
||||||
|
|
||||||
[[example]]
|
[[bin]]
|
||||||
name = "addr2line"
|
name = "addr2line"
|
||||||
required-features = ["default"]
|
required-features = ["bin"]
|
||||||
|
|
|
||||||
|
|
@ -4,25 +4,25 @@
|
||||||
[](https://docs.rs/addr2line)
|
[](https://docs.rs/addr2line)
|
||||||
[](https://coveralls.io/github/gimli-rs/addr2line?branch=master)
|
[](https://coveralls.io/github/gimli-rs/addr2line?branch=master)
|
||||||
|
|
||||||
A cross-platform library for retrieving per-address debug information
|
`addr2line` provides a cross-platform library for retrieving per-address debug information
|
||||||
from files with DWARF debug information.
|
from files with DWARF debug information. Given an address, it can return the file name,
|
||||||
|
line number, and function name associated with that address, as well as the inline call
|
||||||
|
stack leading to that address.
|
||||||
|
|
||||||
`addr2line` uses [`gimli`](https://github.com/gimli-rs/gimli) to parse
|
The crate has a CLI wrapper around the library which provides some of
|
||||||
the debug information, and exposes an interface for finding
|
the functionality of the `addr2line` command line tool distributed with
|
||||||
the source file, line number, and wrapping function for instruction
|
[GNU binutils](https://sourceware.org/binutils/docs/binutils/addr2line.html).
|
||||||
addresses within the target program. These lookups can either be
|
|
||||||
performed programmatically through `Context::find_location` and
|
|
||||||
`Context::find_frames`, or via the included example binary,
|
|
||||||
`addr2line` (named and modelled after the equivalent utility from
|
|
||||||
[GNU binutils](https://sourceware.org/binutils/docs/binutils/addr2line.html)).
|
|
||||||
|
|
||||||
# Quickstart
|
# Quickstart
|
||||||
- Add the [`addr2line` crate](https://crates.io/crates/addr2line) to your `Cargo.toml`
|
- Add the [`addr2line` crate](https://crates.io/crates/addr2line) to your `Cargo.toml`.
|
||||||
- Load the file and parse it with [`addr2line::object::read::File::parse`](https://docs.rs/object/*/object/read/struct.File.html#method.parse)
|
- Call [`addr2line::Loader::new`](https://docs.rs/addr2line/*/addr2line/struct.Loader.html#method.new) with the file path.
|
||||||
- Pass the parsed file to [`addr2line::Context::new` ](https://docs.rs/addr2line/*/addr2line/struct.Context.html#method.new)
|
- Use [`addr2line::Loader::find_location`](https://docs.rs/addr2line/*/addr2line/struct.Loader.html#method.find_location)
|
||||||
- Use [`addr2line::Context::find_location`](https://docs.rs/addr2line/*/addr2line/struct.Context.html#method.find_location)
|
or [`addr2line::Loader::find_frames`](https://docs.rs/addr2line/*/addr2line/struct.Loader.html#method.find_frames)
|
||||||
or [`addr2line::Context::find_frames`](https://docs.rs/addr2line/*/addr2line/struct.Context.html#method.find_frames)
|
to look up debug information for an address.
|
||||||
to look up debug information for an address
|
|
||||||
|
If you want to provide your own file loading and memory management, use
|
||||||
|
[`addr2line::Context`](https://docs.rs/addr2line/*/addr2line/struct.Context.html)
|
||||||
|
instead of `addr2line::Loader`.
|
||||||
|
|
||||||
# Performance
|
# Performance
|
||||||
|
|
||||||
|
|
@ -31,8 +31,10 @@ The DWARF information is parsed lazily where possible.
|
||||||
|
|
||||||
The library aims to perform similarly to equivalent existing tools such
|
The library aims to perform similarly to equivalent existing tools such
|
||||||
as `addr2line` from binutils, `eu-addr2line` from elfutils, and
|
as `addr2line` from binutils, `eu-addr2line` from elfutils, and
|
||||||
`llvm-symbolize` from the llvm project, and in the past some benchmarking
|
`llvm-addr2line` from the llvm project. Current benchmarks show a performance
|
||||||
was done that indicates a comparable performance.
|
improvement in all cases:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,23 +0,0 @@
|
||||||
v <- read.table(file("stdin"))
|
|
||||||
t <- data.frame(prog=v[,1], funcs=(v[,2]=="func"), time=v[,3], mem=v[,4], stringsAsFactors=FALSE)
|
|
||||||
|
|
||||||
t$prog <- as.character(t$prog)
|
|
||||||
t$prog[t$prog == "master"] <- "gimli-rs/addr2line"
|
|
||||||
t$funcs[t$funcs == TRUE] <- "With functions"
|
|
||||||
t$funcs[t$funcs == FALSE] <- "File/line only"
|
|
||||||
t$mem = t$mem / 1024.0
|
|
||||||
|
|
||||||
library(ggplot2)
|
|
||||||
p <- ggplot(data=t, aes(x=prog, y=time, fill=prog))
|
|
||||||
p <- p + geom_bar(stat = "identity")
|
|
||||||
p <- p + facet_wrap(~ funcs)
|
|
||||||
p <- p + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())
|
|
||||||
p <- p + ylab("time (s)") + ggtitle("addr2line runtime")
|
|
||||||
ggsave('time.png',plot=p,width=10,height=6)
|
|
||||||
|
|
||||||
p <- ggplot(data=t, aes(x=prog, y=mem, fill=prog))
|
|
||||||
p <- p + geom_bar(stat = "identity")
|
|
||||||
p <- p + facet_wrap(~ funcs)
|
|
||||||
p <- p + theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())
|
|
||||||
p <- p + ylab("memory (kB)") + ggtitle("addr2line memory usage")
|
|
||||||
ggsave('memory.png',plot=p,width=10,height=6)
|
|
||||||
|
|
@ -1,112 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
if [[ $# -le 1 ]]; then
|
|
||||||
echo "Usage: $0 <executable> [<addresses>] REFS..."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
target="$1"
|
|
||||||
shift
|
|
||||||
|
|
||||||
addresses=""
|
|
||||||
if [[ -e "$1" ]]; then
|
|
||||||
addresses="$1"
|
|
||||||
shift
|
|
||||||
fi
|
|
||||||
|
|
||||||
# path to "us"
|
|
||||||
# readlink -f, but more portable:
|
|
||||||
dirname=$(perl -e 'use Cwd "abs_path";print abs_path(shift)' "$(dirname "$0")")
|
|
||||||
|
|
||||||
# https://stackoverflow.com/a/2358432/472927
|
|
||||||
{
|
|
||||||
# compile all refs
|
|
||||||
pushd "$dirname" > /dev/null
|
|
||||||
# if the user has some local changes, preserve them
|
|
||||||
nstashed=$(git stash list | wc -l)
|
|
||||||
echo "==> Stashing any local modifications"
|
|
||||||
git stash --keep-index > /dev/null
|
|
||||||
popstash() {
|
|
||||||
# https://stackoverflow.com/q/24520791/472927
|
|
||||||
if [[ "$(git stash list | wc -l)" -ne "$nstashed" ]]; then
|
|
||||||
echo "==> Restoring stashed state"
|
|
||||||
git stash pop > /dev/null
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
# if the user has added stuff to the index, abort
|
|
||||||
if ! git diff-index --quiet HEAD --; then
|
|
||||||
echo "Refusing to overwrite outstanding git changes"
|
|
||||||
popstash
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
current=$(git symbolic-ref --short HEAD)
|
|
||||||
for ref in "$@"; do
|
|
||||||
echo "==> Compiling $ref"
|
|
||||||
git checkout -q "$ref"
|
|
||||||
commit=$(git rev-parse HEAD)
|
|
||||||
fn="target/release/addr2line-$commit"
|
|
||||||
if [[ ! -e "$fn" ]]; then
|
|
||||||
cargo build --release --example addr2line
|
|
||||||
cp target/release/examples/addr2line "$fn"
|
|
||||||
fi
|
|
||||||
if [[ "$ref" != "$commit" ]]; then
|
|
||||||
ln -sfn "addr2line-$commit" target/release/addr2line-"$ref"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
git checkout -q "$current"
|
|
||||||
popstash
|
|
||||||
popd > /dev/null
|
|
||||||
|
|
||||||
# get us some addresses to look up
|
|
||||||
if [[ -z "$addresses" ]]; then
|
|
||||||
echo "==> Looking for benchmarking addresses (this may take a while)"
|
|
||||||
addresses=$(mktemp tmp.XXXXXXXXXX)
|
|
||||||
objdump -C -x --disassemble -l "$target" \
|
|
||||||
| grep -P '0[048]:' \
|
|
||||||
| awk '{print $1}' \
|
|
||||||
| sed 's/:$//' \
|
|
||||||
> "$addresses"
|
|
||||||
echo " -> Addresses stored in $addresses; you should re-use it next time"
|
|
||||||
fi
|
|
||||||
|
|
||||||
run() {
|
|
||||||
func="$1"
|
|
||||||
name="$2"
|
|
||||||
cmd="$3"
|
|
||||||
args="$4"
|
|
||||||
printf "%s\t%s\t" "$name" "$func"
|
|
||||||
if [[ "$cmd" =~ llvm-symbolizer ]]; then
|
|
||||||
/usr/bin/time -f '%e\t%M' "$cmd" $args -obj="$target" < "$addresses" 2>&1 >/dev/null
|
|
||||||
else
|
|
||||||
/usr/bin/time -f '%e\t%M' "$cmd" $args -e "$target" < "$addresses" 2>&1 >/dev/null
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# run without functions
|
|
||||||
log1=$(mktemp tmp.XXXXXXXXXX)
|
|
||||||
echo "==> Benchmarking"
|
|
||||||
run nofunc binutils addr2line >> "$log1"
|
|
||||||
#run nofunc elfutils eu-addr2line >> "$log1"
|
|
||||||
run nofunc llvm-sym llvm-symbolizer -functions=none >> "$log1"
|
|
||||||
for ref in "$@"; do
|
|
||||||
run nofunc "$ref" "$dirname/target/release/addr2line-$ref" >> "$log1"
|
|
||||||
done
|
|
||||||
cat "$log1" | column -t
|
|
||||||
|
|
||||||
# run with functions
|
|
||||||
log2=$(mktemp tmp.XXXXXXXXXX)
|
|
||||||
echo "==> Benchmarking with -f"
|
|
||||||
run func binutils addr2line "-f -i" >> "$log2"
|
|
||||||
#run func elfutils eu-addr2line "-f -i" >> "$log2"
|
|
||||||
run func llvm-sym llvm-symbolizer "-functions=linkage -demangle=0" >> "$log2"
|
|
||||||
for ref in "$@"; do
|
|
||||||
run func "$ref" "$dirname/target/release/addr2line-$ref" "-f -i" >> "$log2"
|
|
||||||
done
|
|
||||||
cat "$log2" | column -t
|
|
||||||
cat "$log2" >> "$log1"; rm "$log2"
|
|
||||||
|
|
||||||
echo "==> Plotting"
|
|
||||||
Rscript --no-readline --no-restore --no-save "$dirname/bench.plot.r" < "$log1"
|
|
||||||
|
|
||||||
echo "==> Cleaning up"
|
|
||||||
rm "$log1"
|
|
||||||
exit 0
|
|
||||||
}
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
# Run tarpaulin and pycobertura to generate coverage.html.
|
|
||||||
|
|
||||||
cargo tarpaulin --skip-clean --out Xml
|
|
||||||
pycobertura show --format html --output coverage.html cobertura.xml
|
|
||||||
|
|
@ -1,306 +0,0 @@
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::{BufRead, Lines, StdinLock, Write};
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
|
|
||||||
use clap::{Arg, Command, Values};
|
|
||||||
use fallible_iterator::FallibleIterator;
|
|
||||||
use object::{Object, ObjectSection, SymbolMap, SymbolMapName};
|
|
||||||
use typed_arena::Arena;
|
|
||||||
|
|
||||||
use addr2line::{Context, Location};
|
|
||||||
|
|
||||||
fn parse_uint_from_hex_string(string: &str) -> Option<u64> {
|
|
||||||
if string.len() > 2 && string.starts_with("0x") {
|
|
||||||
u64::from_str_radix(&string[2..], 16).ok()
|
|
||||||
} else {
|
|
||||||
u64::from_str_radix(string, 16).ok()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enum Addrs<'a> {
|
|
||||||
Args(Values<'a>),
|
|
||||||
Stdin(Lines<StdinLock<'a>>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Iterator for Addrs<'a> {
|
|
||||||
type Item = Option<u64>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Option<u64>> {
|
|
||||||
let text = match *self {
|
|
||||||
Addrs::Args(ref mut vals) => vals.next().map(Cow::from),
|
|
||||||
Addrs::Stdin(ref mut lines) => lines.next().map(Result::unwrap).map(Cow::from),
|
|
||||||
};
|
|
||||||
text.as_ref()
|
|
||||||
.map(Cow::as_ref)
|
|
||||||
.map(parse_uint_from_hex_string)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn print_loc(loc: Option<&Location<'_>>, basenames: bool, llvm: bool) {
|
|
||||||
if let Some(loc) = loc {
|
|
||||||
if let Some(ref file) = loc.file.as_ref() {
|
|
||||||
let path = if basenames {
|
|
||||||
Path::new(Path::new(file).file_name().unwrap())
|
|
||||||
} else {
|
|
||||||
Path::new(file)
|
|
||||||
};
|
|
||||||
print!("{}:", path.display());
|
|
||||||
} else {
|
|
||||||
print!("??:");
|
|
||||||
}
|
|
||||||
if llvm {
|
|
||||||
print!("{}:{}", loc.line.unwrap_or(0), loc.column.unwrap_or(0));
|
|
||||||
} else if let Some(line) = loc.line {
|
|
||||||
print!("{}", line);
|
|
||||||
} else {
|
|
||||||
print!("?");
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
} else if llvm {
|
|
||||||
println!("??:0:0");
|
|
||||||
} else {
|
|
||||||
println!("??:0");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn print_function(name: Option<&str>, language: Option<gimli::DwLang>, demangle: bool) {
|
|
||||||
if let Some(name) = name {
|
|
||||||
if demangle {
|
|
||||||
print!("{}", addr2line::demangle_auto(Cow::from(name), language));
|
|
||||||
} else {
|
|
||||||
print!("{}", name);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
print!("??");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn load_file_section<'input, 'arena, Endian: gimli::Endianity>(
|
|
||||||
id: gimli::SectionId,
|
|
||||||
file: &object::File<'input>,
|
|
||||||
endian: Endian,
|
|
||||||
arena_data: &'arena Arena<Cow<'input, [u8]>>,
|
|
||||||
) -> Result<gimli::EndianSlice<'arena, Endian>, ()> {
|
|
||||||
// TODO: Unify with dwarfdump.rs in gimli.
|
|
||||||
let name = id.name();
|
|
||||||
match file.section_by_name(name) {
|
|
||||||
Some(section) => match section.uncompressed_data().unwrap() {
|
|
||||||
Cow::Borrowed(b) => Ok(gimli::EndianSlice::new(b, endian)),
|
|
||||||
Cow::Owned(b) => Ok(gimli::EndianSlice::new(arena_data.alloc(b.into()), endian)),
|
|
||||||
},
|
|
||||||
None => Ok(gimli::EndianSlice::new(&[][..], endian)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn find_name_from_symbols<'a>(
|
|
||||||
symbols: &'a SymbolMap<SymbolMapName<'_>>,
|
|
||||||
probe: u64,
|
|
||||||
) -> Option<&'a str> {
|
|
||||||
symbols.get(probe).map(|x| x.name())
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Options<'a> {
|
|
||||||
do_functions: bool,
|
|
||||||
do_inlines: bool,
|
|
||||||
pretty: bool,
|
|
||||||
print_addrs: bool,
|
|
||||||
basenames: bool,
|
|
||||||
demangle: bool,
|
|
||||||
llvm: bool,
|
|
||||||
exe: &'a PathBuf,
|
|
||||||
sup: Option<&'a PathBuf>,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let matches = Command::new("addr2line")
|
|
||||||
.version(env!("CARGO_PKG_VERSION"))
|
|
||||||
.about("A fast addr2line Rust port")
|
|
||||||
.args(&[
|
|
||||||
Arg::new("exe")
|
|
||||||
.short('e')
|
|
||||||
.long("exe")
|
|
||||||
.value_name("filename")
|
|
||||||
.value_parser(clap::value_parser!(PathBuf))
|
|
||||||
.help(
|
|
||||||
"Specify the name of the executable for which addresses should be translated.",
|
|
||||||
)
|
|
||||||
.required(true),
|
|
||||||
Arg::new("sup")
|
|
||||||
.long("sup")
|
|
||||||
.value_name("filename")
|
|
||||||
.value_parser(clap::value_parser!(PathBuf))
|
|
||||||
.help("Path to supplementary object file."),
|
|
||||||
Arg::new("functions")
|
|
||||||
.short('f')
|
|
||||||
.long("functions")
|
|
||||||
.help("Display function names as well as file and line number information."),
|
|
||||||
Arg::new("pretty").short('p').long("pretty-print").help(
|
|
||||||
"Make the output more human friendly: each location are printed on one line.",
|
|
||||||
),
|
|
||||||
Arg::new("inlines").short('i').long("inlines").help(
|
|
||||||
"If the address belongs to a function that was inlined, the source information for \
|
|
||||||
all enclosing scopes back to the first non-inlined function will also be printed.",
|
|
||||||
),
|
|
||||||
Arg::new("addresses").short('a').long("addresses").help(
|
|
||||||
"Display the address before the function name, file and line number information.",
|
|
||||||
),
|
|
||||||
Arg::new("basenames")
|
|
||||||
.short('s')
|
|
||||||
.long("basenames")
|
|
||||||
.help("Display only the base of each file name."),
|
|
||||||
Arg::new("demangle").short('C').long("demangle").help(
|
|
||||||
"Demangle function names. \
|
|
||||||
Specifying a specific demangling style (like GNU addr2line) is not supported. \
|
|
||||||
(TODO)"
|
|
||||||
),
|
|
||||||
Arg::new("llvm")
|
|
||||||
.long("llvm")
|
|
||||||
.help("Display output in the same format as llvm-symbolizer."),
|
|
||||||
Arg::new("addrs")
|
|
||||||
.takes_value(true)
|
|
||||||
.multiple_occurrences(true)
|
|
||||||
.help("Addresses to use instead of reading from stdin."),
|
|
||||||
])
|
|
||||||
.get_matches();
|
|
||||||
|
|
||||||
let arena_data = Arena::new();
|
|
||||||
|
|
||||||
let opts = Options {
|
|
||||||
do_functions: matches.is_present("functions"),
|
|
||||||
do_inlines: matches.is_present("inlines"),
|
|
||||||
pretty: matches.is_present("pretty"),
|
|
||||||
print_addrs: matches.is_present("addresses"),
|
|
||||||
basenames: matches.is_present("basenames"),
|
|
||||||
demangle: matches.is_present("demangle"),
|
|
||||||
llvm: matches.is_present("llvm"),
|
|
||||||
exe: matches.get_one::<PathBuf>("exe").unwrap(),
|
|
||||||
sup: matches.get_one::<PathBuf>("sup"),
|
|
||||||
};
|
|
||||||
|
|
||||||
let file = File::open(opts.exe).unwrap();
|
|
||||||
let map = unsafe { memmap2::Mmap::map(&file).unwrap() };
|
|
||||||
let object = &object::File::parse(&*map).unwrap();
|
|
||||||
|
|
||||||
let endian = if object.is_little_endian() {
|
|
||||||
gimli::RunTimeEndian::Little
|
|
||||||
} else {
|
|
||||||
gimli::RunTimeEndian::Big
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
|
|
||||||
load_file_section(id, object, endian, &arena_data)
|
|
||||||
};
|
|
||||||
|
|
||||||
let sup_map;
|
|
||||||
let sup_object = if let Some(sup_path) = opts.sup {
|
|
||||||
let sup_file = File::open(sup_path).unwrap();
|
|
||||||
sup_map = unsafe { memmap2::Mmap::map(&sup_file).unwrap() };
|
|
||||||
Some(object::File::parse(&*sup_map).unwrap())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
let symbols = object.symbol_map();
|
|
||||||
let mut dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
|
|
||||||
if let Some(ref sup_object) = sup_object {
|
|
||||||
let mut load_sup_section = |id: gimli::SectionId| -> Result<_, _> {
|
|
||||||
load_file_section(id, sup_object, endian, &arena_data)
|
|
||||||
};
|
|
||||||
dwarf.load_sup(&mut load_sup_section).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut split_dwarf_loader = addr2line::builtin_split_dwarf_loader::SplitDwarfLoader::new(
|
|
||||||
|data, endian| {
|
|
||||||
gimli::EndianSlice::new(arena_data.alloc(Cow::Owned(data.into_owned())), endian)
|
|
||||||
},
|
|
||||||
Some(opts.exe.clone()),
|
|
||||||
);
|
|
||||||
let ctx = Context::from_dwarf(dwarf).unwrap();
|
|
||||||
|
|
||||||
let stdin = std::io::stdin();
|
|
||||||
let addrs = matches
|
|
||||||
.values_of("addrs")
|
|
||||||
.map(Addrs::Args)
|
|
||||||
.unwrap_or_else(|| Addrs::Stdin(stdin.lock().lines()));
|
|
||||||
|
|
||||||
for probe in addrs {
|
|
||||||
if opts.print_addrs {
|
|
||||||
let addr = probe.unwrap_or(0);
|
|
||||||
if opts.llvm {
|
|
||||||
print!("0x{:x}", addr);
|
|
||||||
} else {
|
|
||||||
print!("0x{:016x}", addr);
|
|
||||||
}
|
|
||||||
if opts.pretty {
|
|
||||||
print!(": ");
|
|
||||||
} else {
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.do_functions || opts.do_inlines {
|
|
||||||
let mut printed_anything = false;
|
|
||||||
if let Some(probe) = probe {
|
|
||||||
let frames = ctx.find_frames(probe);
|
|
||||||
let frames = split_dwarf_loader.run(frames).unwrap();
|
|
||||||
let mut frames = frames.enumerate();
|
|
||||||
while let Some((i, frame)) = frames.next().unwrap() {
|
|
||||||
if opts.pretty && i != 0 {
|
|
||||||
print!(" (inlined by) ");
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.do_functions {
|
|
||||||
if let Some(func) = frame.function {
|
|
||||||
print_function(
|
|
||||||
func.raw_name().ok().as_ref().map(AsRef::as_ref),
|
|
||||||
func.language,
|
|
||||||
opts.demangle,
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
let name = find_name_from_symbols(&symbols, probe);
|
|
||||||
print_function(name, None, opts.demangle);
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.pretty {
|
|
||||||
print!(" at ");
|
|
||||||
} else {
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
print_loc(frame.location.as_ref(), opts.basenames, opts.llvm);
|
|
||||||
|
|
||||||
printed_anything = true;
|
|
||||||
|
|
||||||
if !opts.do_inlines {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !printed_anything {
|
|
||||||
if opts.do_functions {
|
|
||||||
let name = probe.and_then(|probe| find_name_from_symbols(&symbols, probe));
|
|
||||||
print_function(name, None, opts.demangle);
|
|
||||||
|
|
||||||
if opts.pretty {
|
|
||||||
print!(" at ");
|
|
||||||
} else {
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
print_loc(None, opts.basenames, opts.llvm);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let loc = probe.and_then(|probe| ctx.find_location(probe).unwrap());
|
|
||||||
print_loc(loc.as_ref(), opts.basenames, opts.llvm);
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.llvm {
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
std::io::stdout().flush().unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
285
.gear/predownloaded-development/vendor/addr2line/src/bin/addr2line.rs
vendored
Normal file
285
.gear/predownloaded-development/vendor/addr2line/src/bin/addr2line.rs
vendored
Normal file
|
|
@ -0,0 +1,285 @@
|
||||||
|
use fallible_iterator::FallibleIterator;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::io::{BufRead, Lines, StdinLock, Write};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use clap::{Arg, ArgAction, Command};
|
||||||
|
|
||||||
|
use addr2line::{Loader, LoaderReader, Location};
|
||||||
|
|
||||||
|
fn parse_uint_from_hex_string(string: &str) -> Option<u64> {
|
||||||
|
if string.len() > 2 && string.starts_with("0x") {
|
||||||
|
u64::from_str_radix(&string[2..], 16).ok()
|
||||||
|
} else {
|
||||||
|
u64::from_str_radix(string, 16).ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Addrs<'a> {
|
||||||
|
Args(clap::parser::ValuesRef<'a, String>),
|
||||||
|
Stdin(Lines<StdinLock<'a>>),
|
||||||
|
All {
|
||||||
|
iter: addr2line::LocationRangeIter<'a, LoaderReader<'a>>,
|
||||||
|
max: u64,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for Addrs<'a> {
|
||||||
|
type Item = Option<u64>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Option<u64>> {
|
||||||
|
let text = match self {
|
||||||
|
Addrs::Args(vals) => vals.next().map(Cow::from),
|
||||||
|
Addrs::Stdin(lines) => lines.next().map(Result::unwrap).map(Cow::from),
|
||||||
|
Addrs::All { iter, max } => {
|
||||||
|
for (addr, _len, _loc) in iter {
|
||||||
|
if addr >= *max {
|
||||||
|
*max = addr + 1;
|
||||||
|
return Some(Some(addr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
text.as_ref()
|
||||||
|
.map(Cow::as_ref)
|
||||||
|
.map(parse_uint_from_hex_string)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_loc(loc: Option<&Location<'_>>, basenames: bool, llvm: bool) {
|
||||||
|
if let Some(loc) = loc {
|
||||||
|
if let Some(ref file) = loc.file.as_ref() {
|
||||||
|
let path = if basenames {
|
||||||
|
Path::new(Path::new(file).file_name().unwrap())
|
||||||
|
} else {
|
||||||
|
Path::new(file)
|
||||||
|
};
|
||||||
|
print!("{}:", path.display());
|
||||||
|
} else {
|
||||||
|
print!("??:");
|
||||||
|
}
|
||||||
|
if llvm {
|
||||||
|
print!("{}:{}", loc.line.unwrap_or(0), loc.column.unwrap_or(0));
|
||||||
|
} else if let Some(line) = loc.line {
|
||||||
|
print!("{}", line);
|
||||||
|
} else {
|
||||||
|
print!("?");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
} else if llvm {
|
||||||
|
println!("??:0:0");
|
||||||
|
} else {
|
||||||
|
println!("??:0");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_function(name: Option<&str>, language: Option<gimli::DwLang>, demangle: bool) {
|
||||||
|
if let Some(name) = name {
|
||||||
|
if demangle {
|
||||||
|
print!("{}", addr2line::demangle_auto(Cow::from(name), language));
|
||||||
|
} else {
|
||||||
|
print!("{}", name);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print!("??");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Options<'a> {
|
||||||
|
do_functions: bool,
|
||||||
|
do_inlines: bool,
|
||||||
|
pretty: bool,
|
||||||
|
print_addrs: bool,
|
||||||
|
basenames: bool,
|
||||||
|
demangle: bool,
|
||||||
|
llvm: bool,
|
||||||
|
exe: &'a PathBuf,
|
||||||
|
sup: Option<&'a PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let matches = Command::new("addr2line")
|
||||||
|
.version(env!("CARGO_PKG_VERSION"))
|
||||||
|
.about("A fast addr2line Rust port")
|
||||||
|
.max_term_width(100)
|
||||||
|
.args(&[
|
||||||
|
Arg::new("exe")
|
||||||
|
.short('e')
|
||||||
|
.long("exe")
|
||||||
|
.value_name("filename")
|
||||||
|
.value_parser(clap::value_parser!(PathBuf))
|
||||||
|
.help(
|
||||||
|
"Specify the name of the executable for which addresses should be translated.",
|
||||||
|
)
|
||||||
|
.required(true),
|
||||||
|
Arg::new("sup")
|
||||||
|
.long("sup")
|
||||||
|
.value_name("filename")
|
||||||
|
.value_parser(clap::value_parser!(PathBuf))
|
||||||
|
.help("Path to supplementary object file."),
|
||||||
|
Arg::new("all")
|
||||||
|
.long("all")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.conflicts_with("addrs")
|
||||||
|
.help("Display all addresses that have line number information."),
|
||||||
|
Arg::new("functions")
|
||||||
|
.short('f')
|
||||||
|
.long("functions")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help("Display function names as well as file and line number information."),
|
||||||
|
Arg::new("pretty").short('p').long("pretty-print")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help(
|
||||||
|
"Make the output more human friendly: each location are printed on one line.",
|
||||||
|
),
|
||||||
|
Arg::new("inlines").short('i').long("inlines")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help(
|
||||||
|
"If the address belongs to a function that was inlined, the source information for \
|
||||||
|
all enclosing scopes back to the first non-inlined function will also be printed.",
|
||||||
|
),
|
||||||
|
Arg::new("addresses").short('a').long("addresses")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help(
|
||||||
|
"Display the address before the function name, file and line number information.",
|
||||||
|
),
|
||||||
|
Arg::new("basenames")
|
||||||
|
.short('s')
|
||||||
|
.long("basenames")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help("Display only the base of each file name."),
|
||||||
|
Arg::new("demangle").short('C').long("demangle")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help(
|
||||||
|
"Demangle function names. \
|
||||||
|
Specifying a specific demangling style (like GNU addr2line) is not supported. \
|
||||||
|
(TODO)"
|
||||||
|
),
|
||||||
|
Arg::new("llvm")
|
||||||
|
.long("llvm")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help("Display output in the same format as llvm-symbolizer."),
|
||||||
|
Arg::new("addrs")
|
||||||
|
.action(ArgAction::Append)
|
||||||
|
.help("Addresses to use instead of reading from stdin."),
|
||||||
|
])
|
||||||
|
.get_matches();
|
||||||
|
|
||||||
|
let opts = Options {
|
||||||
|
do_functions: matches.get_flag("functions"),
|
||||||
|
do_inlines: matches.get_flag("inlines"),
|
||||||
|
pretty: matches.get_flag("pretty"),
|
||||||
|
print_addrs: matches.get_flag("addresses"),
|
||||||
|
basenames: matches.get_flag("basenames"),
|
||||||
|
demangle: matches.get_flag("demangle"),
|
||||||
|
llvm: matches.get_flag("llvm"),
|
||||||
|
exe: matches.get_one::<PathBuf>("exe").unwrap(),
|
||||||
|
sup: matches.get_one::<PathBuf>("sup"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let ctx = Loader::new_with_sup(opts.exe, opts.sup).unwrap();
|
||||||
|
|
||||||
|
let stdin = std::io::stdin();
|
||||||
|
let addrs = if matches.get_flag("all") {
|
||||||
|
Addrs::All {
|
||||||
|
iter: ctx.find_location_range(0, !0).unwrap(),
|
||||||
|
max: 0,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
matches
|
||||||
|
.get_many::<String>("addrs")
|
||||||
|
.map(Addrs::Args)
|
||||||
|
.unwrap_or_else(|| Addrs::Stdin(stdin.lock().lines()))
|
||||||
|
};
|
||||||
|
|
||||||
|
for probe in addrs {
|
||||||
|
if opts.print_addrs {
|
||||||
|
let addr = probe.unwrap_or(0);
|
||||||
|
if opts.llvm {
|
||||||
|
print!("0x{:x}", addr);
|
||||||
|
} else {
|
||||||
|
print!("0x{:016x}", addr);
|
||||||
|
}
|
||||||
|
if opts.pretty {
|
||||||
|
print!(": ");
|
||||||
|
} else {
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.do_functions || opts.do_inlines {
|
||||||
|
let mut printed_anything = false;
|
||||||
|
if let Some(probe) = probe {
|
||||||
|
let mut frames = ctx.find_frames(probe).unwrap().peekable();
|
||||||
|
let mut first = true;
|
||||||
|
while let Some(frame) = frames.next().unwrap() {
|
||||||
|
if opts.pretty && !first {
|
||||||
|
print!(" (inlined by) ");
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
|
||||||
|
if opts.do_functions {
|
||||||
|
// Only use the symbol table if this isn't an inlined function.
|
||||||
|
let symbol = if matches!(frames.peek(), Ok(None)) {
|
||||||
|
ctx.find_symbol(probe)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
if symbol.is_some() {
|
||||||
|
// Prefer the symbol table over the DWARF name because:
|
||||||
|
// - the symbol can include a clone suffix
|
||||||
|
// - llvm may omit the linkage name in the DWARF with -g1
|
||||||
|
print_function(symbol, None, opts.demangle);
|
||||||
|
} else if let Some(func) = frame.function {
|
||||||
|
print_function(
|
||||||
|
func.raw_name().ok().as_deref(),
|
||||||
|
func.language,
|
||||||
|
opts.demangle,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
print_function(None, None, opts.demangle);
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.pretty {
|
||||||
|
print!(" at ");
|
||||||
|
} else {
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_loc(frame.location.as_ref(), opts.basenames, opts.llvm);
|
||||||
|
|
||||||
|
printed_anything = true;
|
||||||
|
|
||||||
|
if !opts.do_inlines {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !printed_anything {
|
||||||
|
if opts.do_functions {
|
||||||
|
let name = probe.and_then(|probe| ctx.find_symbol(probe));
|
||||||
|
print_function(name, None, opts.demangle);
|
||||||
|
|
||||||
|
if opts.pretty {
|
||||||
|
print!(" at ");
|
||||||
|
} else {
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_loc(None, opts.basenames, opts.llvm);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let loc = probe.and_then(|probe| ctx.find_location(probe).unwrap());
|
||||||
|
print_loc(loc.as_ref(), opts.basenames, opts.llvm);
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.llvm {
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
std::io::stdout().flush().unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,164 +0,0 @@
|
||||||
use alloc::borrow::Cow;
|
|
||||||
use alloc::sync::Arc;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
use object::Object;
|
|
||||||
|
|
||||||
use crate::{LookupContinuation, LookupResult};
|
|
||||||
|
|
||||||
#[cfg(unix)]
|
|
||||||
fn convert_path<R: gimli::Reader<Endian = gimli::RunTimeEndian>>(
|
|
||||||
r: &R,
|
|
||||||
) -> Result<PathBuf, gimli::Error> {
|
|
||||||
use std::ffi::OsStr;
|
|
||||||
use std::os::unix::ffi::OsStrExt;
|
|
||||||
let bytes = r.to_slice()?;
|
|
||||||
let s = OsStr::from_bytes(&bytes);
|
|
||||||
Ok(PathBuf::from(s))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(not(unix))]
|
|
||||||
fn convert_path<R: gimli::Reader<Endian = gimli::RunTimeEndian>>(
|
|
||||||
r: &R,
|
|
||||||
) -> Result<PathBuf, gimli::Error> {
|
|
||||||
let bytes = r.to_slice()?;
|
|
||||||
let s = std::str::from_utf8(&bytes).map_err(|_| gimli::Error::BadUtf8)?;
|
|
||||||
Ok(PathBuf::from(s))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn load_section<'data: 'file, 'file, O, R, F>(
|
|
||||||
id: gimli::SectionId,
|
|
||||||
file: &'file O,
|
|
||||||
endian: R::Endian,
|
|
||||||
loader: &mut F,
|
|
||||||
) -> Result<R, gimli::Error>
|
|
||||||
where
|
|
||||||
O: object::Object<'data, 'file>,
|
|
||||||
R: gimli::Reader<Endian = gimli::RunTimeEndian>,
|
|
||||||
F: FnMut(Cow<'data, [u8]>, R::Endian) -> R,
|
|
||||||
{
|
|
||||||
use object::ObjectSection;
|
|
||||||
|
|
||||||
let data = id
|
|
||||||
.dwo_name()
|
|
||||||
.and_then(|dwo_name| {
|
|
||||||
file.section_by_name(dwo_name)
|
|
||||||
.and_then(|section| section.uncompressed_data().ok())
|
|
||||||
})
|
|
||||||
.unwrap_or(Cow::Borrowed(&[]));
|
|
||||||
Ok(loader(data, endian))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A simple builtin split DWARF loader.
|
|
||||||
pub struct SplitDwarfLoader<R, F>
|
|
||||||
where
|
|
||||||
R: gimli::Reader<Endian = gimli::RunTimeEndian>,
|
|
||||||
F: FnMut(Cow<'_, [u8]>, R::Endian) -> R,
|
|
||||||
{
|
|
||||||
loader: F,
|
|
||||||
dwarf_package: Option<gimli::DwarfPackage<R>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R, F> SplitDwarfLoader<R, F>
|
|
||||||
where
|
|
||||||
R: gimli::Reader<Endian = gimli::RunTimeEndian>,
|
|
||||||
F: FnMut(Cow<'_, [u8]>, R::Endian) -> R,
|
|
||||||
{
|
|
||||||
fn load_dwarf_package(loader: &mut F, path: Option<PathBuf>) -> Option<gimli::DwarfPackage<R>> {
|
|
||||||
let mut path = path.map(Ok).unwrap_or_else(std::env::current_exe).ok()?;
|
|
||||||
let dwp_extension = path
|
|
||||||
.extension()
|
|
||||||
.map(|previous_extension| {
|
|
||||||
let mut previous_extension = previous_extension.to_os_string();
|
|
||||||
previous_extension.push(".dwp");
|
|
||||||
previous_extension
|
|
||||||
})
|
|
||||||
.unwrap_or_else(|| "dwp".into());
|
|
||||||
path.set_extension(dwp_extension);
|
|
||||||
let file = File::open(&path).ok()?;
|
|
||||||
let map = unsafe { memmap2::Mmap::map(&file).ok()? };
|
|
||||||
let dwp = object::File::parse(&*map).ok()?;
|
|
||||||
|
|
||||||
let endian = if dwp.is_little_endian() {
|
|
||||||
gimli::RunTimeEndian::Little
|
|
||||||
} else {
|
|
||||||
gimli::RunTimeEndian::Big
|
|
||||||
};
|
|
||||||
|
|
||||||
let empty = loader(Cow::Borrowed(&[]), endian);
|
|
||||||
gimli::DwarfPackage::load(
|
|
||||||
|section_id| load_section(section_id, &dwp, endian, loader),
|
|
||||||
empty,
|
|
||||||
)
|
|
||||||
.ok()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a new split DWARF loader.
|
|
||||||
pub fn new(mut loader: F, path: Option<PathBuf>) -> SplitDwarfLoader<R, F> {
|
|
||||||
let dwarf_package = SplitDwarfLoader::load_dwarf_package(&mut loader, path);
|
|
||||||
SplitDwarfLoader {
|
|
||||||
loader,
|
|
||||||
dwarf_package,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run the provided `LookupResult` to completion, loading any necessary
|
|
||||||
/// split DWARF along the way.
|
|
||||||
pub fn run<L>(&mut self, mut l: LookupResult<L>) -> L::Output
|
|
||||||
where
|
|
||||||
L: LookupContinuation<Buf = R>,
|
|
||||||
{
|
|
||||||
loop {
|
|
||||||
let (load, continuation) = match l {
|
|
||||||
LookupResult::Output(output) => break output,
|
|
||||||
LookupResult::Load { load, continuation } => (load, continuation),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut r: Option<Arc<gimli::Dwarf<_>>> = None;
|
|
||||||
if let Some(dwp) = self.dwarf_package.as_ref() {
|
|
||||||
if let Ok(Some(cu)) = dwp.find_cu(load.dwo_id, &load.parent) {
|
|
||||||
r = Some(Arc::new(cu));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.is_none() {
|
|
||||||
let mut path = PathBuf::new();
|
|
||||||
if let Some(p) = load.comp_dir.as_ref() {
|
|
||||||
if let Ok(p) = convert_path(p) {
|
|
||||||
path.push(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(p) = load.path.as_ref() {
|
|
||||||
if let Ok(p) = convert_path(p) {
|
|
||||||
path.push(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(file) = File::open(&path) {
|
|
||||||
if let Ok(map) = unsafe { memmap2::Mmap::map(&file) } {
|
|
||||||
if let Ok(file) = object::File::parse(&*map) {
|
|
||||||
let endian = if file.is_little_endian() {
|
|
||||||
gimli::RunTimeEndian::Little
|
|
||||||
} else {
|
|
||||||
gimli::RunTimeEndian::Big
|
|
||||||
};
|
|
||||||
|
|
||||||
r = gimli::Dwarf::load(|id| {
|
|
||||||
load_section(id, &file, endian, &mut self.loader)
|
|
||||||
})
|
|
||||||
.ok()
|
|
||||||
.map(|mut dwo_dwarf| {
|
|
||||||
dwo_dwarf.make_dwo(&load.parent);
|
|
||||||
Arc::new(dwo_dwarf)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
l = continuation.resume(r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
221
.gear/predownloaded-development/vendor/addr2line/src/frame.rs
vendored
Normal file
221
.gear/predownloaded-development/vendor/addr2line/src/frame.rs
vendored
Normal file
|
|
@ -0,0 +1,221 @@
|
||||||
|
use alloc::borrow::Cow;
|
||||||
|
use alloc::string::String;
|
||||||
|
use core::iter;
|
||||||
|
|
||||||
|
use crate::{maybe_small, Error, Function, InlinedFunction, ResUnit};
|
||||||
|
|
||||||
|
/// A source location.
|
||||||
|
pub struct Location<'a> {
|
||||||
|
/// The file name.
|
||||||
|
pub file: Option<&'a str>,
|
||||||
|
/// The line number.
|
||||||
|
pub line: Option<u32>,
|
||||||
|
/// The column number.
|
||||||
|
///
|
||||||
|
/// A value of `Some(0)` indicates the left edge.
|
||||||
|
pub column: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A function frame.
|
||||||
|
pub struct Frame<'ctx, R: gimli::Reader> {
|
||||||
|
/// The DWARF unit offset corresponding to the DIE of the function.
|
||||||
|
pub dw_die_offset: Option<gimli::UnitOffset<R::Offset>>,
|
||||||
|
/// The name of the function.
|
||||||
|
pub function: Option<FunctionName<R>>,
|
||||||
|
/// The source location corresponding to this frame.
|
||||||
|
pub location: Option<Location<'ctx>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over function frames.
|
||||||
|
pub struct FrameIter<'ctx, R>(FrameIterState<'ctx, R>)
|
||||||
|
where
|
||||||
|
R: gimli::Reader;
|
||||||
|
|
||||||
|
enum FrameIterState<'ctx, R>
|
||||||
|
where
|
||||||
|
R: gimli::Reader,
|
||||||
|
{
|
||||||
|
Empty,
|
||||||
|
Location(Option<Location<'ctx>>),
|
||||||
|
Frames(FrameIterFrames<'ctx, R>),
|
||||||
|
}
|
||||||
|
|
||||||
|
struct FrameIterFrames<'ctx, R>
|
||||||
|
where
|
||||||
|
R: gimli::Reader,
|
||||||
|
{
|
||||||
|
unit: &'ctx ResUnit<R>,
|
||||||
|
sections: &'ctx gimli::Dwarf<R>,
|
||||||
|
function: &'ctx Function<R>,
|
||||||
|
inlined_functions: iter::Rev<maybe_small::IntoIter<&'ctx InlinedFunction<R>>>,
|
||||||
|
next: Option<Location<'ctx>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'ctx, R> FrameIter<'ctx, R>
|
||||||
|
where
|
||||||
|
R: gimli::Reader + 'ctx,
|
||||||
|
{
|
||||||
|
pub(crate) fn new_empty() -> Self {
|
||||||
|
FrameIter(FrameIterState::Empty)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn new_location(location: Location<'ctx>) -> Self {
|
||||||
|
FrameIter(FrameIterState::Location(Some(location)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn new_frames(
|
||||||
|
unit: &'ctx ResUnit<R>,
|
||||||
|
sections: &'ctx gimli::Dwarf<R>,
|
||||||
|
function: &'ctx Function<R>,
|
||||||
|
inlined_functions: maybe_small::Vec<&'ctx InlinedFunction<R>>,
|
||||||
|
location: Option<Location<'ctx>>,
|
||||||
|
) -> Self {
|
||||||
|
FrameIter(FrameIterState::Frames(FrameIterFrames {
|
||||||
|
unit,
|
||||||
|
sections,
|
||||||
|
function,
|
||||||
|
inlined_functions: inlined_functions.into_iter().rev(),
|
||||||
|
next: location,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Advances the iterator and returns the next frame.
|
||||||
|
#[allow(clippy::should_implement_trait)]
|
||||||
|
pub fn next(&mut self) -> Result<Option<Frame<'ctx, R>>, Error> {
|
||||||
|
let frames = match &mut self.0 {
|
||||||
|
FrameIterState::Empty => return Ok(None),
|
||||||
|
FrameIterState::Location(location) => {
|
||||||
|
// We can't move out of a mutable reference, so use `take` instead.
|
||||||
|
let location = location.take();
|
||||||
|
self.0 = FrameIterState::Empty;
|
||||||
|
return Ok(Some(Frame {
|
||||||
|
dw_die_offset: None,
|
||||||
|
function: None,
|
||||||
|
location,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
FrameIterState::Frames(frames) => frames,
|
||||||
|
};
|
||||||
|
|
||||||
|
let loc = frames.next.take();
|
||||||
|
let func = match frames.inlined_functions.next() {
|
||||||
|
Some(func) => func,
|
||||||
|
None => {
|
||||||
|
let frame = Frame {
|
||||||
|
dw_die_offset: Some(frames.function.dw_die_offset),
|
||||||
|
function: frames.function.name.clone().map(|name| FunctionName {
|
||||||
|
name,
|
||||||
|
language: frames.unit.lang,
|
||||||
|
}),
|
||||||
|
location: loc,
|
||||||
|
};
|
||||||
|
self.0 = FrameIterState::Empty;
|
||||||
|
return Ok(Some(frame));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut next = Location {
|
||||||
|
file: None,
|
||||||
|
line: if func.call_line != 0 {
|
||||||
|
Some(func.call_line)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
column: if func.call_column != 0 {
|
||||||
|
Some(func.call_column)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
};
|
||||||
|
if let Some(call_file) = func.call_file {
|
||||||
|
if let Some(lines) = frames.unit.parse_lines(frames.sections)? {
|
||||||
|
next.file = lines.file(call_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
frames.next = Some(next);
|
||||||
|
|
||||||
|
Ok(Some(Frame {
|
||||||
|
dw_die_offset: Some(func.dw_die_offset),
|
||||||
|
function: func.name.clone().map(|name| FunctionName {
|
||||||
|
name,
|
||||||
|
language: frames.unit.lang,
|
||||||
|
}),
|
||||||
|
location: loc,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "fallible-iterator")]
|
||||||
|
impl<'ctx, R> fallible_iterator::FallibleIterator for FrameIter<'ctx, R>
|
||||||
|
where
|
||||||
|
R: gimli::Reader + 'ctx,
|
||||||
|
{
|
||||||
|
type Item = Frame<'ctx, R>;
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Result<Option<Frame<'ctx, R>>, Error> {
|
||||||
|
self.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A function name.
|
||||||
|
pub struct FunctionName<R: gimli::Reader> {
|
||||||
|
/// The name of the function.
|
||||||
|
pub name: R,
|
||||||
|
/// The language of the compilation unit containing this function.
|
||||||
|
pub language: Option<gimli::DwLang>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> FunctionName<R> {
|
||||||
|
/// The raw name of this function before demangling.
|
||||||
|
pub fn raw_name(&self) -> Result<Cow<'_, str>, Error> {
|
||||||
|
self.name.to_string_lossy()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The name of this function after demangling (if applicable).
|
||||||
|
pub fn demangle(&self) -> Result<Cow<'_, str>, Error> {
|
||||||
|
self.raw_name().map(|x| demangle_auto(x, self.language))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Demangle a symbol name using the demangling scheme for the given language.
|
||||||
|
///
|
||||||
|
/// Returns `None` if demangling failed or is not required.
|
||||||
|
#[allow(unused_variables)]
|
||||||
|
pub fn demangle(name: &str, language: gimli::DwLang) -> Option<String> {
|
||||||
|
match language {
|
||||||
|
#[cfg(feature = "rustc-demangle")]
|
||||||
|
gimli::DW_LANG_Rust => rustc_demangle::try_demangle(name)
|
||||||
|
.ok()
|
||||||
|
.as_ref()
|
||||||
|
.map(|x| format!("{:#}", x)),
|
||||||
|
#[cfg(feature = "cpp_demangle")]
|
||||||
|
gimli::DW_LANG_C_plus_plus
|
||||||
|
| gimli::DW_LANG_C_plus_plus_03
|
||||||
|
| gimli::DW_LANG_C_plus_plus_11
|
||||||
|
| gimli::DW_LANG_C_plus_plus_14 => cpp_demangle::Symbol::new(name)
|
||||||
|
.ok()
|
||||||
|
.and_then(|x| x.demangle(&Default::default()).ok()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Apply 'best effort' demangling of a symbol name.
|
||||||
|
///
|
||||||
|
/// If `language` is given, then only the demangling scheme for that language
|
||||||
|
/// is used.
|
||||||
|
///
|
||||||
|
/// If `language` is `None`, then heuristics are used to determine how to
|
||||||
|
/// demangle the name. Currently, these heuristics are very basic.
|
||||||
|
///
|
||||||
|
/// If demangling fails or is not required, then `name` is returned unchanged.
|
||||||
|
pub fn demangle_auto(name: Cow<'_, str>, language: Option<gimli::DwLang>) -> Cow<'_, str> {
|
||||||
|
match language {
|
||||||
|
Some(language) => demangle(name.as_ref(), language),
|
||||||
|
None => demangle(name.as_ref(), gimli::DW_LANG_Rust)
|
||||||
|
.or_else(|| demangle(name.as_ref(), gimli::DW_LANG_C_plus_plus)),
|
||||||
|
}
|
||||||
|
.map(Cow::from)
|
||||||
|
.unwrap_or(name)
|
||||||
|
}
|
||||||
|
|
@ -1,20 +1,29 @@
|
||||||
use alloc::boxed::Box;
|
use alloc::boxed::Box;
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
use core::cmp::Ordering;
|
use core::cmp::Ordering;
|
||||||
use core::iter;
|
|
||||||
|
|
||||||
use crate::lazy::LazyCell;
|
use crate::lazy::LazyResult;
|
||||||
use crate::maybe_small;
|
use crate::maybe_small;
|
||||||
use crate::{Context, DebugFile, Error, RangeAttributes};
|
use crate::{Context, DebugFile, Error, RangeAttributes};
|
||||||
|
|
||||||
|
pub(crate) struct LazyFunctions<R: gimli::Reader>(LazyResult<Functions<R>>);
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> LazyFunctions<R> {
|
||||||
|
pub(crate) fn new() -> Self {
|
||||||
|
LazyFunctions(LazyResult::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn borrow(&self, unit: gimli::UnitRef<R>) -> Result<&Functions<R>, Error> {
|
||||||
|
self.0
|
||||||
|
.borrow_with(|| Functions::parse(unit))
|
||||||
|
.as_ref()
|
||||||
|
.map_err(Error::clone)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) struct Functions<R: gimli::Reader> {
|
pub(crate) struct Functions<R: gimli::Reader> {
|
||||||
/// List of all `DW_TAG_subprogram` details in the unit.
|
/// List of all `DW_TAG_subprogram` details in the unit.
|
||||||
pub(crate) functions: Box<
|
pub(crate) functions: Box<[LazyFunction<R>]>,
|
||||||
[(
|
|
||||||
gimli::UnitOffset<R::Offset>,
|
|
||||||
LazyCell<Result<Function<R>, Error>>,
|
|
||||||
)],
|
|
||||||
>,
|
|
||||||
/// List of `DW_TAG_subprogram` address ranges in the unit.
|
/// List of `DW_TAG_subprogram` address ranges in the unit.
|
||||||
pub(crate) addresses: Box<[FunctionAddress]>,
|
pub(crate) addresses: Box<[FunctionAddress]>,
|
||||||
}
|
}
|
||||||
|
|
@ -30,6 +39,32 @@ pub(crate) struct FunctionAddress {
|
||||||
pub(crate) function: usize,
|
pub(crate) function: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) struct LazyFunction<R: gimli::Reader> {
|
||||||
|
dw_die_offset: gimli::UnitOffset<R::Offset>,
|
||||||
|
lazy: LazyResult<Function<R>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> LazyFunction<R> {
|
||||||
|
fn new(dw_die_offset: gimli::UnitOffset<R::Offset>) -> Self {
|
||||||
|
LazyFunction {
|
||||||
|
dw_die_offset,
|
||||||
|
lazy: LazyResult::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn borrow(
|
||||||
|
&self,
|
||||||
|
file: DebugFile,
|
||||||
|
unit: gimli::UnitRef<R>,
|
||||||
|
ctx: &Context<R>,
|
||||||
|
) -> Result<&Function<R>, Error> {
|
||||||
|
self.lazy
|
||||||
|
.borrow_with(|| Function::parse(self.dw_die_offset, file, unit, ctx))
|
||||||
|
.as_ref()
|
||||||
|
.map_err(Error::clone)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) struct Function<R: gimli::Reader> {
|
pub(crate) struct Function<R: gimli::Reader> {
|
||||||
pub(crate) dw_die_offset: gimli::UnitOffset<R::Offset>,
|
pub(crate) dw_die_offset: gimli::UnitOffset<R::Offset>,
|
||||||
pub(crate) name: Option<R>,
|
pub(crate) name: Option<R>,
|
||||||
|
|
@ -55,10 +90,7 @@ pub(crate) struct InlinedFunction<R: gimli::Reader> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: gimli::Reader> Functions<R> {
|
impl<R: gimli::Reader> Functions<R> {
|
||||||
pub(crate) fn parse(
|
fn parse(unit: gimli::UnitRef<R>) -> Result<Functions<R>, Error> {
|
||||||
unit: &gimli::Unit<R>,
|
|
||||||
sections: &gimli::Dwarf<R>,
|
|
||||||
) -> Result<Functions<R>, Error> {
|
|
||||||
let mut functions = Vec::new();
|
let mut functions = Vec::new();
|
||||||
let mut addresses = Vec::new();
|
let mut addresses = Vec::new();
|
||||||
let mut entries = unit.entries_raw(None)?;
|
let mut entries = unit.entries_raw(None)?;
|
||||||
|
|
@ -76,7 +108,7 @@ impl<R: gimli::Reader> Functions<R> {
|
||||||
ranges.low_pc = Some(val)
|
ranges.low_pc = Some(val)
|
||||||
}
|
}
|
||||||
gimli::AttributeValue::DebugAddrIndex(index) => {
|
gimli::AttributeValue::DebugAddrIndex(index) => {
|
||||||
ranges.low_pc = Some(sections.address(unit, index)?);
|
ranges.low_pc = Some(unit.address(index)?);
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
|
|
@ -85,7 +117,7 @@ impl<R: gimli::Reader> Functions<R> {
|
||||||
ranges.high_pc = Some(val)
|
ranges.high_pc = Some(val)
|
||||||
}
|
}
|
||||||
gimli::AttributeValue::DebugAddrIndex(index) => {
|
gimli::AttributeValue::DebugAddrIndex(index) => {
|
||||||
ranges.high_pc = Some(sections.address(unit, index)?);
|
ranges.high_pc = Some(unit.address(index)?);
|
||||||
}
|
}
|
||||||
gimli::AttributeValue::Udata(val) => {
|
gimli::AttributeValue::Udata(val) => {
|
||||||
ranges.size = Some(val)
|
ranges.size = Some(val)
|
||||||
|
|
@ -94,7 +126,7 @@ impl<R: gimli::Reader> Functions<R> {
|
||||||
},
|
},
|
||||||
gimli::DW_AT_ranges => {
|
gimli::DW_AT_ranges => {
|
||||||
ranges.ranges_offset =
|
ranges.ranges_offset =
|
||||||
sections.attr_ranges_offset(unit, attr.value())?;
|
unit.attr_ranges_offset(attr.value())?;
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
};
|
};
|
||||||
|
|
@ -104,13 +136,14 @@ impl<R: gimli::Reader> Functions<R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let function_index = functions.len();
|
let function_index = functions.len();
|
||||||
if ranges.for_each_range(sections, unit, |range| {
|
let has_address = ranges.for_each_range(unit, |range| {
|
||||||
addresses.push(FunctionAddress {
|
addresses.push(FunctionAddress {
|
||||||
range,
|
range,
|
||||||
function: function_index,
|
function: function_index,
|
||||||
});
|
});
|
||||||
})? {
|
})?;
|
||||||
functions.push((dw_die_offset, LazyCell::new()));
|
if has_address {
|
||||||
|
functions.push(LazyFunction::new(dw_die_offset));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
entries.skip_attributes(abbrev.attributes())?;
|
entries.skip_attributes(abbrev.attributes())?;
|
||||||
|
|
@ -151,28 +184,22 @@ impl<R: gimli::Reader> Functions<R> {
|
||||||
pub(crate) fn parse_inlined_functions(
|
pub(crate) fn parse_inlined_functions(
|
||||||
&self,
|
&self,
|
||||||
file: DebugFile,
|
file: DebugFile,
|
||||||
unit: &gimli::Unit<R>,
|
unit: gimli::UnitRef<R>,
|
||||||
ctx: &Context<R>,
|
ctx: &Context<R>,
|
||||||
sections: &gimli::Dwarf<R>,
|
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
for function in &*self.functions {
|
for function in &*self.functions {
|
||||||
function
|
function.borrow(file, unit, ctx)?;
|
||||||
.1
|
|
||||||
.borrow_with(|| Function::parse(function.0, file, unit, ctx, sections))
|
|
||||||
.as_ref()
|
|
||||||
.map_err(Error::clone)?;
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: gimli::Reader> Function<R> {
|
impl<R: gimli::Reader> Function<R> {
|
||||||
pub(crate) fn parse(
|
fn parse(
|
||||||
dw_die_offset: gimli::UnitOffset<R::Offset>,
|
dw_die_offset: gimli::UnitOffset<R::Offset>,
|
||||||
file: DebugFile,
|
file: DebugFile,
|
||||||
unit: &gimli::Unit<R>,
|
unit: gimli::UnitRef<R>,
|
||||||
ctx: &Context<R>,
|
ctx: &Context<R>,
|
||||||
sections: &gimli::Dwarf<R>,
|
|
||||||
) -> Result<Self, Error> {
|
) -> Result<Self, Error> {
|
||||||
let mut entries = unit.entries_raw(Some(dw_die_offset))?;
|
let mut entries = unit.entries_raw(Some(dw_die_offset))?;
|
||||||
let depth = entries.next_depth();
|
let depth = entries.next_depth();
|
||||||
|
|
@ -185,18 +212,18 @@ impl<R: gimli::Reader> Function<R> {
|
||||||
Ok(ref attr) => {
|
Ok(ref attr) => {
|
||||||
match attr.name() {
|
match attr.name() {
|
||||||
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
|
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
|
||||||
if let Ok(val) = sections.attr_string(unit, attr.value()) {
|
if let Ok(val) = unit.attr_string(attr.value()) {
|
||||||
name = Some(val);
|
name = Some(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gimli::DW_AT_name => {
|
gimli::DW_AT_name => {
|
||||||
if name.is_none() {
|
if name.is_none() {
|
||||||
name = sections.attr_string(unit, attr.value()).ok();
|
name = unit.attr_string(attr.value()).ok();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gimli::DW_AT_abstract_origin | gimli::DW_AT_specification => {
|
gimli::DW_AT_abstract_origin | gimli::DW_AT_specification => {
|
||||||
if name.is_none() {
|
if name.is_none() {
|
||||||
name = name_attr(attr.value(), file, unit, ctx, sections, 16)?;
|
name = name_attr(attr.value(), file, unit, ctx, 16)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
|
|
@ -206,19 +233,15 @@ impl<R: gimli::Reader> Function<R> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut inlined_functions = Vec::new();
|
let mut state = InlinedState {
|
||||||
let mut inlined_addresses = Vec::new();
|
entries,
|
||||||
Function::parse_children(
|
functions: Vec::new(),
|
||||||
&mut entries,
|
addresses: Vec::new(),
|
||||||
depth,
|
|
||||||
file,
|
file,
|
||||||
unit,
|
unit,
|
||||||
ctx,
|
ctx,
|
||||||
sections,
|
};
|
||||||
&mut inlined_functions,
|
Function::parse_children(&mut state, depth, 0)?;
|
||||||
&mut inlined_addresses,
|
|
||||||
0,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// Sort ranges in "breadth-first traversal order", i.e. first by call_depth
|
// Sort ranges in "breadth-first traversal order", i.e. first by call_depth
|
||||||
// and then by range.begin. This allows finding the range containing an
|
// and then by range.begin. This allows finding the range containing an
|
||||||
|
|
@ -230,7 +253,7 @@ impl<R: gimli::Reader> Function<R> {
|
||||||
// In this example, if you want to look up address 7 at depth 0, and you
|
// In this example, if you want to look up address 7 at depth 0, and you
|
||||||
// encounter [0..2 at depth 1], are you before or after the target range?
|
// encounter [0..2 at depth 1], are you before or after the target range?
|
||||||
// You don't know.
|
// You don't know.
|
||||||
inlined_addresses.sort_by(|r1, r2| {
|
state.addresses.sort_by(|r1, r2| {
|
||||||
if r1.call_depth < r2.call_depth {
|
if r1.call_depth < r2.call_depth {
|
||||||
Ordering::Less
|
Ordering::Less
|
||||||
} else if r1.call_depth > r2.call_depth {
|
} else if r1.call_depth > r2.call_depth {
|
||||||
|
|
@ -247,50 +270,38 @@ impl<R: gimli::Reader> Function<R> {
|
||||||
Ok(Function {
|
Ok(Function {
|
||||||
dw_die_offset,
|
dw_die_offset,
|
||||||
name,
|
name,
|
||||||
inlined_functions: inlined_functions.into_boxed_slice(),
|
inlined_functions: state.functions.into_boxed_slice(),
|
||||||
inlined_addresses: inlined_addresses.into_boxed_slice(),
|
inlined_addresses: state.addresses.into_boxed_slice(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_children(
|
fn parse_children(
|
||||||
entries: &mut gimli::EntriesRaw<'_, '_, R>,
|
state: &mut InlinedState<R>,
|
||||||
depth: isize,
|
depth: isize,
|
||||||
file: DebugFile,
|
|
||||||
unit: &gimli::Unit<R>,
|
|
||||||
ctx: &Context<R>,
|
|
||||||
sections: &gimli::Dwarf<R>,
|
|
||||||
inlined_functions: &mut Vec<InlinedFunction<R>>,
|
|
||||||
inlined_addresses: &mut Vec<InlinedFunctionAddress>,
|
|
||||||
inlined_depth: usize,
|
inlined_depth: usize,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
loop {
|
loop {
|
||||||
let dw_die_offset = entries.next_offset();
|
let dw_die_offset = state.entries.next_offset();
|
||||||
let next_depth = entries.next_depth();
|
let next_depth = state.entries.next_depth();
|
||||||
if next_depth <= depth {
|
if next_depth <= depth {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
if let Some(abbrev) = entries.read_abbreviation()? {
|
if let Some(abbrev) = state.entries.read_abbreviation()? {
|
||||||
match abbrev.tag() {
|
match abbrev.tag() {
|
||||||
gimli::DW_TAG_subprogram => {
|
gimli::DW_TAG_subprogram => {
|
||||||
Function::skip(entries, abbrev, next_depth)?;
|
Function::skip(&mut state.entries, abbrev, next_depth)?;
|
||||||
}
|
}
|
||||||
gimli::DW_TAG_inlined_subroutine => {
|
gimli::DW_TAG_inlined_subroutine => {
|
||||||
InlinedFunction::parse(
|
InlinedFunction::parse(
|
||||||
|
state,
|
||||||
dw_die_offset,
|
dw_die_offset,
|
||||||
entries,
|
|
||||||
abbrev,
|
abbrev,
|
||||||
next_depth,
|
next_depth,
|
||||||
file,
|
|
||||||
unit,
|
|
||||||
ctx,
|
|
||||||
sections,
|
|
||||||
inlined_functions,
|
|
||||||
inlined_addresses,
|
|
||||||
inlined_depth,
|
inlined_depth,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
entries.skip_attributes(abbrev.attributes())?;
|
state.entries.skip_attributes(abbrev.attributes())?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -316,7 +327,7 @@ impl<R: gimli::Reader> Function<R> {
|
||||||
pub(crate) fn find_inlined_functions(
|
pub(crate) fn find_inlined_functions(
|
||||||
&self,
|
&self,
|
||||||
probe: u64,
|
probe: u64,
|
||||||
) -> iter::Rev<maybe_small::IntoIter<&InlinedFunction<R>>> {
|
) -> maybe_small::Vec<&InlinedFunction<R>> {
|
||||||
// `inlined_functions` is ordered from outside to inside.
|
// `inlined_functions` is ordered from outside to inside.
|
||||||
let mut inlined_functions = maybe_small::Vec::new();
|
let mut inlined_functions = maybe_small::Vec::new();
|
||||||
let mut inlined_addresses = &self.inlined_addresses[..];
|
let mut inlined_addresses = &self.inlined_addresses[..];
|
||||||
|
|
@ -347,63 +358,58 @@ impl<R: gimli::Reader> Function<R> {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inlined_functions.into_iter().rev()
|
inlined_functions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: gimli::Reader> InlinedFunction<R> {
|
impl<R: gimli::Reader> InlinedFunction<R> {
|
||||||
fn parse(
|
fn parse(
|
||||||
|
state: &mut InlinedState<R>,
|
||||||
dw_die_offset: gimli::UnitOffset<R::Offset>,
|
dw_die_offset: gimli::UnitOffset<R::Offset>,
|
||||||
entries: &mut gimli::EntriesRaw<'_, '_, R>,
|
|
||||||
abbrev: &gimli::Abbreviation,
|
abbrev: &gimli::Abbreviation,
|
||||||
depth: isize,
|
depth: isize,
|
||||||
file: DebugFile,
|
|
||||||
unit: &gimli::Unit<R>,
|
|
||||||
ctx: &Context<R>,
|
|
||||||
sections: &gimli::Dwarf<R>,
|
|
||||||
inlined_functions: &mut Vec<InlinedFunction<R>>,
|
|
||||||
inlined_addresses: &mut Vec<InlinedFunctionAddress>,
|
|
||||||
inlined_depth: usize,
|
inlined_depth: usize,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
|
let unit = state.unit;
|
||||||
let mut ranges = RangeAttributes::default();
|
let mut ranges = RangeAttributes::default();
|
||||||
let mut name = None;
|
let mut name = None;
|
||||||
let mut call_file = None;
|
let mut call_file = None;
|
||||||
let mut call_line = 0;
|
let mut call_line = 0;
|
||||||
let mut call_column = 0;
|
let mut call_column = 0;
|
||||||
for spec in abbrev.attributes() {
|
for spec in abbrev.attributes() {
|
||||||
match entries.read_attribute(*spec) {
|
match state.entries.read_attribute(*spec) {
|
||||||
Ok(ref attr) => match attr.name() {
|
Ok(ref attr) => match attr.name() {
|
||||||
gimli::DW_AT_low_pc => match attr.value() {
|
gimli::DW_AT_low_pc => match attr.value() {
|
||||||
gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val),
|
gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val),
|
||||||
gimli::AttributeValue::DebugAddrIndex(index) => {
|
gimli::AttributeValue::DebugAddrIndex(index) => {
|
||||||
ranges.low_pc = Some(sections.address(unit, index)?);
|
ranges.low_pc = Some(unit.address(index)?);
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
gimli::DW_AT_high_pc => match attr.value() {
|
gimli::DW_AT_high_pc => match attr.value() {
|
||||||
gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val),
|
gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val),
|
||||||
gimli::AttributeValue::DebugAddrIndex(index) => {
|
gimli::AttributeValue::DebugAddrIndex(index) => {
|
||||||
ranges.high_pc = Some(sections.address(unit, index)?);
|
ranges.high_pc = Some(unit.address(index)?);
|
||||||
}
|
}
|
||||||
gimli::AttributeValue::Udata(val) => ranges.size = Some(val),
|
gimli::AttributeValue::Udata(val) => ranges.size = Some(val),
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
gimli::DW_AT_ranges => {
|
gimli::DW_AT_ranges => {
|
||||||
ranges.ranges_offset = sections.attr_ranges_offset(unit, attr.value())?;
|
ranges.ranges_offset = unit.attr_ranges_offset(attr.value())?;
|
||||||
}
|
}
|
||||||
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
|
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
|
||||||
if let Ok(val) = sections.attr_string(unit, attr.value()) {
|
if let Ok(val) = unit.attr_string(attr.value()) {
|
||||||
name = Some(val);
|
name = Some(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gimli::DW_AT_name => {
|
gimli::DW_AT_name => {
|
||||||
if name.is_none() {
|
if name.is_none() {
|
||||||
name = sections.attr_string(unit, attr.value()).ok();
|
name = unit.attr_string(attr.value()).ok();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gimli::DW_AT_abstract_origin | gimli::DW_AT_specification => {
|
gimli::DW_AT_abstract_origin | gimli::DW_AT_specification => {
|
||||||
if name.is_none() {
|
if name.is_none() {
|
||||||
name = name_attr(attr.value(), file, unit, ctx, sections, 16)?;
|
name = name_attr(attr.value(), state.file, unit, state.ctx, 16)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gimli::DW_AT_call_file => {
|
gimli::DW_AT_call_file => {
|
||||||
|
|
@ -434,8 +440,8 @@ impl<R: gimli::Reader> InlinedFunction<R> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let function_index = inlined_functions.len();
|
let function_index = state.functions.len();
|
||||||
inlined_functions.push(InlinedFunction {
|
state.functions.push(InlinedFunction {
|
||||||
dw_die_offset,
|
dw_die_offset,
|
||||||
name,
|
name,
|
||||||
call_file,
|
call_file,
|
||||||
|
|
@ -443,34 +449,35 @@ impl<R: gimli::Reader> InlinedFunction<R> {
|
||||||
call_column,
|
call_column,
|
||||||
});
|
});
|
||||||
|
|
||||||
ranges.for_each_range(sections, unit, |range| {
|
ranges.for_each_range(unit, |range| {
|
||||||
inlined_addresses.push(InlinedFunctionAddress {
|
state.addresses.push(InlinedFunctionAddress {
|
||||||
range,
|
range,
|
||||||
call_depth: inlined_depth,
|
call_depth: inlined_depth,
|
||||||
function: function_index,
|
function: function_index,
|
||||||
});
|
});
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
Function::parse_children(
|
Function::parse_children(state, depth, inlined_depth + 1)
|
||||||
entries,
|
|
||||||
depth,
|
|
||||||
file,
|
|
||||||
unit,
|
|
||||||
ctx,
|
|
||||||
sections,
|
|
||||||
inlined_functions,
|
|
||||||
inlined_addresses,
|
|
||||||
inlined_depth + 1,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct InlinedState<'a, R: gimli::Reader> {
|
||||||
|
// Mutable fields.
|
||||||
|
entries: gimli::EntriesRaw<'a, 'a, R>,
|
||||||
|
functions: Vec<InlinedFunction<R>>,
|
||||||
|
addresses: Vec<InlinedFunctionAddress>,
|
||||||
|
|
||||||
|
// Constant fields.
|
||||||
|
file: DebugFile,
|
||||||
|
unit: gimli::UnitRef<'a, R>,
|
||||||
|
ctx: &'a Context<R>,
|
||||||
|
}
|
||||||
|
|
||||||
fn name_attr<R>(
|
fn name_attr<R>(
|
||||||
attr: gimli::AttributeValue<R>,
|
attr: gimli::AttributeValue<R>,
|
||||||
mut file: DebugFile,
|
mut file: DebugFile,
|
||||||
unit: &gimli::Unit<R>,
|
unit: gimli::UnitRef<R>,
|
||||||
ctx: &Context<R>,
|
ctx: &Context<R>,
|
||||||
sections: &gimli::Dwarf<R>,
|
|
||||||
recursion_limit: usize,
|
recursion_limit: usize,
|
||||||
) -> Result<Option<R>, Error>
|
) -> Result<Option<R>, Error>
|
||||||
where
|
where
|
||||||
|
|
@ -482,17 +489,20 @@ where
|
||||||
|
|
||||||
match attr {
|
match attr {
|
||||||
gimli::AttributeValue::UnitRef(offset) => {
|
gimli::AttributeValue::UnitRef(offset) => {
|
||||||
name_entry(file, unit, offset, ctx, sections, recursion_limit)
|
name_entry(file, unit, offset, ctx, recursion_limit)
|
||||||
}
|
}
|
||||||
gimli::AttributeValue::DebugInfoRef(dr) => {
|
gimli::AttributeValue::DebugInfoRef(dr) => {
|
||||||
|
let sections = unit.dwarf;
|
||||||
let (unit, offset) = ctx.find_unit(dr, file)?;
|
let (unit, offset) = ctx.find_unit(dr, file)?;
|
||||||
name_entry(file, unit, offset, ctx, sections, recursion_limit)
|
let unit = gimli::UnitRef::new(sections, unit);
|
||||||
|
name_entry(file, unit, offset, ctx, recursion_limit)
|
||||||
}
|
}
|
||||||
gimli::AttributeValue::DebugInfoRefSup(dr) => {
|
gimli::AttributeValue::DebugInfoRefSup(dr) => {
|
||||||
if let Some(sup_sections) = sections.sup.as_ref() {
|
if let Some(sup_sections) = unit.dwarf.sup.as_ref() {
|
||||||
file = DebugFile::Supplementary;
|
file = DebugFile::Supplementary;
|
||||||
let (unit, offset) = ctx.find_unit(dr, file)?;
|
let (unit, offset) = ctx.find_unit(dr, file)?;
|
||||||
name_entry(file, unit, offset, ctx, sup_sections, recursion_limit)
|
let unit = gimli::UnitRef::new(sup_sections, unit);
|
||||||
|
name_entry(file, unit, offset, ctx, recursion_limit)
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
@ -503,10 +513,9 @@ where
|
||||||
|
|
||||||
fn name_entry<R>(
|
fn name_entry<R>(
|
||||||
file: DebugFile,
|
file: DebugFile,
|
||||||
unit: &gimli::Unit<R>,
|
unit: gimli::UnitRef<R>,
|
||||||
offset: gimli::UnitOffset<R::Offset>,
|
offset: gimli::UnitOffset<R::Offset>,
|
||||||
ctx: &Context<R>,
|
ctx: &Context<R>,
|
||||||
sections: &gimli::Dwarf<R>,
|
|
||||||
recursion_limit: usize,
|
recursion_limit: usize,
|
||||||
) -> Result<Option<R>, Error>
|
) -> Result<Option<R>, Error>
|
||||||
where
|
where
|
||||||
|
|
@ -525,12 +534,12 @@ where
|
||||||
match entries.read_attribute(*spec) {
|
match entries.read_attribute(*spec) {
|
||||||
Ok(ref attr) => match attr.name() {
|
Ok(ref attr) => match attr.name() {
|
||||||
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
|
gimli::DW_AT_linkage_name | gimli::DW_AT_MIPS_linkage_name => {
|
||||||
if let Ok(val) = sections.attr_string(unit, attr.value()) {
|
if let Ok(val) = unit.attr_string(attr.value()) {
|
||||||
return Ok(Some(val));
|
return Ok(Some(val));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gimli::DW_AT_name => {
|
gimli::DW_AT_name => {
|
||||||
if let Ok(val) = sections.attr_string(unit, attr.value()) {
|
if let Ok(val) = unit.attr_string(attr.value()) {
|
||||||
name = Some(val);
|
name = Some(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -548,7 +557,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(next) = next {
|
if let Some(next) = next {
|
||||||
return name_attr(next, file, unit, ctx, sections, recursion_limit - 1);
|
return name_attr(next, file, unit, ctx, recursion_limit - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,23 @@
|
||||||
use core::cell::UnsafeCell;
|
use core::cell::UnsafeCell;
|
||||||
|
|
||||||
pub struct LazyCell<T> {
|
pub(crate) type LazyResult<T> = LazyCell<Result<T, crate::Error>>;
|
||||||
|
|
||||||
|
pub(crate) struct LazyCell<T> {
|
||||||
contents: UnsafeCell<Option<T>>,
|
contents: UnsafeCell<Option<T>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> LazyCell<T> {
|
impl<T> LazyCell<T> {
|
||||||
pub fn new() -> LazyCell<T> {
|
pub(crate) fn new() -> LazyCell<T> {
|
||||||
LazyCell {
|
LazyCell {
|
||||||
contents: UnsafeCell::new(None),
|
contents: UnsafeCell::new(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn borrow(&self) -> Option<&T> {
|
pub(crate) fn borrow(&self) -> Option<&T> {
|
||||||
unsafe { &*self.contents.get() }.as_ref()
|
unsafe { &*self.contents.get() }.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn borrow_with(&self, closure: impl FnOnce() -> T) -> &T {
|
pub(crate) fn borrow_with(&self, closure: impl FnOnce() -> T) -> &T {
|
||||||
// First check if we're already initialized...
|
// First check if we're already initialized...
|
||||||
let ptr = self.contents.get();
|
let ptr = self.contents.get();
|
||||||
if let Some(val) = unsafe { &*ptr } {
|
if let Some(val) = unsafe { &*ptr } {
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
314
.gear/predownloaded-development/vendor/addr2line/src/line.rs
vendored
Normal file
314
.gear/predownloaded-development/vendor/addr2line/src/line.rs
vendored
Normal file
|
|
@ -0,0 +1,314 @@
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::string::{String, ToString};
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use core::cmp::Ordering;
|
||||||
|
use core::mem;
|
||||||
|
use core::num::NonZeroU64;
|
||||||
|
|
||||||
|
use crate::lazy::LazyResult;
|
||||||
|
use crate::{Error, Location};
|
||||||
|
|
||||||
|
pub(crate) struct LazyLines(LazyResult<Lines>);
|
||||||
|
|
||||||
|
impl LazyLines {
|
||||||
|
pub(crate) fn new() -> Self {
|
||||||
|
LazyLines(LazyResult::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn borrow<R: gimli::Reader>(
|
||||||
|
&self,
|
||||||
|
dw_unit: gimli::UnitRef<R>,
|
||||||
|
ilnp: &gimli::IncompleteLineProgram<R, R::Offset>,
|
||||||
|
) -> Result<&Lines, Error> {
|
||||||
|
self.0
|
||||||
|
.borrow_with(|| Lines::parse(dw_unit, ilnp.clone()))
|
||||||
|
.as_ref()
|
||||||
|
.map_err(Error::clone)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct LineSequence {
|
||||||
|
start: u64,
|
||||||
|
end: u64,
|
||||||
|
rows: Box<[LineRow]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct LineRow {
|
||||||
|
address: u64,
|
||||||
|
file_index: u64,
|
||||||
|
line: u32,
|
||||||
|
column: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct Lines {
|
||||||
|
files: Box<[String]>,
|
||||||
|
sequences: Box<[LineSequence]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Lines {
|
||||||
|
fn parse<R: gimli::Reader>(
|
||||||
|
dw_unit: gimli::UnitRef<R>,
|
||||||
|
ilnp: gimli::IncompleteLineProgram<R, R::Offset>,
|
||||||
|
) -> Result<Self, Error> {
|
||||||
|
let mut sequences = Vec::new();
|
||||||
|
let mut sequence_rows = Vec::<LineRow>::new();
|
||||||
|
let mut rows = ilnp.rows();
|
||||||
|
while let Some((_, row)) = rows.next_row()? {
|
||||||
|
if row.end_sequence() {
|
||||||
|
if let Some(start) = sequence_rows.first().map(|x| x.address) {
|
||||||
|
let end = row.address();
|
||||||
|
let mut rows = Vec::new();
|
||||||
|
mem::swap(&mut rows, &mut sequence_rows);
|
||||||
|
sequences.push(LineSequence {
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
rows: rows.into_boxed_slice(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let address = row.address();
|
||||||
|
let file_index = row.file_index();
|
||||||
|
// Convert line and column to u32 to save a little memory.
|
||||||
|
// We'll handle the special case of line 0 later,
|
||||||
|
// and return left edge as column 0 in the public API.
|
||||||
|
let line = row.line().map(NonZeroU64::get).unwrap_or(0) as u32;
|
||||||
|
let column = match row.column() {
|
||||||
|
gimli::ColumnType::LeftEdge => 0,
|
||||||
|
gimli::ColumnType::Column(x) => x.get() as u32,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(last_row) = sequence_rows.last_mut() {
|
||||||
|
if last_row.address == address {
|
||||||
|
last_row.file_index = file_index;
|
||||||
|
last_row.line = line;
|
||||||
|
last_row.column = column;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sequence_rows.push(LineRow {
|
||||||
|
address,
|
||||||
|
file_index,
|
||||||
|
line,
|
||||||
|
column,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
sequences.sort_by_key(|x| x.start);
|
||||||
|
|
||||||
|
let mut files = Vec::new();
|
||||||
|
let header = rows.header();
|
||||||
|
match header.file(0) {
|
||||||
|
Some(file) => files.push(render_file(dw_unit, file, header)?),
|
||||||
|
None => files.push(String::from("")), // DWARF version <= 4 may not have 0th index
|
||||||
|
}
|
||||||
|
let mut index = 1;
|
||||||
|
while let Some(file) = header.file(index) {
|
||||||
|
files.push(render_file(dw_unit, file, header)?);
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
files: files.into_boxed_slice(),
|
||||||
|
sequences: sequences.into_boxed_slice(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn file(&self, index: u64) -> Option<&str> {
|
||||||
|
self.files.get(index as usize).map(String::as_str)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ranges(&self) -> impl Iterator<Item = gimli::Range> + '_ {
|
||||||
|
self.sequences.iter().map(|sequence| gimli::Range {
|
||||||
|
begin: sequence.start,
|
||||||
|
end: sequence.end,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn row_location(&self, row: &LineRow) -> Location<'_> {
|
||||||
|
let file = self.files.get(row.file_index as usize).map(String::as_str);
|
||||||
|
Location {
|
||||||
|
file,
|
||||||
|
line: if row.line != 0 { Some(row.line) } else { None },
|
||||||
|
// If row.line is specified then row.column always has meaning.
|
||||||
|
column: if row.line != 0 {
|
||||||
|
Some(row.column)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn find_location(&self, probe: u64) -> Result<Option<Location<'_>>, Error> {
|
||||||
|
let seq_idx = self.sequences.binary_search_by(|sequence| {
|
||||||
|
if probe < sequence.start {
|
||||||
|
Ordering::Greater
|
||||||
|
} else if probe >= sequence.end {
|
||||||
|
Ordering::Less
|
||||||
|
} else {
|
||||||
|
Ordering::Equal
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let seq_idx = match seq_idx {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(_) => return Ok(None),
|
||||||
|
};
|
||||||
|
let sequence = &self.sequences[seq_idx];
|
||||||
|
|
||||||
|
let idx = sequence
|
||||||
|
.rows
|
||||||
|
.binary_search_by(|row| row.address.cmp(&probe));
|
||||||
|
let idx = match idx {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(0) => return Ok(None),
|
||||||
|
Err(x) => x - 1,
|
||||||
|
};
|
||||||
|
Ok(Some(self.row_location(&sequence.rows[idx])))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn find_location_range(
|
||||||
|
&self,
|
||||||
|
probe_low: u64,
|
||||||
|
probe_high: u64,
|
||||||
|
) -> Result<LineLocationRangeIter<'_>, Error> {
|
||||||
|
// Find index for probe_low.
|
||||||
|
let seq_idx = self.sequences.binary_search_by(|sequence| {
|
||||||
|
if probe_low < sequence.start {
|
||||||
|
Ordering::Greater
|
||||||
|
} else if probe_low >= sequence.end {
|
||||||
|
Ordering::Less
|
||||||
|
} else {
|
||||||
|
Ordering::Equal
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let seq_idx = match seq_idx {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(x) => x, // probe below sequence, but range could overlap
|
||||||
|
};
|
||||||
|
|
||||||
|
let row_idx = if let Some(seq) = self.sequences.get(seq_idx) {
|
||||||
|
let idx = seq.rows.binary_search_by(|row| row.address.cmp(&probe_low));
|
||||||
|
match idx {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(0) => 0, // probe below sequence, but range could overlap
|
||||||
|
Err(x) => x - 1,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(LineLocationRangeIter {
|
||||||
|
lines: self,
|
||||||
|
seq_idx,
|
||||||
|
row_idx,
|
||||||
|
probe_high,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct LineLocationRangeIter<'ctx> {
|
||||||
|
lines: &'ctx Lines,
|
||||||
|
seq_idx: usize,
|
||||||
|
row_idx: usize,
|
||||||
|
probe_high: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'ctx> Iterator for LineLocationRangeIter<'ctx> {
|
||||||
|
type Item = (u64, u64, Location<'ctx>);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<(u64, u64, Location<'ctx>)> {
|
||||||
|
while let Some(seq) = self.lines.sequences.get(self.seq_idx) {
|
||||||
|
if seq.start >= self.probe_high {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
match seq.rows.get(self.row_idx) {
|
||||||
|
Some(row) => {
|
||||||
|
if row.address >= self.probe_high {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let nextaddr = seq
|
||||||
|
.rows
|
||||||
|
.get(self.row_idx + 1)
|
||||||
|
.map(|row| row.address)
|
||||||
|
.unwrap_or(seq.end);
|
||||||
|
|
||||||
|
let item = (
|
||||||
|
row.address,
|
||||||
|
nextaddr - row.address,
|
||||||
|
self.lines.row_location(row),
|
||||||
|
);
|
||||||
|
self.row_idx += 1;
|
||||||
|
|
||||||
|
return Some(item);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
self.seq_idx += 1;
|
||||||
|
self.row_idx = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_file<R: gimli::Reader>(
|
||||||
|
dw_unit: gimli::UnitRef<R>,
|
||||||
|
file: &gimli::FileEntry<R, R::Offset>,
|
||||||
|
header: &gimli::LineProgramHeader<R, R::Offset>,
|
||||||
|
) -> Result<String, gimli::Error> {
|
||||||
|
let mut path = if let Some(ref comp_dir) = dw_unit.comp_dir {
|
||||||
|
comp_dir.to_string_lossy()?.into_owned()
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
|
||||||
|
// The directory index 0 is defined to correspond to the compilation unit directory.
|
||||||
|
if file.directory_index() != 0 {
|
||||||
|
if let Some(directory) = file.directory(header) {
|
||||||
|
path_push(
|
||||||
|
&mut path,
|
||||||
|
dw_unit.attr_string(directory)?.to_string_lossy()?.as_ref(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
path_push(
|
||||||
|
&mut path,
|
||||||
|
dw_unit
|
||||||
|
.attr_string(file.path_name())?
|
||||||
|
.to_string_lossy()?
|
||||||
|
.as_ref(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn path_push(path: &mut String, p: &str) {
|
||||||
|
if has_unix_root(p) || has_windows_root(p) {
|
||||||
|
*path = p.to_string();
|
||||||
|
} else {
|
||||||
|
let dir_separator = if has_windows_root(path.as_str()) {
|
||||||
|
'\\'
|
||||||
|
} else {
|
||||||
|
'/'
|
||||||
|
};
|
||||||
|
|
||||||
|
if !path.is_empty() && !path.ends_with(dir_separator) {
|
||||||
|
path.push(dir_separator);
|
||||||
|
}
|
||||||
|
*path += p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the path in the given string has a unix style root
|
||||||
|
fn has_unix_root(p: &str) -> bool {
|
||||||
|
p.starts_with('/')
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the path in the given string has a windows style root
|
||||||
|
fn has_windows_root(p: &str) -> bool {
|
||||||
|
p.starts_with('\\') || p.get(1..3) == Some(":\\")
|
||||||
|
}
|
||||||
451
.gear/predownloaded-development/vendor/addr2line/src/loader.rs
vendored
Normal file
451
.gear/predownloaded-development/vendor/addr2line/src/loader.rs
vendored
Normal file
|
|
@ -0,0 +1,451 @@
|
||||||
|
use alloc::borrow::Cow;
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::sync::Arc;
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use std::ffi::OsStr;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use memmap2::Mmap;
|
||||||
|
use object::{Object, ObjectMapFile, ObjectSection, SymbolMap, SymbolMapName};
|
||||||
|
use typed_arena::Arena;
|
||||||
|
|
||||||
|
use crate::lazy::LazyCell;
|
||||||
|
use crate::{
|
||||||
|
Context, FrameIter, Location, LocationRangeIter, LookupContinuation, LookupResult,
|
||||||
|
SplitDwarfLoad,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// The type used by [`Loader`] for reading DWARF data.
|
||||||
|
///
|
||||||
|
/// This is used in the return types of the methods of [`Loader`].
|
||||||
|
// TODO: use impl Trait when stable
|
||||||
|
pub type LoaderReader<'a> = gimli::EndianSlice<'a, gimli::RunTimeEndian>;
|
||||||
|
|
||||||
|
type Error = Box<dyn std::error::Error>;
|
||||||
|
type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|
||||||
|
/// A loader for the DWARF data required for a `Context`.
|
||||||
|
///
|
||||||
|
/// For performance reasons, a [`Context`] normally borrows the input data.
|
||||||
|
/// However, that means the input data must outlive the `Context`, which
|
||||||
|
/// is inconvenient for long-lived `Context`s.
|
||||||
|
/// This loader uses an arena to store the input data, together with the
|
||||||
|
/// `Context` itself. This ensures that the input data lives as long as
|
||||||
|
/// the `Context`.
|
||||||
|
///
|
||||||
|
/// The loader performs some additional tasks:
|
||||||
|
/// - Loads the symbol table from the executable file (see [`Self::find_symbol`]).
|
||||||
|
/// - Loads Mach-O dSYM files that are located next to the executable file.
|
||||||
|
/// - Locates and loads split DWARF files (DWO and DWP).
|
||||||
|
pub struct Loader {
|
||||||
|
internal: LoaderInternal<'static>,
|
||||||
|
arena_data: Arena<Vec<u8>>,
|
||||||
|
arena_mmap: Arena<Mmap>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Loader {
|
||||||
|
/// Load the DWARF data for an executable file and create a `Context`.
|
||||||
|
#[inline]
|
||||||
|
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||||
|
Self::new_with_sup(path, None::<&Path>)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load the DWARF data for an executable file and create a `Context`.
|
||||||
|
///
|
||||||
|
/// Optionally also use a supplementary object file.
|
||||||
|
pub fn new_with_sup(
|
||||||
|
path: impl AsRef<Path>,
|
||||||
|
sup_path: Option<impl AsRef<Path>>,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let arena_data = Arena::new();
|
||||||
|
let arena_mmap = Arena::new();
|
||||||
|
|
||||||
|
let internal = LoaderInternal::new(
|
||||||
|
path.as_ref(),
|
||||||
|
sup_path.as_ref().map(AsRef::as_ref),
|
||||||
|
&arena_data,
|
||||||
|
&arena_mmap,
|
||||||
|
)?;
|
||||||
|
Ok(Loader {
|
||||||
|
// Convert to static lifetime to allow self-reference by `internal`.
|
||||||
|
// `internal` is only accessed through `borrow_internal`, which ensures
|
||||||
|
// that the static lifetime does not leak.
|
||||||
|
internal: unsafe {
|
||||||
|
core::mem::transmute::<LoaderInternal<'_>, LoaderInternal<'static>>(internal)
|
||||||
|
},
|
||||||
|
arena_data,
|
||||||
|
arena_mmap,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn borrow_internal<'a, F, T>(&'a self, f: F) -> T
|
||||||
|
where
|
||||||
|
F: FnOnce(&'a LoaderInternal<'a>, &'a Arena<Vec<u8>>, &'a Arena<Mmap>) -> T,
|
||||||
|
{
|
||||||
|
// Do not leak the static lifetime.
|
||||||
|
let internal = unsafe {
|
||||||
|
core::mem::transmute::<&LoaderInternal<'static>, &'a LoaderInternal<'a>>(&self.internal)
|
||||||
|
};
|
||||||
|
f(internal, &self.arena_data, &self.arena_mmap)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the base address used for relative virtual addresses.
|
||||||
|
///
|
||||||
|
/// Currently this is only non-zero for PE.
|
||||||
|
pub fn relative_address_base(&self) -> u64 {
|
||||||
|
self.borrow_internal(|i, _data, _mmap| i.relative_address_base)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the source file and line corresponding to the given virtual memory address.
|
||||||
|
///
|
||||||
|
/// This calls [`Context::find_location`] with the given address.
|
||||||
|
pub fn find_location(&self, probe: u64) -> Result<Option<Location<'_>>> {
|
||||||
|
self.borrow_internal(|i, data, mmap| i.find_location(probe, data, mmap))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return source file and lines for a range of addresses.
|
||||||
|
///
|
||||||
|
/// This calls [`Context::find_location_range`] with the given range.
|
||||||
|
pub fn find_location_range(
|
||||||
|
&self,
|
||||||
|
probe_low: u64,
|
||||||
|
probe_high: u64,
|
||||||
|
) -> Result<LocationRangeIter<'_, LoaderReader>> {
|
||||||
|
self.borrow_internal(|i, data, mmap| {
|
||||||
|
i.find_location_range(probe_low, probe_high, data, mmap)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return an iterator for the function frames corresponding to the given virtual
|
||||||
|
/// memory address.
|
||||||
|
///
|
||||||
|
/// This calls [`Context::find_frames`] with the given address.
|
||||||
|
pub fn find_frames(&self, probe: u64) -> Result<FrameIter<'_, LoaderReader<'_>>> {
|
||||||
|
self.borrow_internal(|i, data, mmap| i.find_frames(probe, data, mmap))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the symbol table entry corresponding to the given virtual memory address.
|
||||||
|
pub fn find_symbol(&self, probe: u64) -> Option<&str> {
|
||||||
|
self.borrow_internal(|i, _data, _mmap| i.find_symbol(probe))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct LoaderInternal<'a> {
|
||||||
|
ctx: Context<LoaderReader<'a>>,
|
||||||
|
relative_address_base: u64,
|
||||||
|
symbols: SymbolMap<SymbolMapName<'a>>,
|
||||||
|
dwarf_package: Option<gimli::DwarfPackage<LoaderReader<'a>>>,
|
||||||
|
// Map from address to Mach-O object file path.
|
||||||
|
object_map: object::ObjectMap<'a>,
|
||||||
|
// A context for each Mach-O object file.
|
||||||
|
objects: Vec<LazyCell<Option<ObjectContext<'a>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> LoaderInternal<'a> {
|
||||||
|
fn new(
|
||||||
|
path: &Path,
|
||||||
|
sup_path: Option<&Path>,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let file = File::open(path)?;
|
||||||
|
let map = arena_mmap.alloc(unsafe { Mmap::map(&file)? });
|
||||||
|
let mut object = object::File::parse(&**map)?;
|
||||||
|
|
||||||
|
let relative_address_base = object.relative_address_base();
|
||||||
|
let symbols = object.symbol_map();
|
||||||
|
let object_map = object.object_map();
|
||||||
|
let mut objects = Vec::new();
|
||||||
|
objects.resize_with(object_map.objects().len(), LazyCell::new);
|
||||||
|
|
||||||
|
// Load supplementary object file.
|
||||||
|
// TODO: use debuglink and debugaltlink
|
||||||
|
let sup_map;
|
||||||
|
let sup_object = if let Some(sup_path) = sup_path {
|
||||||
|
let sup_file = File::open(sup_path)?;
|
||||||
|
sup_map = arena_mmap.alloc(unsafe { Mmap::map(&sup_file)? });
|
||||||
|
Some(object::File::parse(&**sup_map)?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// Load Mach-O dSYM file, ignoring errors.
|
||||||
|
if let Some(map) = (|| {
|
||||||
|
let uuid = object.mach_uuid().ok()??;
|
||||||
|
path.parent()?.read_dir().ok()?.find_map(|candidate| {
|
||||||
|
let candidate = candidate.ok()?;
|
||||||
|
let path = candidate.path();
|
||||||
|
if path.extension().and_then(OsStr::to_str) != Some("dSYM") {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let path = path.join("Contents/Resources/DWARF");
|
||||||
|
path.read_dir().ok()?.find_map(|candidate| {
|
||||||
|
let candidate = candidate.ok()?;
|
||||||
|
let path = candidate.path();
|
||||||
|
let file = File::open(path).ok()?;
|
||||||
|
let map = unsafe { Mmap::map(&file) }.ok()?;
|
||||||
|
let object = object::File::parse(&*map).ok()?;
|
||||||
|
if object.mach_uuid() == Ok(Some(uuid)) {
|
||||||
|
Some(map)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})() {
|
||||||
|
let map = arena_mmap.alloc(map);
|
||||||
|
object = object::File::parse(&**map)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load the DWARF sections.
|
||||||
|
let endian = if object.is_little_endian() {
|
||||||
|
gimli::RunTimeEndian::Little
|
||||||
|
} else {
|
||||||
|
gimli::RunTimeEndian::Big
|
||||||
|
};
|
||||||
|
let mut dwarf =
|
||||||
|
gimli::Dwarf::load(|id| load_section(Some(id.name()), &object, endian, arena_data))?;
|
||||||
|
if let Some(sup_object) = &sup_object {
|
||||||
|
dwarf.load_sup(|id| load_section(Some(id.name()), sup_object, endian, arena_data))?;
|
||||||
|
}
|
||||||
|
dwarf.populate_abbreviations_cache(gimli::AbbreviationsCacheStrategy::Duplicates);
|
||||||
|
|
||||||
|
let ctx = Context::from_dwarf(dwarf)?;
|
||||||
|
|
||||||
|
// Load the DWP file, ignoring errors.
|
||||||
|
let dwarf_package = (|| {
|
||||||
|
let mut dwp_path = path.to_path_buf();
|
||||||
|
let dwp_extension = path
|
||||||
|
.extension()
|
||||||
|
.map(|previous_extension| {
|
||||||
|
let mut previous_extension = previous_extension.to_os_string();
|
||||||
|
previous_extension.push(".dwp");
|
||||||
|
previous_extension
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|| "dwp".into());
|
||||||
|
dwp_path.set_extension(dwp_extension);
|
||||||
|
let dwp_file = File::open(&dwp_path).ok()?;
|
||||||
|
let map = arena_mmap.alloc(unsafe { Mmap::map(&dwp_file) }.ok()?);
|
||||||
|
let dwp_object = object::File::parse(&**map).ok()?;
|
||||||
|
|
||||||
|
let endian = if dwp_object.is_little_endian() {
|
||||||
|
gimli::RunTimeEndian::Little
|
||||||
|
} else {
|
||||||
|
gimli::RunTimeEndian::Big
|
||||||
|
};
|
||||||
|
let empty = gimli::EndianSlice::new(&[][..], endian);
|
||||||
|
gimli::DwarfPackage::load(
|
||||||
|
|id| load_section(id.dwo_name(), &dwp_object, endian, arena_data),
|
||||||
|
empty,
|
||||||
|
)
|
||||||
|
.ok()
|
||||||
|
})();
|
||||||
|
|
||||||
|
Ok(LoaderInternal {
|
||||||
|
ctx,
|
||||||
|
relative_address_base,
|
||||||
|
symbols,
|
||||||
|
dwarf_package,
|
||||||
|
object_map,
|
||||||
|
objects,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ctx(
|
||||||
|
&self,
|
||||||
|
probe: u64,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> (&Context<LoaderReader<'a>>, u64) {
|
||||||
|
self.object_ctx(probe, arena_data, arena_mmap)
|
||||||
|
.unwrap_or((&self.ctx, probe))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn object_ctx(
|
||||||
|
&self,
|
||||||
|
probe: u64,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> Option<(&Context<LoaderReader<'a>>, u64)> {
|
||||||
|
let symbol = self.object_map.get(probe)?;
|
||||||
|
let object_context = self.objects[symbol.object_index()]
|
||||||
|
.borrow_with(|| {
|
||||||
|
ObjectContext::new(symbol.object(&self.object_map), arena_data, arena_mmap)
|
||||||
|
})
|
||||||
|
.as_ref()?;
|
||||||
|
object_context.ctx(symbol.name(), probe - symbol.address())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_symbol(&self, probe: u64) -> Option<&str> {
|
||||||
|
self.symbols.get(probe).map(|x| x.name())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_location(
|
||||||
|
&'a self,
|
||||||
|
probe: u64,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> Result<Option<Location<'a>>> {
|
||||||
|
let (ctx, probe) = self.ctx(probe, arena_data, arena_mmap);
|
||||||
|
Ok(ctx.find_location(probe)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_location_range(
|
||||||
|
&self,
|
||||||
|
probe_low: u64,
|
||||||
|
probe_high: u64,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> Result<LocationRangeIter<'a, LoaderReader>> {
|
||||||
|
let (ctx, probe) = self.ctx(probe_low, arena_data, arena_mmap);
|
||||||
|
// TODO: handle ranges that cover multiple objects
|
||||||
|
let probe_high = probe + (probe_high - probe_low);
|
||||||
|
Ok(ctx.find_location_range(probe, probe_high)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_frames(
|
||||||
|
&self,
|
||||||
|
probe: u64,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> Result<FrameIter<'a, LoaderReader>> {
|
||||||
|
let (ctx, probe) = self.ctx(probe, arena_data, arena_mmap);
|
||||||
|
let mut frames = ctx.find_frames(probe);
|
||||||
|
loop {
|
||||||
|
let (load, continuation) = match frames {
|
||||||
|
LookupResult::Output(output) => return Ok(output?),
|
||||||
|
LookupResult::Load { load, continuation } => (load, continuation),
|
||||||
|
};
|
||||||
|
|
||||||
|
let r = self.load_dwo(load, arena_data, arena_mmap)?;
|
||||||
|
frames = continuation.resume(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_dwo(
|
||||||
|
&self,
|
||||||
|
load: SplitDwarfLoad<LoaderReader<'a>>,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> Result<Option<Arc<gimli::Dwarf<LoaderReader<'a>>>>> {
|
||||||
|
// Load the DWO file from the DWARF package, if available.
|
||||||
|
if let Some(dwp) = self.dwarf_package.as_ref() {
|
||||||
|
if let Some(cu) = dwp.find_cu(load.dwo_id, &load.parent)? {
|
||||||
|
return Ok(Some(Arc::new(cu)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine the path to the DWO file.
|
||||||
|
let mut path = PathBuf::new();
|
||||||
|
if let Some(p) = load.comp_dir.as_ref() {
|
||||||
|
path.push(convert_path(p.slice())?);
|
||||||
|
}
|
||||||
|
let Some(p) = load.path.as_ref() else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
path.push(convert_path(p.slice())?);
|
||||||
|
|
||||||
|
// Load the DWO file, ignoring errors.
|
||||||
|
let dwo = (|| {
|
||||||
|
let file = File::open(&path).ok()?;
|
||||||
|
let map = arena_mmap.alloc(unsafe { Mmap::map(&file) }.ok()?);
|
||||||
|
let object = object::File::parse(&**map).ok()?;
|
||||||
|
let endian = if object.is_little_endian() {
|
||||||
|
gimli::RunTimeEndian::Little
|
||||||
|
} else {
|
||||||
|
gimli::RunTimeEndian::Big
|
||||||
|
};
|
||||||
|
let mut dwo_dwarf =
|
||||||
|
gimli::Dwarf::load(|id| load_section(id.dwo_name(), &object, endian, arena_data))
|
||||||
|
.ok()?;
|
||||||
|
let dwo_unit_header = dwo_dwarf.units().next().ok()??;
|
||||||
|
let dwo_unit = dwo_dwarf.unit(dwo_unit_header).ok()?;
|
||||||
|
if dwo_unit.dwo_id != Some(load.dwo_id) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
dwo_dwarf.make_dwo(&load.parent);
|
||||||
|
Some(Arc::new(dwo_dwarf))
|
||||||
|
})();
|
||||||
|
Ok(dwo)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ObjectContext<'a> {
|
||||||
|
ctx: Context<LoaderReader<'a>>,
|
||||||
|
symbols: SymbolMap<SymbolMapName<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ObjectContext<'a> {
|
||||||
|
fn new(
|
||||||
|
object: &ObjectMapFile<'a>,
|
||||||
|
arena_data: &'a Arena<Vec<u8>>,
|
||||||
|
arena_mmap: &'a Arena<Mmap>,
|
||||||
|
) -> Option<Self> {
|
||||||
|
let file = File::open(convert_path(object.path()).ok()?).ok()?;
|
||||||
|
let map = &**arena_mmap.alloc(unsafe { Mmap::map(&file) }.ok()?);
|
||||||
|
let data = if let Some(member_name) = object.member() {
|
||||||
|
let archive = object::read::archive::ArchiveFile::parse(map).ok()?;
|
||||||
|
let member = archive.members().find_map(|member| {
|
||||||
|
let member = member.ok()?;
|
||||||
|
if member.name() == member_name {
|
||||||
|
Some(member)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
member.data(map).ok()?
|
||||||
|
} else {
|
||||||
|
map
|
||||||
|
};
|
||||||
|
let object = object::File::parse(data).ok()?;
|
||||||
|
let endian = if object.is_little_endian() {
|
||||||
|
gimli::RunTimeEndian::Little
|
||||||
|
} else {
|
||||||
|
gimli::RunTimeEndian::Big
|
||||||
|
};
|
||||||
|
let dwarf =
|
||||||
|
gimli::Dwarf::load(|id| load_section(Some(id.name()), &object, endian, arena_data))
|
||||||
|
.ok()?;
|
||||||
|
let ctx = Context::from_dwarf(dwarf).ok()?;
|
||||||
|
let symbols = object.symbol_map();
|
||||||
|
Some(ObjectContext { ctx, symbols })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ctx(&self, symbol_name: &[u8], probe: u64) -> Option<(&Context<LoaderReader<'a>>, u64)> {
|
||||||
|
self.symbols
|
||||||
|
.symbols()
|
||||||
|
.iter()
|
||||||
|
.find(|symbol| symbol.name().as_bytes() == symbol_name)
|
||||||
|
.map(|symbol| (&self.ctx, probe + symbol.address()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_section<'input, Endian: gimli::Endianity>(
|
||||||
|
name: Option<&'static str>,
|
||||||
|
file: &object::File<'input>,
|
||||||
|
endian: Endian,
|
||||||
|
arena_data: &'input Arena<Vec<u8>>,
|
||||||
|
) -> Result<gimli::EndianSlice<'input, Endian>> {
|
||||||
|
let data = match name.and_then(|name| file.section_by_name(name)) {
|
||||||
|
Some(section) => match section.uncompressed_data()? {
|
||||||
|
Cow::Borrowed(b) => b,
|
||||||
|
Cow::Owned(b) => arena_data.alloc(b),
|
||||||
|
},
|
||||||
|
None => &[],
|
||||||
|
};
|
||||||
|
Ok(gimli::EndianSlice::new(data, endian))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
fn convert_path(bytes: &[u8]) -> Result<PathBuf> {
|
||||||
|
use std::os::unix::ffi::OsStrExt;
|
||||||
|
let s = OsStr::from_bytes(bytes);
|
||||||
|
Ok(PathBuf::from(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
fn convert_path(bytes: &[u8]) -> Result<PathBuf> {
|
||||||
|
let s = std::str::from_utf8(bytes)?;
|
||||||
|
Ok(PathBuf::from(s))
|
||||||
|
}
|
||||||
261
.gear/predownloaded-development/vendor/addr2line/src/lookup.rs
vendored
Normal file
261
.gear/predownloaded-development/vendor/addr2line/src/lookup.rs
vendored
Normal file
|
|
@ -0,0 +1,261 @@
|
||||||
|
use alloc::sync::Arc;
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
use core::ops::ControlFlow;
|
||||||
|
|
||||||
|
/// This struct contains the information needed to find split DWARF data
|
||||||
|
/// and to produce a `gimli::Dwarf<R>` for it.
|
||||||
|
pub struct SplitDwarfLoad<R> {
|
||||||
|
/// The dwo id, for looking up in a DWARF package, or for
|
||||||
|
/// verifying an unpacked dwo found on the file system
|
||||||
|
pub dwo_id: gimli::DwoId,
|
||||||
|
/// The compilation directory `path` is relative to.
|
||||||
|
pub comp_dir: Option<R>,
|
||||||
|
/// A path on the filesystem, relative to `comp_dir` to find this dwo.
|
||||||
|
pub path: Option<R>,
|
||||||
|
/// Once the split DWARF data is loaded, the loader is expected
|
||||||
|
/// to call [make_dwo(parent)](gimli::read::Dwarf::make_dwo) before
|
||||||
|
/// returning the data.
|
||||||
|
pub parent: Arc<gimli::Dwarf<R>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Operations that consult debug information may require additional files
|
||||||
|
/// to be loaded if split DWARF is being used. This enum returns the result
|
||||||
|
/// of the operation in the `Output` variant, or information about the split
|
||||||
|
/// DWARF that is required and a continuation to invoke once it is available
|
||||||
|
/// in the `Load` variant.
|
||||||
|
///
|
||||||
|
/// This enum is intended to be used in a loop like so:
|
||||||
|
/// ```no_run
|
||||||
|
/// # use addr2line::*;
|
||||||
|
/// # use std::sync::Arc;
|
||||||
|
/// # let ctx: Context<gimli::EndianSlice<gimli::RunTimeEndian>> = todo!();
|
||||||
|
/// # let do_split_dwarf_load = |load: SplitDwarfLoad<gimli::EndianSlice<gimli::RunTimeEndian>>| -> Option<Arc<gimli::Dwarf<gimli::EndianSlice<gimli::RunTimeEndian>>>> { None };
|
||||||
|
/// const ADDRESS: u64 = 0xdeadbeef;
|
||||||
|
/// let mut r = ctx.find_frames(ADDRESS);
|
||||||
|
/// let result = loop {
|
||||||
|
/// match r {
|
||||||
|
/// LookupResult::Output(result) => break result,
|
||||||
|
/// LookupResult::Load { load, continuation } => {
|
||||||
|
/// let dwo = do_split_dwarf_load(load);
|
||||||
|
/// r = continuation.resume(dwo);
|
||||||
|
/// }
|
||||||
|
/// }
|
||||||
|
/// };
|
||||||
|
/// ```
|
||||||
|
pub enum LookupResult<L: LookupContinuation> {
|
||||||
|
/// The lookup requires split DWARF data to be loaded.
|
||||||
|
Load {
|
||||||
|
/// The information needed to find the split DWARF data.
|
||||||
|
load: SplitDwarfLoad<<L as LookupContinuation>::Buf>,
|
||||||
|
/// The continuation to resume with the loaded split DWARF data.
|
||||||
|
continuation: L,
|
||||||
|
},
|
||||||
|
/// The lookup has completed and produced an output.
|
||||||
|
Output(<L as LookupContinuation>::Output),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This trait represents a partially complete operation that can be resumed
|
||||||
|
/// once a load of needed split DWARF data is completed or abandoned by the
|
||||||
|
/// API consumer.
|
||||||
|
pub trait LookupContinuation: Sized {
|
||||||
|
/// The final output of this operation.
|
||||||
|
type Output;
|
||||||
|
/// The type of reader used.
|
||||||
|
type Buf: gimli::Reader;
|
||||||
|
|
||||||
|
/// Resumes the operation with the provided data.
|
||||||
|
///
|
||||||
|
/// After the caller loads the split DWARF data required, call this
|
||||||
|
/// method to resume the operation. The return value of this method
|
||||||
|
/// indicates if the computation has completed or if further data is
|
||||||
|
/// required.
|
||||||
|
///
|
||||||
|
/// If the additional data cannot be located, or the caller does not
|
||||||
|
/// support split DWARF, `resume(None)` can be used to continue the
|
||||||
|
/// operation with the data that is available.
|
||||||
|
fn resume(self, input: Option<Arc<gimli::Dwarf<Self::Buf>>>) -> LookupResult<Self>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<L: LookupContinuation> LookupResult<L> {
|
||||||
|
/// Callers that do not handle split DWARF can call `skip_all_loads`
|
||||||
|
/// to fast-forward to the end result. This result is produced with
|
||||||
|
/// the data that is available and may be less accurate than the
|
||||||
|
/// the results that would be produced if the caller did properly
|
||||||
|
/// support split DWARF.
|
||||||
|
pub fn skip_all_loads(mut self) -> L::Output {
|
||||||
|
loop {
|
||||||
|
self = match self {
|
||||||
|
LookupResult::Output(t) => return t,
|
||||||
|
LookupResult::Load { continuation, .. } => continuation.resume(None),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn map<T, F: FnOnce(L::Output) -> T>(
|
||||||
|
self,
|
||||||
|
f: F,
|
||||||
|
) -> LookupResult<MappedLookup<T, L, F>> {
|
||||||
|
match self {
|
||||||
|
LookupResult::Output(t) => LookupResult::Output(f(t)),
|
||||||
|
LookupResult::Load { load, continuation } => LookupResult::Load {
|
||||||
|
load,
|
||||||
|
continuation: MappedLookup {
|
||||||
|
original: continuation,
|
||||||
|
mutator: f,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn unwrap(self) -> L::Output {
|
||||||
|
match self {
|
||||||
|
LookupResult::Output(t) => t,
|
||||||
|
LookupResult::Load { .. } => unreachable!("Internal API misuse"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct SimpleLookup<T, R, F>
|
||||||
|
where
|
||||||
|
F: FnOnce(Option<Arc<gimli::Dwarf<R>>>) -> T,
|
||||||
|
R: gimli::Reader,
|
||||||
|
{
|
||||||
|
f: F,
|
||||||
|
phantom: PhantomData<(T, R)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T, R, F> SimpleLookup<T, R, F>
|
||||||
|
where
|
||||||
|
F: FnOnce(Option<Arc<gimli::Dwarf<R>>>) -> T,
|
||||||
|
R: gimli::Reader,
|
||||||
|
{
|
||||||
|
pub(crate) fn new_complete(t: F::Output) -> LookupResult<SimpleLookup<T, R, F>> {
|
||||||
|
LookupResult::Output(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn new_needs_load(
|
||||||
|
load: SplitDwarfLoad<R>,
|
||||||
|
f: F,
|
||||||
|
) -> LookupResult<SimpleLookup<T, R, F>> {
|
||||||
|
LookupResult::Load {
|
||||||
|
load,
|
||||||
|
continuation: SimpleLookup {
|
||||||
|
f,
|
||||||
|
phantom: PhantomData,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T, R, F> LookupContinuation for SimpleLookup<T, R, F>
|
||||||
|
where
|
||||||
|
F: FnOnce(Option<Arc<gimli::Dwarf<R>>>) -> T,
|
||||||
|
R: gimli::Reader,
|
||||||
|
{
|
||||||
|
type Output = T;
|
||||||
|
type Buf = R;
|
||||||
|
|
||||||
|
fn resume(self, v: Option<Arc<gimli::Dwarf<Self::Buf>>>) -> LookupResult<Self> {
|
||||||
|
LookupResult::Output((self.f)(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct MappedLookup<T, L, F>
|
||||||
|
where
|
||||||
|
L: LookupContinuation,
|
||||||
|
F: FnOnce(L::Output) -> T,
|
||||||
|
{
|
||||||
|
original: L,
|
||||||
|
mutator: F,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T, L, F> LookupContinuation for MappedLookup<T, L, F>
|
||||||
|
where
|
||||||
|
L: LookupContinuation,
|
||||||
|
F: FnOnce(L::Output) -> T,
|
||||||
|
{
|
||||||
|
type Output = T;
|
||||||
|
type Buf = L::Buf;
|
||||||
|
|
||||||
|
fn resume(self, v: Option<Arc<gimli::Dwarf<Self::Buf>>>) -> LookupResult<Self> {
|
||||||
|
match self.original.resume(v) {
|
||||||
|
LookupResult::Output(t) => LookupResult::Output((self.mutator)(t)),
|
||||||
|
LookupResult::Load { load, continuation } => LookupResult::Load {
|
||||||
|
load,
|
||||||
|
continuation: MappedLookup {
|
||||||
|
original: continuation,
|
||||||
|
mutator: self.mutator,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Some functions (e.g. `find_frames`) require considering multiple
|
||||||
|
/// compilation units, each of which might require their own split DWARF
|
||||||
|
/// lookup (and thus produce a continuation).
|
||||||
|
///
|
||||||
|
/// We store the underlying continuation here as well as a mutator function
|
||||||
|
/// that will either a) decide that the result of this continuation is
|
||||||
|
/// what is needed and mutate it to the final result or b) produce another
|
||||||
|
/// `LookupResult`. `new_lookup` will in turn eagerly drive any non-continuation
|
||||||
|
/// `LookupResult` with successive invocations of the mutator, until a new
|
||||||
|
/// continuation or a final result is produced. And finally, the impl of
|
||||||
|
/// `LookupContinuation::resume` will call `new_lookup` each time the
|
||||||
|
/// computation is resumed.
|
||||||
|
pub(crate) struct LoopingLookup<T, L, F>
|
||||||
|
where
|
||||||
|
L: LookupContinuation,
|
||||||
|
F: FnMut(L::Output) -> ControlFlow<T, LookupResult<L>>,
|
||||||
|
{
|
||||||
|
continuation: L,
|
||||||
|
mutator: F,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T, L, F> LoopingLookup<T, L, F>
|
||||||
|
where
|
||||||
|
L: LookupContinuation,
|
||||||
|
F: FnMut(L::Output) -> ControlFlow<T, LookupResult<L>>,
|
||||||
|
{
|
||||||
|
pub(crate) fn new_complete(t: T) -> LookupResult<Self> {
|
||||||
|
LookupResult::Output(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn new_lookup(mut r: LookupResult<L>, mut mutator: F) -> LookupResult<Self> {
|
||||||
|
// Drive the loop eagerly so that we only ever have to represent one state
|
||||||
|
// (the r == ControlFlow::Continue state) in LoopingLookup.
|
||||||
|
loop {
|
||||||
|
match r {
|
||||||
|
LookupResult::Output(l) => match mutator(l) {
|
||||||
|
ControlFlow::Break(t) => return LookupResult::Output(t),
|
||||||
|
ControlFlow::Continue(r2) => {
|
||||||
|
r = r2;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
LookupResult::Load { load, continuation } => {
|
||||||
|
return LookupResult::Load {
|
||||||
|
load,
|
||||||
|
continuation: LoopingLookup {
|
||||||
|
continuation,
|
||||||
|
mutator,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T, L, F> LookupContinuation for LoopingLookup<T, L, F>
|
||||||
|
where
|
||||||
|
L: LookupContinuation,
|
||||||
|
F: FnMut(L::Output) -> ControlFlow<T, LookupResult<L>>,
|
||||||
|
{
|
||||||
|
type Output = T;
|
||||||
|
type Buf = L::Buf;
|
||||||
|
|
||||||
|
fn resume(self, v: Option<Arc<gimli::Dwarf<Self::Buf>>>) -> LookupResult<Self> {
|
||||||
|
let r = self.continuation.resume(v);
|
||||||
|
LoopingLookup::new_lookup(r, self.mutator)
|
||||||
|
}
|
||||||
|
}
|
||||||
589
.gear/predownloaded-development/vendor/addr2line/src/unit.rs
vendored
Normal file
589
.gear/predownloaded-development/vendor/addr2line/src/unit.rs
vendored
Normal file
|
|
@ -0,0 +1,589 @@
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::sync::Arc;
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use core::cmp;
|
||||||
|
|
||||||
|
use crate::lazy::LazyResult;
|
||||||
|
use crate::{
|
||||||
|
Context, DebugFile, Error, Function, Functions, LazyFunctions, LazyLines,
|
||||||
|
LineLocationRangeIter, Lines, Location, LookupContinuation, LookupResult, RangeAttributes,
|
||||||
|
SimpleLookup, SplitDwarfLoad,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub(crate) struct UnitRange {
|
||||||
|
unit_id: usize,
|
||||||
|
min_begin: u64,
|
||||||
|
range: gimli::Range,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct ResUnit<R: gimli::Reader> {
|
||||||
|
offset: gimli::DebugInfoOffset<R::Offset>,
|
||||||
|
dw_unit: gimli::Unit<R>,
|
||||||
|
pub(crate) lang: Option<gimli::DwLang>,
|
||||||
|
lines: LazyLines,
|
||||||
|
functions: LazyFunctions<R>,
|
||||||
|
dwo: LazyResult<Option<Box<DwoUnit<R>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
type UnitRef<'unit, R> = (DebugFile, gimli::UnitRef<'unit, R>);
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> ResUnit<R> {
|
||||||
|
pub(crate) fn unit_ref<'a>(&'a self, sections: &'a gimli::Dwarf<R>) -> gimli::UnitRef<'a, R> {
|
||||||
|
gimli::UnitRef::new(sections, &self.dw_unit)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the DWARF sections and the unit.
|
||||||
|
///
|
||||||
|
/// Loads the DWO unit if necessary.
|
||||||
|
pub(crate) fn dwarf_and_unit<'unit, 'ctx: 'unit>(
|
||||||
|
&'unit self,
|
||||||
|
ctx: &'ctx Context<R>,
|
||||||
|
) -> LookupResult<
|
||||||
|
SimpleLookup<
|
||||||
|
Result<UnitRef<'unit, R>, Error>,
|
||||||
|
R,
|
||||||
|
impl FnOnce(Option<Arc<gimli::Dwarf<R>>>) -> Result<UnitRef<'unit, R>, Error>,
|
||||||
|
>,
|
||||||
|
> {
|
||||||
|
let map_dwo = move |dwo: &'unit Result<Option<Box<DwoUnit<R>>>, Error>| match dwo {
|
||||||
|
Ok(Some(dwo)) => Ok((DebugFile::Dwo, dwo.unit_ref())),
|
||||||
|
Ok(None) => Ok((DebugFile::Primary, self.unit_ref(&*ctx.sections))),
|
||||||
|
Err(e) => Err(*e),
|
||||||
|
};
|
||||||
|
let complete = |dwo| SimpleLookup::new_complete(map_dwo(dwo));
|
||||||
|
|
||||||
|
if let Some(dwo) = self.dwo.borrow() {
|
||||||
|
return complete(dwo);
|
||||||
|
}
|
||||||
|
|
||||||
|
let dwo_id = match self.dw_unit.dwo_id {
|
||||||
|
None => {
|
||||||
|
return complete(self.dwo.borrow_with(|| Ok(None)));
|
||||||
|
}
|
||||||
|
Some(dwo_id) => dwo_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
let comp_dir = self.dw_unit.comp_dir.clone();
|
||||||
|
|
||||||
|
let dwo_name = self.dw_unit.dwo_name().and_then(|s| {
|
||||||
|
if let Some(s) = s {
|
||||||
|
Ok(Some(ctx.sections.attr_string(&self.dw_unit, s)?))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let path = match dwo_name {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(e) => {
|
||||||
|
return complete(self.dwo.borrow_with(|| Err(e)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let process_dwo = move |dwo_dwarf: Option<Arc<gimli::Dwarf<R>>>| {
|
||||||
|
let dwo_dwarf = match dwo_dwarf {
|
||||||
|
None => return Ok(None),
|
||||||
|
Some(dwo_dwarf) => dwo_dwarf,
|
||||||
|
};
|
||||||
|
let mut dwo_units = dwo_dwarf.units();
|
||||||
|
let dwo_header = match dwo_units.next()? {
|
||||||
|
Some(dwo_header) => dwo_header,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut dwo_unit = dwo_dwarf.unit(dwo_header)?;
|
||||||
|
dwo_unit.copy_relocated_attributes(&self.dw_unit);
|
||||||
|
Ok(Some(Box::new(DwoUnit {
|
||||||
|
sections: dwo_dwarf,
|
||||||
|
dw_unit: dwo_unit,
|
||||||
|
})))
|
||||||
|
};
|
||||||
|
|
||||||
|
SimpleLookup::new_needs_load(
|
||||||
|
SplitDwarfLoad {
|
||||||
|
dwo_id,
|
||||||
|
comp_dir,
|
||||||
|
path,
|
||||||
|
parent: ctx.sections.clone(),
|
||||||
|
},
|
||||||
|
move |dwo_dwarf| map_dwo(self.dwo.borrow_with(|| process_dwo(dwo_dwarf))),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_lines(&self, sections: &gimli::Dwarf<R>) -> Result<Option<&Lines>, Error> {
|
||||||
|
// NB: line information is always stored in the main debug file so this does not need
|
||||||
|
// to handle DWOs.
|
||||||
|
let ilnp = match self.dw_unit.line_program {
|
||||||
|
Some(ref ilnp) => ilnp,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
self.lines.borrow(self.unit_ref(sections), ilnp).map(Some)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_functions<'unit, 'ctx: 'unit>(
|
||||||
|
&'unit self,
|
||||||
|
ctx: &'ctx Context<R>,
|
||||||
|
) -> LookupResult<impl LookupContinuation<Output = Result<&'unit Functions<R>, Error>, Buf = R>>
|
||||||
|
{
|
||||||
|
self.dwarf_and_unit(ctx).map(move |r| {
|
||||||
|
let (_file, unit) = r?;
|
||||||
|
self.functions.borrow(unit)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_inlined_functions<'unit, 'ctx: 'unit>(
|
||||||
|
&'unit self,
|
||||||
|
ctx: &'ctx Context<R>,
|
||||||
|
) -> LookupResult<impl LookupContinuation<Output = Result<(), Error>, Buf = R> + 'unit> {
|
||||||
|
self.dwarf_and_unit(ctx).map(move |r| {
|
||||||
|
let (file, unit) = r?;
|
||||||
|
self.functions
|
||||||
|
.borrow(unit)?
|
||||||
|
.parse_inlined_functions(file, unit, ctx)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn find_location(
|
||||||
|
&self,
|
||||||
|
probe: u64,
|
||||||
|
sections: &gimli::Dwarf<R>,
|
||||||
|
) -> Result<Option<Location<'_>>, Error> {
|
||||||
|
let Some(lines) = self.parse_lines(sections)? else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
lines.find_location(probe)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn find_location_range(
|
||||||
|
&self,
|
||||||
|
probe_low: u64,
|
||||||
|
probe_high: u64,
|
||||||
|
sections: &gimli::Dwarf<R>,
|
||||||
|
) -> Result<Option<LineLocationRangeIter<'_>>, Error> {
|
||||||
|
let Some(lines) = self.parse_lines(sections)? else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
lines.find_location_range(probe_low, probe_high).map(Some)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn find_function_or_location<'unit, 'ctx: 'unit>(
|
||||||
|
&'unit self,
|
||||||
|
probe: u64,
|
||||||
|
ctx: &'ctx Context<R>,
|
||||||
|
) -> LookupResult<
|
||||||
|
impl LookupContinuation<
|
||||||
|
Output = Result<(Option<&'unit Function<R>>, Option<Location<'unit>>), Error>,
|
||||||
|
Buf = R,
|
||||||
|
>,
|
||||||
|
> {
|
||||||
|
self.dwarf_and_unit(ctx).map(move |r| {
|
||||||
|
let (file, unit) = r?;
|
||||||
|
let functions = self.functions.borrow(unit)?;
|
||||||
|
let function = match functions.find_address(probe) {
|
||||||
|
Some(address) => {
|
||||||
|
let function_index = functions.addresses[address].function;
|
||||||
|
let function = &functions.functions[function_index];
|
||||||
|
Some(function.borrow(file, unit, ctx)?)
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
let location = self.find_location(probe, unit.dwarf)?;
|
||||||
|
Ok((function, location))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct ResUnits<R: gimli::Reader> {
|
||||||
|
ranges: Box<[UnitRange]>,
|
||||||
|
units: Box<[ResUnit<R>]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> ResUnits<R> {
|
||||||
|
pub(crate) fn parse(sections: &gimli::Dwarf<R>) -> Result<Self, Error> {
|
||||||
|
// Find all the references to compilation units in .debug_aranges.
|
||||||
|
// Note that we always also iterate through all of .debug_info to
|
||||||
|
// find compilation units, because .debug_aranges may be missing some.
|
||||||
|
let mut aranges = Vec::new();
|
||||||
|
let mut headers = sections.debug_aranges.headers();
|
||||||
|
while let Some(header) = headers.next()? {
|
||||||
|
aranges.push((header.debug_info_offset(), header.offset()));
|
||||||
|
}
|
||||||
|
aranges.sort_by_key(|i| i.0);
|
||||||
|
|
||||||
|
let mut unit_ranges = Vec::new();
|
||||||
|
let mut res_units = Vec::new();
|
||||||
|
let mut units = sections.units();
|
||||||
|
while let Some(header) = units.next()? {
|
||||||
|
let unit_id = res_units.len();
|
||||||
|
let offset = match header.offset().as_debug_info_offset() {
|
||||||
|
Some(offset) => offset,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
// We mainly want compile units, but we may need to follow references to entries
|
||||||
|
// within other units for function names. We don't need anything from type units.
|
||||||
|
let mut need_unit_range = match header.type_() {
|
||||||
|
gimli::UnitType::Type { .. } | gimli::UnitType::SplitType { .. } => continue,
|
||||||
|
gimli::UnitType::Partial => {
|
||||||
|
// Partial units are only needed for references from other units.
|
||||||
|
// They shouldn't have any address ranges.
|
||||||
|
false
|
||||||
|
}
|
||||||
|
_ => true,
|
||||||
|
};
|
||||||
|
let dw_unit = match sections.unit(header) {
|
||||||
|
Ok(dw_unit) => dw_unit,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let dw_unit_ref = gimli::UnitRef::new(sections, &dw_unit);
|
||||||
|
|
||||||
|
let mut lang = None;
|
||||||
|
if need_unit_range {
|
||||||
|
let mut entries = dw_unit_ref.entries_raw(None)?;
|
||||||
|
|
||||||
|
let abbrev = match entries.read_abbreviation()? {
|
||||||
|
Some(abbrev) => abbrev,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut ranges = RangeAttributes::default();
|
||||||
|
for spec in abbrev.attributes() {
|
||||||
|
let attr = entries.read_attribute(*spec)?;
|
||||||
|
match attr.name() {
|
||||||
|
gimli::DW_AT_low_pc => match attr.value() {
|
||||||
|
gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val),
|
||||||
|
gimli::AttributeValue::DebugAddrIndex(index) => {
|
||||||
|
ranges.low_pc = Some(dw_unit_ref.address(index)?);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
|
gimli::DW_AT_high_pc => match attr.value() {
|
||||||
|
gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val),
|
||||||
|
gimli::AttributeValue::DebugAddrIndex(index) => {
|
||||||
|
ranges.high_pc = Some(dw_unit_ref.address(index)?);
|
||||||
|
}
|
||||||
|
gimli::AttributeValue::Udata(val) => ranges.size = Some(val),
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
|
gimli::DW_AT_ranges => {
|
||||||
|
ranges.ranges_offset = dw_unit_ref.attr_ranges_offset(attr.value())?;
|
||||||
|
}
|
||||||
|
gimli::DW_AT_language => {
|
||||||
|
if let gimli::AttributeValue::Language(val) = attr.value() {
|
||||||
|
lang = Some(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the address ranges for the CU, using in order of preference:
|
||||||
|
// - DW_AT_ranges
|
||||||
|
// - .debug_aranges
|
||||||
|
// - DW_AT_low_pc/DW_AT_high_pc
|
||||||
|
//
|
||||||
|
// Using DW_AT_ranges before .debug_aranges is possibly an arbitrary choice,
|
||||||
|
// but the feeling is that DW_AT_ranges is more likely to be reliable or complete
|
||||||
|
// if it is present.
|
||||||
|
//
|
||||||
|
// .debug_aranges must be used before DW_AT_low_pc/DW_AT_high_pc because
|
||||||
|
// it has been observed on macOS that DW_AT_ranges was not emitted even for
|
||||||
|
// discontiguous CUs.
|
||||||
|
let i = match ranges.ranges_offset {
|
||||||
|
Some(_) => None,
|
||||||
|
None => aranges.binary_search_by_key(&offset, |x| x.0).ok(),
|
||||||
|
};
|
||||||
|
if let Some(mut i) = i {
|
||||||
|
// There should be only one set per CU, but in practice multiple
|
||||||
|
// sets have been observed. This is probably a compiler bug, but
|
||||||
|
// either way we need to handle it.
|
||||||
|
while i > 0 && aranges[i - 1].0 == offset {
|
||||||
|
i -= 1;
|
||||||
|
}
|
||||||
|
for (_, aranges_offset) in aranges[i..].iter().take_while(|x| x.0 == offset) {
|
||||||
|
let aranges_header = sections.debug_aranges.header(*aranges_offset)?;
|
||||||
|
let mut aranges = aranges_header.entries();
|
||||||
|
while let Some(arange) = aranges.next()? {
|
||||||
|
if arange.length() != 0 {
|
||||||
|
unit_ranges.push(UnitRange {
|
||||||
|
range: arange.range(),
|
||||||
|
unit_id,
|
||||||
|
min_begin: 0,
|
||||||
|
});
|
||||||
|
need_unit_range = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
need_unit_range &= !ranges.for_each_range(dw_unit_ref, |range| {
|
||||||
|
unit_ranges.push(UnitRange {
|
||||||
|
range,
|
||||||
|
unit_id,
|
||||||
|
min_begin: 0,
|
||||||
|
});
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let lines = LazyLines::new();
|
||||||
|
if need_unit_range {
|
||||||
|
// The unit did not declare any ranges.
|
||||||
|
// Try to get some ranges from the line program sequences.
|
||||||
|
if let Some(ref ilnp) = dw_unit_ref.line_program {
|
||||||
|
if let Ok(lines) = lines.borrow(dw_unit_ref, ilnp) {
|
||||||
|
for range in lines.ranges() {
|
||||||
|
unit_ranges.push(UnitRange {
|
||||||
|
range,
|
||||||
|
unit_id,
|
||||||
|
min_begin: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res_units.push(ResUnit {
|
||||||
|
offset,
|
||||||
|
dw_unit,
|
||||||
|
lang,
|
||||||
|
lines,
|
||||||
|
functions: LazyFunctions::new(),
|
||||||
|
dwo: LazyResult::new(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort this for faster lookup in `Self::find_range`.
|
||||||
|
unit_ranges.sort_by_key(|i| i.range.end);
|
||||||
|
|
||||||
|
// Calculate the `min_begin` field now that we've determined the order of
|
||||||
|
// CUs.
|
||||||
|
let mut min = !0;
|
||||||
|
for i in unit_ranges.iter_mut().rev() {
|
||||||
|
min = min.min(i.range.begin);
|
||||||
|
i.min_begin = min;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ResUnits {
|
||||||
|
ranges: unit_ranges.into_boxed_slice(),
|
||||||
|
units: res_units.into_boxed_slice(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn iter(&self) -> impl Iterator<Item = &ResUnit<R>> {
|
||||||
|
self.units.iter()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn find_offset(
|
||||||
|
&self,
|
||||||
|
offset: gimli::DebugInfoOffset<R::Offset>,
|
||||||
|
) -> Result<&gimli::Unit<R>, Error> {
|
||||||
|
match self
|
||||||
|
.units
|
||||||
|
.binary_search_by_key(&offset.0, |unit| unit.offset.0)
|
||||||
|
{
|
||||||
|
// There is never a DIE at the unit offset or before the first unit.
|
||||||
|
Ok(_) | Err(0) => Err(gimli::Error::NoEntryAtGivenOffset),
|
||||||
|
Err(i) => Ok(&self.units[i - 1].dw_unit),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds the CUs for the function address given.
|
||||||
|
///
|
||||||
|
/// There might be multiple CUs whose range contains this address.
|
||||||
|
/// Weak symbols have shown up in the wild which cause this to happen
|
||||||
|
/// but otherwise this can happen if the CU has non-contiguous functions
|
||||||
|
/// but only reports a single range.
|
||||||
|
///
|
||||||
|
/// Consequently we return an iterator for all CUs which may contain the
|
||||||
|
/// address, and the caller must check if there is actually a function or
|
||||||
|
/// location in the CU for that address.
|
||||||
|
pub(crate) fn find(&self, probe: u64) -> impl Iterator<Item = &ResUnit<R>> {
|
||||||
|
self.find_range(probe, probe + 1).map(|(unit, _range)| unit)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds the CUs covering the range of addresses given.
|
||||||
|
///
|
||||||
|
/// The range is [low, high) (ie, the upper bound is exclusive). This can return multiple
|
||||||
|
/// ranges for the same unit.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn find_range(
|
||||||
|
&self,
|
||||||
|
probe_low: u64,
|
||||||
|
probe_high: u64,
|
||||||
|
) -> impl Iterator<Item = (&ResUnit<R>, &gimli::Range)> {
|
||||||
|
// Find the position of the next range after a range which
|
||||||
|
// ends at `probe_low` or lower.
|
||||||
|
let pos = match self
|
||||||
|
.ranges
|
||||||
|
.binary_search_by_key(&probe_low, |i| i.range.end)
|
||||||
|
{
|
||||||
|
Ok(i) => i + 1, // Range `i` ends at exactly `probe_low`.
|
||||||
|
Err(i) => i, // Range `i - 1` ends at a lower address.
|
||||||
|
};
|
||||||
|
|
||||||
|
// Iterate from that position to find matching CUs.
|
||||||
|
self.ranges[pos..]
|
||||||
|
.iter()
|
||||||
|
.take_while(move |i| {
|
||||||
|
// We know that this CU's end is at least `probe_low` because
|
||||||
|
// of our sorted array.
|
||||||
|
debug_assert!(i.range.end >= probe_low);
|
||||||
|
|
||||||
|
// Each entry keeps track of the minimum begin address for the
|
||||||
|
// remainder of the array of unit ranges. If our probe is before
|
||||||
|
// the minimum range begin of this entry, then it's guaranteed
|
||||||
|
// to not fit in any subsequent entries, so we break out.
|
||||||
|
probe_high > i.min_begin
|
||||||
|
})
|
||||||
|
.filter_map(move |i| {
|
||||||
|
// If this CU doesn't actually contain this address, move to the
|
||||||
|
// next CU.
|
||||||
|
if probe_low >= i.range.end || probe_high <= i.range.begin {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some((&self.units[i.unit_id], &i.range))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn find_location_range<'a>(
|
||||||
|
&'a self,
|
||||||
|
probe_low: u64,
|
||||||
|
probe_high: u64,
|
||||||
|
sections: &'a gimli::Dwarf<R>,
|
||||||
|
) -> Result<LocationRangeIter<'a, R>, Error> {
|
||||||
|
let unit_iter = Box::new(self.find_range(probe_low, probe_high));
|
||||||
|
Ok(LocationRangeIter {
|
||||||
|
unit_iter,
|
||||||
|
iter: None,
|
||||||
|
probe_low,
|
||||||
|
probe_high,
|
||||||
|
sections,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A DWO unit has its own DWARF sections.
|
||||||
|
struct DwoUnit<R: gimli::Reader> {
|
||||||
|
sections: Arc<gimli::Dwarf<R>>,
|
||||||
|
dw_unit: gimli::Unit<R>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> DwoUnit<R> {
|
||||||
|
fn unit_ref(&self) -> gimli::UnitRef<R> {
|
||||||
|
gimli::UnitRef::new(&self.sections, &self.dw_unit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct SupUnit<R: gimli::Reader> {
|
||||||
|
offset: gimli::DebugInfoOffset<R::Offset>,
|
||||||
|
dw_unit: gimli::Unit<R>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct SupUnits<R: gimli::Reader> {
|
||||||
|
units: Box<[SupUnit<R>]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> Default for SupUnits<R> {
|
||||||
|
fn default() -> Self {
|
||||||
|
SupUnits {
|
||||||
|
units: Box::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: gimli::Reader> SupUnits<R> {
|
||||||
|
pub(crate) fn parse(sections: &gimli::Dwarf<R>) -> Result<Self, Error> {
|
||||||
|
let mut sup_units = Vec::new();
|
||||||
|
let mut units = sections.units();
|
||||||
|
while let Some(header) = units.next()? {
|
||||||
|
let offset = match header.offset().as_debug_info_offset() {
|
||||||
|
Some(offset) => offset,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
let dw_unit = match sections.unit(header) {
|
||||||
|
Ok(dw_unit) => dw_unit,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
sup_units.push(SupUnit { dw_unit, offset });
|
||||||
|
}
|
||||||
|
Ok(SupUnits {
|
||||||
|
units: sup_units.into_boxed_slice(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn find_offset(
|
||||||
|
&self,
|
||||||
|
offset: gimli::DebugInfoOffset<R::Offset>,
|
||||||
|
) -> Result<&gimli::Unit<R>, Error> {
|
||||||
|
match self
|
||||||
|
.units
|
||||||
|
.binary_search_by_key(&offset.0, |unit| unit.offset.0)
|
||||||
|
{
|
||||||
|
// There is never a DIE at the unit offset or before the first unit.
|
||||||
|
Ok(_) | Err(0) => Err(gimli::Error::NoEntryAtGivenOffset),
|
||||||
|
Err(i) => Ok(&self.units[i - 1].dw_unit),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterator over `Location`s in a range of addresses, returned by `Context::find_location_range`.
|
||||||
|
pub struct LocationRangeIter<'ctx, R: gimli::Reader> {
|
||||||
|
unit_iter: Box<dyn Iterator<Item = (&'ctx ResUnit<R>, &'ctx gimli::Range)> + 'ctx>,
|
||||||
|
iter: Option<LineLocationRangeIter<'ctx>>,
|
||||||
|
|
||||||
|
probe_low: u64,
|
||||||
|
probe_high: u64,
|
||||||
|
sections: &'ctx gimli::Dwarf<R>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'ctx, R: gimli::Reader> LocationRangeIter<'ctx, R> {
|
||||||
|
fn next_loc(&mut self) -> Result<Option<(u64, u64, Location<'ctx>)>, Error> {
|
||||||
|
loop {
|
||||||
|
let iter = self.iter.take();
|
||||||
|
match iter {
|
||||||
|
None => match self.unit_iter.next() {
|
||||||
|
Some((unit, range)) => {
|
||||||
|
self.iter = unit.find_location_range(
|
||||||
|
cmp::max(self.probe_low, range.begin),
|
||||||
|
cmp::min(self.probe_high, range.end),
|
||||||
|
self.sections,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
None => return Ok(None),
|
||||||
|
},
|
||||||
|
Some(mut iter) => {
|
||||||
|
if let item @ Some(_) = iter.next() {
|
||||||
|
self.iter = Some(iter);
|
||||||
|
return Ok(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'ctx, R> Iterator for LocationRangeIter<'ctx, R>
|
||||||
|
where
|
||||||
|
R: gimli::Reader + 'ctx,
|
||||||
|
{
|
||||||
|
type Item = (u64, u64, Location<'ctx>);
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.next_loc().unwrap_or_default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "fallible-iterator")]
|
||||||
|
impl<'ctx, R> fallible_iterator::FallibleIterator for LocationRangeIter<'ctx, R>
|
||||||
|
where
|
||||||
|
R: gimli::Reader + 'ctx,
|
||||||
|
{
|
||||||
|
type Item = (u64, u64, Location<'ctx>);
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Result<Option<Self::Item>, Self::Error> {
|
||||||
|
self.next_loc()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,126 +0,0 @@
|
||||||
use addr2line::Context;
|
|
||||||
use fallible_iterator::FallibleIterator;
|
|
||||||
use findshlibs::{IterationControl, SharedLibrary, TargetSharedLibrary};
|
|
||||||
use object::Object;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
fn find_debuginfo() -> memmap2::Mmap {
|
|
||||||
let path = std::env::current_exe().unwrap();
|
|
||||||
let file = File::open(&path).unwrap();
|
|
||||||
let map = unsafe { memmap2::Mmap::map(&file).unwrap() };
|
|
||||||
let file = &object::File::parse(&*map).unwrap();
|
|
||||||
if let Ok(uuid) = file.mach_uuid() {
|
|
||||||
for candidate in path.parent().unwrap().read_dir().unwrap() {
|
|
||||||
let path = candidate.unwrap().path();
|
|
||||||
if !path.to_str().unwrap().ends_with(".dSYM") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for candidate in path.join("Contents/Resources/DWARF").read_dir().unwrap() {
|
|
||||||
let path = candidate.unwrap().path();
|
|
||||||
let file = File::open(&path).unwrap();
|
|
||||||
let map = unsafe { memmap2::Mmap::map(&file).unwrap() };
|
|
||||||
let file = &object::File::parse(&*map).unwrap();
|
|
||||||
if file.mach_uuid().unwrap() == uuid {
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn correctness() {
|
|
||||||
let map = find_debuginfo();
|
|
||||||
let file = &object::File::parse(&*map).unwrap();
|
|
||||||
let module_base = file.relative_address_base();
|
|
||||||
|
|
||||||
let endian = if file.is_little_endian() {
|
|
||||||
gimli::RunTimeEndian::Little
|
|
||||||
} else {
|
|
||||||
gimli::RunTimeEndian::Big
|
|
||||||
};
|
|
||||||
|
|
||||||
fn load_section<'data: 'file, 'file, O, Endian>(
|
|
||||||
id: gimli::SectionId,
|
|
||||||
file: &'file O,
|
|
||||||
endian: Endian,
|
|
||||||
) -> Result<gimli::EndianArcSlice<Endian>, gimli::Error>
|
|
||||||
where
|
|
||||||
O: object::Object<'data, 'file>,
|
|
||||||
Endian: gimli::Endianity,
|
|
||||||
{
|
|
||||||
use object::ObjectSection;
|
|
||||||
|
|
||||||
let data = file
|
|
||||||
.section_by_name(id.name())
|
|
||||||
.and_then(|section| section.uncompressed_data().ok())
|
|
||||||
.unwrap_or(Cow::Borrowed(&[]));
|
|
||||||
Ok(gimli::EndianArcSlice::new(Arc::from(&*data), endian))
|
|
||||||
}
|
|
||||||
|
|
||||||
let dwarf = gimli::Dwarf::load(|id| load_section(id, file, endian)).unwrap();
|
|
||||||
let ctx = Context::from_dwarf(dwarf).unwrap();
|
|
||||||
let mut split_dwarf_loader = addr2line::builtin_split_dwarf_loader::SplitDwarfLoader::new(
|
|
||||||
|data, endian| gimli::EndianArcSlice::new(Arc::from(&*data), endian),
|
|
||||||
None,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut bias = None;
|
|
||||||
TargetSharedLibrary::each(|lib| {
|
|
||||||
bias = Some((lib.virtual_memory_bias().0 as u64).wrapping_sub(module_base));
|
|
||||||
IterationControl::Break
|
|
||||||
});
|
|
||||||
|
|
||||||
#[allow(unused_mut)]
|
|
||||||
let mut test = |sym: u64, expected_prefix: &str| {
|
|
||||||
let ip = sym.wrapping_sub(bias.unwrap());
|
|
||||||
|
|
||||||
let frames = ctx.find_frames(ip);
|
|
||||||
let frames = split_dwarf_loader.run(frames).unwrap();
|
|
||||||
let frame = frames.last().unwrap().unwrap();
|
|
||||||
let name = frame.function.as_ref().unwrap().demangle().unwrap();
|
|
||||||
// Old rust versions generate DWARF with wrong linkage name,
|
|
||||||
// so only check the start.
|
|
||||||
if !name.starts_with(expected_prefix) {
|
|
||||||
panic!("incorrect name '{}', expected {:?}", name, expected_prefix);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
test(test_function as u64, "correctness::test_function");
|
|
||||||
test(
|
|
||||||
small::test_function as u64,
|
|
||||||
"correctness::small::test_function",
|
|
||||||
);
|
|
||||||
test(auxiliary::foo as u64, "auxiliary::foo");
|
|
||||||
}
|
|
||||||
|
|
||||||
mod small {
|
|
||||||
pub fn test_function() {
|
|
||||||
println!("y");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn test_function() {
|
|
||||||
println!("x");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn zero_function() {
|
|
||||||
let map = find_debuginfo();
|
|
||||||
let file = &object::File::parse(&*map).unwrap();
|
|
||||||
let ctx = Context::new(file).unwrap();
|
|
||||||
for probe in 0..10 {
|
|
||||||
assert!(
|
|
||||||
ctx.find_frames(probe)
|
|
||||||
.skip_all_loads()
|
|
||||||
.unwrap()
|
|
||||||
.count()
|
|
||||||
.unwrap()
|
|
||||||
< 10
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,135 +0,0 @@
|
||||||
use std::env;
|
|
||||||
use std::ffi::OsStr;
|
|
||||||
use std::path::Path;
|
|
||||||
use std::process::Command;
|
|
||||||
|
|
||||||
use backtrace::Backtrace;
|
|
||||||
use findshlibs::{IterationControl, SharedLibrary, TargetSharedLibrary};
|
|
||||||
use libtest_mimic::{Arguments, Failed, Trial};
|
|
||||||
|
|
||||||
#[inline(never)]
|
|
||||||
fn make_trace() -> Vec<String> {
|
|
||||||
fn foo() -> Backtrace {
|
|
||||||
bar()
|
|
||||||
}
|
|
||||||
#[inline(never)]
|
|
||||||
fn bar() -> Backtrace {
|
|
||||||
baz()
|
|
||||||
}
|
|
||||||
#[inline(always)]
|
|
||||||
fn baz() -> Backtrace {
|
|
||||||
Backtrace::new_unresolved()
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut base_addr = None;
|
|
||||||
TargetSharedLibrary::each(|lib| {
|
|
||||||
base_addr = Some(lib.virtual_memory_bias().0 as isize);
|
|
||||||
IterationControl::Break
|
|
||||||
});
|
|
||||||
let addrfix = -base_addr.unwrap();
|
|
||||||
|
|
||||||
let trace = foo();
|
|
||||||
trace
|
|
||||||
.frames()
|
|
||||||
.iter()
|
|
||||||
.take(5)
|
|
||||||
.map(|x| format!("{:p}", (x.ip() as *const u8).wrapping_offset(addrfix)))
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn run_cmd<P: AsRef<OsStr>>(exe: P, me: &Path, flags: Option<&str>, trace: &str) -> String {
|
|
||||||
let mut cmd = Command::new(exe);
|
|
||||||
cmd.env("LC_ALL", "C"); // GNU addr2line is localized, we aren't
|
|
||||||
cmd.env("RUST_BACKTRACE", "1"); // if a child crashes, we want to know why
|
|
||||||
|
|
||||||
if let Some(flags) = flags {
|
|
||||||
cmd.arg(flags);
|
|
||||||
}
|
|
||||||
cmd.arg("--exe").arg(me).arg(trace);
|
|
||||||
|
|
||||||
let output = cmd.output().unwrap();
|
|
||||||
|
|
||||||
assert!(output.status.success());
|
|
||||||
String::from_utf8(output.stdout).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn run_test(flags: Option<&str>) -> Result<(), Failed> {
|
|
||||||
let me = env::current_exe().unwrap();
|
|
||||||
let mut exe = me.clone();
|
|
||||||
assert!(exe.pop());
|
|
||||||
if exe.file_name().unwrap().to_str().unwrap() == "deps" {
|
|
||||||
assert!(exe.pop());
|
|
||||||
}
|
|
||||||
exe.push("examples");
|
|
||||||
exe.push("addr2line");
|
|
||||||
|
|
||||||
assert!(exe.is_file());
|
|
||||||
|
|
||||||
let trace = make_trace();
|
|
||||||
|
|
||||||
// HACK: GNU addr2line has a bug where looking up multiple addresses can cause the second
|
|
||||||
// lookup to fail. Workaround by doing one address at a time.
|
|
||||||
for addr in &trace {
|
|
||||||
let theirs = run_cmd("addr2line", &me, flags, addr);
|
|
||||||
let ours = run_cmd(&exe, &me, flags, addr);
|
|
||||||
|
|
||||||
// HACK: GNU addr2line does not tidy up paths properly, causing double slashes to be printed.
|
|
||||||
// We consider our behavior to be correct, so we fix their output to match ours.
|
|
||||||
let theirs = theirs.replace("//", "/");
|
|
||||||
|
|
||||||
assert!(
|
|
||||||
theirs == ours,
|
|
||||||
"Output not equivalent:
|
|
||||||
|
|
||||||
$ addr2line {0} --exe {1} {2}
|
|
||||||
{4}
|
|
||||||
$ {3} {0} --exe {1} {2}
|
|
||||||
{5}
|
|
||||||
|
|
||||||
|
|
||||||
",
|
|
||||||
flags.unwrap_or(""),
|
|
||||||
me.display(),
|
|
||||||
trace.join(" "),
|
|
||||||
exe.display(),
|
|
||||||
theirs,
|
|
||||||
ours
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
static FLAGS: &str = "aipsf";
|
|
||||||
|
|
||||||
fn make_tests() -> Vec<Trial> {
|
|
||||||
(0..(1 << FLAGS.len()))
|
|
||||||
.map(|bits| {
|
|
||||||
if bits == 0 {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
let mut param = String::new();
|
|
||||||
param.push('-');
|
|
||||||
for (i, flag) in FLAGS.chars().enumerate() {
|
|
||||||
if (bits & (1 << i)) != 0 {
|
|
||||||
param.push(flag);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(param)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.map(|param| {
|
|
||||||
Trial::test(
|
|
||||||
format!("addr2line {}", param.as_ref().map_or("", String::as_str)),
|
|
||||||
move || run_test(param.as_ref().map(String::as_str)),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
if !cfg!(target_os = "linux") {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let args = Arguments::from_args();
|
|
||||||
libtest_mimic::run(&args, make_tests()).exit();
|
|
||||||
}
|
|
||||||
|
|
@ -1,114 +0,0 @@
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::env;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::path::{self, PathBuf};
|
|
||||||
|
|
||||||
use object::Object;
|
|
||||||
|
|
||||||
fn release_fixture_path() -> PathBuf {
|
|
||||||
if let Ok(p) = env::var("ADDR2LINE_FIXTURE_PATH") {
|
|
||||||
return p.into();
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut path = PathBuf::new();
|
|
||||||
if let Ok(dir) = env::var("CARGO_MANIFEST_DIR") {
|
|
||||||
path.push(dir);
|
|
||||||
}
|
|
||||||
path.push("fixtures");
|
|
||||||
path.push("addr2line-release");
|
|
||||||
path
|
|
||||||
}
|
|
||||||
|
|
||||||
fn with_file<F: FnOnce(&object::File<'_>)>(target: &path::Path, f: F) {
|
|
||||||
let file = File::open(target).unwrap();
|
|
||||||
let map = unsafe { memmap2::Mmap::map(&file).unwrap() };
|
|
||||||
let file = object::File::parse(&*map).unwrap();
|
|
||||||
f(&file)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dwarf_load<'a>(object: &object::File<'a>) -> gimli::Dwarf<Cow<'a, [u8]>> {
|
|
||||||
let load_section = |id: gimli::SectionId| -> Result<Cow<'a, [u8]>, gimli::Error> {
|
|
||||||
use object::ObjectSection;
|
|
||||||
|
|
||||||
let data = object
|
|
||||||
.section_by_name(id.name())
|
|
||||||
.and_then(|section| section.data().ok())
|
|
||||||
.unwrap_or(&[][..]);
|
|
||||||
Ok(Cow::Borrowed(data))
|
|
||||||
};
|
|
||||||
gimli::Dwarf::load(&load_section).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dwarf_borrow<'a>(
|
|
||||||
dwarf: &'a gimli::Dwarf<Cow<'_, [u8]>>,
|
|
||||||
) -> gimli::Dwarf<gimli::EndianSlice<'a, gimli::LittleEndian>> {
|
|
||||||
let borrow_section: &dyn for<'b> Fn(
|
|
||||||
&'b Cow<'_, [u8]>,
|
|
||||||
) -> gimli::EndianSlice<'b, gimli::LittleEndian> =
|
|
||||||
&|section| gimli::EndianSlice::new(section, gimli::LittleEndian);
|
|
||||||
dwarf.borrow(&borrow_section)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_base_rc() {
|
|
||||||
let target = release_fixture_path();
|
|
||||||
|
|
||||||
with_file(&target, |file| {
|
|
||||||
addr2line::ObjectContext::new(file).unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_base_slice() {
|
|
||||||
let target = release_fixture_path();
|
|
||||||
|
|
||||||
with_file(&target, |file| {
|
|
||||||
let dwarf = dwarf_load(file);
|
|
||||||
let dwarf = dwarf_borrow(&dwarf);
|
|
||||||
addr2line::Context::from_dwarf(dwarf).unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_lines_rc() {
|
|
||||||
let target = release_fixture_path();
|
|
||||||
|
|
||||||
with_file(&target, |file| {
|
|
||||||
let context = addr2line::ObjectContext::new(file).unwrap();
|
|
||||||
context.parse_lines().unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_lines_slice() {
|
|
||||||
let target = release_fixture_path();
|
|
||||||
|
|
||||||
with_file(&target, |file| {
|
|
||||||
let dwarf = dwarf_load(file);
|
|
||||||
let dwarf = dwarf_borrow(&dwarf);
|
|
||||||
let context = addr2line::Context::from_dwarf(dwarf).unwrap();
|
|
||||||
context.parse_lines().unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_functions_rc() {
|
|
||||||
let target = release_fixture_path();
|
|
||||||
|
|
||||||
with_file(&target, |file| {
|
|
||||||
let context = addr2line::ObjectContext::new(file).unwrap();
|
|
||||||
context.parse_functions().unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_functions_slice() {
|
|
||||||
let target = release_fixture_path();
|
|
||||||
|
|
||||||
with_file(&target, |file| {
|
|
||||||
let dwarf = dwarf_load(file);
|
|
||||||
let dwarf = dwarf_borrow(&dwarf);
|
|
||||||
let context = addr2line::Context::from_dwarf(dwarf).unwrap();
|
|
||||||
context.parse_functions().unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
{"files":{".cargo_vcs_info.json":"7ef3fd450a17facfa60d2fcc3225c6e5b9d020d299a31c50f64f6a1a0ba9a29e",".github/workflows/ci.yml":"a96e0ab5bf676757640fb93fefb0410221bf5d37b049c66101033e2710739f89","CHANGELOG.md":"737088e45fdf27fe2cfedce163332d8ce08c58fd86ca287de2de34c0fbaf63e7","Cargo.toml":"f410869f0f1a5697f65a8a77be03da7aeecc0be26e7cf3a1feb1acaa4f518770","Cargo.toml.orig":"67c7da0e25a1fb4e74a8f818af03acabaa73fdf31c362dc41db56b74524b6c6f","LICENSE-0BSD":"861399f8c21c042b110517e76dc6b63a2b334276c8cf17412fc3c8908ca8dc17","LICENSE-APACHE":"8ada45cd9f843acf64e4722ae262c622a2b3b3007c7310ef36ac1061a30f6adb","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"308c50cdb42b9573743068158339570b45ca3f895015ca3b87ba983edb0a21e6","RELEASE_PROCESS.md":"a86cd10fc70f167f8d00e9e4ce0c6b4ebdfa1865058390dffd1e0ad4d3e68d9d","benches/bench.rs":"c07ce370e3680c602e415f8d1ec4e543ea2163ab22a09b6b82d93e8a30adca82","src/algo.rs":"b664b131f724a809591394a10b9023f40ab5963e32a83fa3163c2668e59c8b66","src/lib.rs":"b55ba9c629b30360d08168b2ca0c96275432856a539737a105a6d6ae6bf7e88f"},"package":"f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"}
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
{
|
|
||||||
"git": {
|
|
||||||
"sha1": "a94f525f62698d699d1fb3cc9112db8c35662b16"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,81 +0,0 @@
|
||||||
name: CI
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- staging
|
|
||||||
- trying
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
env:
|
|
||||||
CARGO_TERM_COLOR: always
|
|
||||||
RUSTFLAGS: "--deny warnings"
|
|
||||||
MSRV: 1.31.0
|
|
||||||
NO_STD_TARGET: thumbv6m-none-eabi
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test:
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
rust:
|
|
||||||
- stable
|
|
||||||
- nightly
|
|
||||||
os:
|
|
||||||
- ubuntu-latest
|
|
||||||
- macOS-latest
|
|
||||||
- windows-latest
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
toolchain: ${{ matrix.rust }}
|
|
||||||
override: true
|
|
||||||
- name: Build
|
|
||||||
run: cargo build --all --all-targets
|
|
||||||
- name: Run tests
|
|
||||||
run: |
|
|
||||||
cargo test --all --all-targets
|
|
||||||
cargo test --all --no-default-features
|
|
||||||
|
|
||||||
no-std:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
target: ${{ env.NO_STD_TARGET }}
|
|
||||||
- name: Build
|
|
||||||
run: cargo build --verbose --no-default-features --target ${{ env.NO_STD_TARGET }}
|
|
||||||
|
|
||||||
msrv:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
toolchain: ${{ env.MSRV }}
|
|
||||||
override: true
|
|
||||||
- name: Build
|
|
||||||
run: cargo build --verbose
|
|
||||||
|
|
||||||
lint:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
components: rustfmt
|
|
||||||
- name: Check code formatting
|
|
||||||
run: cargo fmt -- --check
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
# Changelog
|
|
||||||
|
|
||||||
## Unreleased
|
|
||||||
|
|
||||||
No changes.
|
|
||||||
|
|
||||||
## [1.0.2 - 2021-02-26](https://github.com/jonas-schievink/adler/releases/tag/v1.0.2)
|
|
||||||
|
|
||||||
- Fix doctest on big-endian systems ([#9]).
|
|
||||||
|
|
||||||
[#9]: https://github.com/jonas-schievink/adler/pull/9
|
|
||||||
|
|
||||||
## [1.0.1 - 2020-11-08](https://github.com/jonas-schievink/adler/releases/tag/v1.0.1)
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
|
|
||||||
- Fix documentation on docs.rs.
|
|
||||||
|
|
||||||
## [1.0.0 - 2020-11-08](https://github.com/jonas-schievink/adler/releases/tag/v1.0.0)
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
|
|
||||||
- Fix `cargo test --no-default-features` ([#5]).
|
|
||||||
|
|
||||||
### Improvements
|
|
||||||
|
|
||||||
- Extended and clarified documentation.
|
|
||||||
- Added more rustdoc examples.
|
|
||||||
- Extended CI to test the crate with `--no-default-features`.
|
|
||||||
|
|
||||||
### Breaking Changes
|
|
||||||
|
|
||||||
- `adler32_reader` now takes its generic argument by value instead of as a `&mut`.
|
|
||||||
- Renamed `adler32_reader` to `adler32`.
|
|
||||||
|
|
||||||
## [0.2.3 - 2020-07-11](https://github.com/jonas-schievink/adler/releases/tag/v0.2.3)
|
|
||||||
|
|
||||||
- Process 4 Bytes at a time, improving performance by up to 50% ([#2]).
|
|
||||||
|
|
||||||
## [0.2.2 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.2)
|
|
||||||
|
|
||||||
- Bump MSRV to 1.31.0.
|
|
||||||
|
|
||||||
## [0.2.1 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.1)
|
|
||||||
|
|
||||||
- Add a few `#[inline]` annotations to small functions.
|
|
||||||
- Fix CI badge.
|
|
||||||
- Allow integration into libstd.
|
|
||||||
|
|
||||||
## [0.2.0 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.0)
|
|
||||||
|
|
||||||
- Support `#![no_std]` when using `default-features = false`.
|
|
||||||
- Improve performance by around 7x.
|
|
||||||
- Support Rust 1.8.0.
|
|
||||||
- Improve API naming.
|
|
||||||
|
|
||||||
## [0.1.0 - 2020-06-26](https://github.com/jonas-schievink/adler/releases/tag/v0.1.0)
|
|
||||||
|
|
||||||
Initial release.
|
|
||||||
|
|
||||||
|
|
||||||
[#2]: https://github.com/jonas-schievink/adler/pull/2
|
|
||||||
[#5]: https://github.com/jonas-schievink/adler/pull/5
|
|
||||||
|
|
@ -1,64 +0,0 @@
|
||||||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
|
||||||
#
|
|
||||||
# When uploading crates to the registry Cargo will automatically
|
|
||||||
# "normalize" Cargo.toml files for maximal compatibility
|
|
||||||
# with all versions of Cargo and also rewrite `path` dependencies
|
|
||||||
# to registry (e.g., crates.io) dependencies
|
|
||||||
#
|
|
||||||
# If you believe there's an error in this file please file an
|
|
||||||
# issue against the rust-lang/cargo repository. If you're
|
|
||||||
# editing this file be aware that the upstream Cargo.toml
|
|
||||||
# will likely look very different (and much more reasonable)
|
|
||||||
|
|
||||||
[package]
|
|
||||||
name = "adler"
|
|
||||||
version = "1.0.2"
|
|
||||||
authors = ["Jonas Schievink <jonasschievink@gmail.com>"]
|
|
||||||
description = "A simple clean-room implementation of the Adler-32 checksum"
|
|
||||||
documentation = "https://docs.rs/adler/"
|
|
||||||
readme = "README.md"
|
|
||||||
keywords = ["checksum", "integrity", "hash", "adler32", "zlib"]
|
|
||||||
categories = ["algorithms"]
|
|
||||||
license = "0BSD OR MIT OR Apache-2.0"
|
|
||||||
repository = "https://github.com/jonas-schievink/adler.git"
|
|
||||||
[package.metadata.docs.rs]
|
|
||||||
rustdoc-args = ["--cfg=docsrs"]
|
|
||||||
|
|
||||||
[package.metadata.release]
|
|
||||||
no-dev-version = true
|
|
||||||
pre-release-commit-message = "Release {{version}}"
|
|
||||||
tag-message = "{{version}}"
|
|
||||||
|
|
||||||
[[package.metadata.release.pre-release-replacements]]
|
|
||||||
file = "CHANGELOG.md"
|
|
||||||
replace = "## Unreleased\n\nNo changes.\n\n## [{{version}} - {{date}}](https://github.com/jonas-schievink/adler/releases/tag/v{{version}})\n"
|
|
||||||
search = "## Unreleased\n"
|
|
||||||
|
|
||||||
[[package.metadata.release.pre-release-replacements]]
|
|
||||||
file = "README.md"
|
|
||||||
replace = "adler = \"{{version}}\""
|
|
||||||
search = "adler = \"[a-z0-9\\\\.-]+\""
|
|
||||||
|
|
||||||
[[package.metadata.release.pre-release-replacements]]
|
|
||||||
file = "src/lib.rs"
|
|
||||||
replace = "https://docs.rs/adler/{{version}}"
|
|
||||||
search = "https://docs.rs/adler/[a-z0-9\\.-]+"
|
|
||||||
|
|
||||||
[[bench]]
|
|
||||||
name = "bench"
|
|
||||||
harness = false
|
|
||||||
[dependencies.compiler_builtins]
|
|
||||||
version = "0.1.2"
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[dependencies.core]
|
|
||||||
version = "1.0.0"
|
|
||||||
optional = true
|
|
||||||
package = "rustc-std-workspace-core"
|
|
||||||
[dev-dependencies.criterion]
|
|
||||||
version = "0.3.2"
|
|
||||||
|
|
||||||
[features]
|
|
||||||
default = ["std"]
|
|
||||||
rustc-dep-of-std = ["core", "compiler_builtins"]
|
|
||||||
std = []
|
|
||||||
68
.gear/predownloaded-development/vendor/adler/Cargo.toml.orig
generated
vendored
68
.gear/predownloaded-development/vendor/adler/Cargo.toml.orig
generated
vendored
|
|
@ -1,68 +0,0 @@
|
||||||
[package]
|
|
||||||
name = "adler"
|
|
||||||
version = "1.0.2"
|
|
||||||
authors = ["Jonas Schievink <jonasschievink@gmail.com>"]
|
|
||||||
description = "A simple clean-room implementation of the Adler-32 checksum"
|
|
||||||
documentation = "https://docs.rs/adler/"
|
|
||||||
repository = "https://github.com/jonas-schievink/adler.git"
|
|
||||||
keywords = ["checksum", "integrity", "hash", "adler32", "zlib"]
|
|
||||||
categories = ["algorithms"]
|
|
||||||
readme = "README.md"
|
|
||||||
license = "0BSD OR MIT OR Apache-2.0"
|
|
||||||
|
|
||||||
[[bench]]
|
|
||||||
name = "bench"
|
|
||||||
harness = false
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
# Internal features, only used when building as part of libstd, not part of the
|
|
||||||
# stable interface of this crate.
|
|
||||||
core = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-core' }
|
|
||||||
compiler_builtins = { version = '0.1.2', optional = true }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
criterion = "0.3.2"
|
|
||||||
|
|
||||||
[features]
|
|
||||||
# Disable default features to enable `#![no_std]` support.
|
|
||||||
default = ["std"]
|
|
||||||
std = []
|
|
||||||
|
|
||||||
# Internal feature, only used when building as part of libstd, not part of the
|
|
||||||
# stable interface of this crate.
|
|
||||||
rustc-dep-of-std = ['core', 'compiler_builtins']
|
|
||||||
|
|
||||||
|
|
||||||
[package.metadata.docs.rs]
|
|
||||||
rustdoc-args = [ "--cfg=docsrs" ]
|
|
||||||
|
|
||||||
# cargo-release configuration
|
|
||||||
[package.metadata.release]
|
|
||||||
tag-message = "{{version}}"
|
|
||||||
no-dev-version = true
|
|
||||||
pre-release-commit-message = "Release {{version}}"
|
|
||||||
|
|
||||||
# Change the changelog's `Unreleased` section to refer to this release and
|
|
||||||
# prepend a new `Unreleased` section
|
|
||||||
[[package.metadata.release.pre-release-replacements]]
|
|
||||||
file = "CHANGELOG.md"
|
|
||||||
search = "## Unreleased\n"
|
|
||||||
replace = """
|
|
||||||
## Unreleased
|
|
||||||
|
|
||||||
No changes.
|
|
||||||
|
|
||||||
## [{{version}} - {{date}}](https://github.com/jonas-schievink/adler/releases/tag/v{{version}})
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Bump the version inside the example manifest in `README.md`
|
|
||||||
[[package.metadata.release.pre-release-replacements]]
|
|
||||||
file = "README.md"
|
|
||||||
search = 'adler = "[a-z0-9\\.-]+"'
|
|
||||||
replace = 'adler = "{{version}}"'
|
|
||||||
|
|
||||||
# Bump the version referenced by the `html_root_url` attribute in `lib.rs`
|
|
||||||
[[package.metadata.release.pre-release-replacements]]
|
|
||||||
file = "src/lib.rs"
|
|
||||||
search = "https://docs.rs/adler/[a-z0-9\\.-]+"
|
|
||||||
replace = "https://docs.rs/adler/{{version}}"
|
|
||||||
|
|
@ -1,39 +0,0 @@
|
||||||
# Adler-32 checksums for Rust
|
|
||||||
|
|
||||||
[](https://crates.io/crates/adler)
|
|
||||||
[](https://docs.rs/adler/)
|
|
||||||

|
|
||||||
|
|
||||||
This crate provides a simple implementation of the Adler-32 checksum, used in
|
|
||||||
the zlib compression format.
|
|
||||||
|
|
||||||
Please refer to the [changelog](CHANGELOG.md) to see what changed in the last
|
|
||||||
releases.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- Permissively licensed (0BSD) clean-room implementation.
|
|
||||||
- Zero dependencies.
|
|
||||||
- Zero `unsafe`.
|
|
||||||
- Decent performance (3-4 GB/s).
|
|
||||||
- Supports `#![no_std]` (with `default-features = false`).
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
Add an entry to your `Cargo.toml`:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
[dependencies]
|
|
||||||
adler = "1.0.2"
|
|
||||||
```
|
|
||||||
|
|
||||||
Check the [API Documentation](https://docs.rs/adler/) for how to use the
|
|
||||||
crate's functionality.
|
|
||||||
|
|
||||||
## Rust version support
|
|
||||||
|
|
||||||
Currently, this crate supports all Rust versions starting at Rust 1.31.0.
|
|
||||||
|
|
||||||
Bumping the Minimum Supported Rust Version (MSRV) is *not* considered a breaking
|
|
||||||
change, but will not be done without good reasons. The latest 3 stable Rust
|
|
||||||
versions will always be supported no matter what.
|
|
||||||
|
|
@ -1,109 +0,0 @@
|
||||||
extern crate adler;
|
|
||||||
extern crate criterion;
|
|
||||||
|
|
||||||
use adler::{adler32_slice, Adler32};
|
|
||||||
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
|
|
||||||
|
|
||||||
fn simple(c: &mut Criterion) {
|
|
||||||
{
|
|
||||||
const SIZE: usize = 100;
|
|
||||||
|
|
||||||
let mut group = c.benchmark_group("simple-100b");
|
|
||||||
group.throughput(Throughput::Bytes(SIZE as u64));
|
|
||||||
group.bench_function("zeroes-100", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
adler32_slice(&[0; SIZE]);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
group.bench_function("ones-100", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
adler32_slice(&[0xff; SIZE]);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const SIZE: usize = 1024;
|
|
||||||
|
|
||||||
let mut group = c.benchmark_group("simple-1k");
|
|
||||||
group.throughput(Throughput::Bytes(SIZE as u64));
|
|
||||||
|
|
||||||
group.bench_function("zeroes-1k", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
adler32_slice(&[0; SIZE]);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
group.bench_function("ones-1k", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
adler32_slice(&[0xff; SIZE]);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const SIZE: usize = 1024 * 1024;
|
|
||||||
|
|
||||||
let mut group = c.benchmark_group("simple-1m");
|
|
||||||
group.throughput(Throughput::Bytes(SIZE as u64));
|
|
||||||
group.bench_function("zeroes-1m", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
adler32_slice(&[0; SIZE]);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
group.bench_function("ones-1m", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
adler32_slice(&[0xff; SIZE]);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chunked(c: &mut Criterion) {
|
|
||||||
const SIZE: usize = 16 * 1024 * 1024;
|
|
||||||
|
|
||||||
let data = vec![0xAB; SIZE];
|
|
||||||
|
|
||||||
let mut group = c.benchmark_group("chunked-16m");
|
|
||||||
group.throughput(Throughput::Bytes(SIZE as u64));
|
|
||||||
group.bench_function("5552", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
let mut h = Adler32::new();
|
|
||||||
for chunk in data.chunks(5552) {
|
|
||||||
h.write_slice(chunk);
|
|
||||||
}
|
|
||||||
h.checksum()
|
|
||||||
});
|
|
||||||
});
|
|
||||||
group.bench_function("8k", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
let mut h = Adler32::new();
|
|
||||||
for chunk in data.chunks(8 * 1024) {
|
|
||||||
h.write_slice(chunk);
|
|
||||||
}
|
|
||||||
h.checksum()
|
|
||||||
});
|
|
||||||
});
|
|
||||||
group.bench_function("64k", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
let mut h = Adler32::new();
|
|
||||||
for chunk in data.chunks(64 * 1024) {
|
|
||||||
h.write_slice(chunk);
|
|
||||||
}
|
|
||||||
h.checksum()
|
|
||||||
});
|
|
||||||
});
|
|
||||||
group.bench_function("1m", |bencher| {
|
|
||||||
bencher.iter(|| {
|
|
||||||
let mut h = Adler32::new();
|
|
||||||
for chunk in data.chunks(1024 * 1024) {
|
|
||||||
h.write_slice(chunk);
|
|
||||||
}
|
|
||||||
h.checksum()
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
criterion_group!(benches, simple, chunked);
|
|
||||||
criterion_main!(benches);
|
|
||||||
|
|
@ -1,287 +0,0 @@
|
||||||
//! Adler-32 checksum implementation.
|
|
||||||
//!
|
|
||||||
//! This implementation features:
|
|
||||||
//!
|
|
||||||
//! - Permissively licensed (0BSD) clean-room implementation.
|
|
||||||
//! - Zero dependencies.
|
|
||||||
//! - Zero `unsafe`.
|
|
||||||
//! - Decent performance (3-4 GB/s).
|
|
||||||
//! - `#![no_std]` support (with `default-features = false`).
|
|
||||||
|
|
||||||
#![doc(html_root_url = "https://docs.rs/adler/1.0.2")]
|
|
||||||
// Deny a few warnings in doctests, since rustdoc `allow`s many warnings by default
|
|
||||||
#![doc(test(attr(deny(unused_imports, unused_must_use))))]
|
|
||||||
#![cfg_attr(docsrs, feature(doc_cfg))]
|
|
||||||
#![warn(missing_debug_implementations)]
|
|
||||||
#![forbid(unsafe_code)]
|
|
||||||
#![cfg_attr(not(feature = "std"), no_std)]
|
|
||||||
|
|
||||||
#[cfg(not(feature = "std"))]
|
|
||||||
extern crate core as std;
|
|
||||||
|
|
||||||
mod algo;
|
|
||||||
|
|
||||||
use std::hash::Hasher;
|
|
||||||
|
|
||||||
#[cfg(feature = "std")]
|
|
||||||
use std::io::{self, BufRead};
|
|
||||||
|
|
||||||
/// Adler-32 checksum calculator.
|
|
||||||
///
|
|
||||||
/// An instance of this type is equivalent to an Adler-32 checksum: It can be created in the default
|
|
||||||
/// state via [`new`] (or the provided `Default` impl), or from a precalculated checksum via
|
|
||||||
/// [`from_checksum`], and the currently stored checksum can be fetched via [`checksum`].
|
|
||||||
///
|
|
||||||
/// This type also implements `Hasher`, which makes it easy to calculate Adler-32 checksums of any
|
|
||||||
/// type that implements or derives `Hash`. This also allows using Adler-32 in a `HashMap`, although
|
|
||||||
/// that is not recommended (while every checksum is a hash function, they are not necessarily a
|
|
||||||
/// good one).
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// Basic, piecewise checksum calculation:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use adler::Adler32;
|
|
||||||
///
|
|
||||||
/// let mut adler = Adler32::new();
|
|
||||||
///
|
|
||||||
/// adler.write_slice(&[0, 1, 2]);
|
|
||||||
/// adler.write_slice(&[3, 4, 5]);
|
|
||||||
///
|
|
||||||
/// assert_eq!(adler.checksum(), 0x00290010);
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Using `Hash` to process structures:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use std::hash::Hash;
|
|
||||||
/// use adler::Adler32;
|
|
||||||
///
|
|
||||||
/// #[derive(Hash)]
|
|
||||||
/// struct Data {
|
|
||||||
/// byte: u8,
|
|
||||||
/// word: u16,
|
|
||||||
/// big: u64,
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// let mut adler = Adler32::new();
|
|
||||||
///
|
|
||||||
/// let data = Data { byte: 0x1F, word: 0xABCD, big: !0 };
|
|
||||||
/// data.hash(&mut adler);
|
|
||||||
///
|
|
||||||
/// // hash value depends on architecture endianness
|
|
||||||
/// if cfg!(target_endian = "little") {
|
|
||||||
/// assert_eq!(adler.checksum(), 0x33410990);
|
|
||||||
/// }
|
|
||||||
/// if cfg!(target_endian = "big") {
|
|
||||||
/// assert_eq!(adler.checksum(), 0x331F0990);
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// [`new`]: #method.new
|
|
||||||
/// [`from_checksum`]: #method.from_checksum
|
|
||||||
/// [`checksum`]: #method.checksum
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
pub struct Adler32 {
|
|
||||||
a: u16,
|
|
||||||
b: u16,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Adler32 {
|
|
||||||
/// Creates a new Adler-32 instance with default state.
|
|
||||||
#[inline]
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self::default()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates an `Adler32` instance from a precomputed Adler-32 checksum.
|
|
||||||
///
|
|
||||||
/// This allows resuming checksum calculation without having to keep the `Adler32` instance
|
|
||||||
/// around.
|
|
||||||
///
|
|
||||||
/// # Example
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// # use adler::Adler32;
|
|
||||||
/// let parts = [
|
|
||||||
/// "rust",
|
|
||||||
/// "acean",
|
|
||||||
/// ];
|
|
||||||
/// let whole = adler::adler32_slice(b"rustacean");
|
|
||||||
///
|
|
||||||
/// let mut sum = Adler32::new();
|
|
||||||
/// sum.write_slice(parts[0].as_bytes());
|
|
||||||
/// let partial = sum.checksum();
|
|
||||||
///
|
|
||||||
/// // ...later
|
|
||||||
///
|
|
||||||
/// let mut sum = Adler32::from_checksum(partial);
|
|
||||||
/// sum.write_slice(parts[1].as_bytes());
|
|
||||||
/// assert_eq!(sum.checksum(), whole);
|
|
||||||
/// ```
|
|
||||||
#[inline]
|
|
||||||
pub fn from_checksum(sum: u32) -> Self {
|
|
||||||
Adler32 {
|
|
||||||
a: sum as u16,
|
|
||||||
b: (sum >> 16) as u16,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the calculated checksum at this point in time.
|
|
||||||
#[inline]
|
|
||||||
pub fn checksum(&self) -> u32 {
|
|
||||||
(u32::from(self.b) << 16) | u32::from(self.a)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Adds `bytes` to the checksum calculation.
|
|
||||||
///
|
|
||||||
/// If efficiency matters, this should be called with Byte slices that contain at least a few
|
|
||||||
/// thousand Bytes.
|
|
||||||
pub fn write_slice(&mut self, bytes: &[u8]) {
|
|
||||||
self.compute(bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for Adler32 {
|
|
||||||
#[inline]
|
|
||||||
fn default() -> Self {
|
|
||||||
Adler32 { a: 1, b: 0 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Hasher for Adler32 {
|
|
||||||
#[inline]
|
|
||||||
fn finish(&self) -> u64 {
|
|
||||||
u64::from(self.checksum())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write(&mut self, bytes: &[u8]) {
|
|
||||||
self.write_slice(bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Calculates the Adler-32 checksum of a byte slice.
|
|
||||||
///
|
|
||||||
/// This is a convenience function around the [`Adler32`] type.
|
|
||||||
///
|
|
||||||
/// [`Adler32`]: struct.Adler32.html
|
|
||||||
pub fn adler32_slice(data: &[u8]) -> u32 {
|
|
||||||
let mut h = Adler32::new();
|
|
||||||
h.write_slice(data);
|
|
||||||
h.checksum()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Calculates the Adler-32 checksum of a `BufRead`'s contents.
|
|
||||||
///
|
|
||||||
/// The passed `BufRead` implementor will be read until it reaches EOF (or until it reports an
|
|
||||||
/// error).
|
|
||||||
///
|
|
||||||
/// If you only have a `Read` implementor, you can wrap it in `std::io::BufReader` before calling
|
|
||||||
/// this function.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
///
|
|
||||||
/// Any error returned by the reader are bubbled up by this function.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```no_run
|
|
||||||
/// # fn run() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
/// use adler::adler32;
|
|
||||||
///
|
|
||||||
/// use std::fs::File;
|
|
||||||
/// use std::io::BufReader;
|
|
||||||
///
|
|
||||||
/// let file = File::open("input.txt")?;
|
|
||||||
/// let mut file = BufReader::new(file);
|
|
||||||
///
|
|
||||||
/// adler32(&mut file)?;
|
|
||||||
/// # Ok(()) }
|
|
||||||
/// # fn main() { run().unwrap() }
|
|
||||||
/// ```
|
|
||||||
#[cfg(feature = "std")]
|
|
||||||
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
|
|
||||||
pub fn adler32<R: BufRead>(mut reader: R) -> io::Result<u32> {
|
|
||||||
let mut h = Adler32::new();
|
|
||||||
loop {
|
|
||||||
let len = {
|
|
||||||
let buf = reader.fill_buf()?;
|
|
||||||
if buf.is_empty() {
|
|
||||||
return Ok(h.checksum());
|
|
||||||
}
|
|
||||||
|
|
||||||
h.write_slice(buf);
|
|
||||||
buf.len()
|
|
||||||
};
|
|
||||||
reader.consume(len);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn zeroes() {
|
|
||||||
assert_eq!(adler32_slice(&[]), 1);
|
|
||||||
assert_eq!(adler32_slice(&[0]), 1 | 1 << 16);
|
|
||||||
assert_eq!(adler32_slice(&[0, 0]), 1 | 2 << 16);
|
|
||||||
assert_eq!(adler32_slice(&[0; 100]), 0x00640001);
|
|
||||||
assert_eq!(adler32_slice(&[0; 1024]), 0x04000001);
|
|
||||||
assert_eq!(adler32_slice(&[0; 1024 * 1024]), 0x00f00001);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn ones() {
|
|
||||||
assert_eq!(adler32_slice(&[0xff; 1024]), 0x79a6fc2e);
|
|
||||||
assert_eq!(adler32_slice(&[0xff; 1024 * 1024]), 0x8e88ef11);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn mixed() {
|
|
||||||
assert_eq!(adler32_slice(&[1]), 2 | 2 << 16);
|
|
||||||
assert_eq!(adler32_slice(&[40]), 41 | 41 << 16);
|
|
||||||
|
|
||||||
assert_eq!(adler32_slice(&[0xA5; 1024 * 1024]), 0xd5009ab1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
|
|
||||||
#[test]
|
|
||||||
fn wiki() {
|
|
||||||
assert_eq!(adler32_slice(b"Wikipedia"), 0x11E60398);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn resume() {
|
|
||||||
let mut adler = Adler32::new();
|
|
||||||
adler.write_slice(&[0xff; 1024]);
|
|
||||||
let partial = adler.checksum();
|
|
||||||
assert_eq!(partial, 0x79a6fc2e); // from above
|
|
||||||
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
|
|
||||||
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
|
|
||||||
|
|
||||||
// Make sure that we can resume computing from the partial checksum via `from_checksum`.
|
|
||||||
let mut adler = Adler32::from_checksum(partial);
|
|
||||||
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
|
|
||||||
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "std")]
|
|
||||||
#[test]
|
|
||||||
fn bufread() {
|
|
||||||
use std::io::BufReader;
|
|
||||||
fn test(data: &[u8], checksum: u32) {
|
|
||||||
// `BufReader` uses an 8 KB buffer, so this will test buffer refilling.
|
|
||||||
let mut buf = BufReader::new(data);
|
|
||||||
let real_sum = adler32(&mut buf).unwrap();
|
|
||||||
assert_eq!(checksum, real_sum);
|
|
||||||
}
|
|
||||||
|
|
||||||
test(&[], 1);
|
|
||||||
test(&[0; 1024], 0x04000001);
|
|
||||||
test(&[0; 1024 * 1024], 0x00f00001);
|
|
||||||
test(&[0xA5; 1024 * 1024], 0xd5009ab1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
1
.gear/predownloaded-development/vendor/adler2/.cargo-checksum.json
vendored
Normal file
1
.gear/predownloaded-development/vendor/adler2/.cargo-checksum.json
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"files":{".cargo_vcs_info.json":"08071a308400282ad18ca2de0df6d1f2dc270475f69aa483740ce311af3f63e0","CHANGELOG.md":"04fa29ec6eb6b05b706247ecac2cbc7075792dbfcea0bf52715782cf42132e94","Cargo.lock":"3e44ced212a9e7ddc0a5450bcebb48ec67f32a058529856458efa36415554e53","Cargo.toml":"8f30dbd092f3acc475b9d339736cd7b64c6489bc47cd234a7f2232fc52e2d490","Cargo.toml.orig":"077df9094ac86443a4d05305f74782bd237c1f15fa39640463e4c62e9e4a310a","LICENSE-0BSD":"861399f8c21c042b110517e76dc6b63a2b334276c8cf17412fc3c8908ca8dc17","LICENSE-APACHE":"8ada45cd9f843acf64e4722ae262c622a2b3b3007c7310ef36ac1061a30f6adb","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"cd955d5d6a49161e6f7a04df4a5963581b66ed43fd5096b2dedca8e295efe4f9","RELEASE_PROCESS.md":"a86cd10fc70f167f8d00e9e4ce0c6b4ebdfa1865058390dffd1e0ad4d3e68d9d","benches/bench.rs":"d67bef1c7f36ed300a8fbcf9d50b9dfdead1fd340bf87a4d47d99a0c1c042c04","src/algo.rs":"932c2bc591d13fe4470185125617b5aaa660a3898f23b553acc85df0bf49dded","src/lib.rs":"4acd41668fe30daffa37084e7e223f268957b816afc1864ffb3f5d6d7adf0890"},"package":"320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"}
|
||||||
6
.gear/predownloaded-development/vendor/adler2/.cargo_vcs_info.json
vendored
Normal file
6
.gear/predownloaded-development/vendor/adler2/.cargo_vcs_info.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"git": {
|
||||||
|
"sha1": "89a031a0f42eeff31c70dc598b398cbf31f1680f"
|
||||||
|
},
|
||||||
|
"path_in_vcs": ""
|
||||||
|
}
|
||||||
84
.gear/predownloaded-development/vendor/adler2/CHANGELOG.md
vendored
Normal file
84
.gear/predownloaded-development/vendor/adler2/CHANGELOG.md
vendored
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
---
|
||||||
|
## [2.0.1](https://github.com/Frommi/miniz_oxide/compare/2.0.0..2.0.1) - 2025-06-09
|
||||||
|
|
||||||
|
### Other
|
||||||
|
|
||||||
|
- Remove `compiler-builtins` from `rustc-dep-of-std` dependencies - ([7cdbd39](https://github.com/Frommi/miniz_oxide/commit/7cdbd3925a7f61cc075f44367b5d383861571b0a)) - Trevor Gross
|
||||||
|
|
||||||
|
---
|
||||||
|
## [2.0.0](https://github.com/Frommi/miniz_oxide/compare/1.0.2..2.0.0) - 2024-08-04
|
||||||
|
|
||||||
|
First release of adler2 - fork of adler crate as the original is unmaintained and archived
|
||||||
|
|
||||||
|
##### Changes since last version of Adler:
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
|
||||||
|
- **(core)** change to rust 2021 edition, update repository info and links, update author info - ([867b115](https://github.com/Frommi/miniz_oxide/commit/867b115bad79bf62098f2acccc81bf53ec5a125d)) - oyvindln
|
||||||
|
- **(core)** simplify some code and fix benches - ([128fb9c](https://github.com/Frommi/miniz_oxide/commit/128fb9cb6cad5c3a54fb0b6c68549d80b79a1fe0)) - oyvindln
|
||||||
|
|
||||||
|
### Changelog of original adler crate
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [1.0.2 - 2021-02-26](https://github.com/jonas-schievink/adler/releases/tag/v1.0.2)
|
||||||
|
|
||||||
|
- Fix doctest on big-endian systems ([#9]).
|
||||||
|
|
||||||
|
[#9]: https://github.com/jonas-schievink/adler/pull/9
|
||||||
|
|
||||||
|
## [1.0.1 - 2020-11-08](https://github.com/jonas-schievink/adler/releases/tag/v1.0.1)
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
- Fix documentation on docs.rs.
|
||||||
|
|
||||||
|
## [1.0.0 - 2020-11-08](https://github.com/jonas-schievink/adler/releases/tag/v1.0.0)
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
- Fix `cargo test --no-default-features` ([#5]).
|
||||||
|
|
||||||
|
### Improvements
|
||||||
|
|
||||||
|
- Extended and clarified documentation.
|
||||||
|
- Added more rustdoc examples.
|
||||||
|
- Extended CI to test the crate with `--no-default-features`.
|
||||||
|
|
||||||
|
### Breaking Changes
|
||||||
|
|
||||||
|
- `adler32_reader` now takes its generic argument by value instead of as a `&mut`.
|
||||||
|
- Renamed `adler32_reader` to `adler32`.
|
||||||
|
|
||||||
|
## [0.2.3 - 2020-07-11](https://github.com/jonas-schievink/adler/releases/tag/v0.2.3)
|
||||||
|
|
||||||
|
- Process 4 Bytes at a time, improving performance by up to 50% ([#2]).
|
||||||
|
|
||||||
|
## [0.2.2 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.2)
|
||||||
|
|
||||||
|
- Bump MSRV to 1.31.0.
|
||||||
|
|
||||||
|
## [0.2.1 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.1)
|
||||||
|
|
||||||
|
- Add a few `#[inline]` annotations to small functions.
|
||||||
|
- Fix CI badge.
|
||||||
|
- Allow integration into libstd.
|
||||||
|
|
||||||
|
## [0.2.0 - 2020-06-27](https://github.com/jonas-schievink/adler/releases/tag/v0.2.0)
|
||||||
|
|
||||||
|
- Support `#![no_std]` when using `default-features = false`.
|
||||||
|
- Improve performance by around 7x.
|
||||||
|
- Support Rust 1.8.0.
|
||||||
|
- Improve API naming.
|
||||||
|
|
||||||
|
## [0.1.0 - 2020-06-26](https://github.com/jonas-schievink/adler/releases/tag/v0.1.0)
|
||||||
|
|
||||||
|
Initial release.
|
||||||
|
|
||||||
|
|
||||||
|
[#2]: https://github.com/jonas-schievink/adler/pull/2
|
||||||
|
[#5]: https://github.com/jonas-schievink/adler/pull/5
|
||||||
16
.gear/predownloaded-development/vendor/adler2/Cargo.lock
generated
vendored
Normal file
16
.gear/predownloaded-development/vendor/adler2/Cargo.lock
generated
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "adler2"
|
||||||
|
version = "2.0.1"
|
||||||
|
dependencies = [
|
||||||
|
"rustc-std-workspace-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-std-workspace-core"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1956f5517128a2b6f23ab2dadf1a976f4f5b27962e7724c2bf3d45e539ec098c"
|
||||||
91
.gear/predownloaded-development/vendor/adler2/Cargo.toml
vendored
Normal file
91
.gear/predownloaded-development/vendor/adler2/Cargo.toml
vendored
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies.
|
||||||
|
#
|
||||||
|
# If you are reading this file be aware that the original Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable).
|
||||||
|
# See Cargo.toml.orig for the original contents.
|
||||||
|
|
||||||
|
[package]
|
||||||
|
edition = "2021"
|
||||||
|
name = "adler2"
|
||||||
|
version = "2.0.1"
|
||||||
|
authors = [
|
||||||
|
"Jonas Schievink <jonasschievink@gmail.com>",
|
||||||
|
"oyvindln <oyvindln@users.noreply.github.com>",
|
||||||
|
]
|
||||||
|
build = false
|
||||||
|
exclude = [".*"]
|
||||||
|
autolib = false
|
||||||
|
autobins = false
|
||||||
|
autoexamples = false
|
||||||
|
autotests = false
|
||||||
|
autobenches = false
|
||||||
|
description = "A simple clean-room implementation of the Adler-32 checksum"
|
||||||
|
documentation = "https://docs.rs/adler2/"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = [
|
||||||
|
"checksum",
|
||||||
|
"integrity",
|
||||||
|
"hash",
|
||||||
|
"adler32",
|
||||||
|
"zlib",
|
||||||
|
]
|
||||||
|
categories = ["algorithms"]
|
||||||
|
license = "0BSD OR MIT OR Apache-2.0"
|
||||||
|
repository = "https://github.com/oyvindln/adler2"
|
||||||
|
|
||||||
|
[package.metadata.docs.rs]
|
||||||
|
rustdoc-args = ["--cfg=docsrs"]
|
||||||
|
|
||||||
|
[package.metadata.release]
|
||||||
|
no-dev-version = true
|
||||||
|
pre-release-commit-message = "Release {{version}}"
|
||||||
|
tag-message = "{{version}}"
|
||||||
|
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "CHANGELOG.md"
|
||||||
|
replace = """
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
No changes.
|
||||||
|
|
||||||
|
## [{{version}} - {{date}}](https://github.com/jonas-schievink/adler/releases/tag/v{{version}})
|
||||||
|
"""
|
||||||
|
search = """
|
||||||
|
## Unreleased
|
||||||
|
"""
|
||||||
|
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "README.md"
|
||||||
|
replace = 'adler = "{{version}}"'
|
||||||
|
search = 'adler = "[a-z0-9\\.-]+"'
|
||||||
|
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "src/lib.rs"
|
||||||
|
replace = "https://docs.rs/adler/{{version}}"
|
||||||
|
search = 'https://docs.rs/adler/[a-z0-9\.-]+'
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["std"]
|
||||||
|
rustc-dep-of-std = ["core"]
|
||||||
|
std = []
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "adler2"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "bench"
|
||||||
|
path = "benches/bench.rs"
|
||||||
|
harness = false
|
||||||
|
|
||||||
|
[dependencies.core]
|
||||||
|
version = "1.0.0"
|
||||||
|
optional = true
|
||||||
|
package = "rustc-std-workspace-core"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
71
.gear/predownloaded-development/vendor/adler2/Cargo.toml.orig
generated
vendored
Normal file
71
.gear/predownloaded-development/vendor/adler2/Cargo.toml.orig
generated
vendored
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
[package]
|
||||||
|
name = "adler2"
|
||||||
|
version = "2.0.1"
|
||||||
|
authors = ["Jonas Schievink <jonasschievink@gmail.com>", "oyvindln <oyvindln@users.noreply.github.com>"]
|
||||||
|
description = "A simple clean-room implementation of the Adler-32 checksum"
|
||||||
|
documentation = "https://docs.rs/adler2/"
|
||||||
|
repository = "https://github.com/oyvindln/adler2"
|
||||||
|
keywords = ["checksum", "integrity", "hash", "adler32", "zlib"]
|
||||||
|
categories = ["algorithms"]
|
||||||
|
readme = "README.md"
|
||||||
|
license = "0BSD OR MIT OR Apache-2.0"
|
||||||
|
edition = "2021"
|
||||||
|
exclude = [".*"]
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "bench"
|
||||||
|
harness = false
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
# Internal features, only used when building as part of libstd, not part of the
|
||||||
|
# stable interface of this crate.
|
||||||
|
core = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-core' }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
## Messes with minimum rust version and drags in deps just for running tests
|
||||||
|
## so just comment out for now and enable manually when needed for enabling benches
|
||||||
|
## criterion = "0.3.2"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
# Disable default features to enable `#![no_std]` support.
|
||||||
|
default = ["std"]
|
||||||
|
std = []
|
||||||
|
|
||||||
|
# Internal feature, only used when building as part of libstd, not part of the
|
||||||
|
# stable interface of this crate.
|
||||||
|
rustc-dep-of-std = ['core']
|
||||||
|
|
||||||
|
|
||||||
|
[package.metadata.docs.rs]
|
||||||
|
rustdoc-args = [ "--cfg=docsrs" ]
|
||||||
|
|
||||||
|
# cargo-release configuration
|
||||||
|
[package.metadata.release]
|
||||||
|
tag-message = "{{version}}"
|
||||||
|
no-dev-version = true
|
||||||
|
pre-release-commit-message = "Release {{version}}"
|
||||||
|
|
||||||
|
# Change the changelog's `Unreleased` section to refer to this release and
|
||||||
|
# prepend a new `Unreleased` section
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "CHANGELOG.md"
|
||||||
|
search = "## Unreleased\n"
|
||||||
|
replace = """
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
No changes.
|
||||||
|
|
||||||
|
## [{{version}} - {{date}}](https://github.com/jonas-schievink/adler/releases/tag/v{{version}})
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Bump the version inside the example manifest in `README.md`
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "README.md"
|
||||||
|
search = 'adler = "[a-z0-9\\.-]+"'
|
||||||
|
replace = 'adler = "{{version}}"'
|
||||||
|
|
||||||
|
# Bump the version referenced by the `html_root_url` attribute in `lib.rs`
|
||||||
|
[[package.metadata.release.pre-release-replacements]]
|
||||||
|
file = "src/lib.rs"
|
||||||
|
search = "https://docs.rs/adler/[a-z0-9\\.-]+"
|
||||||
|
replace = "https://docs.rs/adler/{{version}}"
|
||||||
46
.gear/predownloaded-development/vendor/adler2/README.md
vendored
Normal file
46
.gear/predownloaded-development/vendor/adler2/README.md
vendored
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
# Adler-32 checksums for Rust
|
||||||
|
|
||||||
|
This is a fork of the adler crate as the [original](https://github.com/jonas-schievink/adler) has been archived and is no longer updated by it's author
|
||||||
|
|
||||||
|
[](https://crates.io/crates/adler)
|
||||||
|
[](https://docs.rs/adler/)
|
||||||
|

|
||||||
|
|
||||||
|
This crate provides a simple implementation of the Adler-32 checksum, used in
|
||||||
|
the zlib compression format.
|
||||||
|
|
||||||
|
Please refer to the [changelog](CHANGELOG.md) to see what changed in the last
|
||||||
|
releases.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Permissively licensed (0BSD) clean-room implementation.
|
||||||
|
- Zero dependencies.
|
||||||
|
- Zero `unsafe`.
|
||||||
|
- Decent performance (3-4 GB/s) (see note).
|
||||||
|
- Supports `#![no_std]` (with `default-features = false`).
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Add an entry to your `Cargo.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
adler2 = "2.0.0"
|
||||||
|
```
|
||||||
|
|
||||||
|
Check the [API Documentation](https://docs.rs/adler/) for how to use the
|
||||||
|
crate's functionality.
|
||||||
|
|
||||||
|
## Rust version support
|
||||||
|
|
||||||
|
Currently, this crate supports all Rust versions starting at Rust 1.56.0.
|
||||||
|
|
||||||
|
Bumping the Minimum Supported Rust Version (MSRV) is *not* considered a breaking
|
||||||
|
change, but will not be done without good reasons. The latest 3 stable Rust
|
||||||
|
versions will always be supported no matter what.
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
Due to the way the algorithm works this crate and the fact that it's not possible to use explicit simd in safe rust currently, this crate benefits drastically from being compiled with newer cpu instructions enabled (using e.g ```RUSTFLAGS=-C target-feature'+sse4.1``` or ```-C target-cpu=x86-64-v2```/```-C target-cpu=x86-64-v3``` arguments depending on what cpu support is being targeted.)
|
||||||
|
Judging by the crate benchmarks, on a Ryzen 5600, compiling with SSE 4.1 (enabled in x86-64-v2 feature level) enabled can give a ~50-150% speedup, enabling the LZCNT instruction (enabled in x86-64-v3 feature level) can give a further ~50% speedup,
|
||||||
109
.gear/predownloaded-development/vendor/adler2/benches/bench.rs
vendored
Normal file
109
.gear/predownloaded-development/vendor/adler2/benches/bench.rs
vendored
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
extern crate adler2;
|
||||||
|
extern crate criterion;
|
||||||
|
|
||||||
|
use adler2::{adler32_slice, Adler32};
|
||||||
|
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
|
||||||
|
|
||||||
|
fn simple(c: &mut Criterion) {
|
||||||
|
{
|
||||||
|
const SIZE: usize = 100;
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("simple-100b");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
group.bench_function("zeroes-100", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("ones-100", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0xff; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const SIZE: usize = 1024;
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("simple-1k");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
|
||||||
|
group.bench_function("zeroes-1k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("ones-1k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0xff; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const SIZE: usize = 1024 * 1024;
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("simple-1m");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
group.bench_function("zeroes-1m", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("ones-1m", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
adler32_slice(&[0xff; SIZE]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn chunked(c: &mut Criterion) {
|
||||||
|
const SIZE: usize = 16 * 1024 * 1024;
|
||||||
|
|
||||||
|
let data = vec![0xAB; SIZE];
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("chunked-16m");
|
||||||
|
group.throughput(Throughput::Bytes(SIZE as u64));
|
||||||
|
group.bench_function("5552", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(5552) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("8k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(8 * 1024) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("64k", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(64 * 1024) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("1m", |bencher| {
|
||||||
|
bencher.iter(|| {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
for chunk in data.chunks(1024 * 1024) {
|
||||||
|
h.write_slice(chunk);
|
||||||
|
}
|
||||||
|
h.checksum()
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, simple, chunked);
|
||||||
|
criterion_main!(benches);
|
||||||
|
|
@ -68,6 +68,7 @@ impl Adler32 {
|
||||||
a_vec += val;
|
a_vec += val;
|
||||||
b_vec += a_vec;
|
b_vec += a_vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
b += CHUNK_SIZE as u32 * a;
|
b += CHUNK_SIZE as u32 * a;
|
||||||
a_vec %= MOD;
|
a_vec %= MOD;
|
||||||
b_vec %= MOD;
|
b_vec %= MOD;
|
||||||
|
|
@ -111,6 +112,7 @@ impl Adler32 {
|
||||||
struct U32X4([u32; 4]);
|
struct U32X4([u32; 4]);
|
||||||
|
|
||||||
impl U32X4 {
|
impl U32X4 {
|
||||||
|
#[inline]
|
||||||
fn from(bytes: &[u8]) -> Self {
|
fn from(bytes: &[u8]) -> Self {
|
||||||
U32X4([
|
U32X4([
|
||||||
u32::from(bytes[0]),
|
u32::from(bytes[0]),
|
||||||
|
|
@ -122,25 +124,32 @@ impl U32X4 {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AddAssign<Self> for U32X4 {
|
impl AddAssign<Self> for U32X4 {
|
||||||
|
#[inline]
|
||||||
fn add_assign(&mut self, other: Self) {
|
fn add_assign(&mut self, other: Self) {
|
||||||
for (s, o) in self.0.iter_mut().zip(other.0.iter()) {
|
// Implement this in a primitive manner to help out the compiler a bit.
|
||||||
*s += o;
|
self.0[0] += other.0[0];
|
||||||
}
|
self.0[1] += other.0[1];
|
||||||
|
self.0[2] += other.0[2];
|
||||||
|
self.0[3] += other.0[3];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RemAssign<u32> for U32X4 {
|
impl RemAssign<u32> for U32X4 {
|
||||||
|
#[inline]
|
||||||
fn rem_assign(&mut self, quotient: u32) {
|
fn rem_assign(&mut self, quotient: u32) {
|
||||||
for s in self.0.iter_mut() {
|
self.0[0] %= quotient;
|
||||||
*s %= quotient;
|
self.0[1] %= quotient;
|
||||||
}
|
self.0[2] %= quotient;
|
||||||
|
self.0[3] %= quotient;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MulAssign<u32> for U32X4 {
|
impl MulAssign<u32> for U32X4 {
|
||||||
|
#[inline]
|
||||||
fn mul_assign(&mut self, rhs: u32) {
|
fn mul_assign(&mut self, rhs: u32) {
|
||||||
for s in self.0.iter_mut() {
|
self.0[0] *= rhs;
|
||||||
*s *= rhs;
|
self.0[1] *= rhs;
|
||||||
}
|
self.0[2] *= rhs;
|
||||||
|
self.0[3] *= rhs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
287
.gear/predownloaded-development/vendor/adler2/src/lib.rs
vendored
Normal file
287
.gear/predownloaded-development/vendor/adler2/src/lib.rs
vendored
Normal file
|
|
@ -0,0 +1,287 @@
|
||||||
|
//! Adler-32 checksum implementation.
|
||||||
|
//!
|
||||||
|
//! This implementation features:
|
||||||
|
//!
|
||||||
|
//! - Permissively licensed (0BSD) clean-room implementation.
|
||||||
|
//! - Zero dependencies.
|
||||||
|
//! - Zero `unsafe`.
|
||||||
|
//! - Decent performance (3-4 GB/s).
|
||||||
|
//! - `#![no_std]` support (with `default-features = false`).
|
||||||
|
|
||||||
|
#![doc(html_root_url = "https://docs.rs/adler2/2.0.0")]
|
||||||
|
// Deny a few warnings in doctests, since rustdoc `allow`s many warnings by default
|
||||||
|
#![doc(test(attr(deny(unused_imports, unused_must_use))))]
|
||||||
|
#![cfg_attr(docsrs, feature(doc_cfg))]
|
||||||
|
#![warn(missing_debug_implementations)]
|
||||||
|
#![forbid(unsafe_code)]
|
||||||
|
#![cfg_attr(not(feature = "std"), no_std)]
|
||||||
|
|
||||||
|
#[cfg(not(feature = "std"))]
|
||||||
|
extern crate core as std;
|
||||||
|
|
||||||
|
mod algo;
|
||||||
|
|
||||||
|
use std::hash::Hasher;
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
use std::io::{self, BufRead};
|
||||||
|
|
||||||
|
/// Adler-32 checksum calculator.
|
||||||
|
///
|
||||||
|
/// An instance of this type is equivalent to an Adler-32 checksum: It can be created in the default
|
||||||
|
/// state via [`new`] (or the provided `Default` impl), or from a precalculated checksum via
|
||||||
|
/// [`from_checksum`], and the currently stored checksum can be fetched via [`checksum`].
|
||||||
|
///
|
||||||
|
/// This type also implements `Hasher`, which makes it easy to calculate Adler-32 checksums of any
|
||||||
|
/// type that implements or derives `Hash`. This also allows using Adler-32 in a `HashMap`, although
|
||||||
|
/// that is not recommended (while every checksum is a hash function, they are not necessarily a
|
||||||
|
/// good one).
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Basic, piecewise checksum calculation:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use adler2::Adler32;
|
||||||
|
///
|
||||||
|
/// let mut adler = Adler32::new();
|
||||||
|
///
|
||||||
|
/// adler.write_slice(&[0, 1, 2]);
|
||||||
|
/// adler.write_slice(&[3, 4, 5]);
|
||||||
|
///
|
||||||
|
/// assert_eq!(adler.checksum(), 0x00290010);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Using `Hash` to process structures:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use std::hash::Hash;
|
||||||
|
/// use adler2::Adler32;
|
||||||
|
///
|
||||||
|
/// #[derive(Hash)]
|
||||||
|
/// struct Data {
|
||||||
|
/// byte: u8,
|
||||||
|
/// word: u16,
|
||||||
|
/// big: u64,
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// let mut adler = Adler32::new();
|
||||||
|
///
|
||||||
|
/// let data = Data { byte: 0x1F, word: 0xABCD, big: !0 };
|
||||||
|
/// data.hash(&mut adler);
|
||||||
|
///
|
||||||
|
/// // hash value depends on architecture endianness
|
||||||
|
/// if cfg!(target_endian = "little") {
|
||||||
|
/// assert_eq!(adler.checksum(), 0x33410990);
|
||||||
|
/// }
|
||||||
|
/// if cfg!(target_endian = "big") {
|
||||||
|
/// assert_eq!(adler.checksum(), 0x331F0990);
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// [`new`]: #method.new
|
||||||
|
/// [`from_checksum`]: #method.from_checksum
|
||||||
|
/// [`checksum`]: #method.checksum
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct Adler32 {
|
||||||
|
a: u16,
|
||||||
|
b: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Adler32 {
|
||||||
|
/// Creates a new Adler-32 instance with default state.
|
||||||
|
#[inline]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates an `Adler32` instance from a precomputed Adler-32 checksum.
|
||||||
|
///
|
||||||
|
/// This allows resuming checksum calculation without having to keep the `Adler32` instance
|
||||||
|
/// around.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use adler2::Adler32;
|
||||||
|
/// let parts = [
|
||||||
|
/// "rust",
|
||||||
|
/// "acean",
|
||||||
|
/// ];
|
||||||
|
/// let whole = adler2::adler32_slice(b"rustacean");
|
||||||
|
///
|
||||||
|
/// let mut sum = Adler32::new();
|
||||||
|
/// sum.write_slice(parts[0].as_bytes());
|
||||||
|
/// let partial = sum.checksum();
|
||||||
|
///
|
||||||
|
/// // ...later
|
||||||
|
///
|
||||||
|
/// let mut sum = Adler32::from_checksum(partial);
|
||||||
|
/// sum.write_slice(parts[1].as_bytes());
|
||||||
|
/// assert_eq!(sum.checksum(), whole);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub const fn from_checksum(sum: u32) -> Self {
|
||||||
|
Adler32 {
|
||||||
|
a: sum as u16,
|
||||||
|
b: (sum >> 16) as u16,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the calculated checksum at this point in time.
|
||||||
|
#[inline]
|
||||||
|
pub fn checksum(&self) -> u32 {
|
||||||
|
(u32::from(self.b) << 16) | u32::from(self.a)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adds `bytes` to the checksum calculation.
|
||||||
|
///
|
||||||
|
/// If efficiency matters, this should be called with Byte slices that contain at least a few
|
||||||
|
/// thousand Bytes.
|
||||||
|
pub fn write_slice(&mut self, bytes: &[u8]) {
|
||||||
|
self.compute(bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Adler32 {
|
||||||
|
#[inline]
|
||||||
|
fn default() -> Self {
|
||||||
|
Adler32 { a: 1, b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Hasher for Adler32 {
|
||||||
|
#[inline]
|
||||||
|
fn finish(&self) -> u64 {
|
||||||
|
u64::from(self.checksum())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write(&mut self, bytes: &[u8]) {
|
||||||
|
self.write_slice(bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the Adler-32 checksum of a byte slice.
|
||||||
|
///
|
||||||
|
/// This is a convenience function around the [`Adler32`] type.
|
||||||
|
///
|
||||||
|
/// [`Adler32`]: struct.Adler32.html
|
||||||
|
pub fn adler32_slice(data: &[u8]) -> u32 {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
h.write_slice(data);
|
||||||
|
h.checksum()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculates the Adler-32 checksum of a `BufRead`'s contents.
|
||||||
|
///
|
||||||
|
/// The passed `BufRead` implementor will be read until it reaches EOF (or until it reports an
|
||||||
|
/// error).
|
||||||
|
///
|
||||||
|
/// If you only have a `Read` implementor, you can wrap it in `std::io::BufReader` before calling
|
||||||
|
/// this function.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Any error returned by the reader are bubbled up by this function.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```no_run
|
||||||
|
/// # fn run() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
/// use adler2::adler32;
|
||||||
|
///
|
||||||
|
/// use std::fs::File;
|
||||||
|
/// use std::io::BufReader;
|
||||||
|
///
|
||||||
|
/// let file = File::open("input.txt")?;
|
||||||
|
/// let mut file = BufReader::new(file);
|
||||||
|
///
|
||||||
|
/// adler32(&mut file)?;
|
||||||
|
/// # Ok(()) }
|
||||||
|
/// # fn main() { run().unwrap() }
|
||||||
|
/// ```
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
|
||||||
|
pub fn adler32<R: BufRead>(mut reader: R) -> io::Result<u32> {
|
||||||
|
let mut h = Adler32::new();
|
||||||
|
loop {
|
||||||
|
let len = {
|
||||||
|
let buf = reader.fill_buf()?;
|
||||||
|
if buf.is_empty() {
|
||||||
|
return Ok(h.checksum());
|
||||||
|
}
|
||||||
|
|
||||||
|
h.write_slice(buf);
|
||||||
|
buf.len()
|
||||||
|
};
|
||||||
|
reader.consume(len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zeroes() {
|
||||||
|
assert_eq!(adler32_slice(&[]), 1);
|
||||||
|
assert_eq!(adler32_slice(&[0]), 1 | 1 << 16);
|
||||||
|
assert_eq!(adler32_slice(&[0, 0]), 1 | 2 << 16);
|
||||||
|
assert_eq!(adler32_slice(&[0; 100]), 0x00640001);
|
||||||
|
assert_eq!(adler32_slice(&[0; 1024]), 0x04000001);
|
||||||
|
assert_eq!(adler32_slice(&[0; 1024 * 1024]), 0x00f00001);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ones() {
|
||||||
|
assert_eq!(adler32_slice(&[0xff; 1024]), 0x79a6fc2e);
|
||||||
|
assert_eq!(adler32_slice(&[0xff; 1024 * 1024]), 0x8e88ef11);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn mixed() {
|
||||||
|
assert_eq!(adler32_slice(&[1]), 2 | 2 << 16);
|
||||||
|
assert_eq!(adler32_slice(&[40]), 41 | 41 << 16);
|
||||||
|
|
||||||
|
assert_eq!(adler32_slice(&[0xA5; 1024 * 1024]), 0xd5009ab1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
|
||||||
|
#[test]
|
||||||
|
fn wiki() {
|
||||||
|
assert_eq!(adler32_slice(b"Wikipedia"), 0x11E60398);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resume() {
|
||||||
|
let mut adler = Adler32::new();
|
||||||
|
adler.write_slice(&[0xff; 1024]);
|
||||||
|
let partial = adler.checksum();
|
||||||
|
assert_eq!(partial, 0x79a6fc2e); // from above
|
||||||
|
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
|
||||||
|
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
|
||||||
|
|
||||||
|
// Make sure that we can resume computing from the partial checksum via `from_checksum`.
|
||||||
|
let mut adler = Adler32::from_checksum(partial);
|
||||||
|
adler.write_slice(&[0xff; 1024 * 1024 - 1024]);
|
||||||
|
assert_eq!(adler.checksum(), 0x8e88ef11); // from above
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
#[test]
|
||||||
|
fn bufread() {
|
||||||
|
use std::io::BufReader;
|
||||||
|
fn test(data: &[u8], checksum: u32) {
|
||||||
|
// `BufReader` uses an 8 KB buffer, so this will test buffer refilling.
|
||||||
|
let mut buf = BufReader::new(data);
|
||||||
|
let real_sum = adler32(&mut buf).unwrap();
|
||||||
|
assert_eq!(checksum, real_sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
test(&[], 1);
|
||||||
|
test(&[0; 1024], 0x04000001);
|
||||||
|
test(&[0; 1024 * 1024], 0x00f00001);
|
||||||
|
test(&[0xA5; 1024 * 1024], 0xd5009ab1);
|
||||||
|
}
|
||||||
|
}
|
||||||
1
.gear/predownloaded-development/vendor/aho-corasick/.cargo-checksum.json
vendored
Normal file
1
.gear/predownloaded-development/vendor/aho-corasick/.cargo-checksum.json
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"files":{".cargo_vcs_info.json":"69f4ef9c995781116d3f30c158ef6b558916fa67130ede33f22564251b580a1f",".github/workflows/ci.yml":"0605d9327a4633916dc789008d5686c692656bb3e1ee57f821f8537e9ad7d7b4",".vim/coc-settings.json":"8237c8f41db352b0d83f1bb10a60bc2f60f56f3234afbf696b4075c8d4d62d9b","COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"88c12a803c6c06c47cd9dabc8bcdba81f35d3bab637221d2106a86a543532731","Cargo.toml.orig":"9526bde160fb3ea38179bd295cd94ddf1e3d8cd8cc1ed3a153664e3c90d3c970","DESIGN.md":"59c960e1b73b1d7fb41e4df6c0c1b1fcf44dd2ebc8a349597a7d0595f8cb5130","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"afc4d559a98cf190029af0bf320fc0022725e349cd2a303aac860254e28f3c53","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"c699c07df70be45c666e128509ad571a7649d2073e4ae16ac1efd6793c9c6890","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"bfef1a94c5e7410584b1beb4e857b40d1ae2031b881cbc06fb1300409bbd555f","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"aeb6ee5fd80eea04decbc4b46aa27d1ab270b78d416a644da25b7934f009ee66","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"de94f02b04efd8744fb096759a8897c22012b0e0ca3ace161fd87c71befefe04","src/packed/api.rs":"160d3b10823316f7b0924e13c3afd222c8a7db5c0a00432401f311ef27d6a1b7","src/packed/ext.rs":"66be06fde8558429da23a290584d4b9fae665bf64c2578db4fe5f5f3ee864869","src/packed/mod.rs":"0020cd6f07ba5c8955923a9516d7f758864260eda53a6b6f629131c45ddeec62","src/packed/pattern.rs":"1e3a289a730c141fc30b295811e372d046c6619c7fd670308299b889a06c7673","src/packed/rabinkarp.rs":"403146eb1d838a84601d171393542340513cd1ee7ff750f2372161dd47746586","src/packed/teddy/README.md":"3a43194b64e221543d885176aba3beb1224a927385a20eca842daf6b0ea2f342","src/packed/teddy/builder.rs":"08ec116a4a842a2bb1221d296a2515ef3672c54906bed588fb733364c07855d3","src/packed/teddy/generic.rs":"ea252ab05b32cea7dd9d71e332071d243db7dd0362e049252a27e5881ba2bf39","src/packed/teddy/mod.rs":"17d741f7e2fb9dbac5ba7d1bd4542cf1e35e9f146ace728e23fe6bbed20028b2","src/packed/tests.rs":"8e2f56eb3890ed3876ecb47d3121996e416563127b6430110d7b516df3f83b4b","src/packed/vector.rs":"70c325cfa6f7c5c4c9a6af7b133b75a29e65990a7fe0b9a4c4ce3c3d5a0fe587","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"e264e6abebf5622b59f6500210773db36048371c4e509c930263334095959a52","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"9fa4498f18bf70478b1996c1a013698b626d15f119aa81dbc536673c9f045718","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"}
|
||||||
6
.gear/predownloaded-development/vendor/aho-corasick/.cargo_vcs_info.json
vendored
Normal file
6
.gear/predownloaded-development/vendor/aho-corasick/.cargo_vcs_info.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"git": {
|
||||||
|
"sha1": "56256dca1bcd2365fd1dc987c1c06195429a2e2c"
|
||||||
|
},
|
||||||
|
"path_in_vcs": ""
|
||||||
|
}
|
||||||
148
.gear/predownloaded-development/vendor/aho-corasick/.github/workflows/ci.yml
vendored
Normal file
148
.gear/predownloaded-development/vendor/aho-corasick/.github/workflows/ci.yml
vendored
Normal file
|
|
@ -0,0 +1,148 @@
|
||||||
|
name: ci
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
schedule:
|
||||||
|
- cron: '00 01 * * *'
|
||||||
|
|
||||||
|
# The section is needed to drop write-all permissions that are granted on
|
||||||
|
# `schedule` event. By specifying any permission explicitly all others are set
|
||||||
|
# to none. By using the principle of least privilege the damage a compromised
|
||||||
|
# workflow can do (because of an injection or compromised third party tool or
|
||||||
|
# action) is restricted. Currently the worklow doesn't need any additional
|
||||||
|
# permission except for pulling the code. Adding labels to issues, commenting
|
||||||
|
# on pull-requests, etc. may need additional permissions:
|
||||||
|
#
|
||||||
|
# Syntax for this section:
|
||||||
|
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#permissions
|
||||||
|
#
|
||||||
|
# Reference for how to assign permissions on a job-by-job basis:
|
||||||
|
# https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||||
|
#
|
||||||
|
# Reference for available permissions that we can enable if needed:
|
||||||
|
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication#permissions-for-the-github_token
|
||||||
|
permissions:
|
||||||
|
# to fetch code (actions/checkout)
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: test
|
||||||
|
env:
|
||||||
|
# For some builds, we use cross to test on 32-bit and big-endian
|
||||||
|
# systems.
|
||||||
|
CARGO: cargo
|
||||||
|
# When CARGO is set to CROSS, TARGET is set to `--target matrix.target`.
|
||||||
|
# Note that we only use cross on Linux, so setting a target on a
|
||||||
|
# different OS will just use normal cargo.
|
||||||
|
TARGET:
|
||||||
|
# Bump this as appropriate. We pin to a version to make sure CI
|
||||||
|
# continues to work as cross releases in the past have broken things
|
||||||
|
# in subtle ways.
|
||||||
|
CROSS_VERSION: v0.2.5
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build: pinned
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: 1.60.0
|
||||||
|
- build: stable
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: stable
|
||||||
|
- build: stable-x86
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: stable
|
||||||
|
target: i686-unknown-linux-gnu
|
||||||
|
- build: stable-aarch64
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: stable
|
||||||
|
target: aarch64-unknown-linux-gnu
|
||||||
|
- build: stable-powerpc64
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: stable
|
||||||
|
target: powerpc64-unknown-linux-gnu
|
||||||
|
- build: stable-s390x
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: stable
|
||||||
|
target: s390x-unknown-linux-gnu
|
||||||
|
- build: beta
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: beta
|
||||||
|
- build: nightly
|
||||||
|
os: ubuntu-latest
|
||||||
|
rust: nightly
|
||||||
|
- build: macos
|
||||||
|
os: macos-latest
|
||||||
|
rust: stable
|
||||||
|
- build: win-msvc
|
||||||
|
os: windows-latest
|
||||||
|
rust: stable
|
||||||
|
- build: win-gnu
|
||||||
|
os: windows-latest
|
||||||
|
rust: stable-x86_64-gnu
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: Install Rust
|
||||||
|
uses: dtolnay/rust-toolchain@master
|
||||||
|
with:
|
||||||
|
toolchain: ${{ matrix.rust }}
|
||||||
|
- name: Install and configure Cross
|
||||||
|
if: matrix.os == 'ubuntu-latest' && matrix.target != ''
|
||||||
|
run: |
|
||||||
|
# In the past, new releases of 'cross' have broken CI. So for now, we
|
||||||
|
# pin it. We also use their pre-compiled binary releases because cross
|
||||||
|
# has over 100 dependencies and takes a bit to compile.
|
||||||
|
dir="$RUNNER_TEMP/cross-download"
|
||||||
|
mkdir "$dir"
|
||||||
|
echo "$dir" >> $GITHUB_PATH
|
||||||
|
cd "$dir"
|
||||||
|
curl -LO "https://github.com/cross-rs/cross/releases/download/$CROSS_VERSION/cross-x86_64-unknown-linux-musl.tar.gz"
|
||||||
|
tar xf cross-x86_64-unknown-linux-musl.tar.gz
|
||||||
|
|
||||||
|
# We used to install 'cross' from master, but it kept failing. So now
|
||||||
|
# we build from a known-good version until 'cross' becomes more stable
|
||||||
|
# or we find an alternative. Notably, between v0.2.1 and current
|
||||||
|
# master (2022-06-14), the number of Cross's dependencies has doubled.
|
||||||
|
echo "CARGO=cross" >> $GITHUB_ENV
|
||||||
|
echo "TARGET=--target ${{ matrix.target }}" >> $GITHUB_ENV
|
||||||
|
- name: Show command used for Cargo
|
||||||
|
run: |
|
||||||
|
echo "cargo command is: ${{ env.CARGO }}"
|
||||||
|
echo "target flag is: ${{ env.TARGET }}"
|
||||||
|
- name: Show CPU info for debugging
|
||||||
|
if: matrix.os == 'ubuntu-latest'
|
||||||
|
run: lscpu
|
||||||
|
# See: https://github.com/rust-lang/regex/blob/a2887636930156023172e4b376a6febad4e49120/.github/workflows/ci.yml#L145-L163
|
||||||
|
- name: Pin memchr to 2.6.2
|
||||||
|
if: matrix.build == 'pinned'
|
||||||
|
run: cargo update -p memchr --precise 2.6.2
|
||||||
|
- run: ${{ env.CARGO }} build --verbose $TARGET
|
||||||
|
- run: ${{ env.CARGO }} doc --verbose $TARGET
|
||||||
|
- run: ${{ env.CARGO }} test --verbose $TARGET
|
||||||
|
- run: ${{ env.CARGO }} test --lib --verbose --no-default-features --features std,perf-literal $TARGET
|
||||||
|
- run: ${{ env.CARGO }} test --lib --verbose --no-default-features $TARGET
|
||||||
|
- run: ${{ env.CARGO }} test --lib --verbose --no-default-features --features std $TARGET
|
||||||
|
- run: ${{ env.CARGO }} test --lib --verbose --no-default-features --features perf-literal $TARGET
|
||||||
|
- run: ${{ env.CARGO }} test --lib --verbose --no-default-features --features std,perf-literal,logging $TARGET
|
||||||
|
- if: matrix.build == 'nightly'
|
||||||
|
run: ${{ env.CARGO }} build --manifest-path aho-corasick-debug/Cargo.toml $TARGET
|
||||||
|
|
||||||
|
rustfmt:
|
||||||
|
name: rustfmt
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: Install Rust
|
||||||
|
uses: dtolnay/rust-toolchain@master
|
||||||
|
with:
|
||||||
|
toolchain: stable
|
||||||
|
components: rustfmt
|
||||||
|
- name: Check formatting
|
||||||
|
run: |
|
||||||
|
cargo fmt --all -- --check
|
||||||
12
.gear/predownloaded-development/vendor/aho-corasick/.vim/coc-settings.json
vendored
Normal file
12
.gear/predownloaded-development/vendor/aho-corasick/.vim/coc-settings.json
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
"rust-analyzer.linkedProjects": [
|
||||||
|
"aho-corasick-debug/Cargo.toml",
|
||||||
|
"benchmarks/engines/rust-aho-corasick/Cargo.toml",
|
||||||
|
"benchmarks/engines/rust-daachorse/Cargo.toml",
|
||||||
|
"benchmarks/engines/rust-jetscii/Cargo.toml",
|
||||||
|
"benchmarks/engines/naive/Cargo.toml",
|
||||||
|
"benchmarks/shared/Cargo.toml",
|
||||||
|
"fuzz/Cargo.toml",
|
||||||
|
"Cargo.toml"
|
||||||
|
]
|
||||||
|
}
|
||||||
3
.gear/predownloaded-development/vendor/aho-corasick/COPYING
vendored
Normal file
3
.gear/predownloaded-development/vendor/aho-corasick/COPYING
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
This project is dual-licensed under the Unlicense and MIT licenses.
|
||||||
|
|
||||||
|
You may use this code under the terms of either license.
|
||||||
74
.gear/predownloaded-development/vendor/aho-corasick/Cargo.toml
vendored
Normal file
74
.gear/predownloaded-development/vendor/aho-corasick/Cargo.toml
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies.
|
||||||
|
#
|
||||||
|
# If you are reading this file be aware that the original Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable).
|
||||||
|
# See Cargo.toml.orig for the original contents.
|
||||||
|
|
||||||
|
[package]
|
||||||
|
edition = "2021"
|
||||||
|
rust-version = "1.60.0"
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.3"
|
||||||
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
|
exclude = [
|
||||||
|
"/aho-corasick-debug",
|
||||||
|
"/benchmarks",
|
||||||
|
"/tmp",
|
||||||
|
]
|
||||||
|
autotests = false
|
||||||
|
description = "Fast multiple substring searching."
|
||||||
|
homepage = "https://github.com/BurntSushi/aho-corasick"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = [
|
||||||
|
"string",
|
||||||
|
"search",
|
||||||
|
"text",
|
||||||
|
"pattern",
|
||||||
|
"multi",
|
||||||
|
]
|
||||||
|
categories = ["text-processing"]
|
||||||
|
license = "Unlicense OR MIT"
|
||||||
|
repository = "https://github.com/BurntSushi/aho-corasick"
|
||||||
|
|
||||||
|
[package.metadata.docs.rs]
|
||||||
|
all-features = true
|
||||||
|
rustdoc-args = [
|
||||||
|
"--cfg",
|
||||||
|
"docsrs",
|
||||||
|
"--generate-link-to-definition",
|
||||||
|
]
|
||||||
|
|
||||||
|
[profile.bench]
|
||||||
|
debug = 2
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
debug = 2
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "aho_corasick"
|
||||||
|
|
||||||
|
[dependencies.log]
|
||||||
|
version = "0.4.17"
|
||||||
|
optional = true
|
||||||
|
|
||||||
|
[dependencies.memchr]
|
||||||
|
version = "2.4.0"
|
||||||
|
optional = true
|
||||||
|
default-features = false
|
||||||
|
|
||||||
|
[dev-dependencies.doc-comment]
|
||||||
|
version = "0.3.3"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = [
|
||||||
|
"std",
|
||||||
|
"perf-literal",
|
||||||
|
]
|
||||||
|
logging = ["dep:log"]
|
||||||
|
perf-literal = ["dep:memchr"]
|
||||||
|
std = ["memchr?/std"]
|
||||||
74
.gear/predownloaded-development/vendor/aho-corasick/Cargo.toml.orig
generated
vendored
Normal file
74
.gear/predownloaded-development/vendor/aho-corasick/Cargo.toml.orig
generated
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
[package]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.3" #:version
|
||||||
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
|
description = "Fast multiple substring searching."
|
||||||
|
homepage = "https://github.com/BurntSushi/aho-corasick"
|
||||||
|
repository = "https://github.com/BurntSushi/aho-corasick"
|
||||||
|
readme = "README.md"
|
||||||
|
keywords = ["string", "search", "text", "pattern", "multi"]
|
||||||
|
license = "Unlicense OR MIT"
|
||||||
|
categories = ["text-processing"]
|
||||||
|
autotests = false
|
||||||
|
exclude = ["/aho-corasick-debug", "/benchmarks", "/tmp"]
|
||||||
|
edition = "2021"
|
||||||
|
rust-version = "1.60.0"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "aho_corasick"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["std", "perf-literal"]
|
||||||
|
std = ["memchr?/std"]
|
||||||
|
|
||||||
|
# Enables prefilter optimizations that depend on external crates.
|
||||||
|
perf-literal = ["dep:memchr"]
|
||||||
|
|
||||||
|
# Enable logging via the 'log' crate. This is useful for seeing messages about
|
||||||
|
# internal decisions and metrics. For example, how the choice of the internal
|
||||||
|
# Aho-Corasick implementation is used or the heap usage of an automaton.
|
||||||
|
logging = ["dep:log"]
|
||||||
|
|
||||||
|
# Provides a trait impl for fst::Automaton for nfa::noncontiguous::NFA,
|
||||||
|
# nfa::contiguous::NFA and dfa::DFA. This is useful for searching an
|
||||||
|
# FST with an Aho-Corasick automaton. Note that this does not apply
|
||||||
|
# to the top-level 'AhoCorasick' type, as it does not implement the
|
||||||
|
# aho_corasick::automaton::Automaton trait, and thus enabling this feature does
|
||||||
|
# not cause it to implement fst::Automaton either.
|
||||||
|
#
|
||||||
|
# NOTE: Currently this feature is not available as `fst` is not at 1.0 yet,
|
||||||
|
# and this would make `fst` a public dependency. If you absolutely need this,
|
||||||
|
# you can copy the (very small) src/transducer.rs file to your tree. It
|
||||||
|
# specifically does not use any private APIs and should work after replacing
|
||||||
|
# 'crate::' with 'aho_corasick::'.
|
||||||
|
#
|
||||||
|
# NOTE: I think my current plan is to flip this around an add an optional
|
||||||
|
# dependency on 'aho-corasick' to the 'fst' crate and move the trait impls
|
||||||
|
# there. But I haven't gotten around to it yet.
|
||||||
|
# transducer = ["fst"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
log = { version = "0.4.17", optional = true }
|
||||||
|
memchr = { version = "2.4.0", default-features = false, optional = true }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
doc-comment = "0.3.3"
|
||||||
|
# fst = "0.4.5"
|
||||||
|
|
||||||
|
[package.metadata.docs.rs]
|
||||||
|
# We want to document all features.
|
||||||
|
all-features = true
|
||||||
|
# This opts into a nightly unstable option to show the features that need to be
|
||||||
|
# enabled for public API items. To do that, we set 'docsrs', and when that's
|
||||||
|
# enabled, we enable the 'doc_auto_cfg' feature.
|
||||||
|
#
|
||||||
|
# To test this locally, run:
|
||||||
|
#
|
||||||
|
# RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features
|
||||||
|
rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
debug = true
|
||||||
|
|
||||||
|
[profile.bench]
|
||||||
|
debug = true
|
||||||
481
.gear/predownloaded-development/vendor/aho-corasick/DESIGN.md
vendored
Normal file
481
.gear/predownloaded-development/vendor/aho-corasick/DESIGN.md
vendored
Normal file
|
|
@ -0,0 +1,481 @@
|
||||||
|
This document describes the internal design of this crate, which is an object
|
||||||
|
lesson in what happens when you take a fairly simple old algorithm like
|
||||||
|
Aho-Corasick and make it fast and production ready.
|
||||||
|
|
||||||
|
The target audience of this document is Rust programmers that have some
|
||||||
|
familiarity with string searching, however, one does not need to know the
|
||||||
|
Aho-Corasick algorithm in order to read this (it is explained below). One
|
||||||
|
should, however, know what a trie is. (If you don't, go read its Wikipedia
|
||||||
|
article.)
|
||||||
|
|
||||||
|
The center-piece of this crate is an implementation of Aho-Corasick. On its
|
||||||
|
own, Aho-Corasick isn't that complicated. The complex pieces come from the
|
||||||
|
different variants of Aho-Corasick implemented in this crate. Specifically,
|
||||||
|
they are:
|
||||||
|
|
||||||
|
* Aho-Corasick as a noncontiguous NFA. States have their transitions
|
||||||
|
represented sparsely, and each state puts its transitions in its own separate
|
||||||
|
allocation. Hence the same "noncontiguous."
|
||||||
|
* Aho-Corasick as a contiguous NFA. This NFA uses a single allocation to
|
||||||
|
represent the transitions of all states. That is, transitions are laid out
|
||||||
|
contiguously in memory. Moreover, states near the starting state are
|
||||||
|
represented densely, such that finding the next state ID takes a constant
|
||||||
|
number of instructions.
|
||||||
|
* Aho-Corasick as a DFA. In this case, all states are represented densely in
|
||||||
|
a transition table that uses one allocation.
|
||||||
|
* Supporting "standard" match semantics, along with its overlapping variant,
|
||||||
|
in addition to leftmost-first and leftmost-longest semantics. The "standard"
|
||||||
|
semantics are typically what you see in a textbook description of
|
||||||
|
Aho-Corasick. However, Aho-Corasick is also useful as an optimization in
|
||||||
|
regex engines, which often use leftmost-first or leftmost-longest semantics.
|
||||||
|
Thus, it is useful to implement those semantics here. The "standard" and
|
||||||
|
"leftmost" search algorithms are subtly different, and also require slightly
|
||||||
|
different construction algorithms.
|
||||||
|
* Support for ASCII case insensitive matching.
|
||||||
|
* Support for accelerating searches when the patterns all start with a small
|
||||||
|
number of fixed bytes. Or alternatively, when the patterns all contain a
|
||||||
|
small number of rare bytes. (Searching for these bytes uses SIMD vectorized
|
||||||
|
code courtesy of `memchr`.)
|
||||||
|
* Transparent support for alternative SIMD vectorized search routines for
|
||||||
|
smaller number of literals, such as the Teddy algorithm. We called these
|
||||||
|
"packed" search routines because they use SIMD. They can often be an order of
|
||||||
|
magnitude faster than just Aho-Corasick, but don't scale as well.
|
||||||
|
* Support for searching streams. This can reuse most of the underlying code,
|
||||||
|
but does require careful buffering support.
|
||||||
|
* Support for anchored searches, which permit efficient "is prefix" checks for
|
||||||
|
a large number of patterns.
|
||||||
|
|
||||||
|
When you combine all of this together along with trying to make everything as
|
||||||
|
fast as possible, what you end up with is enitrely too much code with too much
|
||||||
|
`unsafe`. Alas, I was not smart enough to figure out how to reduce it. Instead,
|
||||||
|
we will explain it.
|
||||||
|
|
||||||
|
|
||||||
|
# Basics
|
||||||
|
|
||||||
|
The fundamental problem this crate is trying to solve is to determine the
|
||||||
|
occurrences of possibly many patterns in a haystack. The naive way to solve
|
||||||
|
this is to look for a match for each pattern at each position in the haystack:
|
||||||
|
|
||||||
|
for i in 0..haystack.len():
|
||||||
|
for p in patterns.iter():
|
||||||
|
if haystack[i..].starts_with(p.bytes()):
|
||||||
|
return Match(p.id(), i, i + p.bytes().len())
|
||||||
|
|
||||||
|
Those four lines are effectively all this crate does. The problem with those
|
||||||
|
four lines is that they are very slow, especially when you're searching for a
|
||||||
|
large number of patterns.
|
||||||
|
|
||||||
|
While there are many different algorithms available to solve this, a popular
|
||||||
|
one is Aho-Corasick. It's a common solution because it's not too hard to
|
||||||
|
implement, scales quite well even when searching for thousands of patterns and
|
||||||
|
is generally pretty fast. Aho-Corasick does well here because, regardless of
|
||||||
|
the number of patterns you're searching for, it always visits each byte in the
|
||||||
|
haystack exactly once. This means, generally speaking, adding more patterns to
|
||||||
|
an Aho-Corasick automaton does not make it slower. (Strictly speaking, however,
|
||||||
|
this is not true, since a larger automaton will make less effective use of the
|
||||||
|
CPU's cache.)
|
||||||
|
|
||||||
|
Aho-Corasick can be succinctly described as a trie with state transitions
|
||||||
|
between some of the nodes that efficiently instruct the search algorithm to
|
||||||
|
try matching alternative keys in the trie. The trick is that these state
|
||||||
|
transitions are arranged such that each byte of input needs to be inspected
|
||||||
|
only once. These state transitions are typically called "failure transitions,"
|
||||||
|
because they instruct the searcher (the thing traversing the automaton while
|
||||||
|
reading from the haystack) what to do when a byte in the haystack does not
|
||||||
|
correspond to a valid transition in the current state of the trie.
|
||||||
|
|
||||||
|
More formally, a failure transition points to a state in the automaton that may
|
||||||
|
lead to a match whose prefix is a proper suffix of the path traversed through
|
||||||
|
the trie so far. (If no such proper suffix exists, then the failure transition
|
||||||
|
points back to the start state of the trie, effectively restarting the search.)
|
||||||
|
This is perhaps simpler to explain pictorally. For example, let's say we built
|
||||||
|
an Aho-Corasick automaton with the following patterns: 'abcd' and 'cef'. The
|
||||||
|
trie looks like this:
|
||||||
|
|
||||||
|
a - S1 - b - S2 - c - S3 - d - S4*
|
||||||
|
/
|
||||||
|
S0 - c - S5 - e - S6 - f - S7*
|
||||||
|
|
||||||
|
where states marked with a `*` are match states (meaning, the search algorithm
|
||||||
|
should stop and report a match to the caller).
|
||||||
|
|
||||||
|
So given this trie, it should be somewhat straight-forward to see how it can
|
||||||
|
be used to determine whether any particular haystack *starts* with either
|
||||||
|
`abcd` or `cef`. It's easy to express this in code:
|
||||||
|
|
||||||
|
fn has_prefix(trie: &Trie, haystack: &[u8]) -> bool {
|
||||||
|
let mut state_id = trie.start();
|
||||||
|
// If the empty pattern is in trie, then state_id is a match state.
|
||||||
|
if trie.is_match(state_id) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (i, &b) in haystack.iter().enumerate() {
|
||||||
|
state_id = match trie.next_state(state_id, b) {
|
||||||
|
Some(id) => id,
|
||||||
|
// If there was no transition for this state and byte, then we know
|
||||||
|
// the haystack does not start with one of the patterns in our trie.
|
||||||
|
None => return false,
|
||||||
|
};
|
||||||
|
if trie.is_match(state_id) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
And that's pretty much it. All we do is move through the trie starting with the
|
||||||
|
bytes at the beginning of the haystack. If we find ourselves in a position
|
||||||
|
where we can't move, or if we've looked through the entire haystack without
|
||||||
|
seeing a match state, then we know the haystack does not start with any of the
|
||||||
|
patterns in the trie.
|
||||||
|
|
||||||
|
The meat of the Aho-Corasick algorithm is in how we add failure transitions to
|
||||||
|
our trie to keep searching efficient. Specifically, it permits us to not only
|
||||||
|
check whether a haystack *starts* with any one of a number of patterns, but
|
||||||
|
rather, whether the haystack contains any of a number of patterns *anywhere* in
|
||||||
|
the haystack.
|
||||||
|
|
||||||
|
As mentioned before, failure transitions connect a proper suffix of the path
|
||||||
|
traversed through the trie before, with a path that leads to a match that has a
|
||||||
|
prefix corresponding to that proper suffix. So in our case, for patterns `abcd`
|
||||||
|
and `cef`, with a haystack `abcef`, we want to transition to state `S5` (from
|
||||||
|
the diagram above) from `S3` upon seeing that the byte following `c` is not
|
||||||
|
`d`. Namely, the proper suffix in this example is `c`, which is a prefix of
|
||||||
|
`cef`. So the modified diagram looks like this:
|
||||||
|
|
||||||
|
|
||||||
|
a - S1 - b - S2 - c - S3 - d - S4*
|
||||||
|
/ /
|
||||||
|
/ ----------------
|
||||||
|
/ /
|
||||||
|
S0 - c - S5 - e - S6 - f - S7*
|
||||||
|
|
||||||
|
One thing that isn't shown in this diagram is that *all* states have a failure
|
||||||
|
transition, but only `S3` has a *non-trivial* failure transition. That is, all
|
||||||
|
other states have a failure transition back to the start state. So if our
|
||||||
|
haystack was `abzabcd`, then the searcher would transition back to `S0` after
|
||||||
|
seeing `z`, which effectively restarts the search. (Because there is no pattern
|
||||||
|
in our trie that has a prefix of `bz` or `z`.)
|
||||||
|
|
||||||
|
The code for traversing this *automaton* or *finite state machine* (it is no
|
||||||
|
longer just a trie) is not that much different from the `has_prefix` code
|
||||||
|
above:
|
||||||
|
|
||||||
|
fn contains(fsm: &FiniteStateMachine, haystack: &[u8]) -> bool {
|
||||||
|
let mut state_id = fsm.start();
|
||||||
|
// If the empty pattern is in fsm, then state_id is a match state.
|
||||||
|
if fsm.is_match(state_id) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (i, &b) in haystack.iter().enumerate() {
|
||||||
|
// While the diagram above doesn't show this, we may wind up needing
|
||||||
|
// to follow multiple failure transitions before we land on a state
|
||||||
|
// in which we can advance. Therefore, when searching for the next
|
||||||
|
// state, we need to loop until we don't see a failure transition.
|
||||||
|
//
|
||||||
|
// This loop terminates because the start state has no empty
|
||||||
|
// transitions. Every transition from the start state either points to
|
||||||
|
// another state, or loops back to the start state.
|
||||||
|
loop {
|
||||||
|
match fsm.next_state(state_id, b) {
|
||||||
|
Some(id) => {
|
||||||
|
state_id = id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Unlike our code above, if there was no transition for this
|
||||||
|
// state, then we don't quit. Instead, we look for this state's
|
||||||
|
// failure transition and follow that instead.
|
||||||
|
None => {
|
||||||
|
state_id = fsm.next_fail_state(state_id);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if fsm.is_match(state_id) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
Other than the complication around traversing failure transitions, this code
|
||||||
|
is still roughly "traverse the automaton with bytes from the haystack, and quit
|
||||||
|
when a match is seen."
|
||||||
|
|
||||||
|
And that concludes our section on the basics. While we didn't go deep into how
|
||||||
|
the automaton is built (see `src/nfa/noncontiguous.rs`, which has detailed
|
||||||
|
comments about that), the basic structure of Aho-Corasick should be reasonably
|
||||||
|
clear.
|
||||||
|
|
||||||
|
|
||||||
|
# NFAs and DFAs
|
||||||
|
|
||||||
|
There are generally two types of finite automata: non-deterministic finite
|
||||||
|
automata (NFA) and deterministic finite automata (DFA). The difference between
|
||||||
|
them is, principally, that an NFA can be in multiple states at once. This is
|
||||||
|
typically accomplished by things called _epsilon_ transitions, where one could
|
||||||
|
move to a new state without consuming any bytes from the input. (The other
|
||||||
|
mechanism by which NFAs can be in more than one state is where the same byte in
|
||||||
|
a particular state transitions to multiple distinct states.) In contrast, a DFA
|
||||||
|
can only ever be in one state at a time. A DFA has no epsilon transitions, and
|
||||||
|
for any given state, a byte transitions to at most one other state.
|
||||||
|
|
||||||
|
By this formulation, the Aho-Corasick automaton described in the previous
|
||||||
|
section is an NFA. This is because failure transitions are, effectively,
|
||||||
|
epsilon transitions. That is, whenever the automaton is in state `S`, it is
|
||||||
|
actually in the set of states that are reachable by recursively following
|
||||||
|
failure transitions from `S` until you reach the start state. (This means
|
||||||
|
that, for example, the start state is always active since the start state is
|
||||||
|
reachable via failure transitions from any state in the automaton.)
|
||||||
|
|
||||||
|
NFAs have a lot of nice properties. They tend to be easier to construct, and
|
||||||
|
also tend to use less memory. However, their primary downside is that they are
|
||||||
|
typically slower to execute a search with. For example, the code above showing
|
||||||
|
how to search with an Aho-Corasick automaton needs to potentially iterate
|
||||||
|
through many failure transitions for every byte of input. While this is a
|
||||||
|
fairly small amount of overhead, this can add up, especially if the automaton
|
||||||
|
has a lot of overlapping patterns with a lot of failure transitions.
|
||||||
|
|
||||||
|
A DFA's search code, by contrast, looks like this:
|
||||||
|
|
||||||
|
fn contains(dfa: &DFA, haystack: &[u8]) -> bool {
|
||||||
|
let mut state_id = dfa.start();
|
||||||
|
// If the empty pattern is in dfa, then state_id is a match state.
|
||||||
|
if dfa.is_match(state_id) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
for (i, &b) in haystack.iter().enumerate() {
|
||||||
|
// An Aho-Corasick DFA *never* has a missing state that requires
|
||||||
|
// failure transitions to be followed. One byte of input advances the
|
||||||
|
// automaton by one state. Always.
|
||||||
|
state_id = dfa.next_state(state_id, b);
|
||||||
|
if dfa.is_match(state_id) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
The search logic here is much simpler than for the NFA, and this tends to
|
||||||
|
translate into significant performance benefits as well, since there's a lot
|
||||||
|
less work being done for each byte in the haystack. How is this accomplished?
|
||||||
|
It's done by pre-following all failure transitions for all states for all bytes
|
||||||
|
in the alphabet, and then building a single state transition table. Building
|
||||||
|
this DFA can be much more costly than building the NFA, and use much more
|
||||||
|
memory, but the better performance can be worth it.
|
||||||
|
|
||||||
|
Users of this crate can actually choose between using one of two possible NFAs
|
||||||
|
(noncontiguous or contiguous) or a DFA. By default, a contiguous NFA is used,
|
||||||
|
in most circumstances, but if the number of patterns is small enough a DFA will
|
||||||
|
be used. A contiguous NFA is chosen because it uses orders of magnitude less
|
||||||
|
memory than a DFA, takes only a little longer to build than a noncontiguous
|
||||||
|
NFA and usually gets pretty close to the search speed of a DFA. (Callers can
|
||||||
|
override this automatic selection via the `AhoCorasickBuilder::start_kind`
|
||||||
|
configuration.)
|
||||||
|
|
||||||
|
|
||||||
|
# More DFA tricks
|
||||||
|
|
||||||
|
As described in the previous section, one of the downsides of using a DFA
|
||||||
|
is that it uses more memory and can take longer to build. One small way of
|
||||||
|
mitigating these concerns is to map the alphabet used by the automaton into
|
||||||
|
a smaller space. Typically, the alphabet of a DFA has 256 elements in it:
|
||||||
|
one element for each possible value that fits into a byte. However, in many
|
||||||
|
cases, one does not need the full alphabet. For example, if all patterns in an
|
||||||
|
Aho-Corasick automaton are ASCII letters, then this only uses up 52 distinct
|
||||||
|
bytes. As far as the automaton is concerned, the rest of the 204 bytes are
|
||||||
|
indistinguishable from one another: they will never disrciminate between a
|
||||||
|
match or a non-match. Therefore, in cases like that, the alphabet can be shrunk
|
||||||
|
to just 53 elements. One for each ASCII letter, and then another to serve as a
|
||||||
|
placeholder for every other unused byte.
|
||||||
|
|
||||||
|
In practice, this library doesn't quite compute the optimal set of equivalence
|
||||||
|
classes, but it's close enough in most cases. The key idea is that this then
|
||||||
|
allows the transition table for the DFA to be potentially much smaller. The
|
||||||
|
downside of doing this, however, is that since the transition table is defined
|
||||||
|
in terms of this smaller alphabet space, every byte in the haystack must be
|
||||||
|
re-mapped to this smaller space. This requires an additional 256-byte table.
|
||||||
|
In practice, this can lead to a small search time hit, but it can be difficult
|
||||||
|
to measure. Moreover, it can sometimes lead to faster search times for bigger
|
||||||
|
automata, since it could be difference between more parts of the automaton
|
||||||
|
staying in the CPU cache or not.
|
||||||
|
|
||||||
|
One other trick for DFAs employed by this crate is the notion of premultiplying
|
||||||
|
state identifiers. Specifically, the normal way to compute the next transition
|
||||||
|
in a DFA is via the following (assuming that the transition table is laid out
|
||||||
|
sequentially in memory, in row-major order, where the rows are states):
|
||||||
|
|
||||||
|
next_state_id = dfa.transitions[current_state_id * 256 + current_byte]
|
||||||
|
|
||||||
|
However, since the value `256` is a fixed constant, we can actually premultiply
|
||||||
|
the state identifiers in the table when we build the table initially. Then, the
|
||||||
|
next transition computation simply becomes:
|
||||||
|
|
||||||
|
next_state_id = dfa.transitions[current_state_id + current_byte]
|
||||||
|
|
||||||
|
This doesn't seem like much, but when this is being executed for every byte of
|
||||||
|
input that you're searching, saving that extra multiplication instruction can
|
||||||
|
add up.
|
||||||
|
|
||||||
|
The same optimization works even when equivalence classes are enabled, as
|
||||||
|
described above. The only difference is that the premultiplication is by the
|
||||||
|
total number of equivalence classes instead of 256.
|
||||||
|
|
||||||
|
There isn't much downside to premultiplying state identifiers, other than it
|
||||||
|
imposes a smaller limit on the total number of states in the DFA. Namely, with
|
||||||
|
premultiplied state identifiers, you run out of room in your state identifier
|
||||||
|
representation more rapidly than if the identifiers are just state indices.
|
||||||
|
|
||||||
|
Both equivalence classes and premultiplication are always enabled. There is a
|
||||||
|
`AhoCorasickBuilder::byte_classes` configuration, but disabling this just makes
|
||||||
|
it so there are always 256 equivalence classes, i.e., every class corresponds
|
||||||
|
to precisely one byte. When it's disabled, the equivalence class map itself is
|
||||||
|
still used. The purpose of disabling it is when one is debugging the underlying
|
||||||
|
automaton. It can be easier to comprehend when it uses actual byte values for
|
||||||
|
its transitions instead of equivalence classes.
|
||||||
|
|
||||||
|
|
||||||
|
# Match semantics
|
||||||
|
|
||||||
|
One of the more interesting things about this implementation of Aho-Corasick
|
||||||
|
that (as far as this author knows) separates it from other implementations, is
|
||||||
|
that it natively supports leftmost-first and leftmost-longest match semantics.
|
||||||
|
Briefly, match semantics refer to the decision procedure by which searching
|
||||||
|
will disambiguate matches when there are multiple to choose from:
|
||||||
|
|
||||||
|
* **standard** match semantics emits matches as soon as they are detected by
|
||||||
|
the automaton. This is typically equivalent to the textbook non-overlapping
|
||||||
|
formulation of Aho-Corasick.
|
||||||
|
* **leftmost-first** match semantics means that 1) the next match is the match
|
||||||
|
starting at the leftmost position and 2) among multiple matches starting at
|
||||||
|
the same leftmost position, the match corresponding to the pattern provided
|
||||||
|
first by the caller is reported.
|
||||||
|
* **leftmost-longest** is like leftmost-first, except when there are multiple
|
||||||
|
matches starting at the same leftmost position, the pattern corresponding to
|
||||||
|
the longest match is returned.
|
||||||
|
|
||||||
|
(The crate API documentation discusses these differences, with examples, in
|
||||||
|
more depth on the `MatchKind` type.)
|
||||||
|
|
||||||
|
The reason why supporting these match semantics is important is because it
|
||||||
|
gives the user more control over the match procedure. For example,
|
||||||
|
leftmost-first permits users to implement match priority by simply putting the
|
||||||
|
higher priority patterns first. Leftmost-longest, on the other hand, permits
|
||||||
|
finding the longest possible match, which might be useful when trying to find
|
||||||
|
words matching a dictionary. Additionally, regex engines often want to use
|
||||||
|
Aho-Corasick as an optimization when searching for an alternation of literals.
|
||||||
|
In order to preserve correct match semantics, regex engines typically can't use
|
||||||
|
the standard textbook definition directly, since regex engines will implement
|
||||||
|
either leftmost-first (Perl-like) or leftmost-longest (POSIX) match semantics.
|
||||||
|
|
||||||
|
Supporting leftmost semantics requires a couple key changes:
|
||||||
|
|
||||||
|
* Constructing the Aho-Corasick automaton changes a bit in both how the trie is
|
||||||
|
constructed and how failure transitions are found. Namely, only a subset
|
||||||
|
of the failure transitions are added. Specifically, only the failure
|
||||||
|
transitions that either do not occur after a match or do occur after a match
|
||||||
|
but preserve that match are kept. (More details on this can be found in
|
||||||
|
`src/nfa/noncontiguous.rs`.)
|
||||||
|
* The search algorithm changes slightly. Since we are looking for the leftmost
|
||||||
|
match, we cannot quit as soon as a match is detected. Instead, after a match
|
||||||
|
is detected, we must keep searching until either the end of the input or
|
||||||
|
until a dead state is seen. (Dead states are not used for standard match
|
||||||
|
semantics. Dead states mean that searching should stop after a match has been
|
||||||
|
found.)
|
||||||
|
|
||||||
|
Most other implementations of Aho-Corasick do support leftmost match semantics,
|
||||||
|
but they do it with more overhead at search time, or even worse, with a queue
|
||||||
|
of matches and sophisticated hijinks to disambiguate the matches. While our
|
||||||
|
construction algorithm becomes a bit more complicated, the correct match
|
||||||
|
semantics fall out from the structure of the automaton itself.
|
||||||
|
|
||||||
|
|
||||||
|
# Overlapping matches
|
||||||
|
|
||||||
|
One of the nice properties of an Aho-Corasick automaton is that it can report
|
||||||
|
all possible matches, even when they overlap with one another. In this mode,
|
||||||
|
the match semantics don't matter, since all possible matches are reported.
|
||||||
|
Overlapping searches work just like regular searches, except the state
|
||||||
|
identifier at which the previous search left off is carried over to the next
|
||||||
|
search, so that it can pick up where it left off. If there are additional
|
||||||
|
matches at that state, then they are reported before resuming the search.
|
||||||
|
|
||||||
|
Enabling leftmost-first or leftmost-longest match semantics causes the
|
||||||
|
automaton to use a subset of all failure transitions, which means that
|
||||||
|
overlapping searches cannot be used. Therefore, if leftmost match semantics are
|
||||||
|
used, attempting to do an overlapping search will return an error (or panic
|
||||||
|
when using the infallible APIs). Thus, to get overlapping searches, the caller
|
||||||
|
must use the default standard match semantics. This behavior was chosen because
|
||||||
|
there are only two alternatives, which were deemed worse:
|
||||||
|
|
||||||
|
* Compile two automatons internally, one for standard semantics and one for
|
||||||
|
the semantics requested by the caller (if not standard).
|
||||||
|
* Create a new type, distinct from the `AhoCorasick` type, which has different
|
||||||
|
capabilities based on the configuration options.
|
||||||
|
|
||||||
|
The first is untenable because of the amount of memory used by the automaton.
|
||||||
|
The second increases the complexity of the API too much by adding too many
|
||||||
|
types that do similar things. It is conceptually much simpler to keep all
|
||||||
|
searching isolated to a single type.
|
||||||
|
|
||||||
|
|
||||||
|
# Stream searching
|
||||||
|
|
||||||
|
Since Aho-Corasick is an automaton, it is possible to do partial searches on
|
||||||
|
partial parts of the haystack, and then resume that search on subsequent pieces
|
||||||
|
of the haystack. This is useful when the haystack you're trying to search is
|
||||||
|
not stored contiguously in memory, or if one does not want to read the entire
|
||||||
|
haystack into memory at once.
|
||||||
|
|
||||||
|
Currently, only standard semantics are supported for stream searching. This is
|
||||||
|
some of the more complicated code in this crate, and is something I would very
|
||||||
|
much like to improve. In particular, it currently has the restriction that it
|
||||||
|
must buffer at least enough of the haystack in memory in order to fit the
|
||||||
|
longest possible match. The difficulty in getting stream searching right is
|
||||||
|
that the implementation choices (such as the buffer size) often impact what the
|
||||||
|
API looks like and what it's allowed to do.
|
||||||
|
|
||||||
|
|
||||||
|
# Prefilters
|
||||||
|
|
||||||
|
In some cases, Aho-Corasick is not the fastest way to find matches containing
|
||||||
|
multiple patterns. Sometimes, the search can be accelerated using highly
|
||||||
|
optimized SIMD routines. For example, consider searching the following
|
||||||
|
patterns:
|
||||||
|
|
||||||
|
Sherlock
|
||||||
|
Moriarty
|
||||||
|
Watson
|
||||||
|
|
||||||
|
It is plausible that it would be much faster to quickly look for occurrences of
|
||||||
|
the leading bytes, `S`, `M` or `W`, before trying to start searching via the
|
||||||
|
automaton. Indeed, this is exactly what this crate will do.
|
||||||
|
|
||||||
|
When there are more than three distinct starting bytes, then this crate will
|
||||||
|
look for three distinct bytes occurring at any position in the patterns, while
|
||||||
|
preferring bytes that are heuristically determined to be rare over others. For
|
||||||
|
example:
|
||||||
|
|
||||||
|
Abuzz
|
||||||
|
Sanchez
|
||||||
|
Vasquez
|
||||||
|
Topaz
|
||||||
|
Waltz
|
||||||
|
|
||||||
|
Here, we have more than 3 distinct starting bytes, but all of the patterns
|
||||||
|
contain `z`, which is typically a rare byte. In this case, the prefilter will
|
||||||
|
scan for `z`, back up a bit, and then execute the Aho-Corasick automaton.
|
||||||
|
|
||||||
|
If all of that fails, then a packed multiple substring algorithm will be
|
||||||
|
attempted. Currently, the only algorithm available for this is Teddy, but more
|
||||||
|
may be added in the future. Teddy is unlike the above prefilters in that it
|
||||||
|
confirms its own matches, so when Teddy is active, it might not be necessary
|
||||||
|
for Aho-Corasick to run at all. However, the current Teddy implementation
|
||||||
|
only works in `x86_64` when SSSE3 or AVX2 are available or in `aarch64`
|
||||||
|
(using NEON), and moreover, only works _well_ when there are a small number
|
||||||
|
of patterns (say, less than 100). Teddy also requires the haystack to be of a
|
||||||
|
certain length (more than 16-34 bytes). When the haystack is shorter than that,
|
||||||
|
Rabin-Karp is used instead. (See `src/packed/rabinkarp.rs`.)
|
||||||
|
|
||||||
|
There is a more thorough description of Teddy at
|
||||||
|
[`src/packed/teddy/README.md`](src/packed/teddy/README.md).
|
||||||
21
.gear/predownloaded-development/vendor/aho-corasick/LICENSE-MIT
vendored
Normal file
21
.gear/predownloaded-development/vendor/aho-corasick/LICENSE-MIT
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2015 Andrew Gallant
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
174
.gear/predownloaded-development/vendor/aho-corasick/README.md
vendored
Normal file
174
.gear/predownloaded-development/vendor/aho-corasick/README.md
vendored
Normal file
|
|
@ -0,0 +1,174 @@
|
||||||
|
aho-corasick
|
||||||
|
============
|
||||||
|
A library for finding occurrences of many patterns at once with SIMD
|
||||||
|
acceleration in some cases. This library provides multiple pattern
|
||||||
|
search principally through an implementation of the
|
||||||
|
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
||||||
|
which builds a finite state machine for executing searches in linear time.
|
||||||
|
Features include case insensitive matching, overlapping matches, fast searching
|
||||||
|
via SIMD and optional full DFA construction and search & replace in streams.
|
||||||
|
|
||||||
|
[](https://github.com/BurntSushi/aho-corasick/actions)
|
||||||
|
[](https://crates.io/crates/aho-corasick)
|
||||||
|
|
||||||
|
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/).
|
||||||
|
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
https://docs.rs/aho-corasick
|
||||||
|
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
Run `cargo add aho-corasick` to automatically add this crate as a dependency
|
||||||
|
in your `Cargo.toml` file.
|
||||||
|
|
||||||
|
|
||||||
|
### Example: basic searching
|
||||||
|
|
||||||
|
This example shows how to search for occurrences of multiple patterns
|
||||||
|
simultaneously. Each match includes the pattern that matched along with the
|
||||||
|
byte offsets of the match.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::{AhoCorasick, PatternID};
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "Snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns).unwrap();
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(PatternID::must(1), 13, 18),
|
||||||
|
(PatternID::must(0), 28, 33),
|
||||||
|
(PatternID::must(2), 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Example: ASCII case insensitivity
|
||||||
|
|
||||||
|
This is like the previous example, but matches `Snapple` case insensitively
|
||||||
|
using `AhoCorasickBuilder`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::{AhoCorasick, PatternID};
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::builder()
|
||||||
|
.ascii_case_insensitive(true)
|
||||||
|
.build(patterns)
|
||||||
|
.unwrap();
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(PatternID::must(1), 13, 18),
|
||||||
|
(PatternID::must(0), 28, 33),
|
||||||
|
(PatternID::must(2), 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Example: replacing matches in a stream
|
||||||
|
|
||||||
|
This example shows how to execute a search and replace on a stream without
|
||||||
|
loading the entire stream into memory first.
|
||||||
|
|
||||||
|
```rust,ignore
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["fox", "brown", "quick"];
|
||||||
|
let replace_with = &["sloth", "grey", "slow"];
|
||||||
|
|
||||||
|
// In a real example, these might be `std::fs::File`s instead. All you need to
|
||||||
|
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
||||||
|
let rdr = "The quick brown fox.";
|
||||||
|
let mut wtr = vec![];
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns).unwrap();
|
||||||
|
ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)
|
||||||
|
.expect("stream_replace_all failed");
|
||||||
|
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Example: finding the leftmost first match
|
||||||
|
|
||||||
|
In the textbook description of Aho-Corasick, its formulation is typically
|
||||||
|
structured such that it reports all possible matches, even when they overlap
|
||||||
|
with another. In many cases, overlapping matches may not be desired, such as
|
||||||
|
the case of finding all successive non-overlapping matches like you might with
|
||||||
|
a standard regular expression.
|
||||||
|
|
||||||
|
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
||||||
|
this doesn't always work in the expected way, since it will report matches as
|
||||||
|
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
||||||
|
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
||||||
|
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
||||||
|
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
||||||
|
|
||||||
|
A novel contribution of this library is the ability to change the match
|
||||||
|
semantics of Aho-Corasick (without additional search time overhead) such that
|
||||||
|
`Samwise` is reported instead. For example, here's the standard approach:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns).unwrap();
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
And now here's the leftmost-first version, which matches how a Perl-like
|
||||||
|
regex will work:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use aho_corasick::{AhoCorasick, MatchKind};
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::builder()
|
||||||
|
.match_kind(MatchKind::LeftmostFirst)
|
||||||
|
.build(patterns)
|
||||||
|
.unwrap();
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
In addition to leftmost-first semantics, this library also supports
|
||||||
|
leftmost-longest semantics, which match the POSIX behavior of a regular
|
||||||
|
expression alternation. See `MatchKind` in the docs for more details.
|
||||||
|
|
||||||
|
|
||||||
|
### Minimum Rust version policy
|
||||||
|
|
||||||
|
This crate's minimum supported `rustc` version is `1.60.0`.
|
||||||
|
|
||||||
|
The current policy is that the minimum Rust version required to use this crate
|
||||||
|
can be increased in minor version updates. For example, if `crate 1.0` requires
|
||||||
|
Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
|
||||||
|
1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
|
||||||
|
version of Rust.
|
||||||
|
|
||||||
|
In general, this crate will be conservative with respect to the minimum
|
||||||
|
supported version of Rust.
|
||||||
|
|
||||||
|
|
||||||
|
### FFI bindings
|
||||||
|
|
||||||
|
* [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/)
|
||||||
|
is a Python wrapper for this library.
|
||||||
|
* [tmikus/ahocorasick_rs](https://github.com/tmikus/ahocorasick_rs) is a Go
|
||||||
|
wrapper for this library.
|
||||||
24
.gear/predownloaded-development/vendor/aho-corasick/UNLICENSE
vendored
Normal file
24
.gear/predownloaded-development/vendor/aho-corasick/UNLICENSE
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
This is free and unencumbered software released into the public domain.
|
||||||
|
|
||||||
|
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||||
|
distribute this software, either in source code form or as a compiled
|
||||||
|
binary, for any purpose, commercial or non-commercial, and by any
|
||||||
|
means.
|
||||||
|
|
||||||
|
In jurisdictions that recognize copyright laws, the author or authors
|
||||||
|
of this software dedicate any and all copyright interest in the
|
||||||
|
software to the public domain. We make this dedication for the benefit
|
||||||
|
of the public at large and to the detriment of our heirs and
|
||||||
|
successors. We intend this dedication to be an overt act of
|
||||||
|
relinquishment in perpetuity of all present and future rights to this
|
||||||
|
software under copyright law.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
For more information, please refer to <http://unlicense.org/>
|
||||||
2
.gear/predownloaded-development/vendor/aho-corasick/rustfmt.toml
vendored
Normal file
2
.gear/predownloaded-development/vendor/aho-corasick/rustfmt.toml
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
max_width = 79
|
||||||
|
use_small_heuristics = "max"
|
||||||
2789
.gear/predownloaded-development/vendor/aho-corasick/src/ahocorasick.rs
vendored
Normal file
2789
.gear/predownloaded-development/vendor/aho-corasick/src/ahocorasick.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
1608
.gear/predownloaded-development/vendor/aho-corasick/src/automaton.rs
vendored
Normal file
1608
.gear/predownloaded-development/vendor/aho-corasick/src/automaton.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
835
.gear/predownloaded-development/vendor/aho-corasick/src/dfa.rs
vendored
Normal file
835
.gear/predownloaded-development/vendor/aho-corasick/src/dfa.rs
vendored
Normal file
|
|
@ -0,0 +1,835 @@
|
||||||
|
/*!
|
||||||
|
Provides direct access to a DFA implementation of Aho-Corasick.
|
||||||
|
|
||||||
|
This is a low-level API that generally only needs to be used in niche
|
||||||
|
circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick)
|
||||||
|
instead of a DFA directly. Using an `DFA` directly is typically only necessary
|
||||||
|
when one needs access to the [`Automaton`] trait implementation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use alloc::{vec, vec::Vec};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
automaton::Automaton,
|
||||||
|
nfa::noncontiguous,
|
||||||
|
util::{
|
||||||
|
alphabet::ByteClasses,
|
||||||
|
error::{BuildError, MatchError},
|
||||||
|
int::{Usize, U32},
|
||||||
|
prefilter::Prefilter,
|
||||||
|
primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID},
|
||||||
|
search::{Anchored, MatchKind, StartKind},
|
||||||
|
special::Special,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A DFA implementation of Aho-Corasick.
|
||||||
|
///
|
||||||
|
/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of
|
||||||
|
/// this type directly. Using a `DFA` directly is typically only necessary when
|
||||||
|
/// one needs access to the [`Automaton`] trait implementation.
|
||||||
|
///
|
||||||
|
/// This DFA can only be built by first constructing a [`noncontiguous::NFA`].
|
||||||
|
/// Both [`DFA::new`] and [`Builder::build`] do this for you automatically, but
|
||||||
|
/// [`Builder::build_from_noncontiguous`] permits doing it explicitly.
|
||||||
|
///
|
||||||
|
/// A DFA provides the best possible search performance (in this crate) via two
|
||||||
|
/// mechanisms:
|
||||||
|
///
|
||||||
|
/// * All states use a dense representation for their transitions.
|
||||||
|
/// * All failure transitions are pre-computed such that they are never
|
||||||
|
/// explicitly handled at search time.
|
||||||
|
///
|
||||||
|
/// These two facts combined mean that every state transition is performed
|
||||||
|
/// using a constant number of instructions. However, this comes at
|
||||||
|
/// great cost. The memory usage of a DFA can be quite exorbitant.
|
||||||
|
/// It is potentially multiple orders of magnitude greater than a
|
||||||
|
/// [`contiguous::NFA`](crate::nfa::contiguous::NFA) for example. In exchange,
|
||||||
|
/// a DFA will typically have better search speed than a `contiguous::NFA`, but
|
||||||
|
/// not by orders of magnitude.
|
||||||
|
///
|
||||||
|
/// Unless you have a small number of patterns or memory usage is not a concern
|
||||||
|
/// and search performance is critical, a DFA is usually not the best choice.
|
||||||
|
///
|
||||||
|
/// Moreover, unlike the NFAs in this crate, it is costly for a DFA to
|
||||||
|
/// support for anchored and unanchored search configurations. Namely,
|
||||||
|
/// since failure transitions are pre-computed, supporting both anchored
|
||||||
|
/// and unanchored searches requires a duplication of the transition table,
|
||||||
|
/// making the memory usage of such a DFA ever bigger. (The NFAs in this crate
|
||||||
|
/// unconditionally support both anchored and unanchored searches because there
|
||||||
|
/// is essentially no added cost for doing so.) It is for this reason that
|
||||||
|
/// a DFA's support for anchored and unanchored searches can be configured
|
||||||
|
/// via [`Builder::start_kind`]. By default, a DFA only supports unanchored
|
||||||
|
/// searches.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// This example shows how to build an `DFA` directly and use it to execute
|
||||||
|
/// [`Automaton::try_find`]:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{
|
||||||
|
/// automaton::Automaton,
|
||||||
|
/// dfa::DFA,
|
||||||
|
/// Input, Match,
|
||||||
|
/// };
|
||||||
|
///
|
||||||
|
/// let patterns = &["b", "abc", "abcd"];
|
||||||
|
/// let haystack = "abcd";
|
||||||
|
///
|
||||||
|
/// let nfa = DFA::new(patterns).unwrap();
|
||||||
|
/// assert_eq!(
|
||||||
|
/// Some(Match::must(0, 1..2)),
|
||||||
|
/// nfa.try_find(&Input::new(haystack))?,
|
||||||
|
/// );
|
||||||
|
/// # Ok::<(), Box<dyn std::error::Error>>(())
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// It is also possible to implement your own version of `try_find`. See the
|
||||||
|
/// [`Automaton`] documentation for an example.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct DFA {
|
||||||
|
/// The DFA transition table. IDs in this table are pre-multiplied. So
|
||||||
|
/// instead of the IDs being 0, 1, 2, 3, ..., they are 0*stride, 1*stride,
|
||||||
|
/// 2*stride, 3*stride, ...
|
||||||
|
trans: Vec<StateID>,
|
||||||
|
/// The matches for every match state in this DFA. This is first indexed by
|
||||||
|
/// state index (so that's `sid >> stride2`) and then by order in which the
|
||||||
|
/// matches are meant to occur.
|
||||||
|
matches: Vec<Vec<PatternID>>,
|
||||||
|
/// The amount of heap memory used, in bytes, by the inner Vecs of
|
||||||
|
/// 'matches'.
|
||||||
|
matches_memory_usage: usize,
|
||||||
|
/// The length of each pattern. This is used to compute the start offset
|
||||||
|
/// of a match.
|
||||||
|
pattern_lens: Vec<SmallIndex>,
|
||||||
|
/// A prefilter for accelerating searches, if one exists.
|
||||||
|
prefilter: Option<Prefilter>,
|
||||||
|
/// The match semantics built into this DFA.
|
||||||
|
match_kind: MatchKind,
|
||||||
|
/// The total number of states in this DFA.
|
||||||
|
state_len: usize,
|
||||||
|
/// The alphabet size, or total number of equivalence classes, for this
|
||||||
|
/// DFA. Note that the actual number of transitions in each state is
|
||||||
|
/// stride=2^stride2, where stride is the smallest power of 2 greater than
|
||||||
|
/// or equal to alphabet_len. We do things this way so that we can use
|
||||||
|
/// bitshifting to go from a state ID to an index into 'matches'.
|
||||||
|
alphabet_len: usize,
|
||||||
|
/// The exponent with a base 2, such that stride=2^stride2. Given a state
|
||||||
|
/// index 'i', its state identifier is 'i << stride2'. Given a state
|
||||||
|
/// identifier 'sid', its state index is 'sid >> stride2'.
|
||||||
|
stride2: usize,
|
||||||
|
/// The equivalence classes for this DFA. All transitions are defined on
|
||||||
|
/// equivalence classes and not on the 256 distinct byte values.
|
||||||
|
byte_classes: ByteClasses,
|
||||||
|
/// The length of the shortest pattern in this automaton.
|
||||||
|
min_pattern_len: usize,
|
||||||
|
/// The length of the longest pattern in this automaton.
|
||||||
|
max_pattern_len: usize,
|
||||||
|
/// The information required to deduce which states are "special" in this
|
||||||
|
/// DFA.
|
||||||
|
special: Special,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DFA {
|
||||||
|
/// Create a new Aho-Corasick DFA using the default configuration.
|
||||||
|
///
|
||||||
|
/// Use a [`Builder`] if you want to change the configuration.
|
||||||
|
pub fn new<I, P>(patterns: I) -> Result<DFA, BuildError>
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = P>,
|
||||||
|
P: AsRef<[u8]>,
|
||||||
|
{
|
||||||
|
DFA::builder().build(patterns)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A convenience method for returning a new Aho-Corasick DFA builder.
|
||||||
|
///
|
||||||
|
/// This usually permits one to just import the `DFA` type.
|
||||||
|
pub fn builder() -> Builder {
|
||||||
|
Builder::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DFA {
|
||||||
|
/// A sentinel state ID indicating that a search should stop once it has
|
||||||
|
/// entered this state. When a search stops, it returns a match if one has
|
||||||
|
/// been found, otherwise no match. A DFA always has an actual dead state
|
||||||
|
/// at this ID.
|
||||||
|
///
|
||||||
|
/// N.B. DFAs, unlike NFAs, do not have any notion of a FAIL state.
|
||||||
|
/// Namely, the whole point of a DFA is that the FAIL state is completely
|
||||||
|
/// compiled away. That is, DFA construction involves pre-computing the
|
||||||
|
/// failure transitions everywhere, such that failure transitions are no
|
||||||
|
/// longer used at search time. This, combined with its uniformly dense
|
||||||
|
/// representation, are the two most important factors in why it's faster
|
||||||
|
/// than the NFAs in this crate.
|
||||||
|
const DEAD: StateID = StateID::new_unchecked(0);
|
||||||
|
|
||||||
|
/// Adds the given pattern IDs as matches to the given state and also
|
||||||
|
/// records the added memory usage.
|
||||||
|
fn set_matches(
|
||||||
|
&mut self,
|
||||||
|
sid: StateID,
|
||||||
|
pids: impl Iterator<Item = PatternID>,
|
||||||
|
) {
|
||||||
|
let index = (sid.as_usize() >> self.stride2).checked_sub(2).unwrap();
|
||||||
|
let mut at_least_one = false;
|
||||||
|
for pid in pids {
|
||||||
|
self.matches[index].push(pid);
|
||||||
|
self.matches_memory_usage += PatternID::SIZE;
|
||||||
|
at_least_one = true;
|
||||||
|
}
|
||||||
|
assert!(at_least_one, "match state must have non-empty pids");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always
|
||||||
|
// returns a valid state ID given a valid state ID. We otherwise claim that
|
||||||
|
// all other methods are correct as well.
|
||||||
|
unsafe impl Automaton for DFA {
|
||||||
|
#[inline(always)]
|
||||||
|
fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError> {
|
||||||
|
// Either of the start state IDs can be DEAD, in which case, support
|
||||||
|
// for that type of search is not provided by this DFA. Which start
|
||||||
|
// state IDs are inactive depends on the 'StartKind' configuration at
|
||||||
|
// DFA construction time.
|
||||||
|
match anchored {
|
||||||
|
Anchored::No => {
|
||||||
|
let start = self.special.start_unanchored_id;
|
||||||
|
if start == DFA::DEAD {
|
||||||
|
Err(MatchError::invalid_input_unanchored())
|
||||||
|
} else {
|
||||||
|
Ok(start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Anchored::Yes => {
|
||||||
|
let start = self.special.start_anchored_id;
|
||||||
|
if start == DFA::DEAD {
|
||||||
|
Err(MatchError::invalid_input_anchored())
|
||||||
|
} else {
|
||||||
|
Ok(start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn next_state(
|
||||||
|
&self,
|
||||||
|
_anchored: Anchored,
|
||||||
|
sid: StateID,
|
||||||
|
byte: u8,
|
||||||
|
) -> StateID {
|
||||||
|
let class = self.byte_classes.get(byte);
|
||||||
|
self.trans[(sid.as_u32() + u32::from(class)).as_usize()]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_special(&self, sid: StateID) -> bool {
|
||||||
|
sid <= self.special.max_special_id
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_dead(&self, sid: StateID) -> bool {
|
||||||
|
sid == DFA::DEAD
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_match(&self, sid: StateID) -> bool {
|
||||||
|
!self.is_dead(sid) && sid <= self.special.max_match_id
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_start(&self, sid: StateID) -> bool {
|
||||||
|
sid == self.special.start_unanchored_id
|
||||||
|
|| sid == self.special.start_anchored_id
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn match_kind(&self) -> MatchKind {
|
||||||
|
self.match_kind
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn patterns_len(&self) -> usize {
|
||||||
|
self.pattern_lens.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn pattern_len(&self, pid: PatternID) -> usize {
|
||||||
|
self.pattern_lens[pid].as_usize()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn min_pattern_len(&self) -> usize {
|
||||||
|
self.min_pattern_len
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn max_pattern_len(&self) -> usize {
|
||||||
|
self.max_pattern_len
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn match_len(&self, sid: StateID) -> usize {
|
||||||
|
debug_assert!(self.is_match(sid));
|
||||||
|
let offset = (sid.as_usize() >> self.stride2) - 2;
|
||||||
|
self.matches[offset].len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn match_pattern(&self, sid: StateID, index: usize) -> PatternID {
|
||||||
|
debug_assert!(self.is_match(sid));
|
||||||
|
let offset = (sid.as_usize() >> self.stride2) - 2;
|
||||||
|
self.matches[offset][index]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn memory_usage(&self) -> usize {
|
||||||
|
use core::mem::size_of;
|
||||||
|
|
||||||
|
(self.trans.len() * size_of::<u32>())
|
||||||
|
+ (self.matches.len() * size_of::<Vec<PatternID>>())
|
||||||
|
+ self.matches_memory_usage
|
||||||
|
+ (self.pattern_lens.len() * size_of::<SmallIndex>())
|
||||||
|
+ self.prefilter.as_ref().map_or(0, |p| p.memory_usage())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn prefilter(&self) -> Option<&Prefilter> {
|
||||||
|
self.prefilter.as_ref()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::fmt::Debug for DFA {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||||
|
use crate::{
|
||||||
|
automaton::{fmt_state_indicator, sparse_transitions},
|
||||||
|
util::debug::DebugByte,
|
||||||
|
};
|
||||||
|
|
||||||
|
writeln!(f, "dfa::DFA(")?;
|
||||||
|
for index in 0..self.state_len {
|
||||||
|
let sid = StateID::new_unchecked(index << self.stride2);
|
||||||
|
// While we do currently include the FAIL state in the transition
|
||||||
|
// table (to simplify construction), it is never actually used. It
|
||||||
|
// poses problems with the code below because it gets treated as
|
||||||
|
// a match state incidentally when it is, of course, not. So we
|
||||||
|
// special case it. The fail state is always the first state after
|
||||||
|
// the dead state.
|
||||||
|
//
|
||||||
|
// If the construction is changed to remove the fail state (it
|
||||||
|
// probably should be), then this special case should be updated.
|
||||||
|
if index == 1 {
|
||||||
|
writeln!(f, "F {:06}:", sid.as_usize())?;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fmt_state_indicator(f, self, sid)?;
|
||||||
|
write!(f, "{:06}: ", sid.as_usize())?;
|
||||||
|
|
||||||
|
let it = (0..self.byte_classes.alphabet_len()).map(|class| {
|
||||||
|
(class.as_u8(), self.trans[sid.as_usize() + class])
|
||||||
|
});
|
||||||
|
for (i, (start, end, next)) in sparse_transitions(it).enumerate() {
|
||||||
|
if i > 0 {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
if start == end {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{:?} => {:?}",
|
||||||
|
DebugByte(start),
|
||||||
|
next.as_usize()
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{:?}-{:?} => {:?}",
|
||||||
|
DebugByte(start),
|
||||||
|
DebugByte(end),
|
||||||
|
next.as_usize()
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
write!(f, "\n")?;
|
||||||
|
if self.is_match(sid) {
|
||||||
|
write!(f, " matches: ")?;
|
||||||
|
for i in 0..self.match_len(sid) {
|
||||||
|
if i > 0 {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
let pid = self.match_pattern(sid, i);
|
||||||
|
write!(f, "{}", pid.as_usize())?;
|
||||||
|
}
|
||||||
|
write!(f, "\n")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeln!(f, "match kind: {:?}", self.match_kind)?;
|
||||||
|
writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?;
|
||||||
|
writeln!(f, "state length: {:?}", self.state_len)?;
|
||||||
|
writeln!(f, "pattern length: {:?}", self.patterns_len())?;
|
||||||
|
writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?;
|
||||||
|
writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?;
|
||||||
|
writeln!(f, "alphabet length: {:?}", self.alphabet_len)?;
|
||||||
|
writeln!(f, "stride: {:?}", 1 << self.stride2)?;
|
||||||
|
writeln!(f, "byte classes: {:?}", self.byte_classes)?;
|
||||||
|
writeln!(f, "memory usage: {:?}", self.memory_usage())?;
|
||||||
|
writeln!(f, ")")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for configuring an Aho-Corasick DFA.
|
||||||
|
///
|
||||||
|
/// This builder has a subset of the options available to a
|
||||||
|
/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options,
|
||||||
|
/// their behavior is identical.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Builder {
|
||||||
|
noncontiguous: noncontiguous::Builder,
|
||||||
|
start_kind: StartKind,
|
||||||
|
byte_classes: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Builder {
|
||||||
|
fn default() -> Builder {
|
||||||
|
Builder {
|
||||||
|
noncontiguous: noncontiguous::Builder::new(),
|
||||||
|
start_kind: StartKind::Unanchored,
|
||||||
|
byte_classes: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
|
/// Create a new builder for configuring an Aho-Corasick DFA.
|
||||||
|
pub fn new() -> Builder {
|
||||||
|
Builder::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build an Aho-Corasick DFA from the given iterator of patterns.
|
||||||
|
///
|
||||||
|
/// A builder may be reused to create more DFAs.
|
||||||
|
pub fn build<I, P>(&self, patterns: I) -> Result<DFA, BuildError>
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = P>,
|
||||||
|
P: AsRef<[u8]>,
|
||||||
|
{
|
||||||
|
let nnfa = self.noncontiguous.build(patterns)?;
|
||||||
|
self.build_from_noncontiguous(&nnfa)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build an Aho-Corasick DFA from the given noncontiguous NFA.
|
||||||
|
///
|
||||||
|
/// Note that when this method is used, only the `start_kind` and
|
||||||
|
/// `byte_classes` settings on this builder are respected. The other
|
||||||
|
/// settings only apply to the initial construction of the Aho-Corasick
|
||||||
|
/// automaton. Since using this method requires that initial construction
|
||||||
|
/// has already completed, all settings impacting only initial construction
|
||||||
|
/// are no longer relevant.
|
||||||
|
pub fn build_from_noncontiguous(
|
||||||
|
&self,
|
||||||
|
nnfa: &noncontiguous::NFA,
|
||||||
|
) -> Result<DFA, BuildError> {
|
||||||
|
debug!("building DFA");
|
||||||
|
let byte_classes = if self.byte_classes {
|
||||||
|
nnfa.byte_classes().clone()
|
||||||
|
} else {
|
||||||
|
ByteClasses::singletons()
|
||||||
|
};
|
||||||
|
let state_len = match self.start_kind {
|
||||||
|
StartKind::Unanchored | StartKind::Anchored => nnfa.states().len(),
|
||||||
|
StartKind::Both => {
|
||||||
|
// These unwraps are OK because we know that the number of
|
||||||
|
// NFA states is < StateID::LIMIT which is in turn less than
|
||||||
|
// i32::MAX. Thus, there is always room to multiply by 2.
|
||||||
|
// Finally, the number of states is always at least 4 in the
|
||||||
|
// NFA (DEAD, FAIL, START-UNANCHORED, START-ANCHORED), so the
|
||||||
|
// subtraction of 4 is okay.
|
||||||
|
//
|
||||||
|
// Note that we subtract 4 because the "anchored" part of
|
||||||
|
// the DFA duplicates the unanchored part (without failure
|
||||||
|
// transitions), but reuses the DEAD, FAIL and START states.
|
||||||
|
nnfa.states()
|
||||||
|
.len()
|
||||||
|
.checked_mul(2)
|
||||||
|
.unwrap()
|
||||||
|
.checked_sub(4)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let trans_len =
|
||||||
|
match state_len.checked_shl(byte_classes.stride2().as_u32()) {
|
||||||
|
Some(trans_len) => trans_len,
|
||||||
|
None => {
|
||||||
|
return Err(BuildError::state_id_overflow(
|
||||||
|
StateID::MAX.as_u64(),
|
||||||
|
usize::MAX.as_u64(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
StateID::new(trans_len.checked_sub(byte_classes.stride()).unwrap())
|
||||||
|
.map_err(|e| {
|
||||||
|
BuildError::state_id_overflow(
|
||||||
|
StateID::MAX.as_u64(),
|
||||||
|
e.attempted(),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let num_match_states = match self.start_kind {
|
||||||
|
StartKind::Unanchored | StartKind::Anchored => {
|
||||||
|
nnfa.special().max_match_id.as_usize().checked_sub(1).unwrap()
|
||||||
|
}
|
||||||
|
StartKind::Both => nnfa
|
||||||
|
.special()
|
||||||
|
.max_match_id
|
||||||
|
.as_usize()
|
||||||
|
.checked_sub(1)
|
||||||
|
.unwrap()
|
||||||
|
.checked_mul(2)
|
||||||
|
.unwrap(),
|
||||||
|
};
|
||||||
|
let mut dfa = DFA {
|
||||||
|
trans: vec![DFA::DEAD; trans_len],
|
||||||
|
matches: vec![vec![]; num_match_states],
|
||||||
|
matches_memory_usage: 0,
|
||||||
|
pattern_lens: nnfa.pattern_lens_raw().to_vec(),
|
||||||
|
prefilter: nnfa.prefilter().map(|p| p.clone()),
|
||||||
|
match_kind: nnfa.match_kind(),
|
||||||
|
state_len,
|
||||||
|
alphabet_len: byte_classes.alphabet_len(),
|
||||||
|
stride2: byte_classes.stride2(),
|
||||||
|
byte_classes,
|
||||||
|
min_pattern_len: nnfa.min_pattern_len(),
|
||||||
|
max_pattern_len: nnfa.max_pattern_len(),
|
||||||
|
// The special state IDs are set later.
|
||||||
|
special: Special::zero(),
|
||||||
|
};
|
||||||
|
match self.start_kind {
|
||||||
|
StartKind::Both => {
|
||||||
|
self.finish_build_both_starts(nnfa, &mut dfa);
|
||||||
|
}
|
||||||
|
StartKind::Unanchored => {
|
||||||
|
self.finish_build_one_start(Anchored::No, nnfa, &mut dfa);
|
||||||
|
}
|
||||||
|
StartKind::Anchored => {
|
||||||
|
self.finish_build_one_start(Anchored::Yes, nnfa, &mut dfa)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
debug!(
|
||||||
|
"DFA built, <states: {:?}, size: {:?}, \
|
||||||
|
alphabet len: {:?}, stride: {:?}>",
|
||||||
|
dfa.state_len,
|
||||||
|
dfa.memory_usage(),
|
||||||
|
dfa.byte_classes.alphabet_len(),
|
||||||
|
dfa.byte_classes.stride(),
|
||||||
|
);
|
||||||
|
// The vectors can grow ~twice as big during construction because a
|
||||||
|
// Vec amortizes growth. But here, let's shrink things back down to
|
||||||
|
// what we actually need since we're never going to add more to it.
|
||||||
|
dfa.trans.shrink_to_fit();
|
||||||
|
dfa.pattern_lens.shrink_to_fit();
|
||||||
|
dfa.matches.shrink_to_fit();
|
||||||
|
// TODO: We might also want to shrink each Vec inside of `dfa.matches`,
|
||||||
|
// or even better, convert it to one contiguous allocation. But I think
|
||||||
|
// I went with nested allocs for good reason (can't remember), so this
|
||||||
|
// may be tricky to do. I decided not to shrink them here because it
|
||||||
|
// might require a fair bit of work to do. It's unclear whether it's
|
||||||
|
// worth it.
|
||||||
|
Ok(dfa)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finishes building a DFA for either unanchored or anchored searches,
|
||||||
|
/// but NOT both.
|
||||||
|
fn finish_build_one_start(
|
||||||
|
&self,
|
||||||
|
anchored: Anchored,
|
||||||
|
nnfa: &noncontiguous::NFA,
|
||||||
|
dfa: &mut DFA,
|
||||||
|
) {
|
||||||
|
// This function always succeeds because we check above that all of the
|
||||||
|
// states in the NFA can be mapped to DFA state IDs.
|
||||||
|
let stride2 = dfa.stride2;
|
||||||
|
let old2new = |oldsid: StateID| {
|
||||||
|
StateID::new_unchecked(oldsid.as_usize() << stride2)
|
||||||
|
};
|
||||||
|
for (oldsid, state) in nnfa.states().iter().with_state_ids() {
|
||||||
|
let newsid = old2new(oldsid);
|
||||||
|
if state.is_match() {
|
||||||
|
dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
|
||||||
|
}
|
||||||
|
sparse_iter(
|
||||||
|
nnfa,
|
||||||
|
oldsid,
|
||||||
|
&dfa.byte_classes,
|
||||||
|
|byte, class, mut oldnextsid| {
|
||||||
|
if oldnextsid == noncontiguous::NFA::FAIL {
|
||||||
|
if anchored.is_anchored() {
|
||||||
|
oldnextsid = noncontiguous::NFA::DEAD;
|
||||||
|
} else if state.fail() == noncontiguous::NFA::DEAD {
|
||||||
|
// This is a special case that avoids following
|
||||||
|
// DEAD transitions in a non-contiguous NFA.
|
||||||
|
// Following these transitions is pretty slow
|
||||||
|
// because the non-contiguous NFA will always use
|
||||||
|
// a sparse representation for it (because the
|
||||||
|
// DEAD state is usually treated as a sentinel).
|
||||||
|
// The *vast* majority of failure states are DEAD
|
||||||
|
// states, so this winds up being pretty slow if
|
||||||
|
// we go through the non-contiguous NFA state
|
||||||
|
// transition logic. Instead, just do it ourselves.
|
||||||
|
oldnextsid = noncontiguous::NFA::DEAD;
|
||||||
|
} else {
|
||||||
|
oldnextsid = nnfa.next_state(
|
||||||
|
Anchored::No,
|
||||||
|
state.fail(),
|
||||||
|
byte,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dfa.trans[newsid.as_usize() + usize::from(class)] =
|
||||||
|
old2new(oldnextsid);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Now that we've remapped all the IDs in our states, all that's left
|
||||||
|
// is remapping the special state IDs.
|
||||||
|
let old = nnfa.special();
|
||||||
|
let new = &mut dfa.special;
|
||||||
|
new.max_special_id = old2new(old.max_special_id);
|
||||||
|
new.max_match_id = old2new(old.max_match_id);
|
||||||
|
if anchored.is_anchored() {
|
||||||
|
new.start_unanchored_id = DFA::DEAD;
|
||||||
|
new.start_anchored_id = old2new(old.start_anchored_id);
|
||||||
|
} else {
|
||||||
|
new.start_unanchored_id = old2new(old.start_unanchored_id);
|
||||||
|
new.start_anchored_id = DFA::DEAD;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finishes building a DFA that supports BOTH unanchored and anchored
|
||||||
|
/// searches. It works by inter-leaving unanchored states with anchored
|
||||||
|
/// states in the same transition table. This way, we avoid needing to
|
||||||
|
/// re-shuffle states afterward to ensure that our states still look like
|
||||||
|
/// DEAD, MATCH, ..., START-UNANCHORED, START-ANCHORED, NON-MATCH, ...
|
||||||
|
///
|
||||||
|
/// Honestly this is pretty inscrutable... Simplifications are most
|
||||||
|
/// welcome.
|
||||||
|
fn finish_build_both_starts(
|
||||||
|
&self,
|
||||||
|
nnfa: &noncontiguous::NFA,
|
||||||
|
dfa: &mut DFA,
|
||||||
|
) {
|
||||||
|
let stride2 = dfa.stride2;
|
||||||
|
let stride = 1 << stride2;
|
||||||
|
let mut remap_unanchored = vec![DFA::DEAD; nnfa.states().len()];
|
||||||
|
let mut remap_anchored = vec![DFA::DEAD; nnfa.states().len()];
|
||||||
|
let mut is_anchored = vec![false; dfa.state_len];
|
||||||
|
let mut newsid = DFA::DEAD;
|
||||||
|
let next_dfa_id =
|
||||||
|
|sid: StateID| StateID::new_unchecked(sid.as_usize() + stride);
|
||||||
|
for (oldsid, state) in nnfa.states().iter().with_state_ids() {
|
||||||
|
if oldsid == noncontiguous::NFA::DEAD
|
||||||
|
|| oldsid == noncontiguous::NFA::FAIL
|
||||||
|
{
|
||||||
|
remap_unanchored[oldsid] = newsid;
|
||||||
|
remap_anchored[oldsid] = newsid;
|
||||||
|
newsid = next_dfa_id(newsid);
|
||||||
|
} else if oldsid == nnfa.special().start_unanchored_id
|
||||||
|
|| oldsid == nnfa.special().start_anchored_id
|
||||||
|
{
|
||||||
|
if oldsid == nnfa.special().start_unanchored_id {
|
||||||
|
remap_unanchored[oldsid] = newsid;
|
||||||
|
remap_anchored[oldsid] = DFA::DEAD;
|
||||||
|
} else {
|
||||||
|
remap_unanchored[oldsid] = DFA::DEAD;
|
||||||
|
remap_anchored[oldsid] = newsid;
|
||||||
|
is_anchored[newsid.as_usize() >> stride2] = true;
|
||||||
|
}
|
||||||
|
if state.is_match() {
|
||||||
|
dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
|
||||||
|
}
|
||||||
|
sparse_iter(
|
||||||
|
nnfa,
|
||||||
|
oldsid,
|
||||||
|
&dfa.byte_classes,
|
||||||
|
|_, class, oldnextsid| {
|
||||||
|
let class = usize::from(class);
|
||||||
|
if oldnextsid == noncontiguous::NFA::FAIL {
|
||||||
|
dfa.trans[newsid.as_usize() + class] = DFA::DEAD;
|
||||||
|
} else {
|
||||||
|
dfa.trans[newsid.as_usize() + class] = oldnextsid;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
newsid = next_dfa_id(newsid);
|
||||||
|
} else {
|
||||||
|
let unewsid = newsid;
|
||||||
|
newsid = next_dfa_id(newsid);
|
||||||
|
let anewsid = newsid;
|
||||||
|
newsid = next_dfa_id(newsid);
|
||||||
|
|
||||||
|
remap_unanchored[oldsid] = unewsid;
|
||||||
|
remap_anchored[oldsid] = anewsid;
|
||||||
|
is_anchored[anewsid.as_usize() >> stride2] = true;
|
||||||
|
if state.is_match() {
|
||||||
|
dfa.set_matches(unewsid, nnfa.iter_matches(oldsid));
|
||||||
|
dfa.set_matches(anewsid, nnfa.iter_matches(oldsid));
|
||||||
|
}
|
||||||
|
sparse_iter(
|
||||||
|
nnfa,
|
||||||
|
oldsid,
|
||||||
|
&dfa.byte_classes,
|
||||||
|
|byte, class, oldnextsid| {
|
||||||
|
let class = usize::from(class);
|
||||||
|
if oldnextsid == noncontiguous::NFA::FAIL {
|
||||||
|
let oldnextsid =
|
||||||
|
if state.fail() == noncontiguous::NFA::DEAD {
|
||||||
|
noncontiguous::NFA::DEAD
|
||||||
|
} else {
|
||||||
|
nnfa.next_state(
|
||||||
|
Anchored::No,
|
||||||
|
state.fail(),
|
||||||
|
byte,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
dfa.trans[unewsid.as_usize() + class] = oldnextsid;
|
||||||
|
} else {
|
||||||
|
dfa.trans[unewsid.as_usize() + class] = oldnextsid;
|
||||||
|
dfa.trans[anewsid.as_usize() + class] = oldnextsid;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i in 0..dfa.state_len {
|
||||||
|
let sid = i << stride2;
|
||||||
|
if is_anchored[i] {
|
||||||
|
for next in dfa.trans[sid..][..stride].iter_mut() {
|
||||||
|
*next = remap_anchored[*next];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for next in dfa.trans[sid..][..stride].iter_mut() {
|
||||||
|
*next = remap_unanchored[*next];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now that we've remapped all the IDs in our states, all that's left
|
||||||
|
// is remapping the special state IDs.
|
||||||
|
let old = nnfa.special();
|
||||||
|
let new = &mut dfa.special;
|
||||||
|
new.max_special_id = remap_anchored[old.max_special_id];
|
||||||
|
new.max_match_id = remap_anchored[old.max_match_id];
|
||||||
|
new.start_unanchored_id = remap_unanchored[old.start_unanchored_id];
|
||||||
|
new.start_anchored_id = remap_anchored[old.start_anchored_id];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the desired match semantics.
|
||||||
|
///
|
||||||
|
/// This only applies when using [`Builder::build`] and not
|
||||||
|
/// [`Builder::build_from_noncontiguous`].
|
||||||
|
///
|
||||||
|
/// See
|
||||||
|
/// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind)
|
||||||
|
/// for more documentation and examples.
|
||||||
|
pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder {
|
||||||
|
self.noncontiguous.match_kind(kind);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Enable ASCII-aware case insensitive matching.
|
||||||
|
///
|
||||||
|
/// This only applies when using [`Builder::build`] and not
|
||||||
|
/// [`Builder::build_from_noncontiguous`].
|
||||||
|
///
|
||||||
|
/// See
|
||||||
|
/// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive)
|
||||||
|
/// for more documentation and examples.
|
||||||
|
pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder {
|
||||||
|
self.noncontiguous.ascii_case_insensitive(yes);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Enable heuristic prefilter optimizations.
|
||||||
|
///
|
||||||
|
/// This only applies when using [`Builder::build`] and not
|
||||||
|
/// [`Builder::build_from_noncontiguous`].
|
||||||
|
///
|
||||||
|
/// See
|
||||||
|
/// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter)
|
||||||
|
/// for more documentation and examples.
|
||||||
|
pub fn prefilter(&mut self, yes: bool) -> &mut Builder {
|
||||||
|
self.noncontiguous.prefilter(yes);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets the starting state configuration for the automaton.
|
||||||
|
///
|
||||||
|
/// See
|
||||||
|
/// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind)
|
||||||
|
/// for more documentation and examples.
|
||||||
|
pub fn start_kind(&mut self, kind: StartKind) -> &mut Builder {
|
||||||
|
self.start_kind = kind;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A debug setting for whether to attempt to shrink the size of the
|
||||||
|
/// automaton's alphabet or not.
|
||||||
|
///
|
||||||
|
/// This should never be enabled unless you're debugging an automaton.
|
||||||
|
/// Namely, disabling byte classes makes transitions easier to reason
|
||||||
|
/// about, since they use the actual bytes instead of equivalence classes.
|
||||||
|
/// Disabling this confers no performance benefit at search time.
|
||||||
|
///
|
||||||
|
/// See
|
||||||
|
/// [`AhoCorasickBuilder::byte_classes`](crate::AhoCorasickBuilder::byte_classes)
|
||||||
|
/// for more documentation and examples.
|
||||||
|
pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
|
||||||
|
self.byte_classes = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterate over all possible equivalence class transitions in this state.
|
||||||
|
/// The closure is called for all transitions with a distinct equivalence
|
||||||
|
/// class, even those not explicitly represented in this sparse state. For
|
||||||
|
/// any implicitly defined transitions, the given closure is called with
|
||||||
|
/// the fail state ID.
|
||||||
|
///
|
||||||
|
/// The closure is guaranteed to be called precisely
|
||||||
|
/// `byte_classes.alphabet_len()` times, once for every possible class in
|
||||||
|
/// ascending order.
|
||||||
|
fn sparse_iter<F: FnMut(u8, u8, StateID)>(
|
||||||
|
nnfa: &noncontiguous::NFA,
|
||||||
|
oldsid: StateID,
|
||||||
|
classes: &ByteClasses,
|
||||||
|
mut f: F,
|
||||||
|
) {
|
||||||
|
let mut prev_class = None;
|
||||||
|
let mut byte = 0usize;
|
||||||
|
for t in nnfa.iter_trans(oldsid) {
|
||||||
|
while byte < usize::from(t.byte()) {
|
||||||
|
let rep = byte.as_u8();
|
||||||
|
let class = classes.get(rep);
|
||||||
|
byte += 1;
|
||||||
|
if prev_class != Some(class) {
|
||||||
|
f(rep, class, noncontiguous::NFA::FAIL);
|
||||||
|
prev_class = Some(class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let rep = t.byte();
|
||||||
|
let class = classes.get(rep);
|
||||||
|
byte += 1;
|
||||||
|
if prev_class != Some(class) {
|
||||||
|
f(rep, class, t.next());
|
||||||
|
prev_class = Some(class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for b in byte..=255 {
|
||||||
|
let rep = b.as_u8();
|
||||||
|
let class = classes.get(rep);
|
||||||
|
if prev_class != Some(class) {
|
||||||
|
f(rep, class, noncontiguous::NFA::FAIL);
|
||||||
|
prev_class = Some(class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
326
.gear/predownloaded-development/vendor/aho-corasick/src/lib.rs
vendored
Normal file
326
.gear/predownloaded-development/vendor/aho-corasick/src/lib.rs
vendored
Normal file
|
|
@ -0,0 +1,326 @@
|
||||||
|
/*!
|
||||||
|
A library for finding occurrences of many patterns at once. This library
|
||||||
|
provides multiple pattern search principally through an implementation of the
|
||||||
|
[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
|
||||||
|
which builds a fast finite state machine for executing searches in linear time.
|
||||||
|
|
||||||
|
Additionally, this library provides a number of configuration options for
|
||||||
|
building the automaton that permit controlling the space versus time trade
|
||||||
|
off. Other features include simple ASCII case insensitive matching, finding
|
||||||
|
overlapping matches, replacements, searching streams and even searching and
|
||||||
|
replacing text in streams.
|
||||||
|
|
||||||
|
Finally, unlike most other Aho-Corasick implementations, this one
|
||||||
|
supports enabling [leftmost-first](MatchKind::LeftmostFirst) or
|
||||||
|
[leftmost-longest](MatchKind::LeftmostLongest) match semantics, using a
|
||||||
|
(seemingly) novel alternative construction algorithm. For more details on what
|
||||||
|
match semantics means, see the [`MatchKind`] type.
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
|
This section gives a brief overview of the primary types in this crate:
|
||||||
|
|
||||||
|
* [`AhoCorasick`] is the primary type and represents an Aho-Corasick automaton.
|
||||||
|
This is the type you use to execute searches.
|
||||||
|
* [`AhoCorasickBuilder`] can be used to build an Aho-Corasick automaton, and
|
||||||
|
supports configuring a number of options.
|
||||||
|
* [`Match`] represents a single match reported by an Aho-Corasick automaton.
|
||||||
|
Each match has two pieces of information: the pattern that matched and the
|
||||||
|
start and end byte offsets corresponding to the position in the haystack at
|
||||||
|
which it matched.
|
||||||
|
|
||||||
|
# Example: basic searching
|
||||||
|
|
||||||
|
This example shows how to search for occurrences of multiple patterns
|
||||||
|
simultaneously. Each match includes the pattern that matched along with the
|
||||||
|
byte offsets of the match.
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::{AhoCorasick, PatternID};
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "Snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns).unwrap();
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(PatternID::must(1), 13, 18),
|
||||||
|
(PatternID::must(0), 28, 33),
|
||||||
|
(PatternID::must(2), 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
# Example: case insensitivity
|
||||||
|
|
||||||
|
This is like the previous example, but matches `Snapple` case insensitively
|
||||||
|
using `AhoCorasickBuilder`:
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::{AhoCorasick, PatternID};
|
||||||
|
|
||||||
|
let patterns = &["apple", "maple", "snapple"];
|
||||||
|
let haystack = "Nobody likes maple in their apple flavored Snapple.";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::builder()
|
||||||
|
.ascii_case_insensitive(true)
|
||||||
|
.build(patterns)
|
||||||
|
.unwrap();
|
||||||
|
let mut matches = vec![];
|
||||||
|
for mat in ac.find_iter(haystack) {
|
||||||
|
matches.push((mat.pattern(), mat.start(), mat.end()));
|
||||||
|
}
|
||||||
|
assert_eq!(matches, vec![
|
||||||
|
(PatternID::must(1), 13, 18),
|
||||||
|
(PatternID::must(0), 28, 33),
|
||||||
|
(PatternID::must(2), 43, 50),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
# Example: replacing matches in a stream
|
||||||
|
|
||||||
|
This example shows how to execute a search and replace on a stream without
|
||||||
|
loading the entire stream into memory first.
|
||||||
|
|
||||||
|
```
|
||||||
|
# #[cfg(feature = "std")] {
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
# fn example() -> Result<(), std::io::Error> {
|
||||||
|
let patterns = &["fox", "brown", "quick"];
|
||||||
|
let replace_with = &["sloth", "grey", "slow"];
|
||||||
|
|
||||||
|
// In a real example, these might be `std::fs::File`s instead. All you need to
|
||||||
|
// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
|
||||||
|
let rdr = "The quick brown fox.";
|
||||||
|
let mut wtr = vec![];
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns).unwrap();
|
||||||
|
ac.try_stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
|
||||||
|
assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
|
||||||
|
# Ok(()) }; example().unwrap()
|
||||||
|
# }
|
||||||
|
```
|
||||||
|
|
||||||
|
# Example: finding the leftmost first match
|
||||||
|
|
||||||
|
In the textbook description of Aho-Corasick, its formulation is typically
|
||||||
|
structured such that it reports all possible matches, even when they overlap
|
||||||
|
with another. In many cases, overlapping matches may not be desired, such as
|
||||||
|
the case of finding all successive non-overlapping matches like you might with
|
||||||
|
a standard regular expression.
|
||||||
|
|
||||||
|
Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
|
||||||
|
this doesn't always work in the expected way, since it will report matches as
|
||||||
|
soon as they are seen. For example, consider matching the regex `Samwise|Sam`
|
||||||
|
against the text `Samwise`. Most regex engines (that are Perl-like, or
|
||||||
|
non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
|
||||||
|
algorithm modified for reporting non-overlapping matches will report `Sam`.
|
||||||
|
|
||||||
|
A novel contribution of this library is the ability to change the match
|
||||||
|
semantics of Aho-Corasick (without additional search time overhead) such that
|
||||||
|
`Samwise` is reported instead. For example, here's the standard approach:
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::AhoCorasick;
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::new(patterns).unwrap();
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
And now here's the leftmost-first version, which matches how a Perl-like
|
||||||
|
regex will work:
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::{AhoCorasick, MatchKind};
|
||||||
|
|
||||||
|
let patterns = &["Samwise", "Sam"];
|
||||||
|
let haystack = "Samwise";
|
||||||
|
|
||||||
|
let ac = AhoCorasick::builder()
|
||||||
|
.match_kind(MatchKind::LeftmostFirst)
|
||||||
|
.build(patterns)
|
||||||
|
.unwrap();
|
||||||
|
let mat = ac.find(haystack).expect("should have a match");
|
||||||
|
assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
|
||||||
|
```
|
||||||
|
|
||||||
|
In addition to leftmost-first semantics, this library also supports
|
||||||
|
leftmost-longest semantics, which match the POSIX behavior of a regular
|
||||||
|
expression alternation. See [`MatchKind`] for more details.
|
||||||
|
|
||||||
|
# Prefilters
|
||||||
|
|
||||||
|
While an Aho-Corasick automaton can perform admirably when compared to more
|
||||||
|
naive solutions, it is generally slower than more specialized algorithms that
|
||||||
|
are accelerated using vector instructions such as SIMD.
|
||||||
|
|
||||||
|
For that reason, this library will internally use a "prefilter" to attempt
|
||||||
|
to accelerate searches when possible. Currently, this library has several
|
||||||
|
different algorithms it might use depending on the patterns provided. Once the
|
||||||
|
number of patterns gets too big, prefilters are no longer used.
|
||||||
|
|
||||||
|
While a prefilter is generally good to have on by default since it works
|
||||||
|
well in the common case, it can lead to less predictable or even sub-optimal
|
||||||
|
performance in some cases. For that reason, prefilters can be explicitly
|
||||||
|
disabled via [`AhoCorasickBuilder::prefilter`].
|
||||||
|
|
||||||
|
# Lower level APIs
|
||||||
|
|
||||||
|
This crate also provides several sub-modules that collectively expose many of
|
||||||
|
the implementation details of the main [`AhoCorasick`] type. Most users of this
|
||||||
|
library can completely ignore the submodules and their contents, but if you
|
||||||
|
needed finer grained control, some parts of them may be useful to you. Here is
|
||||||
|
a brief overview of each and why you might want to use them:
|
||||||
|
|
||||||
|
* The [`packed`] sub-module contains a lower level API for using fast
|
||||||
|
vectorized routines for finding a small number of patterns in a haystack.
|
||||||
|
You might want to use this API when you want to completely side-step using
|
||||||
|
Aho-Corasick automata. Otherwise, the fast vectorized routines are used
|
||||||
|
automatically as prefilters for `AhoCorasick` searches whenever possible.
|
||||||
|
* The [`automaton`] sub-module provides a lower level finite state
|
||||||
|
machine interface that the various Aho-Corasick implementations in
|
||||||
|
this crate implement. This sub-module's main contribution is the
|
||||||
|
[`Automaton`](automaton::Automaton) trait, which permits manually walking the
|
||||||
|
state transitions of an Aho-Corasick automaton.
|
||||||
|
* The [`dfa`] and [`nfa`] sub-modules provide DFA and NFA implementations of
|
||||||
|
the aforementioned `Automaton` trait. The main reason one might want to use
|
||||||
|
these sub-modules is to get access to a type that implements the `Automaton`
|
||||||
|
trait. (The top-level `AhoCorasick` type does not implement the `Automaton`
|
||||||
|
trait.)
|
||||||
|
|
||||||
|
As mentioned above, if you aren't sure whether you need these sub-modules,
|
||||||
|
you should be able to safely ignore them and just focus on the [`AhoCorasick`]
|
||||||
|
type.
|
||||||
|
|
||||||
|
# Crate features
|
||||||
|
|
||||||
|
This crate exposes a few features for controlling dependency usage and whether
|
||||||
|
this crate can be used without the standard library.
|
||||||
|
|
||||||
|
* **std** -
|
||||||
|
Enables support for the standard library. This feature is enabled by
|
||||||
|
default. When disabled, only `core` and `alloc` are used. At an API
|
||||||
|
level, enabling `std` enables `std::error::Error` trait impls for the
|
||||||
|
various error types, and higher level stream search routines such as
|
||||||
|
[`AhoCorasick::try_stream_find_iter`]. But the `std` feature is also required
|
||||||
|
to enable vectorized prefilters. Prefilters can greatly accelerate searches,
|
||||||
|
but generally only apply when the number of patterns is small (less than
|
||||||
|
~100).
|
||||||
|
* **perf-literal** -
|
||||||
|
Enables support for literal prefilters that use vectorized routines from
|
||||||
|
external crates. This feature is enabled by default. If you're only using
|
||||||
|
Aho-Corasick for large numbers of patterns or otherwise can abide lower
|
||||||
|
throughput when searching with a small number of patterns, then it is
|
||||||
|
reasonable to disable this feature.
|
||||||
|
* **logging** -
|
||||||
|
Enables a dependency on the `log` crate and emits messages to aide in
|
||||||
|
diagnostics. This feature is disabled by default.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#![no_std]
|
||||||
|
#![deny(missing_docs)]
|
||||||
|
#![deny(rustdoc::broken_intra_doc_links)]
|
||||||
|
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
|
||||||
|
|
||||||
|
extern crate alloc;
|
||||||
|
#[cfg(any(test, feature = "std"))]
|
||||||
|
extern crate std;
|
||||||
|
|
||||||
|
#[cfg(doctest)]
|
||||||
|
doc_comment::doctest!("../README.md");
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
pub use crate::ahocorasick::StreamFindIter;
|
||||||
|
pub use crate::{
|
||||||
|
ahocorasick::{
|
||||||
|
AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, FindIter,
|
||||||
|
FindOverlappingIter,
|
||||||
|
},
|
||||||
|
util::{
|
||||||
|
error::{BuildError, MatchError, MatchErrorKind},
|
||||||
|
primitives::{PatternID, PatternIDError},
|
||||||
|
search::{Anchored, Input, Match, MatchKind, Span, StartKind},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
mod macros;
|
||||||
|
|
||||||
|
mod ahocorasick;
|
||||||
|
pub mod automaton;
|
||||||
|
pub mod dfa;
|
||||||
|
pub mod nfa;
|
||||||
|
pub mod packed;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests;
|
||||||
|
// I wrote out the module for implementing fst::Automaton only to later realize
|
||||||
|
// that this would make fst a public dependency and fst is not at 1.0 yet. I
|
||||||
|
// decided to just keep the code in tree, but build it only during tests.
|
||||||
|
//
|
||||||
|
// TODO: I think I've changed my mind again. I'm considering pushing it out
|
||||||
|
// into either a separate crate or into 'fst' directly as an optional feature.
|
||||||
|
// #[cfg(test)]
|
||||||
|
// #[allow(dead_code)]
|
||||||
|
// mod transducer;
|
||||||
|
pub(crate) mod util;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod testoibits {
|
||||||
|
use std::panic::{RefUnwindSafe, UnwindSafe};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn assert_all<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn oibits_main() {
|
||||||
|
assert_all::<AhoCorasick>();
|
||||||
|
assert_all::<AhoCorasickBuilder>();
|
||||||
|
assert_all::<AhoCorasickKind>();
|
||||||
|
assert_all::<FindIter>();
|
||||||
|
assert_all::<FindOverlappingIter>();
|
||||||
|
|
||||||
|
assert_all::<BuildError>();
|
||||||
|
assert_all::<MatchError>();
|
||||||
|
assert_all::<MatchErrorKind>();
|
||||||
|
|
||||||
|
assert_all::<Anchored>();
|
||||||
|
assert_all::<Input>();
|
||||||
|
assert_all::<Match>();
|
||||||
|
assert_all::<MatchKind>();
|
||||||
|
assert_all::<Span>();
|
||||||
|
assert_all::<StartKind>();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn oibits_automaton() {
|
||||||
|
use crate::{automaton, dfa::DFA};
|
||||||
|
|
||||||
|
assert_all::<automaton::FindIter<DFA>>();
|
||||||
|
assert_all::<automaton::FindOverlappingIter<DFA>>();
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
assert_all::<automaton::StreamFindIter<DFA, std::io::Stdin>>();
|
||||||
|
assert_all::<automaton::OverlappingState>();
|
||||||
|
|
||||||
|
assert_all::<automaton::Prefilter>();
|
||||||
|
assert_all::<automaton::Candidate>();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn oibits_packed() {
|
||||||
|
use crate::packed;
|
||||||
|
|
||||||
|
assert_all::<packed::Config>();
|
||||||
|
assert_all::<packed::Builder>();
|
||||||
|
assert_all::<packed::Searcher>();
|
||||||
|
assert_all::<packed::FindIter>();
|
||||||
|
assert_all::<packed::MatchKind>();
|
||||||
|
}
|
||||||
|
}
|
||||||
18
.gear/predownloaded-development/vendor/aho-corasick/src/macros.rs
vendored
Normal file
18
.gear/predownloaded-development/vendor/aho-corasick/src/macros.rs
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
#![allow(unused_macros)]
|
||||||
|
|
||||||
|
macro_rules! log {
|
||||||
|
($($tt:tt)*) => {
|
||||||
|
#[cfg(feature = "logging")]
|
||||||
|
{
|
||||||
|
$($tt)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! debug {
|
||||||
|
($($tt:tt)*) => { log!(log::debug!($($tt)*)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! trace {
|
||||||
|
($($tt:tt)*) => { log!(log::trace!($($tt)*)) }
|
||||||
|
}
|
||||||
1141
.gear/predownloaded-development/vendor/aho-corasick/src/nfa/contiguous.rs
vendored
Normal file
1141
.gear/predownloaded-development/vendor/aho-corasick/src/nfa/contiguous.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
40
.gear/predownloaded-development/vendor/aho-corasick/src/nfa/mod.rs
vendored
Normal file
40
.gear/predownloaded-development/vendor/aho-corasick/src/nfa/mod.rs
vendored
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*!
|
||||||
|
Provides direct access to NFA implementations of Aho-Corasick.
|
||||||
|
|
||||||
|
The principle characteristic of an NFA in this crate is that it may
|
||||||
|
transition through multiple states per byte of haystack. In Aho-Corasick
|
||||||
|
parlance, NFAs follow failure transitions during a search. In contrast,
|
||||||
|
a [`DFA`](crate::dfa::DFA) pre-computes all failure transitions during
|
||||||
|
compilation at the expense of a much bigger memory footprint.
|
||||||
|
|
||||||
|
Currently, there are two NFA implementations provided: noncontiguous and
|
||||||
|
contiguous. The names reflect their internal representation, and consequently,
|
||||||
|
the trade offs associated with them:
|
||||||
|
|
||||||
|
* A [`noncontiguous::NFA`] uses a separate allocation for every NFA state to
|
||||||
|
represent its transitions in a sparse format. This is ideal for building an
|
||||||
|
NFA, since it cheaply permits different states to have a different number of
|
||||||
|
transitions. A noncontiguous NFA is where the main Aho-Corasick construction
|
||||||
|
algorithm is implemented. All other Aho-Corasick implementations are built by
|
||||||
|
first constructing a noncontiguous NFA.
|
||||||
|
* A [`contiguous::NFA`] is uses a single allocation to represent all states,
|
||||||
|
while still encoding most states as sparse states but permitting states near
|
||||||
|
the starting state to have a dense representation. The dense representation
|
||||||
|
uses more memory, but permits computing transitions during a search more
|
||||||
|
quickly. By only making the most active states dense (the states near the
|
||||||
|
starting state), a contiguous NFA better balances memory usage with search
|
||||||
|
speed. The single contiguous allocation also uses less overhead per state and
|
||||||
|
enables compression tricks where most states only use 8 bytes of heap memory.
|
||||||
|
|
||||||
|
When given the choice between these two, you almost always want to pick a
|
||||||
|
contiguous NFA. It takes only a little longer to build, but both its memory
|
||||||
|
usage and search speed are typically much better than a noncontiguous NFA. A
|
||||||
|
noncontiguous NFA is useful when prioritizing build times, or when there are
|
||||||
|
so many patterns that a contiguous NFA could not be built. (Currently, because
|
||||||
|
of both memory and search speed improvements, a contiguous NFA has a smaller
|
||||||
|
internal limit on the total number of NFA states it can represent. But you
|
||||||
|
would likely need to have hundreds of thousands or even millions of patterns
|
||||||
|
before you hit this limit.)
|
||||||
|
*/
|
||||||
|
pub mod contiguous;
|
||||||
|
pub mod noncontiguous;
|
||||||
1762
.gear/predownloaded-development/vendor/aho-corasick/src/nfa/noncontiguous.rs
vendored
Normal file
1762
.gear/predownloaded-development/vendor/aho-corasick/src/nfa/noncontiguous.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
687
.gear/predownloaded-development/vendor/aho-corasick/src/packed/api.rs
vendored
Normal file
687
.gear/predownloaded-development/vendor/aho-corasick/src/packed/api.rs
vendored
Normal file
|
|
@ -0,0 +1,687 @@
|
||||||
|
use alloc::sync::Arc;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
packed::{pattern::Patterns, rabinkarp::RabinKarp, teddy},
|
||||||
|
util::search::{Match, Span},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// This is a limit placed on the total number of patterns we're willing to try
|
||||||
|
/// and match at once. As more sophisticated algorithms are added, this number
|
||||||
|
/// may be increased.
|
||||||
|
const PATTERN_LIMIT: usize = 128;
|
||||||
|
|
||||||
|
/// A knob for controlling the match semantics of a packed multiple string
|
||||||
|
/// searcher.
|
||||||
|
///
|
||||||
|
/// This differs from the [`MatchKind`](crate::MatchKind) type in the top-level
|
||||||
|
/// crate module in that it doesn't support "standard" match semantics,
|
||||||
|
/// and instead only supports leftmost-first or leftmost-longest. Namely,
|
||||||
|
/// "standard" semantics cannot be easily supported by packed searchers.
|
||||||
|
///
|
||||||
|
/// For more information on the distinction between leftmost-first and
|
||||||
|
/// leftmost-longest, see the docs on the top-level `MatchKind` type.
|
||||||
|
///
|
||||||
|
/// Unlike the top-level `MatchKind` type, the default match semantics for this
|
||||||
|
/// type are leftmost-first.
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub enum MatchKind {
|
||||||
|
/// Use leftmost-first match semantics, which reports leftmost matches.
|
||||||
|
/// When there are multiple possible leftmost matches, the match
|
||||||
|
/// corresponding to the pattern that appeared earlier when constructing
|
||||||
|
/// the automaton is reported.
|
||||||
|
///
|
||||||
|
/// This is the default.
|
||||||
|
LeftmostFirst,
|
||||||
|
/// Use leftmost-longest match semantics, which reports leftmost matches.
|
||||||
|
/// When there are multiple possible leftmost matches, the longest match
|
||||||
|
/// is chosen.
|
||||||
|
LeftmostLongest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for MatchKind {
|
||||||
|
fn default() -> MatchKind {
|
||||||
|
MatchKind::LeftmostFirst
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The configuration for a packed multiple pattern searcher.
|
||||||
|
///
|
||||||
|
/// The configuration is currently limited only to being able to select the
|
||||||
|
/// match semantics (leftmost-first or leftmost-longest) of a searcher. In the
|
||||||
|
/// future, more knobs may be made available.
|
||||||
|
///
|
||||||
|
/// A configuration produces a [`packed::Builder`](Builder), which in turn can
|
||||||
|
/// be used to construct a [`packed::Searcher`](Searcher) for searching.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// This example shows how to use leftmost-longest semantics instead of the
|
||||||
|
/// default (leftmost-first).
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{packed::{Config, MatchKind}, PatternID};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let searcher = Config::new()
|
||||||
|
/// .match_kind(MatchKind::LeftmostLongest)
|
||||||
|
/// .builder()
|
||||||
|
/// .add("foo")
|
||||||
|
/// .add("foobar")
|
||||||
|
/// .build()?;
|
||||||
|
/// let matches: Vec<PatternID> = searcher
|
||||||
|
/// .find_iter("foobar")
|
||||||
|
/// .map(|mat| mat.pattern())
|
||||||
|
/// .collect();
|
||||||
|
/// assert_eq!(vec![PatternID::must(1)], matches);
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Config {
|
||||||
|
kind: MatchKind,
|
||||||
|
force: Option<ForceAlgorithm>,
|
||||||
|
only_teddy_fat: Option<bool>,
|
||||||
|
only_teddy_256bit: Option<bool>,
|
||||||
|
heuristic_pattern_limits: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An internal option for forcing the use of a particular packed algorithm.
|
||||||
|
///
|
||||||
|
/// When an algorithm is forced, if a searcher could not be constructed for it,
|
||||||
|
/// then no searcher will be returned even if an alternative algorithm would
|
||||||
|
/// work.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
enum ForceAlgorithm {
|
||||||
|
Teddy,
|
||||||
|
RabinKarp,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Config {
|
||||||
|
fn default() -> Config {
|
||||||
|
Config::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config {
|
||||||
|
/// Create a new default configuration. A default configuration uses
|
||||||
|
/// leftmost-first match semantics.
|
||||||
|
pub fn new() -> Config {
|
||||||
|
Config {
|
||||||
|
kind: MatchKind::LeftmostFirst,
|
||||||
|
force: None,
|
||||||
|
only_teddy_fat: None,
|
||||||
|
only_teddy_256bit: None,
|
||||||
|
heuristic_pattern_limits: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a packed builder from this configuration. The builder can be
|
||||||
|
/// used to accumulate patterns and create a [`Searcher`] from them.
|
||||||
|
pub fn builder(&self) -> Builder {
|
||||||
|
Builder::from_config(self.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the match semantics for this configuration.
|
||||||
|
pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config {
|
||||||
|
self.kind = kind;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An undocumented method for forcing the use of the Teddy algorithm.
|
||||||
|
///
|
||||||
|
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||||
|
/// should not use it as it is not part of the API stability guarantees of
|
||||||
|
/// this crate.
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub fn only_teddy(&mut self, yes: bool) -> &mut Config {
|
||||||
|
if yes {
|
||||||
|
self.force = Some(ForceAlgorithm::Teddy);
|
||||||
|
} else {
|
||||||
|
self.force = None;
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An undocumented method for forcing the use of the Fat Teddy algorithm.
|
||||||
|
///
|
||||||
|
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||||
|
/// should not use it as it is not part of the API stability guarantees of
|
||||||
|
/// this crate.
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub fn only_teddy_fat(&mut self, yes: Option<bool>) -> &mut Config {
|
||||||
|
self.only_teddy_fat = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An undocumented method for forcing the use of SSE (`Some(false)`) or
|
||||||
|
/// AVX (`Some(true)`) algorithms.
|
||||||
|
///
|
||||||
|
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||||
|
/// should not use it as it is not part of the API stability guarantees of
|
||||||
|
/// this crate.
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub fn only_teddy_256bit(&mut self, yes: Option<bool>) -> &mut Config {
|
||||||
|
self.only_teddy_256bit = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An undocumented method for forcing the use of the Rabin-Karp algorithm.
|
||||||
|
///
|
||||||
|
/// This is only exposed for more precise testing and benchmarks. Callers
|
||||||
|
/// should not use it as it is not part of the API stability guarantees of
|
||||||
|
/// this crate.
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub fn only_rabin_karp(&mut self, yes: bool) -> &mut Config {
|
||||||
|
if yes {
|
||||||
|
self.force = Some(ForceAlgorithm::RabinKarp);
|
||||||
|
} else {
|
||||||
|
self.force = None;
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request that heuristic limitations on the number of patterns be
|
||||||
|
/// employed. This useful to disable for benchmarking where one wants to
|
||||||
|
/// explore how Teddy performs on large number of patterns even if the
|
||||||
|
/// heuristics would otherwise refuse construction.
|
||||||
|
///
|
||||||
|
/// This is enabled by default.
|
||||||
|
pub fn heuristic_pattern_limits(&mut self, yes: bool) -> &mut Config {
|
||||||
|
self.heuristic_pattern_limits = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for constructing a packed searcher from a collection of patterns.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// This example shows how to use a builder to construct a searcher. By
|
||||||
|
/// default, leftmost-first match semantics are used.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{packed::{Builder, MatchKind}, PatternID};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let searcher = Builder::new()
|
||||||
|
/// .add("foobar")
|
||||||
|
/// .add("foo")
|
||||||
|
/// .build()?;
|
||||||
|
/// let matches: Vec<PatternID> = searcher
|
||||||
|
/// .find_iter("foobar")
|
||||||
|
/// .map(|mat| mat.pattern())
|
||||||
|
/// .collect();
|
||||||
|
/// assert_eq!(vec![PatternID::ZERO], matches);
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Builder {
|
||||||
|
/// The configuration of this builder and subsequent matcher.
|
||||||
|
config: Config,
|
||||||
|
/// Set to true if the builder detects that a matcher cannot be built.
|
||||||
|
inert: bool,
|
||||||
|
/// The patterns provided by the caller.
|
||||||
|
patterns: Patterns,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
|
/// Create a new builder for constructing a multi-pattern searcher. This
|
||||||
|
/// constructor uses the default configuration.
|
||||||
|
pub fn new() -> Builder {
|
||||||
|
Builder::from_config(Config::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_config(config: Config) -> Builder {
|
||||||
|
Builder { config, inert: false, patterns: Patterns::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a searcher from the patterns added to this builder so far.
|
||||||
|
pub fn build(&self) -> Option<Searcher> {
|
||||||
|
if self.inert || self.patterns.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let mut patterns = self.patterns.clone();
|
||||||
|
patterns.set_match_kind(self.config.kind);
|
||||||
|
let patterns = Arc::new(patterns);
|
||||||
|
let rabinkarp = RabinKarp::new(&patterns);
|
||||||
|
// Effectively, we only want to return a searcher if we can use Teddy,
|
||||||
|
// since Teddy is our only fast packed searcher at the moment.
|
||||||
|
// Rabin-Karp is only used when searching haystacks smaller than what
|
||||||
|
// Teddy can support. Thus, the only way to get a Rabin-Karp searcher
|
||||||
|
// is to force it using undocumented APIs (for tests/benchmarks).
|
||||||
|
let (search_kind, minimum_len) = match self.config.force {
|
||||||
|
None | Some(ForceAlgorithm::Teddy) => {
|
||||||
|
debug!("trying to build Teddy packed matcher");
|
||||||
|
let teddy = match self.build_teddy(Arc::clone(&patterns)) {
|
||||||
|
None => return None,
|
||||||
|
Some(teddy) => teddy,
|
||||||
|
};
|
||||||
|
let minimum_len = teddy.minimum_len();
|
||||||
|
(SearchKind::Teddy(teddy), minimum_len)
|
||||||
|
}
|
||||||
|
Some(ForceAlgorithm::RabinKarp) => {
|
||||||
|
debug!("using Rabin-Karp packed matcher");
|
||||||
|
(SearchKind::RabinKarp, 0)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Some(Searcher { patterns, rabinkarp, search_kind, minimum_len })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_teddy(&self, patterns: Arc<Patterns>) -> Option<teddy::Searcher> {
|
||||||
|
teddy::Builder::new()
|
||||||
|
.only_256bit(self.config.only_teddy_256bit)
|
||||||
|
.only_fat(self.config.only_teddy_fat)
|
||||||
|
.heuristic_pattern_limits(self.config.heuristic_pattern_limits)
|
||||||
|
.build(patterns)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add the given pattern to this set to match.
|
||||||
|
///
|
||||||
|
/// The order in which patterns are added is significant. Namely, when
|
||||||
|
/// using leftmost-first match semantics, then when multiple patterns can
|
||||||
|
/// match at a particular location, the pattern that was added first is
|
||||||
|
/// used as the match.
|
||||||
|
///
|
||||||
|
/// If the number of patterns added exceeds the amount supported by packed
|
||||||
|
/// searchers, then the builder will stop accumulating patterns and render
|
||||||
|
/// itself inert. At this point, constructing a searcher will always return
|
||||||
|
/// `None`.
|
||||||
|
pub fn add<P: AsRef<[u8]>>(&mut self, pattern: P) -> &mut Builder {
|
||||||
|
if self.inert {
|
||||||
|
return self;
|
||||||
|
} else if self.patterns.len() >= PATTERN_LIMIT {
|
||||||
|
self.inert = true;
|
||||||
|
self.patterns.reset();
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
// Just in case PATTERN_LIMIT increases beyond u16::MAX.
|
||||||
|
assert!(self.patterns.len() <= core::u16::MAX as usize);
|
||||||
|
|
||||||
|
let pattern = pattern.as_ref();
|
||||||
|
if pattern.is_empty() {
|
||||||
|
self.inert = true;
|
||||||
|
self.patterns.reset();
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
self.patterns.add(pattern);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add the given iterator of patterns to this set to match.
|
||||||
|
///
|
||||||
|
/// The iterator must yield elements that can be converted into a `&[u8]`.
|
||||||
|
///
|
||||||
|
/// The order in which patterns are added is significant. Namely, when
|
||||||
|
/// using leftmost-first match semantics, then when multiple patterns can
|
||||||
|
/// match at a particular location, the pattern that was added first is
|
||||||
|
/// used as the match.
|
||||||
|
///
|
||||||
|
/// If the number of patterns added exceeds the amount supported by packed
|
||||||
|
/// searchers, then the builder will stop accumulating patterns and render
|
||||||
|
/// itself inert. At this point, constructing a searcher will always return
|
||||||
|
/// `None`.
|
||||||
|
pub fn extend<I, P>(&mut self, patterns: I) -> &mut Builder
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = P>,
|
||||||
|
P: AsRef<[u8]>,
|
||||||
|
{
|
||||||
|
for p in patterns {
|
||||||
|
self.add(p);
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of patterns added to this builder.
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.patterns.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the length, in bytes, of the shortest pattern added.
|
||||||
|
pub fn minimum_len(&self) -> usize {
|
||||||
|
self.patterns.minimum_len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Builder {
|
||||||
|
fn default() -> Builder {
|
||||||
|
Builder::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A packed searcher for quickly finding occurrences of multiple patterns.
|
||||||
|
///
|
||||||
|
/// If callers need more flexible construction, or if one wants to change the
|
||||||
|
/// match semantics (either leftmost-first or leftmost-longest), then one can
|
||||||
|
/// use the [`Config`] and/or [`Builder`] types for more fine grained control.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// This example shows how to create a searcher from an iterator of patterns.
|
||||||
|
/// By default, leftmost-first match semantics are used.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||||
|
/// let matches: Vec<PatternID> = searcher
|
||||||
|
/// .find_iter("foobar")
|
||||||
|
/// .map(|mat| mat.pattern())
|
||||||
|
/// .collect();
|
||||||
|
/// assert_eq!(vec![PatternID::ZERO], matches);
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Searcher {
|
||||||
|
patterns: Arc<Patterns>,
|
||||||
|
rabinkarp: RabinKarp,
|
||||||
|
search_kind: SearchKind,
|
||||||
|
minimum_len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
enum SearchKind {
|
||||||
|
Teddy(teddy::Searcher),
|
||||||
|
RabinKarp,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Searcher {
|
||||||
|
/// A convenience function for constructing a searcher from an iterator
|
||||||
|
/// of things that can be converted to a `&[u8]`.
|
||||||
|
///
|
||||||
|
/// If a searcher could not be constructed (either because of an
|
||||||
|
/// unsupported CPU or because there are too many patterns), then `None`
|
||||||
|
/// is returned.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||||
|
/// let matches: Vec<PatternID> = searcher
|
||||||
|
/// .find_iter("foobar")
|
||||||
|
/// .map(|mat| mat.pattern())
|
||||||
|
/// .collect();
|
||||||
|
/// assert_eq!(vec![PatternID::ZERO], matches);
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
pub fn new<I, P>(patterns: I) -> Option<Searcher>
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = P>,
|
||||||
|
P: AsRef<[u8]>,
|
||||||
|
{
|
||||||
|
Builder::new().extend(patterns).build()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A convenience function for calling `Config::new()`.
|
||||||
|
///
|
||||||
|
/// This is useful for avoiding an additional import.
|
||||||
|
pub fn config() -> Config {
|
||||||
|
Config::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A convenience function for calling `Builder::new()`.
|
||||||
|
///
|
||||||
|
/// This is useful for avoiding an additional import.
|
||||||
|
pub fn builder() -> Builder {
|
||||||
|
Builder::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the first occurrence of any of the patterns in this searcher,
|
||||||
|
/// according to its match semantics, in the given haystack. The `Match`
|
||||||
|
/// returned will include the identifier of the pattern that matched, which
|
||||||
|
/// corresponds to the index of the pattern (starting from `0`) in which it
|
||||||
|
/// was added.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||||
|
/// let mat = searcher.find("foobar")?;
|
||||||
|
/// assert_eq!(PatternID::ZERO, mat.pattern());
|
||||||
|
/// assert_eq!(0, mat.start());
|
||||||
|
/// assert_eq!(6, mat.end());
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<Match> {
|
||||||
|
let haystack = haystack.as_ref();
|
||||||
|
self.find_in(haystack, Span::from(0..haystack.len()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the first occurrence of any of the patterns in this searcher,
|
||||||
|
/// according to its match semantics, in the given haystack starting from
|
||||||
|
/// the given position.
|
||||||
|
///
|
||||||
|
/// The `Match` returned will include the identifier of the pattern that
|
||||||
|
/// matched, which corresponds to the index of the pattern (starting from
|
||||||
|
/// `0`) in which it was added. The offsets in the `Match` will be relative
|
||||||
|
/// to the start of `haystack` (and not `at`).
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID, Span};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let haystack = "foofoobar";
|
||||||
|
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||||
|
/// let mat = searcher.find_in(haystack, Span::from(3..haystack.len()))?;
|
||||||
|
/// assert_eq!(PatternID::ZERO, mat.pattern());
|
||||||
|
/// assert_eq!(3, mat.start());
|
||||||
|
/// assert_eq!(9, mat.end());
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn find_in<B: AsRef<[u8]>>(
|
||||||
|
&self,
|
||||||
|
haystack: B,
|
||||||
|
span: Span,
|
||||||
|
) -> Option<Match> {
|
||||||
|
let haystack = haystack.as_ref();
|
||||||
|
match self.search_kind {
|
||||||
|
SearchKind::Teddy(ref teddy) => {
|
||||||
|
if haystack[span].len() < teddy.minimum_len() {
|
||||||
|
return self.find_in_slow(haystack, span);
|
||||||
|
}
|
||||||
|
teddy.find(&haystack[..span.end], span.start)
|
||||||
|
}
|
||||||
|
SearchKind::RabinKarp => {
|
||||||
|
self.rabinkarp.find_at(&haystack[..span.end], span.start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return an iterator of non-overlapping occurrences of the patterns in
|
||||||
|
/// this searcher, according to its match semantics, in the given haystack.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||||
|
/// let matches: Vec<PatternID> = searcher
|
||||||
|
/// .find_iter("foobar fooba foofoo")
|
||||||
|
/// .map(|mat| mat.pattern())
|
||||||
|
/// .collect();
|
||||||
|
/// assert_eq!(vec![
|
||||||
|
/// PatternID::must(0),
|
||||||
|
/// PatternID::must(1),
|
||||||
|
/// PatternID::must(1),
|
||||||
|
/// PatternID::must(1),
|
||||||
|
/// ], matches);
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>(
|
||||||
|
&'a self,
|
||||||
|
haystack: &'b B,
|
||||||
|
) -> FindIter<'a, 'b> {
|
||||||
|
let haystack = haystack.as_ref();
|
||||||
|
let span = Span::from(0..haystack.len());
|
||||||
|
FindIter { searcher: self, haystack, span }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the match kind used by this packed searcher.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Basic usage:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::packed::{MatchKind, Searcher};
|
||||||
|
///
|
||||||
|
/// # fn example() -> Option<()> {
|
||||||
|
/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||||
|
/// // leftmost-first is the default.
|
||||||
|
/// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind());
|
||||||
|
/// # Some(()) }
|
||||||
|
/// # if cfg!(all(feature = "std", any(
|
||||||
|
/// # target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
/// # ))) {
|
||||||
|
/// # example().unwrap()
|
||||||
|
/// # } else {
|
||||||
|
/// # assert!(example().is_none());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn match_kind(&self) -> &MatchKind {
|
||||||
|
self.patterns.match_kind()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the minimum length of a haystack that is required in order for
|
||||||
|
/// packed searching to be effective.
|
||||||
|
///
|
||||||
|
/// In some cases, the underlying packed searcher may not be able to search
|
||||||
|
/// very short haystacks. When that occurs, the implementation will defer
|
||||||
|
/// to a slower non-packed searcher (which is still generally faster than
|
||||||
|
/// Aho-Corasick for a small number of patterns). However, callers may
|
||||||
|
/// want to avoid ever using the slower variant, which one can do by
|
||||||
|
/// never passing a haystack shorter than the minimum length returned by
|
||||||
|
/// this method.
|
||||||
|
#[inline]
|
||||||
|
pub fn minimum_len(&self) -> usize {
|
||||||
|
self.minimum_len
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the approximate total amount of heap used by this searcher, in
|
||||||
|
/// units of bytes.
|
||||||
|
#[inline]
|
||||||
|
pub fn memory_usage(&self) -> usize {
|
||||||
|
self.patterns.memory_usage()
|
||||||
|
+ self.rabinkarp.memory_usage()
|
||||||
|
+ self.search_kind.memory_usage()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Use a slow (non-packed) searcher.
|
||||||
|
///
|
||||||
|
/// This is useful when a packed searcher could be constructed, but could
|
||||||
|
/// not be used to search a specific haystack. For example, if Teddy was
|
||||||
|
/// built but the haystack is smaller than ~34 bytes, then Teddy might not
|
||||||
|
/// be able to run.
|
||||||
|
fn find_in_slow(&self, haystack: &[u8], span: Span) -> Option<Match> {
|
||||||
|
self.rabinkarp.find_at(&haystack[..span.end], span.start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchKind {
|
||||||
|
fn memory_usage(&self) -> usize {
|
||||||
|
match *self {
|
||||||
|
SearchKind::Teddy(ref ted) => ted.memory_usage(),
|
||||||
|
SearchKind::RabinKarp => 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over non-overlapping matches from a packed searcher.
|
||||||
|
///
|
||||||
|
/// The lifetime `'s` refers to the lifetime of the underlying [`Searcher`],
|
||||||
|
/// while the lifetime `'h` refers to the lifetime of the haystack being
|
||||||
|
/// searched.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FindIter<'s, 'h> {
|
||||||
|
searcher: &'s Searcher,
|
||||||
|
haystack: &'h [u8],
|
||||||
|
span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s, 'h> Iterator for FindIter<'s, 'h> {
|
||||||
|
type Item = Match;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Match> {
|
||||||
|
if self.span.start > self.span.end {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
match self.searcher.find_in(&self.haystack, self.span) {
|
||||||
|
None => None,
|
||||||
|
Some(m) => {
|
||||||
|
self.span.start = m.end();
|
||||||
|
Some(m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
39
.gear/predownloaded-development/vendor/aho-corasick/src/packed/ext.rs
vendored
Normal file
39
.gear/predownloaded-development/vendor/aho-corasick/src/packed/ext.rs
vendored
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
/// A trait for adding some helper routines to pointers.
|
||||||
|
pub(crate) trait Pointer {
|
||||||
|
/// Returns the distance, in units of `T`, between `self` and `origin`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Same as `ptr::offset_from` in addition to `self >= origin`.
|
||||||
|
unsafe fn distance(self, origin: Self) -> usize;
|
||||||
|
|
||||||
|
/// Casts this pointer to `usize`.
|
||||||
|
///
|
||||||
|
/// Callers should not convert the `usize` back to a pointer if at all
|
||||||
|
/// possible. (And if you believe it's necessary, open an issue to discuss
|
||||||
|
/// why. Otherwise, it has the potential to violate pointer provenance.)
|
||||||
|
/// The purpose of this function is just to be able to do arithmetic, i.e.,
|
||||||
|
/// computing offsets or alignments.
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Pointer for *const T {
|
||||||
|
unsafe fn distance(self, origin: *const T) -> usize {
|
||||||
|
// TODO: Replace with `ptr::sub_ptr` once stabilized.
|
||||||
|
usize::try_from(self.offset_from(origin)).unwrap_unchecked()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Pointer for *mut T {
|
||||||
|
unsafe fn distance(self, origin: *mut T) -> usize {
|
||||||
|
(self as *const T).distance(origin as *const T)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
(self as *const T).as_usize()
|
||||||
|
}
|
||||||
|
}
|
||||||
120
.gear/predownloaded-development/vendor/aho-corasick/src/packed/mod.rs
vendored
Normal file
120
.gear/predownloaded-development/vendor/aho-corasick/src/packed/mod.rs
vendored
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
/*!
|
||||||
|
Provides packed multiple substring search, principally for a small number of
|
||||||
|
patterns.
|
||||||
|
|
||||||
|
This sub-module provides vectorized routines for quickly finding
|
||||||
|
matches of a small number of patterns. In general, users of this crate
|
||||||
|
shouldn't need to interface with this module directly, as the primary
|
||||||
|
[`AhoCorasick`](crate::AhoCorasick) searcher will use these routines
|
||||||
|
automatically as a prefilter when applicable. However, in some cases, callers
|
||||||
|
may want to bypass the Aho-Corasick machinery entirely and use this vectorized
|
||||||
|
searcher directly.
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
|
The primary types in this sub-module are:
|
||||||
|
|
||||||
|
* [`Searcher`] executes the actual search algorithm to report matches in a
|
||||||
|
haystack.
|
||||||
|
* [`Builder`] accumulates patterns incrementally and can construct a
|
||||||
|
`Searcher`.
|
||||||
|
* [`Config`] permits tuning the searcher, and itself will produce a `Builder`
|
||||||
|
(which can then be used to build a `Searcher`). Currently, the only tuneable
|
||||||
|
knob are the match semantics, but this may be expanded in the future.
|
||||||
|
|
||||||
|
# Examples
|
||||||
|
|
||||||
|
This example shows how to create a searcher from an iterator of patterns.
|
||||||
|
By default, leftmost-first match semantics are used. (See the top-level
|
||||||
|
[`MatchKind`] type for more details about match semantics, which apply
|
||||||
|
similarly to packed substring search.)
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
|
||||||
|
|
||||||
|
# fn example() -> Option<()> {
|
||||||
|
let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
|
||||||
|
let matches: Vec<PatternID> = searcher
|
||||||
|
.find_iter("foobar")
|
||||||
|
.map(|mat| mat.pattern())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(vec![PatternID::ZERO], matches);
|
||||||
|
# Some(()) }
|
||||||
|
# if cfg!(all(feature = "std", any(
|
||||||
|
# target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
# ))) {
|
||||||
|
# example().unwrap()
|
||||||
|
# } else {
|
||||||
|
# assert!(example().is_none());
|
||||||
|
# }
|
||||||
|
```
|
||||||
|
|
||||||
|
This example shows how to use [`Config`] to change the match semantics to
|
||||||
|
leftmost-longest:
|
||||||
|
|
||||||
|
```
|
||||||
|
use aho_corasick::{packed::{Config, MatchKind}, PatternID};
|
||||||
|
|
||||||
|
# fn example() -> Option<()> {
|
||||||
|
let searcher = Config::new()
|
||||||
|
.match_kind(MatchKind::LeftmostLongest)
|
||||||
|
.builder()
|
||||||
|
.add("foo")
|
||||||
|
.add("foobar")
|
||||||
|
.build()?;
|
||||||
|
let matches: Vec<PatternID> = searcher
|
||||||
|
.find_iter("foobar")
|
||||||
|
.map(|mat| mat.pattern())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(vec![PatternID::must(1)], matches);
|
||||||
|
# Some(()) }
|
||||||
|
# if cfg!(all(feature = "std", any(
|
||||||
|
# target_arch = "x86_64", target_arch = "aarch64",
|
||||||
|
# ))) {
|
||||||
|
# example().unwrap()
|
||||||
|
# } else {
|
||||||
|
# assert!(example().is_none());
|
||||||
|
# }
|
||||||
|
```
|
||||||
|
|
||||||
|
# Packed substring searching
|
||||||
|
|
||||||
|
Packed substring searching refers to the use of SIMD (Single Instruction,
|
||||||
|
Multiple Data) to accelerate the detection of matches in a haystack. Unlike
|
||||||
|
conventional algorithms, such as Aho-Corasick, SIMD algorithms for substring
|
||||||
|
search tend to do better with a small number of patterns, where as Aho-Corasick
|
||||||
|
generally maintains reasonably consistent performance regardless of the number
|
||||||
|
of patterns you give it. Because of this, the vectorized searcher in this
|
||||||
|
sub-module cannot be used as a general purpose searcher, since building the
|
||||||
|
searcher may fail even when given a small number of patterns. However, in
|
||||||
|
exchange, when searching for a small number of patterns, searching can be quite
|
||||||
|
a bit faster than Aho-Corasick (sometimes by an order of magnitude).
|
||||||
|
|
||||||
|
The key take away here is that constructing a searcher from a list of patterns
|
||||||
|
is a fallible operation with no clear rules for when it will fail. While the
|
||||||
|
precise conditions under which building a searcher can fail is specifically an
|
||||||
|
implementation detail, here are some common reasons:
|
||||||
|
|
||||||
|
* Too many patterns were given. Typically, the limit is on the order of 100 or
|
||||||
|
so, but this limit may fluctuate based on available CPU features.
|
||||||
|
* The available packed algorithms require CPU features that aren't available.
|
||||||
|
For example, currently, this crate only provides packed algorithms for
|
||||||
|
`x86_64` and `aarch64`. Therefore, constructing a packed searcher on any
|
||||||
|
other target will always fail.
|
||||||
|
* Zero patterns were given, or one of the patterns given was empty. Packed
|
||||||
|
searchers require at least one pattern and that all patterns are non-empty.
|
||||||
|
* Something else about the nature of the patterns (typically based on
|
||||||
|
heuristics) suggests that a packed searcher would perform very poorly, so
|
||||||
|
no searcher is built.
|
||||||
|
*/
|
||||||
|
|
||||||
|
pub use crate::packed::api::{Builder, Config, FindIter, MatchKind, Searcher};
|
||||||
|
|
||||||
|
mod api;
|
||||||
|
mod ext;
|
||||||
|
mod pattern;
|
||||||
|
mod rabinkarp;
|
||||||
|
mod teddy;
|
||||||
|
#[cfg(all(feature = "std", test))]
|
||||||
|
mod tests;
|
||||||
|
mod vector;
|
||||||
480
.gear/predownloaded-development/vendor/aho-corasick/src/packed/pattern.rs
vendored
Normal file
480
.gear/predownloaded-development/vendor/aho-corasick/src/packed/pattern.rs
vendored
Normal file
|
|
@ -0,0 +1,480 @@
|
||||||
|
use core::{cmp, fmt, mem, u16, usize};
|
||||||
|
|
||||||
|
use alloc::{boxed::Box, string::String, vec, vec::Vec};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
packed::{api::MatchKind, ext::Pointer},
|
||||||
|
PatternID,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A non-empty collection of non-empty patterns to search for.
|
||||||
|
///
|
||||||
|
/// This collection of patterns is what is passed around to both execute
|
||||||
|
/// searches and to construct the searchers themselves. Namely, this permits
|
||||||
|
/// searches to avoid copying all of the patterns, and allows us to keep only
|
||||||
|
/// one copy throughout all packed searchers.
|
||||||
|
///
|
||||||
|
/// Note that this collection is not a set. The same pattern can appear more
|
||||||
|
/// than once.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct Patterns {
|
||||||
|
/// The match semantics supported by this collection of patterns.
|
||||||
|
///
|
||||||
|
/// The match semantics determines the order of the iterator over patterns.
|
||||||
|
/// For leftmost-first, patterns are provided in the same order as were
|
||||||
|
/// provided by the caller. For leftmost-longest, patterns are provided in
|
||||||
|
/// descending order of length, with ties broken by the order in which they
|
||||||
|
/// were provided by the caller.
|
||||||
|
kind: MatchKind,
|
||||||
|
/// The collection of patterns, indexed by their identifier.
|
||||||
|
by_id: Vec<Vec<u8>>,
|
||||||
|
/// The order of patterns defined for iteration, given by pattern
|
||||||
|
/// identifiers. The order of `by_id` and `order` is always the same for
|
||||||
|
/// leftmost-first semantics, but may be different for leftmost-longest
|
||||||
|
/// semantics.
|
||||||
|
order: Vec<PatternID>,
|
||||||
|
/// The length of the smallest pattern, in bytes.
|
||||||
|
minimum_len: usize,
|
||||||
|
/// The total number of pattern bytes across the entire collection. This
|
||||||
|
/// is used for reporting total heap usage in constant time.
|
||||||
|
total_pattern_bytes: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
// BREADCRUMBS: I think we want to experiment with a different bucket
|
||||||
|
// representation. Basically, each bucket is just a Range<usize> to a single
|
||||||
|
// contiguous allocation? Maybe length-prefixed patterns or something? The
|
||||||
|
// idea is to try to get rid of the pointer chasing in verification. I don't
|
||||||
|
// know that that is the issue, but I suspect it is.
|
||||||
|
|
||||||
|
impl Patterns {
|
||||||
|
/// Create a new collection of patterns for the given match semantics. The
|
||||||
|
/// ID of each pattern is the index of the pattern at which it occurs in
|
||||||
|
/// the `by_id` slice.
|
||||||
|
///
|
||||||
|
/// If any of the patterns in the slice given are empty, then this panics.
|
||||||
|
/// Similarly, if the number of patterns given is zero, then this also
|
||||||
|
/// panics.
|
||||||
|
pub(crate) fn new() -> Patterns {
|
||||||
|
Patterns {
|
||||||
|
kind: MatchKind::default(),
|
||||||
|
by_id: vec![],
|
||||||
|
order: vec![],
|
||||||
|
minimum_len: usize::MAX,
|
||||||
|
total_pattern_bytes: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a pattern to this collection.
|
||||||
|
///
|
||||||
|
/// This panics if the pattern given is empty.
|
||||||
|
pub(crate) fn add(&mut self, bytes: &[u8]) {
|
||||||
|
assert!(!bytes.is_empty());
|
||||||
|
assert!(self.by_id.len() <= u16::MAX as usize);
|
||||||
|
|
||||||
|
let id = PatternID::new(self.by_id.len()).unwrap();
|
||||||
|
self.order.push(id);
|
||||||
|
self.by_id.push(bytes.to_vec());
|
||||||
|
self.minimum_len = cmp::min(self.minimum_len, bytes.len());
|
||||||
|
self.total_pattern_bytes += bytes.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the match kind semantics for this collection of patterns.
|
||||||
|
///
|
||||||
|
/// If the kind is not set, then the default is leftmost-first.
|
||||||
|
pub(crate) fn set_match_kind(&mut self, kind: MatchKind) {
|
||||||
|
self.kind = kind;
|
||||||
|
match self.kind {
|
||||||
|
MatchKind::LeftmostFirst => {
|
||||||
|
self.order.sort();
|
||||||
|
}
|
||||||
|
MatchKind::LeftmostLongest => {
|
||||||
|
let (order, by_id) = (&mut self.order, &mut self.by_id);
|
||||||
|
order.sort_by(|&id1, &id2| {
|
||||||
|
by_id[id1].len().cmp(&by_id[id2].len()).reverse()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the number of patterns in this collection.
|
||||||
|
///
|
||||||
|
/// This is guaranteed to be greater than zero.
|
||||||
|
pub(crate) fn len(&self) -> usize {
|
||||||
|
self.by_id.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this collection of patterns is empty.
|
||||||
|
pub(crate) fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the approximate total amount of heap used by these patterns, in
|
||||||
|
/// units of bytes.
|
||||||
|
pub(crate) fn memory_usage(&self) -> usize {
|
||||||
|
self.order.len() * mem::size_of::<PatternID>()
|
||||||
|
+ self.by_id.len() * mem::size_of::<Vec<u8>>()
|
||||||
|
+ self.total_pattern_bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clears all heap memory associated with this collection of patterns and
|
||||||
|
/// resets all state such that it is a valid empty collection.
|
||||||
|
pub(crate) fn reset(&mut self) {
|
||||||
|
self.kind = MatchKind::default();
|
||||||
|
self.by_id.clear();
|
||||||
|
self.order.clear();
|
||||||
|
self.minimum_len = usize::MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the length, in bytes, of the smallest pattern.
|
||||||
|
///
|
||||||
|
/// This is guaranteed to be at least one.
|
||||||
|
pub(crate) fn minimum_len(&self) -> usize {
|
||||||
|
self.minimum_len
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the match semantics used by these patterns.
|
||||||
|
pub(crate) fn match_kind(&self) -> &MatchKind {
|
||||||
|
&self.kind
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the pattern with the given identifier. If such a pattern does
|
||||||
|
/// not exist, then this panics.
|
||||||
|
pub(crate) fn get(&self, id: PatternID) -> Pattern<'_> {
|
||||||
|
Pattern(&self.by_id[id])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the pattern with the given identifier without performing bounds
|
||||||
|
/// checks.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Callers must ensure that a pattern with the given identifier exists
|
||||||
|
/// before using this method.
|
||||||
|
pub(crate) unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> {
|
||||||
|
Pattern(self.by_id.get_unchecked(id.as_usize()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return an iterator over all the patterns in this collection, in the
|
||||||
|
/// order in which they should be matched.
|
||||||
|
///
|
||||||
|
/// Specifically, in a naive multi-pattern matcher, the following is
|
||||||
|
/// guaranteed to satisfy the match semantics of this collection of
|
||||||
|
/// patterns:
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// for i in 0..haystack.len():
|
||||||
|
/// for p in patterns.iter():
|
||||||
|
/// if haystack[i..].starts_with(p.bytes()):
|
||||||
|
/// return Match(p.id(), i, i + p.bytes().len())
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Namely, among the patterns in a collection, if they are matched in
|
||||||
|
/// the order provided by this iterator, then the result is guaranteed
|
||||||
|
/// to satisfy the correct match semantics. (Either leftmost-first or
|
||||||
|
/// leftmost-longest.)
|
||||||
|
pub(crate) fn iter(&self) -> PatternIter<'_> {
|
||||||
|
PatternIter { patterns: self, i: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the patterns in the `Patterns` collection.
|
||||||
|
///
|
||||||
|
/// The order of the patterns provided by this iterator is consistent with the
|
||||||
|
/// match semantics of the originating collection of patterns.
|
||||||
|
///
|
||||||
|
/// The lifetime `'p` corresponds to the lifetime of the collection of patterns
|
||||||
|
/// this is iterating over.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct PatternIter<'p> {
|
||||||
|
patterns: &'p Patterns,
|
||||||
|
i: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'p> Iterator for PatternIter<'p> {
|
||||||
|
type Item = (PatternID, Pattern<'p>);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<(PatternID, Pattern<'p>)> {
|
||||||
|
if self.i >= self.patterns.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let id = self.patterns.order[self.i];
|
||||||
|
let p = self.patterns.get(id);
|
||||||
|
self.i += 1;
|
||||||
|
Some((id, p))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A pattern that is used in packed searching.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub(crate) struct Pattern<'a>(&'a [u8]);
|
||||||
|
|
||||||
|
impl<'a> fmt::Debug for Pattern<'a> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("Pattern")
|
||||||
|
.field("lit", &String::from_utf8_lossy(&self.0))
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'p> Pattern<'p> {
|
||||||
|
/// Returns the length of this pattern, in bytes.
|
||||||
|
pub(crate) fn len(&self) -> usize {
|
||||||
|
self.0.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the bytes of this pattern.
|
||||||
|
pub(crate) fn bytes(&self) -> &[u8] {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the first `len` low nybbles from this pattern. If this pattern
|
||||||
|
/// is shorter than `len`, then this panics.
|
||||||
|
pub(crate) fn low_nybbles(&self, len: usize) -> Box<[u8]> {
|
||||||
|
let mut nybs = vec![0; len].into_boxed_slice();
|
||||||
|
for (i, byte) in self.bytes().iter().take(len).enumerate() {
|
||||||
|
nybs[i] = byte & 0xF;
|
||||||
|
}
|
||||||
|
nybs
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if this pattern is a prefix of the given bytes.
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn is_prefix(&self, bytes: &[u8]) -> bool {
|
||||||
|
is_prefix(bytes, self.bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if this pattern is a prefix of the haystack given by the
|
||||||
|
/// raw `start` and `end` pointers.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// * It must be the case that `start < end` and that the distance between
|
||||||
|
/// them is at least equal to `V::BYTES`. That is, it must always be valid
|
||||||
|
/// to do at least an unaligned load of `V` at `start`.
|
||||||
|
/// * Both `start` and `end` must be valid for reads.
|
||||||
|
/// * Both `start` and `end` must point to an initialized value.
|
||||||
|
/// * Both `start` and `end` must point to the same allocated object and
|
||||||
|
/// must either be in bounds or at most one byte past the end of the
|
||||||
|
/// allocated object.
|
||||||
|
/// * Both `start` and `end` must be _derived from_ a pointer to the same
|
||||||
|
/// object.
|
||||||
|
/// * The distance between `start` and `end` must not overflow `isize`.
|
||||||
|
/// * The distance being in bounds must not rely on "wrapping around" the
|
||||||
|
/// address space.
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) unsafe fn is_prefix_raw(
|
||||||
|
&self,
|
||||||
|
start: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> bool {
|
||||||
|
let patlen = self.bytes().len();
|
||||||
|
let haylen = end.distance(start);
|
||||||
|
if patlen > haylen {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// SAFETY: We've checked that the haystack has length at least equal
|
||||||
|
// to this pattern. All other safety concerns are the responsibility
|
||||||
|
// of the caller.
|
||||||
|
is_equal_raw(start, self.bytes().as_ptr(), patlen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if `needle` is a prefix of `haystack`.
|
||||||
|
///
|
||||||
|
/// This uses a latency optimized variant of `memcmp` internally which *might*
|
||||||
|
/// make this faster for very short strings.
|
||||||
|
///
|
||||||
|
/// # Inlining
|
||||||
|
///
|
||||||
|
/// This routine is marked `inline(always)`. If you want to call this function
|
||||||
|
/// in a way that is not always inlined, you'll need to wrap a call to it in
|
||||||
|
/// another function that is marked as `inline(never)` or just `inline`.
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool {
|
||||||
|
if needle.len() > haystack.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// SAFETY: Our pointers are derived directly from borrowed slices which
|
||||||
|
// uphold all of our safety guarantees except for length. We account for
|
||||||
|
// length with the check above.
|
||||||
|
unsafe { is_equal_raw(haystack.as_ptr(), needle.as_ptr(), needle.len()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare corresponding bytes in `x` and `y` for equality.
|
||||||
|
///
|
||||||
|
/// That is, this returns true if and only if `x.len() == y.len()` and
|
||||||
|
/// `x[i] == y[i]` for all `0 <= i < x.len()`.
|
||||||
|
///
|
||||||
|
/// Note that this isn't used. We only use it in tests as a convenient way
|
||||||
|
/// of testing `is_equal_raw`.
|
||||||
|
///
|
||||||
|
/// # Inlining
|
||||||
|
///
|
||||||
|
/// This routine is marked `inline(always)`. If you want to call this function
|
||||||
|
/// in a way that is not always inlined, you'll need to wrap a call to it in
|
||||||
|
/// another function that is marked as `inline(never)` or just `inline`.
|
||||||
|
///
|
||||||
|
/// # Motivation
|
||||||
|
///
|
||||||
|
/// Why not use slice equality instead? Well, slice equality usually results in
|
||||||
|
/// a call out to the current platform's `libc` which might not be inlineable
|
||||||
|
/// or have other overhead. This routine isn't guaranteed to be a win, but it
|
||||||
|
/// might be in some cases.
|
||||||
|
#[cfg(test)]
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_equal(x: &[u8], y: &[u8]) -> bool {
|
||||||
|
if x.len() != y.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// SAFETY: Our pointers are derived directly from borrowed slices which
|
||||||
|
// uphold all of our safety guarantees except for length. We account for
|
||||||
|
// length with the check above.
|
||||||
|
unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compare `n` bytes at the given pointers for equality.
|
||||||
|
///
|
||||||
|
/// This returns true if and only if `*x.add(i) == *y.add(i)` for all
|
||||||
|
/// `0 <= i < n`.
|
||||||
|
///
|
||||||
|
/// # Inlining
|
||||||
|
///
|
||||||
|
/// This routine is marked `inline(always)`. If you want to call this function
|
||||||
|
/// in a way that is not always inlined, you'll need to wrap a call to it in
|
||||||
|
/// another function that is marked as `inline(never)` or just `inline`.
|
||||||
|
///
|
||||||
|
/// # Motivation
|
||||||
|
///
|
||||||
|
/// Why not use slice equality instead? Well, slice equality usually results in
|
||||||
|
/// a call out to the current platform's `libc` which might not be inlineable
|
||||||
|
/// or have other overhead. This routine isn't guaranteed to be a win, but it
|
||||||
|
/// might be in some cases.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// * Both `x` and `y` must be valid for reads of up to `n` bytes.
|
||||||
|
/// * Both `x` and `y` must point to an initialized value.
|
||||||
|
/// * Both `x` and `y` must each point to an allocated object and
|
||||||
|
/// must either be in bounds or at most one byte past the end of the
|
||||||
|
/// allocated object. `x` and `y` do not need to point to the same allocated
|
||||||
|
/// object, but they may.
|
||||||
|
/// * Both `x` and `y` must be _derived from_ a pointer to their respective
|
||||||
|
/// allocated objects.
|
||||||
|
/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly
|
||||||
|
/// for `y` and `y+n`.
|
||||||
|
/// * The distance being in bounds must not rely on "wrapping around" the
|
||||||
|
/// address space.
|
||||||
|
#[inline(always)]
|
||||||
|
unsafe fn is_equal_raw(mut x: *const u8, mut y: *const u8, n: usize) -> bool {
|
||||||
|
// If we don't have enough bytes to do 4-byte at a time loads, then
|
||||||
|
// handle each possible length specially. Note that I used to have a
|
||||||
|
// byte-at-a-time loop here and that turned out to be quite a bit slower
|
||||||
|
// for the memmem/pathological/defeat-simple-vector-alphabet benchmark.
|
||||||
|
if n < 4 {
|
||||||
|
return match n {
|
||||||
|
0 => true,
|
||||||
|
1 => x.read() == y.read(),
|
||||||
|
2 => {
|
||||||
|
x.cast::<u16>().read_unaligned()
|
||||||
|
== y.cast::<u16>().read_unaligned()
|
||||||
|
}
|
||||||
|
// I also tried copy_nonoverlapping here and it looks like the
|
||||||
|
// codegen is the same.
|
||||||
|
3 => x.cast::<[u8; 3]>().read() == y.cast::<[u8; 3]>().read(),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// When we have 4 or more bytes to compare, then proceed in chunks of 4 at
|
||||||
|
// a time using unaligned loads.
|
||||||
|
//
|
||||||
|
// Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is
|
||||||
|
// that this particular version of memcmp is likely to be called with tiny
|
||||||
|
// needles. That means that if we do 8 byte loads, then a higher proportion
|
||||||
|
// of memcmp calls will use the slower variant above. With that said, this
|
||||||
|
// is a hypothesis and is only loosely supported by benchmarks. There's
|
||||||
|
// likely some improvement that could be made here. The main thing here
|
||||||
|
// though is to optimize for latency, not throughput.
|
||||||
|
|
||||||
|
// SAFETY: The caller is responsible for ensuring the pointers we get are
|
||||||
|
// valid and readable for at least `n` bytes. We also do unaligned loads,
|
||||||
|
// so there's no need to ensure we're aligned. (This is justified by this
|
||||||
|
// routine being specifically for short strings.)
|
||||||
|
let xend = x.add(n.wrapping_sub(4));
|
||||||
|
let yend = y.add(n.wrapping_sub(4));
|
||||||
|
while x < xend {
|
||||||
|
let vx = x.cast::<u32>().read_unaligned();
|
||||||
|
let vy = y.cast::<u32>().read_unaligned();
|
||||||
|
if vx != vy {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
x = x.add(4);
|
||||||
|
y = y.add(4);
|
||||||
|
}
|
||||||
|
let vx = xend.cast::<u32>().read_unaligned();
|
||||||
|
let vy = yend.cast::<u32>().read_unaligned();
|
||||||
|
vx == vy
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn equals_different_lengths() {
|
||||||
|
assert!(!is_equal(b"", b"a"));
|
||||||
|
assert!(!is_equal(b"a", b""));
|
||||||
|
assert!(!is_equal(b"ab", b"a"));
|
||||||
|
assert!(!is_equal(b"a", b"ab"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn equals_mismatch() {
|
||||||
|
let one_mismatch = [
|
||||||
|
(&b"a"[..], &b"x"[..]),
|
||||||
|
(&b"ab"[..], &b"ax"[..]),
|
||||||
|
(&b"abc"[..], &b"abx"[..]),
|
||||||
|
(&b"abcd"[..], &b"abcx"[..]),
|
||||||
|
(&b"abcde"[..], &b"abcdx"[..]),
|
||||||
|
(&b"abcdef"[..], &b"abcdex"[..]),
|
||||||
|
(&b"abcdefg"[..], &b"abcdefx"[..]),
|
||||||
|
(&b"abcdefgh"[..], &b"abcdefgx"[..]),
|
||||||
|
(&b"abcdefghi"[..], &b"abcdefghx"[..]),
|
||||||
|
(&b"abcdefghij"[..], &b"abcdefghix"[..]),
|
||||||
|
(&b"abcdefghijk"[..], &b"abcdefghijx"[..]),
|
||||||
|
(&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]),
|
||||||
|
(&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]),
|
||||||
|
(&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]),
|
||||||
|
];
|
||||||
|
for (x, y) in one_mismatch {
|
||||||
|
assert_eq!(x.len(), y.len(), "lengths should match");
|
||||||
|
assert!(!is_equal(x, y));
|
||||||
|
assert!(!is_equal(y, x));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn equals_yes() {
|
||||||
|
assert!(is_equal(b"", b""));
|
||||||
|
assert!(is_equal(b"a", b"a"));
|
||||||
|
assert!(is_equal(b"ab", b"ab"));
|
||||||
|
assert!(is_equal(b"abc", b"abc"));
|
||||||
|
assert!(is_equal(b"abcd", b"abcd"));
|
||||||
|
assert!(is_equal(b"abcde", b"abcde"));
|
||||||
|
assert!(is_equal(b"abcdef", b"abcdef"));
|
||||||
|
assert!(is_equal(b"abcdefg", b"abcdefg"));
|
||||||
|
assert!(is_equal(b"abcdefgh", b"abcdefgh"));
|
||||||
|
assert!(is_equal(b"abcdefghi", b"abcdefghi"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prefix() {
|
||||||
|
assert!(is_prefix(b"", b""));
|
||||||
|
assert!(is_prefix(b"a", b""));
|
||||||
|
assert!(is_prefix(b"ab", b""));
|
||||||
|
assert!(is_prefix(b"foo", b"foo"));
|
||||||
|
assert!(is_prefix(b"foobar", b"foo"));
|
||||||
|
|
||||||
|
assert!(!is_prefix(b"foo", b"fob"));
|
||||||
|
assert!(!is_prefix(b"foobar", b"fob"));
|
||||||
|
}
|
||||||
|
}
|
||||||
168
.gear/predownloaded-development/vendor/aho-corasick/src/packed/rabinkarp.rs
vendored
Normal file
168
.gear/predownloaded-development/vendor/aho-corasick/src/packed/rabinkarp.rs
vendored
Normal file
|
|
@ -0,0 +1,168 @@
|
||||||
|
use alloc::{sync::Arc, vec, vec::Vec};
|
||||||
|
|
||||||
|
use crate::{packed::pattern::Patterns, util::search::Match, PatternID};
|
||||||
|
|
||||||
|
/// The type of the rolling hash used in the Rabin-Karp algorithm.
|
||||||
|
type Hash = usize;
|
||||||
|
|
||||||
|
/// The number of buckets to store our patterns in. We don't want this to be
|
||||||
|
/// too big in order to avoid wasting memory, but we don't want it to be too
|
||||||
|
/// small either to avoid spending too much time confirming literals.
|
||||||
|
///
|
||||||
|
/// The number of buckets MUST be a power of two. Otherwise, determining the
|
||||||
|
/// bucket from a hash will slow down the code considerably. Using a power
|
||||||
|
/// of two means `hash % NUM_BUCKETS` can compile down to a simple `and`
|
||||||
|
/// instruction.
|
||||||
|
const NUM_BUCKETS: usize = 64;
|
||||||
|
|
||||||
|
/// An implementation of the Rabin-Karp algorithm. The main idea of this
|
||||||
|
/// algorithm is to maintain a rolling hash as it moves through the input, and
|
||||||
|
/// then check whether that hash corresponds to the same hash for any of the
|
||||||
|
/// patterns we're looking for.
|
||||||
|
///
|
||||||
|
/// A draw back of naively scaling Rabin-Karp to multiple patterns is that
|
||||||
|
/// it requires all of the patterns to be the same length, which in turn
|
||||||
|
/// corresponds to the number of bytes to hash. We adapt this to work for
|
||||||
|
/// multiple patterns of varying size by fixing the number of bytes to hash
|
||||||
|
/// to be the length of the smallest pattern. We also split the patterns into
|
||||||
|
/// several buckets to hopefully make the confirmation step faster.
|
||||||
|
///
|
||||||
|
/// Wikipedia has a decent explanation, if a bit heavy on the theory:
|
||||||
|
/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
|
||||||
|
///
|
||||||
|
/// But ESMAJ provides something a bit more concrete:
|
||||||
|
/// https://www-igm.univ-mlv.fr/~lecroq/string/node5.html
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct RabinKarp {
|
||||||
|
/// The patterns we're searching for.
|
||||||
|
patterns: Arc<Patterns>,
|
||||||
|
/// The order of patterns in each bucket is significant. Namely, they are
|
||||||
|
/// arranged such that the first one to match is the correct match. This
|
||||||
|
/// may not necessarily correspond to the order provided by the caller.
|
||||||
|
/// For example, if leftmost-longest semantics are used, then the patterns
|
||||||
|
/// are sorted by their length in descending order. If leftmost-first
|
||||||
|
/// semantics are used, then the patterns are sorted by their pattern ID
|
||||||
|
/// in ascending order (which corresponds to the caller's order).
|
||||||
|
buckets: Vec<Vec<(Hash, PatternID)>>,
|
||||||
|
/// The length of the hashing window. Generally, this corresponds to the
|
||||||
|
/// length of the smallest pattern.
|
||||||
|
hash_len: usize,
|
||||||
|
/// The factor to subtract out of a hash before updating it with a new
|
||||||
|
/// byte.
|
||||||
|
hash_2pow: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RabinKarp {
|
||||||
|
/// Compile a new Rabin-Karp matcher from the patterns given.
|
||||||
|
///
|
||||||
|
/// This panics if any of the patterns in the collection are empty, or if
|
||||||
|
/// the collection is itself empty.
|
||||||
|
pub(crate) fn new(patterns: &Arc<Patterns>) -> RabinKarp {
|
||||||
|
assert!(patterns.len() >= 1);
|
||||||
|
let hash_len = patterns.minimum_len();
|
||||||
|
assert!(hash_len >= 1);
|
||||||
|
|
||||||
|
let mut hash_2pow = 1usize;
|
||||||
|
for _ in 1..hash_len {
|
||||||
|
hash_2pow = hash_2pow.wrapping_shl(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut rk = RabinKarp {
|
||||||
|
patterns: Arc::clone(patterns),
|
||||||
|
buckets: vec![vec![]; NUM_BUCKETS],
|
||||||
|
hash_len,
|
||||||
|
hash_2pow,
|
||||||
|
};
|
||||||
|
for (id, pat) in patterns.iter() {
|
||||||
|
let hash = rk.hash(&pat.bytes()[..rk.hash_len]);
|
||||||
|
let bucket = hash % NUM_BUCKETS;
|
||||||
|
rk.buckets[bucket].push((hash, id));
|
||||||
|
}
|
||||||
|
rk
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the first matching pattern in the given haystack, begining the
|
||||||
|
/// search at `at`.
|
||||||
|
pub(crate) fn find_at(
|
||||||
|
&self,
|
||||||
|
haystack: &[u8],
|
||||||
|
mut at: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
assert_eq!(NUM_BUCKETS, self.buckets.len());
|
||||||
|
|
||||||
|
if at + self.hash_len > haystack.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let mut hash = self.hash(&haystack[at..at + self.hash_len]);
|
||||||
|
loop {
|
||||||
|
let bucket = &self.buckets[hash % NUM_BUCKETS];
|
||||||
|
for &(phash, pid) in bucket {
|
||||||
|
if phash == hash {
|
||||||
|
if let Some(c) = self.verify(pid, haystack, at) {
|
||||||
|
return Some(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if at + self.hash_len >= haystack.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
hash = self.update_hash(
|
||||||
|
hash,
|
||||||
|
haystack[at],
|
||||||
|
haystack[at + self.hash_len],
|
||||||
|
);
|
||||||
|
at += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the approximate total amount of heap used by this searcher, in
|
||||||
|
/// units of bytes.
|
||||||
|
pub(crate) fn memory_usage(&self) -> usize {
|
||||||
|
self.buckets.len() * core::mem::size_of::<Vec<(Hash, PatternID)>>()
|
||||||
|
+ self.patterns.len() * core::mem::size_of::<(Hash, PatternID)>()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify whether the pattern with the given id matches at
|
||||||
|
/// `haystack[at..]`.
|
||||||
|
///
|
||||||
|
/// We tag this function as `cold` because it helps improve codegen.
|
||||||
|
/// Intuitively, it would seem like inlining it would be better. However,
|
||||||
|
/// the only time this is called and a match is not found is when there
|
||||||
|
/// there is a hash collision, or when a prefix of a pattern matches but
|
||||||
|
/// the entire pattern doesn't match. This is hopefully fairly rare, and
|
||||||
|
/// if it does occur a lot, it's going to be slow no matter what we do.
|
||||||
|
#[cold]
|
||||||
|
fn verify(
|
||||||
|
&self,
|
||||||
|
id: PatternID,
|
||||||
|
haystack: &[u8],
|
||||||
|
at: usize,
|
||||||
|
) -> Option<Match> {
|
||||||
|
let pat = self.patterns.get(id);
|
||||||
|
if pat.is_prefix(&haystack[at..]) {
|
||||||
|
Some(Match::new(id, at..at + pat.len()))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hash the given bytes.
|
||||||
|
fn hash(&self, bytes: &[u8]) -> Hash {
|
||||||
|
assert_eq!(self.hash_len, bytes.len());
|
||||||
|
|
||||||
|
let mut hash = 0usize;
|
||||||
|
for &b in bytes {
|
||||||
|
hash = hash.wrapping_shl(1).wrapping_add(b as usize);
|
||||||
|
}
|
||||||
|
hash
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update the hash given based on removing `old_byte` at the beginning
|
||||||
|
/// of some byte string, and appending `new_byte` to the end of that same
|
||||||
|
/// byte string.
|
||||||
|
fn update_hash(&self, prev: Hash, old_byte: u8, new_byte: u8) -> Hash {
|
||||||
|
prev.wrapping_sub((old_byte as usize).wrapping_mul(self.hash_2pow))
|
||||||
|
.wrapping_shl(1)
|
||||||
|
.wrapping_add(new_byte as usize)
|
||||||
|
}
|
||||||
|
}
|
||||||
386
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/README.md
vendored
Normal file
386
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/README.md
vendored
Normal file
|
|
@ -0,0 +1,386 @@
|
||||||
|
Teddy is a SIMD accelerated multiple substring matching algorithm. The name
|
||||||
|
and the core ideas in the algorithm were learned from the [Hyperscan][1_u]
|
||||||
|
project. The implementation in this repository was mostly motivated for use in
|
||||||
|
accelerating regex searches by searching for small sets of required literals
|
||||||
|
extracted from the regex.
|
||||||
|
|
||||||
|
|
||||||
|
# Background
|
||||||
|
|
||||||
|
The key idea of Teddy is to do *packed* substring matching. In the literature,
|
||||||
|
packed substring matching is the idea of examining multiple bytes in a haystack
|
||||||
|
at a time to detect matches. Implementations of, for example, memchr (which
|
||||||
|
detects matches of a single byte) have been doing this for years. Only
|
||||||
|
recently, with the introduction of various SIMD instructions, has this been
|
||||||
|
extended to substring matching. The PCMPESTRI instruction (and its relatives),
|
||||||
|
for example, implements substring matching in hardware. It is, however, limited
|
||||||
|
to substrings of length 16 bytes or fewer, but this restriction is fine in a
|
||||||
|
regex engine, since we rarely care about the performance difference between
|
||||||
|
searching for a 16 byte literal and a 16 + N literal; 16 is already long
|
||||||
|
enough. The key downside of the PCMPESTRI instruction, on current (2016) CPUs
|
||||||
|
at least, is its latency and throughput. As a result, it is often faster to
|
||||||
|
do substring search with a Boyer-Moore (or Two-Way) variant and a well placed
|
||||||
|
memchr to quickly skip through the haystack.
|
||||||
|
|
||||||
|
There are fewer results from the literature on packed substring matching,
|
||||||
|
and even fewer for packed multiple substring matching. Ben-Kiki et al. [2]
|
||||||
|
describes use of PCMPESTRI for substring matching, but is mostly theoretical
|
||||||
|
and hand-waves performance. There is other theoretical work done by Bille [3]
|
||||||
|
as well.
|
||||||
|
|
||||||
|
The rest of the work in the field, as far as I'm aware, is by Faro and Kulekci
|
||||||
|
and is generally focused on multiple pattern search. Their first paper [4a]
|
||||||
|
introduces the concept of a fingerprint, which is computed for every block of
|
||||||
|
N bytes in every pattern. The haystack is then scanned N bytes at a time and
|
||||||
|
a fingerprint is computed in the same way it was computed for blocks in the
|
||||||
|
patterns. If the fingerprint corresponds to one that was found in a pattern,
|
||||||
|
then a verification step follows to confirm that one of the substrings with the
|
||||||
|
corresponding fingerprint actually matches at the current location. Various
|
||||||
|
implementation tricks are employed to make sure the fingerprint lookup is fast;
|
||||||
|
typically by truncating the fingerprint. (This may, of course, provoke more
|
||||||
|
steps in the verification process, so a balance must be struck.)
|
||||||
|
|
||||||
|
The main downside of [4a] is that the minimum substring length is 32 bytes,
|
||||||
|
presumably because of how the algorithm uses certain SIMD instructions. This
|
||||||
|
essentially makes it useless for general purpose regex matching, where a small
|
||||||
|
number of short patterns is far more likely.
|
||||||
|
|
||||||
|
Faro and Kulekci published another paper [4b] that is conceptually very similar
|
||||||
|
to [4a]. The key difference is that it uses the CRC32 instruction (introduced
|
||||||
|
as part of SSE 4.2) to compute fingerprint values. This also enables the
|
||||||
|
algorithm to work effectively on substrings as short as 7 bytes with 4 byte
|
||||||
|
windows. 7 bytes is unfortunately still too long. The window could be
|
||||||
|
technically shrunk to 2 bytes, thereby reducing minimum length to 3, but the
|
||||||
|
small window size ends up negating most performance benefits—and it's likely
|
||||||
|
the common case in a general purpose regex engine.
|
||||||
|
|
||||||
|
Faro and Kulekci also published [4c] that appears to be intended as a
|
||||||
|
replacement to using PCMPESTRI. In particular, it is specifically motivated by
|
||||||
|
the high throughput/latency time of PCMPESTRI and therefore chooses other SIMD
|
||||||
|
instructions that are faster. While this approach works for short substrings,
|
||||||
|
I personally couldn't see a way to generalize it to multiple substring search.
|
||||||
|
|
||||||
|
Faro and Kulekci have another paper [4d] that I haven't been able to read
|
||||||
|
because it is behind a paywall.
|
||||||
|
|
||||||
|
|
||||||
|
# Teddy
|
||||||
|
|
||||||
|
Finally, we get to Teddy. If the above literature review is complete, then it
|
||||||
|
appears that Teddy is a novel algorithm. More than that, in my experience, it
|
||||||
|
completely blows away the competition for short substrings, which is exactly
|
||||||
|
what we want in a general purpose regex engine. Again, the algorithm appears
|
||||||
|
to be developed by the authors of [Hyperscan][1_u]. Hyperscan was open sourced
|
||||||
|
late 2015, and no earlier history could be found. Therefore, tracking the exact
|
||||||
|
provenance of the algorithm with respect to the published literature seems
|
||||||
|
difficult.
|
||||||
|
|
||||||
|
At a high level, Teddy works somewhat similarly to the fingerprint algorithms
|
||||||
|
published by Faro and Kulekci, but Teddy does it in a way that scales a bit
|
||||||
|
better. Namely:
|
||||||
|
|
||||||
|
1. Teddy's core algorithm scans the haystack in 16 (for SSE, or 32 for AVX)
|
||||||
|
byte chunks. 16 (or 32) is significant because it corresponds to the number
|
||||||
|
of bytes in a SIMD vector.
|
||||||
|
2. Bitwise operations are performed on each chunk to discover if any region of
|
||||||
|
it matches a set of precomputed fingerprints from the patterns. If there are
|
||||||
|
matches, then a verification step is performed. In this implementation, our
|
||||||
|
verification step is naive. This can be improved upon.
|
||||||
|
|
||||||
|
The details to make this work are quite clever. First, we must choose how to
|
||||||
|
pick our fingerprints. In Hyperscan's implementation, I *believe* they use the
|
||||||
|
last N bytes of each substring, where N must be at least the minimum length of
|
||||||
|
any substring in the set being searched. In this implementation, we use the
|
||||||
|
first N bytes of each substring. (The tradeoffs between these choices aren't
|
||||||
|
yet clear to me.) We then must figure out how to quickly test whether an
|
||||||
|
occurrence of any fingerprint from the set of patterns appears in a 16 byte
|
||||||
|
block from the haystack. To keep things simple, let's assume N = 1 and examine
|
||||||
|
some examples to motivate the approach. Here are our patterns:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
foo
|
||||||
|
bar
|
||||||
|
baz
|
||||||
|
```
|
||||||
|
|
||||||
|
The corresponding fingerprints, for N = 1, are `f`, `b` and `b`. Now let's set
|
||||||
|
our 16 byte block to:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
bat cat foo bump
|
||||||
|
xxxxxxxxxxxxxxxx
|
||||||
|
```
|
||||||
|
|
||||||
|
To cut to the chase, Teddy works by using bitsets. In particular, Teddy creates
|
||||||
|
a mask that allows us to quickly compute membership of a fingerprint in a 16
|
||||||
|
byte block that also tells which pattern the fingerprint corresponds to. In
|
||||||
|
this case, our fingerprint is a single byte, so an appropriate abstraction is
|
||||||
|
a map from a single byte to a list of patterns that contain that fingerprint:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
f |--> foo
|
||||||
|
b |--> bar, baz
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, all we need to do is figure out how to represent this map in vector space
|
||||||
|
and use normal SIMD operations to perform a lookup. The first simplification
|
||||||
|
we can make is to represent our patterns as bit fields occupying a single
|
||||||
|
byte. This is important, because a single SIMD vector can store 16 bytes.
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
f |--> 00000001
|
||||||
|
b |--> 00000010, 00000100
|
||||||
|
```
|
||||||
|
|
||||||
|
How do we perform lookup though? It turns out that SSSE3 introduced a very cool
|
||||||
|
instruction called PSHUFB. The instruction takes two SIMD vectors, `A` and `B`,
|
||||||
|
and returns a third vector `C`. All vectors are treated as 16 8-bit integers.
|
||||||
|
`C` is formed by `C[i] = A[B[i]]`. (This is a bit of a simplification, but true
|
||||||
|
for the purposes of this algorithm. For full details, see [Intel's Intrinsics
|
||||||
|
Guide][5_u].) This essentially lets us use the values in `B` to lookup values
|
||||||
|
in `A`.
|
||||||
|
|
||||||
|
If we could somehow cause `B` to contain our 16 byte block from the haystack,
|
||||||
|
and if `A` could contain our bitmasks, then we'd end up with something like
|
||||||
|
this for `A`:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
0x00 0x01 ... 0x62 ... 0x66 ... 0xFF
|
||||||
|
A = 0 0 00000110 00000001 0
|
||||||
|
```
|
||||||
|
|
||||||
|
And if `B` contains our window from our haystack, we could use shuffle to take
|
||||||
|
the values from `B` and use them to look up our bitsets in `A`. But of course,
|
||||||
|
we can't do this because `A` in the above example contains 256 bytes, which
|
||||||
|
is much larger than the size of a SIMD vector.
|
||||||
|
|
||||||
|
Nybbles to the rescue! A nybble is 4 bits. Instead of one mask to hold all of
|
||||||
|
our bitsets, we can use two masks, where one mask corresponds to the lower four
|
||||||
|
bits of our fingerprint and the other mask corresponds to the upper four bits.
|
||||||
|
So our map now looks like:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
'f' & 0xF = 0x6 |--> 00000001
|
||||||
|
'f' >> 4 = 0x6 |--> 00000111
|
||||||
|
'b' & 0xF = 0x2 |--> 00000110
|
||||||
|
'b' >> 4 = 0x6 |--> 00000111
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice that the bitsets for each nybble correspond to the union of all
|
||||||
|
fingerprints that contain that nybble. For example, both `f` and `b` have the
|
||||||
|
same upper 4 bits but differ on the lower 4 bits. Putting this together, we
|
||||||
|
have `A0`, `A1` and `B`, where `A0` is our mask for the lower nybble, `A1` is
|
||||||
|
our mask for the upper nybble and `B` is our 16 byte block from the haystack:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
0x00 0x01 0x02 0x03 ... 0x06 ... 0xF
|
||||||
|
A0 = 0 0 00000110 0 00000001 0
|
||||||
|
A1 = 0 0 0 0 00000111 0
|
||||||
|
B = b a t _ t p
|
||||||
|
B = 0x62 0x61 0x74 0x20 0x74 0x70
|
||||||
|
```
|
||||||
|
|
||||||
|
But of course, we can't use `B` with `PSHUFB` yet, since its values are 8 bits,
|
||||||
|
and we need indexes that are at most 4 bits (corresponding to one of 16
|
||||||
|
values). We can apply the same transformation to split `B` into lower and upper
|
||||||
|
nybbles as we did `A`. As before, `B0` corresponds to the lower nybbles and
|
||||||
|
`B1` corresponds to the upper nybbles:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
b a t _ c a t _ f o o _ b u m p
|
||||||
|
B0 = 0x2 0x1 0x4 0x0 0x3 0x1 0x4 0x0 0x6 0xF 0xF 0x0 0x2 0x5 0xD 0x0
|
||||||
|
B1 = 0x6 0x6 0x7 0x2 0x6 0x6 0x7 0x2 0x6 0x6 0x6 0x2 0x6 0x7 0x6 0x7
|
||||||
|
```
|
||||||
|
|
||||||
|
And now we have a nice correspondence. `B0` can index `A0` and `B1` can index
|
||||||
|
`A1`. Here's what we get when we apply `C0 = PSHUFB(A0, B0)`:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
b a ... f o ... p
|
||||||
|
A0[0x2] A0[0x1] A0[0x6] A0[0xF] A0[0x0]
|
||||||
|
C0 = 00000110 0 00000001 0 0
|
||||||
|
```
|
||||||
|
|
||||||
|
And `C1 = PSHUFB(A1, B1)`:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
b a ... f o ... p
|
||||||
|
A1[0x6] A1[0x6] A1[0x6] A1[0x6] A1[0x7]
|
||||||
|
C1 = 00000111 00000111 00000111 00000111 0
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice how neither one of `C0` or `C1` is guaranteed to report fully correct
|
||||||
|
results all on its own. For example, `C1` claims that `b` is a fingerprint for
|
||||||
|
the pattern `foo` (since `A1[0x6] = 00000111`), and that `o` is a fingerprint
|
||||||
|
for all of our patterns. But if we combined `C0` and `C1` with an `AND`
|
||||||
|
operation:
|
||||||
|
|
||||||
|
```ignore
|
||||||
|
b a ... f o ... p
|
||||||
|
C = 00000110 0 00000001 0 0
|
||||||
|
```
|
||||||
|
|
||||||
|
Then we now have that `C[i]` contains a bitset corresponding to the matching
|
||||||
|
fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that
|
||||||
|
block.
|
||||||
|
|
||||||
|
Once we have that, we can look for the position of the least significant bit
|
||||||
|
in `C`. (Least significant because we only target little endian here. Thus,
|
||||||
|
the least significant bytes correspond to bytes in our haystack at a lower
|
||||||
|
address.) That position, modulo `8`, gives us the pattern that the fingerprint
|
||||||
|
matches. That position, integer divided by `8`, also gives us the byte offset
|
||||||
|
that the fingerprint occurs in inside the 16 byte haystack block. Using those
|
||||||
|
two pieces of information, we can run a verification procedure that tries
|
||||||
|
to match all substrings containing that fingerprint at that position in the
|
||||||
|
haystack.
|
||||||
|
|
||||||
|
|
||||||
|
# Implementation notes
|
||||||
|
|
||||||
|
The problem with the algorithm as described above is that it uses a single byte
|
||||||
|
for a fingerprint. This will work well if the fingerprints are rare in the
|
||||||
|
haystack (e.g., capital letters or special characters in normal English text),
|
||||||
|
but if the fingerprints are common, you'll wind up spending too much time in
|
||||||
|
the verification step, which effectively negates the performance benefits of
|
||||||
|
scanning 16 bytes at a time. Remember, the key to the performance of this
|
||||||
|
algorithm is to do as little work as possible per 16 (or 32) bytes.
|
||||||
|
|
||||||
|
This algorithm can be extrapolated in a relatively straight-forward way to use
|
||||||
|
larger fingerprints. That is, instead of a single byte prefix, we might use a
|
||||||
|
two or three byte prefix. The implementation here implements N = {1, 2, 3}
|
||||||
|
and always picks the largest N possible. The rationale is that the bigger the
|
||||||
|
fingerprint, the fewer verification steps we'll do. Of course, if N is too
|
||||||
|
large, then we'll end up doing too much on each step.
|
||||||
|
|
||||||
|
The way to extend it is:
|
||||||
|
|
||||||
|
1. Add a mask for each byte in the fingerprint. (Remember that each mask is
|
||||||
|
composed of two SIMD vectors.) This results in a value of `C` for each byte
|
||||||
|
in the fingerprint while searching.
|
||||||
|
2. When testing each 16 (or 32) byte block, each value of `C` must be shifted
|
||||||
|
so that they are aligned. Once aligned, they should all be `AND`'d together.
|
||||||
|
This will give you only the bitsets corresponding to the full match of the
|
||||||
|
fingerprint. To do this, one needs to save the last byte (for N=2) or last
|
||||||
|
two bytes (for N=3) from the previous iteration, and then line them up with
|
||||||
|
the first one or two bytes of the next iteration.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
Verification generally follows the procedure outlined above. The tricky parts
|
||||||
|
are in the right formulation of operations to get our bits out of our vectors.
|
||||||
|
We have a limited set of operations available to us on SIMD vectors as 128-bit
|
||||||
|
or 256-bit numbers, so we wind up needing to rip out 2 (or 4) 64-bit integers
|
||||||
|
from our vectors, and then run our verification step on each of those. The
|
||||||
|
verification step looks at the least significant bit set, and from its
|
||||||
|
position, we can derive the byte offset and bucket. (Again, as described
|
||||||
|
above.) Once we know the bucket, we do a fairly naive exhaustive search for
|
||||||
|
every literal in that bucket. (Hyperscan is a bit smarter here and uses a hash
|
||||||
|
table, but I haven't had time to thoroughly explore that. A few initial
|
||||||
|
half-hearted attempts resulted in worse performance.)
|
||||||
|
|
||||||
|
## AVX
|
||||||
|
|
||||||
|
The AVX version of Teddy extrapolates almost perfectly from the SSE version.
|
||||||
|
The only hickup is that PALIGNR is used to align chunks in the 16-bit version,
|
||||||
|
and there is no equivalent instruction in AVX. AVX does have VPALIGNR, but it
|
||||||
|
only works within 128-bit lanes. So there's a bit of tomfoolery to get around
|
||||||
|
this by shuffling the vectors before calling VPALIGNR.
|
||||||
|
|
||||||
|
The only other aspect to AVX is that since our masks are still fundamentally
|
||||||
|
16-bytes (0x0-0xF), they are duplicated to 32-bytes, so that they can apply to
|
||||||
|
32-byte chunks.
|
||||||
|
|
||||||
|
## Fat Teddy
|
||||||
|
|
||||||
|
In the version of Teddy described above, 8 buckets are used to group patterns
|
||||||
|
that we want to search for. However, when AVX is available, we can extend the
|
||||||
|
number of buckets to 16 by permitting each byte in our masks to use 16-bits
|
||||||
|
instead of 8-bits to represent the buckets it belongs to. (This variant is also
|
||||||
|
in Hyperscan.) However, what we give up is the ability to scan 32 bytes at a
|
||||||
|
time, even though we're using AVX. Instead, we have to scan 16 bytes at a time.
|
||||||
|
What we gain, though, is (hopefully) less work in our verification routine.
|
||||||
|
It patterns are more spread out across more buckets, then there should overall
|
||||||
|
be fewer false positives. In general, Fat Teddy permits us to grow our capacity
|
||||||
|
a bit and search for more literals before Teddy gets overwhelmed.
|
||||||
|
|
||||||
|
The tricky part of Fat Teddy is in how we adjust our masks and our verification
|
||||||
|
procedure. For the masks, we simply represent the first 8 buckets in each of
|
||||||
|
the low 16 bytes, and then the second 8 buckets in each of the high 16 bytes.
|
||||||
|
Then, in the search loop, instead of loading 32 bytes from the haystack, we
|
||||||
|
load the same 16 bytes from the haystack into both the low and high 16 byte
|
||||||
|
portions of our 256-bit vector. So for example, a mask might look like this:
|
||||||
|
|
||||||
|
bits: 00100001 00000000 ... 11000000 00000000 00000001 ... 00000000
|
||||||
|
byte: 31 30 16 15 14 0
|
||||||
|
offset: 15 14 0 15 14 0
|
||||||
|
buckets: 8-15 8-15 8-15 0-7 0-7 0-7
|
||||||
|
|
||||||
|
Where `byte` is the position in the vector (higher numbers corresponding to
|
||||||
|
more significant bits), `offset` is the corresponding position in the haystack
|
||||||
|
chunk, and `buckets` corresponds to the bucket assignments for that particular
|
||||||
|
byte.
|
||||||
|
|
||||||
|
In particular, notice that the bucket assignments for offset `0` are spread
|
||||||
|
out between bytes `0` and `16`. This works well for the chunk-by-chunk search
|
||||||
|
procedure, but verification really wants to process all bucket assignments for
|
||||||
|
each offset at once. Otherwise, we might wind up finding a match at offset
|
||||||
|
`1` in one the first 8 buckets, when we really should have reported a match
|
||||||
|
at offset `0` in one of the second 8 buckets. (Because we want the leftmost
|
||||||
|
match.)
|
||||||
|
|
||||||
|
Thus, for verification, we rearrange the above vector such that it is a
|
||||||
|
sequence of 16-bit integers, where the least significant 16-bit integer
|
||||||
|
corresponds to all of the bucket assignments for offset `0`. So with the
|
||||||
|
above vector, the least significant 16-bit integer would be
|
||||||
|
|
||||||
|
11000000 000000
|
||||||
|
|
||||||
|
which was taken from bytes `16` and `0`. Then the verification step pretty much
|
||||||
|
runs as described, except with 16 buckets instead of 8.
|
||||||
|
|
||||||
|
|
||||||
|
# References
|
||||||
|
|
||||||
|
- **[1]** [Hyperscan on GitHub](https://github.com/intel/hyperscan),
|
||||||
|
[webpage](https://www.hyperscan.io/)
|
||||||
|
- **[2a]** Ben-Kiki, O., Bille, P., Breslauer, D., Gasieniec, L., Grossi, R.,
|
||||||
|
& Weimann, O. (2011).
|
||||||
|
_Optimal packed string matching_.
|
||||||
|
In LIPIcs-Leibniz International Proceedings in Informatics (Vol. 13).
|
||||||
|
Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik.
|
||||||
|
DOI: 10.4230/LIPIcs.FSTTCS.2011.423.
|
||||||
|
[PDF](https://drops.dagstuhl.de/opus/volltexte/2011/3355/pdf/37.pdf).
|
||||||
|
- **[2b]** Ben-Kiki, O., Bille, P., Breslauer, D., Ga̧sieniec, L., Grossi, R.,
|
||||||
|
& Weimann, O. (2014).
|
||||||
|
_Towards optimal packed string matching_.
|
||||||
|
Theoretical Computer Science, 525, 111-129.
|
||||||
|
DOI: 10.1016/j.tcs.2013.06.013.
|
||||||
|
[PDF](https://www.cs.haifa.ac.il/~oren/Publications/bpsm.pdf).
|
||||||
|
- **[3]** Bille, P. (2011).
|
||||||
|
_Fast searching in packed strings_.
|
||||||
|
Journal of Discrete Algorithms, 9(1), 49-56.
|
||||||
|
DOI: 10.1016/j.jda.2010.09.003.
|
||||||
|
[PDF](https://www.sciencedirect.com/science/article/pii/S1570866710000353).
|
||||||
|
- **[4a]** Faro, S., & Külekci, M. O. (2012, October).
|
||||||
|
_Fast multiple string matching using streaming SIMD extensions technology_.
|
||||||
|
In String Processing and Information Retrieval (pp. 217-228).
|
||||||
|
Springer Berlin Heidelberg.
|
||||||
|
DOI: 10.1007/978-3-642-34109-0_23.
|
||||||
|
[PDF](https://www.dmi.unict.it/faro/papers/conference/faro32.pdf).
|
||||||
|
- **[4b]** Faro, S., & Külekci, M. O. (2013, September).
|
||||||
|
_Towards a Very Fast Multiple String Matching Algorithm for Short Patterns_.
|
||||||
|
In Stringology (pp. 78-91).
|
||||||
|
[PDF](https://www.dmi.unict.it/faro/papers/conference/faro36.pdf).
|
||||||
|
- **[4c]** Faro, S., & Külekci, M. O. (2013, January).
|
||||||
|
_Fast packed string matching for short patterns_.
|
||||||
|
In Proceedings of the Meeting on Algorithm Engineering & Expermiments
|
||||||
|
(pp. 113-121).
|
||||||
|
Society for Industrial and Applied Mathematics.
|
||||||
|
[PDF](https://arxiv.org/pdf/1209.6449.pdf).
|
||||||
|
- **[4d]** Faro, S., & Külekci, M. O. (2014).
|
||||||
|
_Fast and flexible packed string matching_.
|
||||||
|
Journal of Discrete Algorithms, 28, 61-72.
|
||||||
|
DOI: 10.1016/j.jda.2014.07.003.
|
||||||
|
|
||||||
|
[1_u]: https://github.com/intel/hyperscan
|
||||||
|
[5_u]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
|
||||||
792
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/builder.rs
vendored
Normal file
792
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/builder.rs
vendored
Normal file
|
|
@ -0,0 +1,792 @@
|
||||||
|
use core::{
|
||||||
|
fmt::Debug,
|
||||||
|
panic::{RefUnwindSafe, UnwindSafe},
|
||||||
|
};
|
||||||
|
|
||||||
|
use alloc::sync::Arc;
|
||||||
|
|
||||||
|
use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match};
|
||||||
|
|
||||||
|
/// A builder for constructing a Teddy matcher.
|
||||||
|
///
|
||||||
|
/// The builder primarily permits fine grained configuration of the Teddy
|
||||||
|
/// matcher. Most options are made only available for testing/benchmarking
|
||||||
|
/// purposes. In reality, options are automatically determined by the nature
|
||||||
|
/// and number of patterns given to the builder.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct Builder {
|
||||||
|
/// When none, this is automatically determined. Otherwise, `false` means
|
||||||
|
/// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
|
||||||
|
/// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
|
||||||
|
/// available and Fat Teddy was requested, no matcher will be built.
|
||||||
|
only_fat: Option<bool>,
|
||||||
|
/// When none, this is automatically determined. Otherwise, `false` means
|
||||||
|
/// that 128-bit vectors will be used (up to SSSE3 instructions) where as
|
||||||
|
/// `true` means that 256-bit vectors will be used. As with `fat`, if
|
||||||
|
/// 256-bit vectors are requested and they aren't available, then a
|
||||||
|
/// searcher will not be built.
|
||||||
|
only_256bit: Option<bool>,
|
||||||
|
/// When true (the default), the number of patterns will be used as a
|
||||||
|
/// heuristic for refusing construction of a Teddy searcher. The point here
|
||||||
|
/// is that too many patterns can overwhelm Teddy. But this can be disabled
|
||||||
|
/// in cases where the caller knows better.
|
||||||
|
heuristic_pattern_limits: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Builder {
|
||||||
|
fn default() -> Builder {
|
||||||
|
Builder::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
|
/// Create a new builder for configuring a Teddy matcher.
|
||||||
|
pub(crate) fn new() -> Builder {
|
||||||
|
Builder {
|
||||||
|
only_fat: None,
|
||||||
|
only_256bit: None,
|
||||||
|
heuristic_pattern_limits: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a matcher for the set of patterns given. If a matcher could not
|
||||||
|
/// be built, then `None` is returned.
|
||||||
|
///
|
||||||
|
/// Generally, a matcher isn't built if the necessary CPU features aren't
|
||||||
|
/// available, an unsupported target or if the searcher is believed to be
|
||||||
|
/// slower than standard techniques (i.e., if there are too many literals).
|
||||||
|
pub(crate) fn build(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
|
||||||
|
self.build_imp(patterns)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
|
||||||
|
/// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
|
||||||
|
/// for a larger set of literals.
|
||||||
|
///
|
||||||
|
/// `None` is the default, which results in an automatic selection based
|
||||||
|
/// on the number of literals and available CPU features.
|
||||||
|
pub(crate) fn only_fat(&mut self, yes: Option<bool>) -> &mut Builder {
|
||||||
|
self.only_fat = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
|
||||||
|
/// Generally, a larger vector size is better since it either permits
|
||||||
|
/// matching more patterns or matching more bytes in the haystack at once.
|
||||||
|
///
|
||||||
|
/// `None` is the default, which results in an automatic selection based on
|
||||||
|
/// the number of literals and available CPU features.
|
||||||
|
pub(crate) fn only_256bit(&mut self, yes: Option<bool>) -> &mut Builder {
|
||||||
|
self.only_256bit = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request that heuristic limitations on the number of patterns be
|
||||||
|
/// employed. This useful to disable for benchmarking where one wants to
|
||||||
|
/// explore how Teddy performs on large number of patterns even if the
|
||||||
|
/// heuristics would otherwise refuse construction.
|
||||||
|
///
|
||||||
|
/// This is enabled by default.
|
||||||
|
pub(crate) fn heuristic_pattern_limits(
|
||||||
|
&mut self,
|
||||||
|
yes: bool,
|
||||||
|
) -> &mut Builder {
|
||||||
|
self.heuristic_pattern_limits = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_imp(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
|
||||||
|
let patlimit = self.heuristic_pattern_limits;
|
||||||
|
// There's no particular reason why we limit ourselves to little endian
|
||||||
|
// here, but it seems likely that some parts of Teddy as they are
|
||||||
|
// currently written (e.g., the uses of `trailing_zeros`) are likely
|
||||||
|
// wrong on non-little-endian targets. Such things are likely easy to
|
||||||
|
// fix, but at the time of writing (2023/09/18), I actually do not know
|
||||||
|
// how to test this code on a big-endian target. So for now, we're
|
||||||
|
// conservative and just bail out.
|
||||||
|
if !cfg!(target_endian = "little") {
|
||||||
|
debug!("skipping Teddy because target isn't little endian");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// Too many patterns will overwhelm Teddy and likely lead to slow
|
||||||
|
// downs, typically in the verification step.
|
||||||
|
if patlimit && patterns.len() > 64 {
|
||||||
|
debug!("skipping Teddy because of too many patterns");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
|
||||||
|
{
|
||||||
|
use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3};
|
||||||
|
|
||||||
|
let mask_len = core::cmp::min(4, patterns.minimum_len());
|
||||||
|
let beefy = patterns.len() > 32;
|
||||||
|
let has_avx2 = self::x86_64::is_available_avx2();
|
||||||
|
let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3();
|
||||||
|
let use_avx2 = if self.only_256bit == Some(true) {
|
||||||
|
if !has_avx2 {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy because avx2 was demanded but unavailable"
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
true
|
||||||
|
} else if self.only_256bit == Some(false) {
|
||||||
|
if !has_ssse3 {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy because ssse3 was demanded but unavailable"
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
} else if !has_ssse3 && !has_avx2 {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy because ssse3 and avx2 are unavailable"
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
} else {
|
||||||
|
has_avx2
|
||||||
|
};
|
||||||
|
let fat = match self.only_fat {
|
||||||
|
None => use_avx2 && beefy,
|
||||||
|
Some(false) => false,
|
||||||
|
Some(true) if !use_avx2 => {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy because fat was demanded, but fat \
|
||||||
|
Teddy requires avx2 which is unavailable"
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(true) => true,
|
||||||
|
};
|
||||||
|
// Just like for aarch64, it's possible that too many patterns will
|
||||||
|
// overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which
|
||||||
|
// helps things scale a bit more by spreading patterns over more
|
||||||
|
// buckets.
|
||||||
|
//
|
||||||
|
// These thresholds were determined by looking at the measurements
|
||||||
|
// for the rust/aho-corasick/packed/leftmost-first and
|
||||||
|
// rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
|
||||||
|
// benchmarks.
|
||||||
|
if patlimit && mask_len == 1 && patterns.len() > 16 {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy (mask len: 1) because there are \
|
||||||
|
too many patterns",
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
match (mask_len, use_avx2, fat) {
|
||||||
|
(1, false, _) => {
|
||||||
|
debug!("Teddy choice: 128-bit slim, 1 byte");
|
||||||
|
SlimSSSE3::<1>::new(&patterns)
|
||||||
|
}
|
||||||
|
(1, true, false) => {
|
||||||
|
debug!("Teddy choice: 256-bit slim, 1 byte");
|
||||||
|
SlimAVX2::<1>::new(&patterns)
|
||||||
|
}
|
||||||
|
(1, true, true) => {
|
||||||
|
debug!("Teddy choice: 256-bit fat, 1 byte");
|
||||||
|
FatAVX2::<1>::new(&patterns)
|
||||||
|
}
|
||||||
|
(2, false, _) => {
|
||||||
|
debug!("Teddy choice: 128-bit slim, 2 bytes");
|
||||||
|
SlimSSSE3::<2>::new(&patterns)
|
||||||
|
}
|
||||||
|
(2, true, false) => {
|
||||||
|
debug!("Teddy choice: 256-bit slim, 2 bytes");
|
||||||
|
SlimAVX2::<2>::new(&patterns)
|
||||||
|
}
|
||||||
|
(2, true, true) => {
|
||||||
|
debug!("Teddy choice: 256-bit fat, 2 bytes");
|
||||||
|
FatAVX2::<2>::new(&patterns)
|
||||||
|
}
|
||||||
|
(3, false, _) => {
|
||||||
|
debug!("Teddy choice: 128-bit slim, 3 bytes");
|
||||||
|
SlimSSSE3::<3>::new(&patterns)
|
||||||
|
}
|
||||||
|
(3, true, false) => {
|
||||||
|
debug!("Teddy choice: 256-bit slim, 3 bytes");
|
||||||
|
SlimAVX2::<3>::new(&patterns)
|
||||||
|
}
|
||||||
|
(3, true, true) => {
|
||||||
|
debug!("Teddy choice: 256-bit fat, 3 bytes");
|
||||||
|
FatAVX2::<3>::new(&patterns)
|
||||||
|
}
|
||||||
|
(4, false, _) => {
|
||||||
|
debug!("Teddy choice: 128-bit slim, 4 bytes");
|
||||||
|
SlimSSSE3::<4>::new(&patterns)
|
||||||
|
}
|
||||||
|
(4, true, false) => {
|
||||||
|
debug!("Teddy choice: 256-bit slim, 4 bytes");
|
||||||
|
SlimAVX2::<4>::new(&patterns)
|
||||||
|
}
|
||||||
|
(4, true, true) => {
|
||||||
|
debug!("Teddy choice: 256-bit fat, 4 bytes");
|
||||||
|
FatAVX2::<4>::new(&patterns)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
debug!("no supported Teddy configuration found");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[cfg(all(
|
||||||
|
target_arch = "aarch64",
|
||||||
|
target_feature = "neon",
|
||||||
|
target_endian = "little"
|
||||||
|
))]
|
||||||
|
{
|
||||||
|
use self::aarch64::SlimNeon;
|
||||||
|
|
||||||
|
let mask_len = core::cmp::min(4, patterns.minimum_len());
|
||||||
|
if self.only_256bit == Some(true) {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy because 256-bits were demanded \
|
||||||
|
but unavailable"
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if self.only_fat == Some(true) {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy because fat was demanded but unavailable"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Since we don't have Fat teddy in aarch64 (I think we'd want at
|
||||||
|
// least 256-bit vectors for that), we need to be careful not to
|
||||||
|
// allow too many patterns as it might overwhelm Teddy. Generally
|
||||||
|
// speaking, as the mask length goes up, the more patterns we can
|
||||||
|
// handle because the mask length results in fewer candidates
|
||||||
|
// generated.
|
||||||
|
//
|
||||||
|
// These thresholds were determined by looking at the measurements
|
||||||
|
// for the rust/aho-corasick/packed/leftmost-first and
|
||||||
|
// rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
|
||||||
|
// benchmarks.
|
||||||
|
match mask_len {
|
||||||
|
1 => {
|
||||||
|
if patlimit && patterns.len() > 16 {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy (mask len: 1) because there are \
|
||||||
|
too many patterns",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
debug!("Teddy choice: 128-bit slim, 1 byte");
|
||||||
|
SlimNeon::<1>::new(&patterns)
|
||||||
|
}
|
||||||
|
2 => {
|
||||||
|
if patlimit && patterns.len() > 32 {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy (mask len: 2) because there are \
|
||||||
|
too many patterns",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
debug!("Teddy choice: 128-bit slim, 2 bytes");
|
||||||
|
SlimNeon::<2>::new(&patterns)
|
||||||
|
}
|
||||||
|
3 => {
|
||||||
|
if patlimit && patterns.len() > 48 {
|
||||||
|
debug!(
|
||||||
|
"skipping Teddy (mask len: 3) because there are \
|
||||||
|
too many patterns",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
debug!("Teddy choice: 128-bit slim, 3 bytes");
|
||||||
|
SlimNeon::<3>::new(&patterns)
|
||||||
|
}
|
||||||
|
4 => {
|
||||||
|
debug!("Teddy choice: 128-bit slim, 4 bytes");
|
||||||
|
SlimNeon::<4>::new(&patterns)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
debug!("no supported Teddy configuration found");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[cfg(not(any(
|
||||||
|
all(target_arch = "x86_64", target_feature = "sse2"),
|
||||||
|
all(
|
||||||
|
target_arch = "aarch64",
|
||||||
|
target_feature = "neon",
|
||||||
|
target_endian = "little"
|
||||||
|
)
|
||||||
|
)))]
|
||||||
|
{
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A searcher that dispatches to one of several possible Teddy variants.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct Searcher {
|
||||||
|
/// The Teddy variant we use. We use dynamic dispatch under the theory that
|
||||||
|
/// it results in better codegen then a enum, although this is a specious
|
||||||
|
/// claim.
|
||||||
|
///
|
||||||
|
/// This `Searcher` is essentially a wrapper for a `SearcherT` trait
|
||||||
|
/// object. We just make `memory_usage` and `minimum_len` available without
|
||||||
|
/// going through dynamic dispatch.
|
||||||
|
imp: Arc<dyn SearcherT>,
|
||||||
|
/// Total heap memory used by the Teddy variant.
|
||||||
|
memory_usage: usize,
|
||||||
|
/// The minimum haystack length this searcher can handle. It is intended
|
||||||
|
/// for callers to use some other search routine (such as Rabin-Karp) in
|
||||||
|
/// cases where the haystack (or remainer of the haystack) is too short.
|
||||||
|
minimum_len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Searcher {
|
||||||
|
/// Look for the leftmost occurrence of any pattern in this search in the
|
||||||
|
/// given haystack starting at the given position.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// This panics when `haystack[at..].len()` is less than the minimum length
|
||||||
|
/// for this haystack.
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn find(
|
||||||
|
&self,
|
||||||
|
haystack: &[u8],
|
||||||
|
at: usize,
|
||||||
|
) -> Option<crate::Match> {
|
||||||
|
// SAFETY: The Teddy implementations all require a minimum haystack
|
||||||
|
// length, and this is required for safety. Therefore, we assert it
|
||||||
|
// here in order to make this method sound.
|
||||||
|
assert!(haystack[at..].len() >= self.minimum_len);
|
||||||
|
let hayptr = haystack.as_ptr();
|
||||||
|
// SAFETY: Construction of the searcher guarantees that we are able
|
||||||
|
// to run it in the current environment (i.e., we won't get an AVX2
|
||||||
|
// searcher on a x86-64 CPU without AVX2 support). Also, the pointers
|
||||||
|
// are valid as they are derived directly from a borrowed slice.
|
||||||
|
let teddym = unsafe {
|
||||||
|
self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))?
|
||||||
|
};
|
||||||
|
let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize());
|
||||||
|
let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize());
|
||||||
|
let span = crate::Span { start, end };
|
||||||
|
// OK because we won't permit the construction of a searcher that
|
||||||
|
// could report a pattern ID bigger than what can fit in the crate-wide
|
||||||
|
// PatternID type.
|
||||||
|
let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize());
|
||||||
|
let m = crate::Match::new(pid, span);
|
||||||
|
Some(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the approximate total amount of heap used by this type, in
|
||||||
|
/// units of bytes.
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn memory_usage(&self) -> usize {
|
||||||
|
self.memory_usage
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the minimum length, in bytes, that a haystack must be in order
|
||||||
|
/// to use it with this searcher.
|
||||||
|
#[inline(always)]
|
||||||
|
pub(crate) fn minimum_len(&self) -> usize {
|
||||||
|
self.minimum_len
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A trait that provides dynamic dispatch over the different possible Teddy
|
||||||
|
/// variants on the same algorithm.
|
||||||
|
///
|
||||||
|
/// On `x86_64` for example, it isn't known until runtime which of 12 possible
|
||||||
|
/// variants will be used. One might use one of the four slim 128-bit vector
|
||||||
|
/// variants, or one of the four 256-bit vector variants or even one of the
|
||||||
|
/// four fat 256-bit vector variants.
|
||||||
|
///
|
||||||
|
/// Since this choice is generally made when the Teddy searcher is constructed
|
||||||
|
/// and this choice is based on the patterns given and what the current CPU
|
||||||
|
/// supports, it follows that there must be some kind of indirection at search
|
||||||
|
/// time that "selects" the variant chosen at build time.
|
||||||
|
///
|
||||||
|
/// There are a few different ways to go about this. One approach is to use an
|
||||||
|
/// enum. It works fine, but in my experiments, this generally results in worse
|
||||||
|
/// codegen. Another approach, which is what we use here, is dynamic dispatch
|
||||||
|
/// via a trait object. We basically implement this trait for each possible
|
||||||
|
/// variant, select the variant we want at build time and convert it to a
|
||||||
|
/// trait object for use at search time.
|
||||||
|
///
|
||||||
|
/// Another approach is to use function pointers and stick each of the possible
|
||||||
|
/// variants into a union. This is essentially isomorphic to the dynamic
|
||||||
|
/// dispatch approach, but doesn't require any allocations. Since this crate
|
||||||
|
/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The
|
||||||
|
/// `memchr` crate does this.)
|
||||||
|
trait SearcherT:
|
||||||
|
Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
|
||||||
|
{
|
||||||
|
/// Execute a search on the given haystack (identified by `start` and `end`
|
||||||
|
/// raw pointers).
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Essentially, the `start` and `end` pointers must be valid and point
|
||||||
|
/// to a haystack one can read. As long as you derive them from, for
|
||||||
|
/// example, a `&[u8]`, they should automatically satisfy all of the safety
|
||||||
|
/// obligations:
|
||||||
|
///
|
||||||
|
/// * Both `start` and `end` must be valid for reads.
|
||||||
|
/// * Both `start` and `end` must point to an initialized value.
|
||||||
|
/// * Both `start` and `end` must point to the same allocated object and
|
||||||
|
/// must either be in bounds or at most one byte past the end of the
|
||||||
|
/// allocated object.
|
||||||
|
/// * Both `start` and `end` must be _derived from_ a pointer to the same
|
||||||
|
/// object.
|
||||||
|
/// * The distance between `start` and `end` must not overflow `isize`.
|
||||||
|
/// * The distance being in bounds must not rely on "wrapping around" the
|
||||||
|
/// address space.
|
||||||
|
/// * It must be the case that `start <= end`.
|
||||||
|
/// * `end - start` must be greater than the minimum length for this
|
||||||
|
/// searcher.
|
||||||
|
///
|
||||||
|
/// Also, it is expected that implementations of this trait will tag this
|
||||||
|
/// method with a `target_feature` attribute. Callers must ensure that
|
||||||
|
/// they are executing this method in an environment where that attribute
|
||||||
|
/// is valid.
|
||||||
|
unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
|
||||||
|
mod x86_64 {
|
||||||
|
use core::arch::x86_64::{__m128i, __m256i};
|
||||||
|
|
||||||
|
use alloc::sync::Arc;
|
||||||
|
|
||||||
|
use crate::packed::{
|
||||||
|
ext::Pointer,
|
||||||
|
pattern::Patterns,
|
||||||
|
teddy::generic::{self, Match},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{Searcher, SearcherT};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(super) struct SlimSSSE3<const BYTES: usize> {
|
||||||
|
slim128: generic::Slim<__m128i, BYTES>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
|
||||||
|
macro_rules! slim_ssse3 {
|
||||||
|
($len:expr) => {
|
||||||
|
impl SlimSSSE3<$len> {
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 128-bit
|
||||||
|
/// vectors. If SSSE3 is not available in the current
|
||||||
|
/// environment, then this returns `None`.
|
||||||
|
pub(super) fn new(
|
||||||
|
patterns: &Arc<Patterns>,
|
||||||
|
) -> Option<Searcher> {
|
||||||
|
if !is_available_ssse3() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 256-bit
|
||||||
|
/// vectors without checking whether SSSE3 is available or not.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Callers must ensure that SSSE3 is available in the current
|
||||||
|
/// environment.
|
||||||
|
#[target_feature(enable = "ssse3")]
|
||||||
|
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
|
||||||
|
let slim128 = generic::Slim::<__m128i, $len>::new(
|
||||||
|
Arc::clone(patterns),
|
||||||
|
);
|
||||||
|
let memory_usage = slim128.memory_usage();
|
||||||
|
let minimum_len = slim128.minimum_len();
|
||||||
|
let imp = Arc::new(SlimSSSE3 { slim128 });
|
||||||
|
Searcher { imp, memory_usage, minimum_len }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearcherT for SlimSSSE3<$len> {
|
||||||
|
#[target_feature(enable = "ssse3")]
|
||||||
|
#[inline]
|
||||||
|
unsafe fn find(
|
||||||
|
&self,
|
||||||
|
start: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> Option<Match> {
|
||||||
|
// SAFETY: All obligations except for `target_feature` are
|
||||||
|
// passed to the caller. Our use of `target_feature` is
|
||||||
|
// safe because construction of this type requires that the
|
||||||
|
// requisite target features are available.
|
||||||
|
self.slim128.find(start, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
slim_ssse3!(1);
|
||||||
|
slim_ssse3!(2);
|
||||||
|
slim_ssse3!(3);
|
||||||
|
slim_ssse3!(4);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(super) struct SlimAVX2<const BYTES: usize> {
|
||||||
|
slim128: generic::Slim<__m128i, BYTES>,
|
||||||
|
slim256: generic::Slim<__m256i, BYTES>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
|
||||||
|
macro_rules! slim_avx2 {
|
||||||
|
($len:expr) => {
|
||||||
|
impl SlimAVX2<$len> {
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 256-bit
|
||||||
|
/// vectors. If AVX2 is not available in the current
|
||||||
|
/// environment, then this returns `None`.
|
||||||
|
pub(super) fn new(
|
||||||
|
patterns: &Arc<Patterns>,
|
||||||
|
) -> Option<Searcher> {
|
||||||
|
if !is_available_avx2() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 256-bit
|
||||||
|
/// vectors without checking whether AVX2 is available or not.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Callers must ensure that AVX2 is available in the current
|
||||||
|
/// environment.
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
|
||||||
|
let slim128 = generic::Slim::<__m128i, $len>::new(
|
||||||
|
Arc::clone(&patterns),
|
||||||
|
);
|
||||||
|
let slim256 = generic::Slim::<__m256i, $len>::new(
|
||||||
|
Arc::clone(&patterns),
|
||||||
|
);
|
||||||
|
let memory_usage =
|
||||||
|
slim128.memory_usage() + slim256.memory_usage();
|
||||||
|
let minimum_len = slim128.minimum_len();
|
||||||
|
let imp = Arc::new(SlimAVX2 { slim128, slim256 });
|
||||||
|
Searcher { imp, memory_usage, minimum_len }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearcherT for SlimAVX2<$len> {
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
#[inline]
|
||||||
|
unsafe fn find(
|
||||||
|
&self,
|
||||||
|
start: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> Option<Match> {
|
||||||
|
// SAFETY: All obligations except for `target_feature` are
|
||||||
|
// passed to the caller. Our use of `target_feature` is
|
||||||
|
// safe because construction of this type requires that the
|
||||||
|
// requisite target features are available.
|
||||||
|
let len = end.distance(start);
|
||||||
|
if len < self.slim256.minimum_len() {
|
||||||
|
self.slim128.find(start, end)
|
||||||
|
} else {
|
||||||
|
self.slim256.find(start, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
slim_avx2!(1);
|
||||||
|
slim_avx2!(2);
|
||||||
|
slim_avx2!(3);
|
||||||
|
slim_avx2!(4);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(super) struct FatAVX2<const BYTES: usize> {
|
||||||
|
fat256: generic::Fat<__m256i, BYTES>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
|
||||||
|
macro_rules! fat_avx2 {
|
||||||
|
($len:expr) => {
|
||||||
|
impl FatAVX2<$len> {
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 256-bit
|
||||||
|
/// vectors. If AVX2 is not available in the current
|
||||||
|
/// environment, then this returns `None`.
|
||||||
|
pub(super) fn new(
|
||||||
|
patterns: &Arc<Patterns>,
|
||||||
|
) -> Option<Searcher> {
|
||||||
|
if !is_available_avx2() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 256-bit
|
||||||
|
/// vectors without checking whether AVX2 is available or not.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Callers must ensure that AVX2 is available in the current
|
||||||
|
/// environment.
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
|
||||||
|
let fat256 = generic::Fat::<__m256i, $len>::new(
|
||||||
|
Arc::clone(&patterns),
|
||||||
|
);
|
||||||
|
let memory_usage = fat256.memory_usage();
|
||||||
|
let minimum_len = fat256.minimum_len();
|
||||||
|
let imp = Arc::new(FatAVX2 { fat256 });
|
||||||
|
Searcher { imp, memory_usage, minimum_len }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearcherT for FatAVX2<$len> {
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
#[inline]
|
||||||
|
unsafe fn find(
|
||||||
|
&self,
|
||||||
|
start: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> Option<Match> {
|
||||||
|
// SAFETY: All obligations except for `target_feature` are
|
||||||
|
// passed to the caller. Our use of `target_feature` is
|
||||||
|
// safe because construction of this type requires that the
|
||||||
|
// requisite target features are available.
|
||||||
|
self.fat256.find(start, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fat_avx2!(1);
|
||||||
|
fat_avx2!(2);
|
||||||
|
fat_avx2!(3);
|
||||||
|
fat_avx2!(4);
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub(super) fn is_available_ssse3() -> bool {
|
||||||
|
#[cfg(not(target_feature = "sse2"))]
|
||||||
|
{
|
||||||
|
false
|
||||||
|
}
|
||||||
|
#[cfg(target_feature = "sse2")]
|
||||||
|
{
|
||||||
|
#[cfg(target_feature = "ssse3")]
|
||||||
|
{
|
||||||
|
true
|
||||||
|
}
|
||||||
|
#[cfg(not(target_feature = "ssse3"))]
|
||||||
|
{
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
{
|
||||||
|
std::is_x86_feature_detected!("ssse3")
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "std"))]
|
||||||
|
{
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub(super) fn is_available_avx2() -> bool {
|
||||||
|
#[cfg(not(target_feature = "sse2"))]
|
||||||
|
{
|
||||||
|
false
|
||||||
|
}
|
||||||
|
#[cfg(target_feature = "sse2")]
|
||||||
|
{
|
||||||
|
#[cfg(target_feature = "avx2")]
|
||||||
|
{
|
||||||
|
true
|
||||||
|
}
|
||||||
|
#[cfg(not(target_feature = "avx2"))]
|
||||||
|
{
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
{
|
||||||
|
std::is_x86_feature_detected!("avx2")
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "std"))]
|
||||||
|
{
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(
|
||||||
|
target_arch = "aarch64",
|
||||||
|
target_feature = "neon",
|
||||||
|
target_endian = "little"
|
||||||
|
))]
|
||||||
|
mod aarch64 {
|
||||||
|
use core::arch::aarch64::uint8x16_t;
|
||||||
|
|
||||||
|
use alloc::sync::Arc;
|
||||||
|
|
||||||
|
use crate::packed::{
|
||||||
|
pattern::Patterns,
|
||||||
|
teddy::generic::{self, Match},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{Searcher, SearcherT};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(super) struct SlimNeon<const BYTES: usize> {
|
||||||
|
slim128: generic::Slim<uint8x16_t, BYTES>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
|
||||||
|
macro_rules! slim_neon {
|
||||||
|
($len:expr) => {
|
||||||
|
impl SlimNeon<$len> {
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 128-bit
|
||||||
|
/// vectors. If SSSE3 is not available in the current
|
||||||
|
/// environment, then this returns `None`.
|
||||||
|
pub(super) fn new(
|
||||||
|
patterns: &Arc<Patterns>,
|
||||||
|
) -> Option<Searcher> {
|
||||||
|
Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new searcher using "slim" Teddy with 256-bit
|
||||||
|
/// vectors without checking whether SSSE3 is available or not.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// Callers must ensure that SSSE3 is available in the current
|
||||||
|
/// environment.
|
||||||
|
#[target_feature(enable = "neon")]
|
||||||
|
unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
|
||||||
|
let slim128 = generic::Slim::<uint8x16_t, $len>::new(
|
||||||
|
Arc::clone(patterns),
|
||||||
|
);
|
||||||
|
let memory_usage = slim128.memory_usage();
|
||||||
|
let minimum_len = slim128.minimum_len();
|
||||||
|
let imp = Arc::new(SlimNeon { slim128 });
|
||||||
|
Searcher { imp, memory_usage, minimum_len }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearcherT for SlimNeon<$len> {
|
||||||
|
#[target_feature(enable = "neon")]
|
||||||
|
#[inline]
|
||||||
|
unsafe fn find(
|
||||||
|
&self,
|
||||||
|
start: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> Option<Match> {
|
||||||
|
// SAFETY: All obligations except for `target_feature` are
|
||||||
|
// passed to the caller. Our use of `target_feature` is
|
||||||
|
// safe because construction of this type requires that the
|
||||||
|
// requisite target features are available.
|
||||||
|
self.slim128.find(start, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
slim_neon!(1);
|
||||||
|
slim_neon!(2);
|
||||||
|
slim_neon!(3);
|
||||||
|
slim_neon!(4);
|
||||||
|
}
|
||||||
1382
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/generic.rs
vendored
Normal file
1382
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/generic.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
9
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/mod.rs
vendored
Normal file
9
.gear/predownloaded-development/vendor/aho-corasick/src/packed/teddy/mod.rs
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
// Regrettable, but Teddy stuff just isn't used on all targets. And for some
|
||||||
|
// targets, like aarch64, only "slim" Teddy is used and so "fat" Teddy gets a
|
||||||
|
// bunch of dead-code warnings. Just not worth trying to squash them. Blech.
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
pub(crate) use self::builder::{Builder, Searcher};
|
||||||
|
|
||||||
|
mod builder;
|
||||||
|
mod generic;
|
||||||
583
.gear/predownloaded-development/vendor/aho-corasick/src/packed/tests.rs
vendored
Normal file
583
.gear/predownloaded-development/vendor/aho-corasick/src/packed/tests.rs
vendored
Normal file
|
|
@ -0,0 +1,583 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use alloc::{
|
||||||
|
format,
|
||||||
|
string::{String, ToString},
|
||||||
|
vec,
|
||||||
|
vec::Vec,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
packed::{Config, MatchKind},
|
||||||
|
util::search::Match,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A description of a single test against a multi-pattern searcher.
|
||||||
|
///
|
||||||
|
/// A single test may not necessarily pass on every configuration of a
|
||||||
|
/// searcher. The tests are categorized and grouped appropriately below.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
struct SearchTest {
|
||||||
|
/// The name of this test, for debugging.
|
||||||
|
name: &'static str,
|
||||||
|
/// The patterns to search for.
|
||||||
|
patterns: &'static [&'static str],
|
||||||
|
/// The text to search.
|
||||||
|
haystack: &'static str,
|
||||||
|
/// Each match is a triple of (pattern_index, start, end), where
|
||||||
|
/// pattern_index is an index into `patterns` and `start`/`end` are indices
|
||||||
|
/// into `haystack`.
|
||||||
|
matches: &'static [(usize, usize, usize)],
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchTestOwned {
|
||||||
|
offset: usize,
|
||||||
|
name: String,
|
||||||
|
patterns: Vec<String>,
|
||||||
|
haystack: String,
|
||||||
|
matches: Vec<(usize, usize, usize)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchTest {
|
||||||
|
fn variations(&self) -> Vec<SearchTestOwned> {
|
||||||
|
let count = if cfg!(miri) { 1 } else { 261 };
|
||||||
|
let mut tests = vec![];
|
||||||
|
for i in 0..count {
|
||||||
|
tests.push(self.offset_prefix(i));
|
||||||
|
tests.push(self.offset_suffix(i));
|
||||||
|
tests.push(self.offset_both(i));
|
||||||
|
}
|
||||||
|
tests
|
||||||
|
}
|
||||||
|
|
||||||
|
fn offset_both(&self, off: usize) -> SearchTestOwned {
|
||||||
|
SearchTestOwned {
|
||||||
|
offset: off,
|
||||||
|
name: self.name.to_string(),
|
||||||
|
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
|
||||||
|
haystack: format!(
|
||||||
|
"{}{}{}",
|
||||||
|
"Z".repeat(off),
|
||||||
|
self.haystack,
|
||||||
|
"Z".repeat(off)
|
||||||
|
),
|
||||||
|
matches: self
|
||||||
|
.matches
|
||||||
|
.iter()
|
||||||
|
.map(|&(id, s, e)| (id, s + off, e + off))
|
||||||
|
.collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn offset_prefix(&self, off: usize) -> SearchTestOwned {
|
||||||
|
SearchTestOwned {
|
||||||
|
offset: off,
|
||||||
|
name: self.name.to_string(),
|
||||||
|
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
|
||||||
|
haystack: format!("{}{}", "Z".repeat(off), self.haystack),
|
||||||
|
matches: self
|
||||||
|
.matches
|
||||||
|
.iter()
|
||||||
|
.map(|&(id, s, e)| (id, s + off, e + off))
|
||||||
|
.collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn offset_suffix(&self, off: usize) -> SearchTestOwned {
|
||||||
|
SearchTestOwned {
|
||||||
|
offset: off,
|
||||||
|
name: self.name.to_string(),
|
||||||
|
patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
|
||||||
|
haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
|
||||||
|
matches: self.matches.to_vec(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Short-hand constructor for SearchTest. We use it a lot below.
|
||||||
|
macro_rules! t {
|
||||||
|
($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
|
||||||
|
SearchTest {
|
||||||
|
name: stringify!($name),
|
||||||
|
patterns: $patterns,
|
||||||
|
haystack: $haystack,
|
||||||
|
matches: $matches,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A collection of test groups.
|
||||||
|
type TestCollection = &'static [&'static [SearchTest]];
|
||||||
|
|
||||||
|
// Define several collections corresponding to the different type of match
|
||||||
|
// semantics supported. These collections have some overlap, but each
|
||||||
|
// collection should have some tests that no other collection has.
|
||||||
|
|
||||||
|
/// Tests for leftmost-first match semantics.
|
||||||
|
const PACKED_LEFTMOST_FIRST: TestCollection =
|
||||||
|
&[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
|
||||||
|
|
||||||
|
/// Tests for leftmost-longest match semantics.
|
||||||
|
const PACKED_LEFTMOST_LONGEST: TestCollection =
|
||||||
|
&[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
|
||||||
|
|
||||||
|
// Now define the individual tests that make up the collections above.
|
||||||
|
|
||||||
|
/// A collection of tests for the that should always be true regardless of
|
||||||
|
/// match semantics. That is, all combinations of leftmost-{first, longest}
|
||||||
|
/// should produce the same answer.
|
||||||
|
const BASICS: &'static [SearchTest] = &[
|
||||||
|
t!(basic001, &["a"], "", &[]),
|
||||||
|
t!(basic010, &["a"], "a", &[(0, 0, 1)]),
|
||||||
|
t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
|
||||||
|
t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
|
||||||
|
t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
|
||||||
|
t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
|
||||||
|
t!(basic060, &["a"], "bbb", &[]),
|
||||||
|
t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
|
||||||
|
t!(basic100, &["aa"], "", &[]),
|
||||||
|
t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
|
||||||
|
t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
|
||||||
|
t!(basic130, &["aa"], "abbab", &[]),
|
||||||
|
t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
|
||||||
|
t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
|
||||||
|
t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
|
||||||
|
t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
|
||||||
|
t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
|
||||||
|
t!(basic230, &["abcd"], "abcd", &[(0, 0, 4)]),
|
||||||
|
t!(basic240, &["abcd"], "zazabzabcdz", &[(0, 6, 10)]),
|
||||||
|
t!(basic250, &["abcd"], "zazabcdzabcdz", &[(0, 3, 7), (0, 8, 12)]),
|
||||||
|
t!(basic300, &["a", "b"], "", &[]),
|
||||||
|
t!(basic310, &["a", "b"], "z", &[]),
|
||||||
|
t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
|
||||||
|
t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
|
||||||
|
t!(
|
||||||
|
basic340,
|
||||||
|
&["a", "b"],
|
||||||
|
"abba",
|
||||||
|
&[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
basic350,
|
||||||
|
&["b", "a"],
|
||||||
|
"abba",
|
||||||
|
&[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
|
||||||
|
),
|
||||||
|
t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
|
||||||
|
t!(basic400, &["foo", "bar"], "", &[]),
|
||||||
|
t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
|
||||||
|
t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
|
||||||
|
t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
|
||||||
|
t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
|
||||||
|
t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
|
||||||
|
t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
|
||||||
|
t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
|
||||||
|
t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
|
||||||
|
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
|
||||||
|
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
|
||||||
|
t!(
|
||||||
|
basic720,
|
||||||
|
&["yabcdef", "bcdeyabc", "abcdezghi"],
|
||||||
|
"yabcdezghi",
|
||||||
|
&[(2, 1, 10),]
|
||||||
|
),
|
||||||
|
t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
|
||||||
|
t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
|
||||||
|
t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
|
||||||
|
t!(
|
||||||
|
basic840,
|
||||||
|
&["ab", "ba"],
|
||||||
|
"abababa",
|
||||||
|
&[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
|
||||||
|
),
|
||||||
|
t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Tests for leftmost match semantics. These should pass for both
|
||||||
|
/// leftmost-first and leftmost-longest match kinds. Stated differently, among
|
||||||
|
/// ambiguous matches, the longest match and the match that appeared first when
|
||||||
|
/// constructing the automaton should always be the same.
|
||||||
|
const LEFTMOST: &'static [SearchTest] = &[
|
||||||
|
t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
|
||||||
|
t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
|
||||||
|
t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
|
||||||
|
t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
|
||||||
|
t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
|
||||||
|
t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
|
||||||
|
t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
|
||||||
|
t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
|
||||||
|
t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
|
||||||
|
t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
|
||||||
|
t!(
|
||||||
|
leftmost360,
|
||||||
|
&["abcdefghi", "hz", "abcdefgh"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(2, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
leftmost370,
|
||||||
|
&["abcdefghi", "cde", "hz", "abcdefgh"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(3, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
leftmost380,
|
||||||
|
&["abcdefghi", "hz", "abcdefgh", "a"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(2, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
leftmost390,
|
||||||
|
&["b", "abcdefghi", "hz", "abcdefgh"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(3, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
leftmost400,
|
||||||
|
&["h", "abcdefghi", "hz", "abcdefgh"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(3, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
leftmost410,
|
||||||
|
&["z", "abcdefghi", "hz", "abcdefgh"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(3, 0, 8), (0, 8, 9),]
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Tests for non-overlapping leftmost-first match semantics. These tests
|
||||||
|
/// should generally be specific to leftmost-first, which means they should
|
||||||
|
/// generally fail under leftmost-longest semantics.
|
||||||
|
const LEFTMOST_FIRST: &'static [SearchTest] = &[
|
||||||
|
t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
|
||||||
|
t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
|
||||||
|
t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
|
||||||
|
t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
|
||||||
|
t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
|
||||||
|
t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
|
||||||
|
t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
|
||||||
|
t!(
|
||||||
|
leftfirst310,
|
||||||
|
&["abcd", "b", "bce", "ce"],
|
||||||
|
"abce",
|
||||||
|
&[(1, 1, 2), (3, 2, 4),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
leftfirst320,
|
||||||
|
&["a", "abcdefghi", "hz", "abcdefgh"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(0, 0, 1), (2, 7, 9),]
|
||||||
|
),
|
||||||
|
t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
|
||||||
|
t!(
|
||||||
|
leftfirst340,
|
||||||
|
&["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
|
||||||
|
"abcdef",
|
||||||
|
&[(0, 0, 6)]
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Tests for non-overlapping leftmost-longest match semantics. These tests
|
||||||
|
/// should generally be specific to leftmost-longest, which means they should
|
||||||
|
/// generally fail under leftmost-first semantics.
|
||||||
|
const LEFTMOST_LONGEST: &'static [SearchTest] = &[
|
||||||
|
t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
|
||||||
|
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
|
||||||
|
t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
|
||||||
|
t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
|
||||||
|
t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
|
||||||
|
t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
|
||||||
|
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
|
||||||
|
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
|
||||||
|
t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
|
||||||
|
t!(
|
||||||
|
leftlong310,
|
||||||
|
&["a", "abcdefghi", "hz", "abcdefgh"],
|
||||||
|
"abcdefghz",
|
||||||
|
&[(3, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
|
||||||
|
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
|
||||||
|
t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Regression tests that are applied to all combinations.
|
||||||
|
///
|
||||||
|
/// If regression tests are needed for specific match semantics, then add them
|
||||||
|
/// to the appropriate group above.
|
||||||
|
const REGRESSION: &'static [SearchTest] = &[
|
||||||
|
t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
|
||||||
|
t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
|
||||||
|
t!(
|
||||||
|
regression030,
|
||||||
|
&["libcore/", "libstd/"],
|
||||||
|
"libcore/char/methods.rs",
|
||||||
|
&[(0, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
regression040,
|
||||||
|
&["libstd/", "libcore/"],
|
||||||
|
"libcore/char/methods.rs",
|
||||||
|
&[(1, 0, 8),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
regression050,
|
||||||
|
&["\x00\x00\x01", "\x00\x00\x00"],
|
||||||
|
"\x00\x00\x00",
|
||||||
|
&[(1, 0, 3),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
regression060,
|
||||||
|
&["\x00\x00\x00", "\x00\x00\x01"],
|
||||||
|
"\x00\x00\x00",
|
||||||
|
&[(0, 0, 3),]
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
const TEDDY: &'static [SearchTest] = &[
|
||||||
|
t!(
|
||||||
|
teddy010,
|
||||||
|
&["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
|
||||||
|
"abcdefghijk",
|
||||||
|
&[
|
||||||
|
(0, 0, 1),
|
||||||
|
(1, 1, 2),
|
||||||
|
(2, 2, 3),
|
||||||
|
(3, 3, 4),
|
||||||
|
(4, 4, 5),
|
||||||
|
(5, 5, 6),
|
||||||
|
(6, 6, 7),
|
||||||
|
(7, 7, 8),
|
||||||
|
(8, 8, 9),
|
||||||
|
(9, 9, 10),
|
||||||
|
(10, 10, 11)
|
||||||
|
]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
teddy020,
|
||||||
|
&["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
|
||||||
|
"abcdefghijk",
|
||||||
|
&[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
|
||||||
|
),
|
||||||
|
t!(
|
||||||
|
teddy030,
|
||||||
|
&["abc"],
|
||||||
|
"abcdefghijklmnopqrstuvwxyzabcdefghijk",
|
||||||
|
&[(0, 0, 3), (0, 26, 29)]
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
// Now define a test for each combination of things above that we want to run.
|
||||||
|
// Since there are a few different combinations for each collection of tests,
|
||||||
|
// we define a couple of macros to avoid repetition drudgery. The testconfig
|
||||||
|
// macro constructs the automaton from a given match kind, and runs the search
|
||||||
|
// tests one-by-one over the given collection. The `with` parameter allows one
|
||||||
|
// to configure the config with additional parameters. The testcombo macro
|
||||||
|
// invokes testconfig in precisely this way: it sets up several tests where
|
||||||
|
// each one turns a different knob on Config.
|
||||||
|
|
||||||
|
macro_rules! testconfig {
|
||||||
|
($name:ident, $collection:expr, $with:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
run_search_tests($collection, |test| {
|
||||||
|
let mut config = Config::new();
|
||||||
|
$with(&mut config);
|
||||||
|
let mut builder = config.builder();
|
||||||
|
builder.extend(test.patterns.iter().map(|p| p.as_bytes()));
|
||||||
|
let searcher = match builder.build() {
|
||||||
|
Some(searcher) => searcher,
|
||||||
|
None => {
|
||||||
|
// For x86-64 and aarch64, not building a searcher is
|
||||||
|
// probably a bug, so be loud.
|
||||||
|
if cfg!(any(
|
||||||
|
target_arch = "x86_64",
|
||||||
|
target_arch = "aarch64"
|
||||||
|
)) {
|
||||||
|
panic!("failed to build packed searcher")
|
||||||
|
}
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Some(searcher.find_iter(&test.haystack).collect())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_default_leftmost_first,
|
||||||
|
PACKED_LEFTMOST_FIRST,
|
||||||
|
|_: &mut Config| {}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_default_leftmost_longest,
|
||||||
|
PACKED_LEFTMOST_LONGEST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.match_kind(MatchKind::LeftmostLongest);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_leftmost_first,
|
||||||
|
PACKED_LEFTMOST_FIRST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_leftmost_longest,
|
||||||
|
PACKED_LEFTMOST_LONGEST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_ssse3_leftmost_first,
|
||||||
|
PACKED_LEFTMOST_FIRST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true);
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
if std::is_x86_feature_detected!("ssse3") {
|
||||||
|
c.only_teddy_256bit(Some(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_ssse3_leftmost_longest,
|
||||||
|
PACKED_LEFTMOST_LONGEST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
if std::is_x86_feature_detected!("ssse3") {
|
||||||
|
c.only_teddy_256bit(Some(false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_avx2_leftmost_first,
|
||||||
|
PACKED_LEFTMOST_FIRST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true);
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
if std::is_x86_feature_detected!("avx2") {
|
||||||
|
c.only_teddy_256bit(Some(true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_avx2_leftmost_longest,
|
||||||
|
PACKED_LEFTMOST_LONGEST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
if std::is_x86_feature_detected!("avx2") {
|
||||||
|
c.only_teddy_256bit(Some(true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_fat_leftmost_first,
|
||||||
|
PACKED_LEFTMOST_FIRST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true);
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
if std::is_x86_feature_detected!("avx2") {
|
||||||
|
c.only_teddy_fat(Some(true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_teddy_fat_leftmost_longest,
|
||||||
|
PACKED_LEFTMOST_LONGEST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
if std::is_x86_feature_detected!("avx2") {
|
||||||
|
c.only_teddy_fat(Some(true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_rabinkarp_leftmost_first,
|
||||||
|
PACKED_LEFTMOST_FIRST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_rabin_karp(true);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
testconfig!(
|
||||||
|
search_rabinkarp_leftmost_longest,
|
||||||
|
PACKED_LEFTMOST_LONGEST,
|
||||||
|
|c: &mut Config| {
|
||||||
|
c.only_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn search_tests_have_unique_names() {
|
||||||
|
let assert = |constname, tests: &[SearchTest]| {
|
||||||
|
let mut seen = HashMap::new(); // map from test name to position
|
||||||
|
for (i, test) in tests.iter().enumerate() {
|
||||||
|
if !seen.contains_key(test.name) {
|
||||||
|
seen.insert(test.name, i);
|
||||||
|
} else {
|
||||||
|
let last = seen[test.name];
|
||||||
|
panic!(
|
||||||
|
"{} tests have duplicate names at positions {} and {}",
|
||||||
|
constname, last, i
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
assert("BASICS", BASICS);
|
||||||
|
assert("LEFTMOST", LEFTMOST);
|
||||||
|
assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
|
||||||
|
assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
|
||||||
|
assert("REGRESSION", REGRESSION);
|
||||||
|
assert("TEDDY", TEDDY);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_search_tests<F: FnMut(&SearchTestOwned) -> Option<Vec<Match>>>(
|
||||||
|
which: TestCollection,
|
||||||
|
mut f: F,
|
||||||
|
) {
|
||||||
|
let get_match_triples =
|
||||||
|
|matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
|
||||||
|
matches
|
||||||
|
.into_iter()
|
||||||
|
.map(|m| (m.pattern().as_usize(), m.start(), m.end()))
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
for &tests in which {
|
||||||
|
for spec in tests {
|
||||||
|
for test in spec.variations() {
|
||||||
|
let results = match f(&test) {
|
||||||
|
None => continue,
|
||||||
|
Some(results) => results,
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
test.matches,
|
||||||
|
get_match_triples(results).as_slice(),
|
||||||
|
"test: {}, patterns: {:?}, haystack(len={:?}): {:?}, \
|
||||||
|
offset: {:?}",
|
||||||
|
test.name,
|
||||||
|
test.patterns,
|
||||||
|
test.haystack.len(),
|
||||||
|
test.haystack,
|
||||||
|
test.offset,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1757
.gear/predownloaded-development/vendor/aho-corasick/src/packed/vector.rs
vendored
Normal file
1757
.gear/predownloaded-development/vendor/aho-corasick/src/packed/vector.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
1664
.gear/predownloaded-development/vendor/aho-corasick/src/tests.rs
vendored
Normal file
1664
.gear/predownloaded-development/vendor/aho-corasick/src/tests.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
270
.gear/predownloaded-development/vendor/aho-corasick/src/transducer.rs
vendored
Normal file
270
.gear/predownloaded-development/vendor/aho-corasick/src/transducer.rs
vendored
Normal file
|
|
@ -0,0 +1,270 @@
|
||||||
|
/*!
|
||||||
|
Provides implementations of `fst::Automaton` for Aho-Corasick automata.
|
||||||
|
|
||||||
|
This works by providing two wrapper types, [`Anchored`] and [`Unanchored`].
|
||||||
|
The former executes an anchored search on an FST while the latter executes
|
||||||
|
an unanchored search. Building these wrappers is fallible and will fail if
|
||||||
|
the underlying Aho-Corasick automaton does not support the type of search it
|
||||||
|
represents.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
automaton::{Automaton, StateID},
|
||||||
|
Anchored as AcAnchored, Input, MatchError,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Represents an unanchored Aho-Corasick search of a finite state transducer.
|
||||||
|
///
|
||||||
|
/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the
|
||||||
|
/// underlying automaton does not support unanchored searches.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// This shows how to build an FST of keys and then run an unanchored search on
|
||||||
|
/// those keys using an Aho-Corasick automaton.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{nfa::contiguous::NFA, transducer::Unanchored};
|
||||||
|
/// use fst::{Automaton, IntoStreamer, Set, Streamer};
|
||||||
|
///
|
||||||
|
/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
|
||||||
|
/// let nfa = NFA::new(&["bcd", "x"]).unwrap();
|
||||||
|
/// // NFAs always support both unanchored and anchored searches.
|
||||||
|
/// let searcher = Unanchored::new(&nfa).unwrap();
|
||||||
|
///
|
||||||
|
/// let mut stream = set.search(searcher).into_stream();
|
||||||
|
/// let mut results = vec![];
|
||||||
|
/// while let Some(key) = stream.next() {
|
||||||
|
/// results.push(std::str::from_utf8(key).unwrap().to_string());
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(vec!["abcd", "bcd", "xyz"], results);
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Unanchored<A>(A);
|
||||||
|
|
||||||
|
impl<A: Automaton> Unanchored<A> {
|
||||||
|
/// Create a new `Unanchored` implementation of the `fst::Automaton` trait.
|
||||||
|
///
|
||||||
|
/// If the given Aho-Corasick automaton does not support unanchored
|
||||||
|
/// searches, then this returns an error.
|
||||||
|
pub fn new(aut: A) -> Result<Unanchored<A>, MatchError> {
|
||||||
|
let input = Input::new("").anchored(AcAnchored::No);
|
||||||
|
let _ = aut.start_state(&input)?;
|
||||||
|
Ok(Unanchored(aut))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a borrow to the underlying automaton.
|
||||||
|
pub fn as_ref(&self) -> &A {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unwrap this value and return the inner automaton.
|
||||||
|
pub fn into_inner(self) -> A {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<A: Automaton> fst::Automaton for Unanchored<A> {
|
||||||
|
type State = StateID;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn start(&self) -> StateID {
|
||||||
|
let input = Input::new("").anchored(AcAnchored::No);
|
||||||
|
self.0.start_state(&input).expect("support for unanchored searches")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_match(&self, state: &StateID) -> bool {
|
||||||
|
self.0.is_match(*state)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn accept(&self, state: &StateID, byte: u8) -> StateID {
|
||||||
|
if fst::Automaton::is_match(self, state) {
|
||||||
|
return *state;
|
||||||
|
}
|
||||||
|
self.0.next_state(AcAnchored::No, *state, byte)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn can_match(&self, state: &StateID) -> bool {
|
||||||
|
!self.0.is_dead(*state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents an anchored Aho-Corasick search of a finite state transducer.
|
||||||
|
///
|
||||||
|
/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the
|
||||||
|
/// underlying automaton does not support unanchored searches.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// This shows how to build an FST of keys and then run an anchored search on
|
||||||
|
/// those keys using an Aho-Corasick automaton.
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{nfa::contiguous::NFA, transducer::Anchored};
|
||||||
|
/// use fst::{Automaton, IntoStreamer, Set, Streamer};
|
||||||
|
///
|
||||||
|
/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
|
||||||
|
/// let nfa = NFA::new(&["bcd", "x"]).unwrap();
|
||||||
|
/// // NFAs always support both unanchored and anchored searches.
|
||||||
|
/// let searcher = Anchored::new(&nfa).unwrap();
|
||||||
|
///
|
||||||
|
/// let mut stream = set.search(searcher).into_stream();
|
||||||
|
/// let mut results = vec![];
|
||||||
|
/// while let Some(key) = stream.next() {
|
||||||
|
/// results.push(std::str::from_utf8(key).unwrap().to_string());
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(vec!["bcd", "xyz"], results);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This is like the example above, except we use an Aho-Corasick DFA, which
|
||||||
|
/// requires explicitly configuring it to support anchored searches. (NFAs
|
||||||
|
/// unconditionally support both unanchored and anchored searches.)
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use aho_corasick::{dfa::DFA, transducer::Anchored, StartKind};
|
||||||
|
/// use fst::{Automaton, IntoStreamer, Set, Streamer};
|
||||||
|
///
|
||||||
|
/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
|
||||||
|
/// let dfa = DFA::builder()
|
||||||
|
/// .start_kind(StartKind::Anchored)
|
||||||
|
/// .build(&["bcd", "x"])
|
||||||
|
/// .unwrap();
|
||||||
|
/// // We've explicitly configured our DFA to support anchored searches.
|
||||||
|
/// let searcher = Anchored::new(&dfa).unwrap();
|
||||||
|
///
|
||||||
|
/// let mut stream = set.search(searcher).into_stream();
|
||||||
|
/// let mut results = vec![];
|
||||||
|
/// while let Some(key) = stream.next() {
|
||||||
|
/// results.push(std::str::from_utf8(key).unwrap().to_string());
|
||||||
|
/// }
|
||||||
|
/// assert_eq!(vec!["bcd", "xyz"], results);
|
||||||
|
/// ```
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Anchored<A>(A);
|
||||||
|
|
||||||
|
impl<A: Automaton> Anchored<A> {
|
||||||
|
/// Create a new `Anchored` implementation of the `fst::Automaton` trait.
|
||||||
|
///
|
||||||
|
/// If the given Aho-Corasick automaton does not support anchored searches,
|
||||||
|
/// then this returns an error.
|
||||||
|
pub fn new(aut: A) -> Result<Anchored<A>, MatchError> {
|
||||||
|
let input = Input::new("").anchored(AcAnchored::Yes);
|
||||||
|
let _ = aut.start_state(&input)?;
|
||||||
|
Ok(Anchored(aut))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a borrow to the underlying automaton.
|
||||||
|
pub fn as_ref(&self) -> &A {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unwrap this value and return the inner automaton.
|
||||||
|
pub fn into_inner(self) -> A {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<A: Automaton> fst::Automaton for Anchored<A> {
|
||||||
|
type State = StateID;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn start(&self) -> StateID {
|
||||||
|
let input = Input::new("").anchored(AcAnchored::Yes);
|
||||||
|
self.0.start_state(&input).expect("support for unanchored searches")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_match(&self, state: &StateID) -> bool {
|
||||||
|
self.0.is_match(*state)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn accept(&self, state: &StateID, byte: u8) -> StateID {
|
||||||
|
if fst::Automaton::is_match(self, state) {
|
||||||
|
return *state;
|
||||||
|
}
|
||||||
|
self.0.next_state(AcAnchored::Yes, *state, byte)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn can_match(&self, state: &StateID) -> bool {
|
||||||
|
!self.0.is_dead(*state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use alloc::{string::String, vec, vec::Vec};
|
||||||
|
|
||||||
|
use fst::{Automaton, IntoStreamer, Set, Streamer};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
dfa::DFA,
|
||||||
|
nfa::{contiguous, noncontiguous},
|
||||||
|
StartKind,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn search<A: Automaton, D: AsRef<[u8]>>(
|
||||||
|
set: &Set<D>,
|
||||||
|
aut: A,
|
||||||
|
) -> Vec<String> {
|
||||||
|
let mut stream = set.search(aut).into_stream();
|
||||||
|
let mut results = vec![];
|
||||||
|
while let Some(key) = stream.next() {
|
||||||
|
results.push(String::from(core::str::from_utf8(key).unwrap()));
|
||||||
|
}
|
||||||
|
results
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unanchored() {
|
||||||
|
let set =
|
||||||
|
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
|
||||||
|
.unwrap();
|
||||||
|
let patterns = vec!["baz", "bax"];
|
||||||
|
let expected = vec!["baz", "xbax"];
|
||||||
|
|
||||||
|
let aut = Unanchored(noncontiguous::NFA::new(&patterns).unwrap());
|
||||||
|
let got = search(&set, &aut);
|
||||||
|
assert_eq!(got, expected);
|
||||||
|
|
||||||
|
let aut = Unanchored(contiguous::NFA::new(&patterns).unwrap());
|
||||||
|
let got = search(&set, &aut);
|
||||||
|
assert_eq!(got, expected);
|
||||||
|
|
||||||
|
let aut = Unanchored(DFA::new(&patterns).unwrap());
|
||||||
|
let got = search(&set, &aut);
|
||||||
|
assert_eq!(got, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn anchored() {
|
||||||
|
let set =
|
||||||
|
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
|
||||||
|
.unwrap();
|
||||||
|
let patterns = vec!["baz", "bax"];
|
||||||
|
let expected = vec!["baz"];
|
||||||
|
|
||||||
|
let aut = Anchored(noncontiguous::NFA::new(&patterns).unwrap());
|
||||||
|
let got = search(&set, &aut);
|
||||||
|
assert_eq!(got, expected);
|
||||||
|
|
||||||
|
let aut = Anchored(contiguous::NFA::new(&patterns).unwrap());
|
||||||
|
let got = search(&set, &aut);
|
||||||
|
assert_eq!(got, expected);
|
||||||
|
|
||||||
|
let aut = Anchored(
|
||||||
|
DFA::builder()
|
||||||
|
.start_kind(StartKind::Anchored)
|
||||||
|
.build(&patterns)
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
|
let got = search(&set, &aut);
|
||||||
|
assert_eq!(got, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
409
.gear/predownloaded-development/vendor/aho-corasick/src/util/alphabet.rs
vendored
Normal file
409
.gear/predownloaded-development/vendor/aho-corasick/src/util/alphabet.rs
vendored
Normal file
|
|
@ -0,0 +1,409 @@
|
||||||
|
use crate::util::int::Usize;
|
||||||
|
|
||||||
|
/// A representation of byte oriented equivalence classes.
|
||||||
|
///
|
||||||
|
/// This is used in finite state machines to reduce the size of the transition
|
||||||
|
/// table. This can have a particularly large impact not only on the total size
|
||||||
|
/// of an FSM, but also on FSM build times because it reduces the number of
|
||||||
|
/// transitions that need to be visited/set.
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub(crate) struct ByteClasses([u8; 256]);
|
||||||
|
|
||||||
|
impl ByteClasses {
|
||||||
|
/// Creates a new set of equivalence classes where all bytes are mapped to
|
||||||
|
/// the same class.
|
||||||
|
pub(crate) fn empty() -> ByteClasses {
|
||||||
|
ByteClasses([0; 256])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new set of equivalence classes where each byte belongs to
|
||||||
|
/// its own equivalence class.
|
||||||
|
pub(crate) fn singletons() -> ByteClasses {
|
||||||
|
let mut classes = ByteClasses::empty();
|
||||||
|
for b in 0..=255 {
|
||||||
|
classes.set(b, b);
|
||||||
|
}
|
||||||
|
classes
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the equivalence class for the given byte.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn set(&mut self, byte: u8, class: u8) {
|
||||||
|
self.0[usize::from(byte)] = class;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the equivalence class for the given byte.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn get(&self, byte: u8) -> u8 {
|
||||||
|
self.0[usize::from(byte)]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the total number of elements in the alphabet represented by
|
||||||
|
/// these equivalence classes. Equivalently, this returns the total number
|
||||||
|
/// of equivalence classes.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn alphabet_len(&self) -> usize {
|
||||||
|
// Add one since the number of equivalence classes is one bigger than
|
||||||
|
// the last one.
|
||||||
|
usize::from(self.0[255]) + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the stride, as a base-2 exponent, required for these
|
||||||
|
/// equivalence classes.
|
||||||
|
///
|
||||||
|
/// The stride is always the smallest power of 2 that is greater than or
|
||||||
|
/// equal to the alphabet length. This is done so that converting between
|
||||||
|
/// state IDs and indices can be done with shifts alone, which is much
|
||||||
|
/// faster than integer division. The "stride2" is the exponent. i.e.,
|
||||||
|
/// `2^stride2 = stride`.
|
||||||
|
pub(crate) fn stride2(&self) -> usize {
|
||||||
|
let zeros = self.alphabet_len().next_power_of_two().trailing_zeros();
|
||||||
|
usize::try_from(zeros).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the stride for these equivalence classes, which corresponds
|
||||||
|
/// to the smallest power of 2 greater than or equal to the number of
|
||||||
|
/// equivalence classes.
|
||||||
|
pub(crate) fn stride(&self) -> usize {
|
||||||
|
1 << self.stride2()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if every byte in this class maps to its own
|
||||||
|
/// equivalence class. Equivalently, there are 257 equivalence classes
|
||||||
|
/// and each class contains exactly one byte (plus the special EOI class).
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn is_singleton(&self) -> bool {
|
||||||
|
self.alphabet_len() == 256
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator over all equivalence classes in this set.
|
||||||
|
pub(crate) fn iter(&self) -> ByteClassIter {
|
||||||
|
ByteClassIter { it: 0..self.alphabet_len() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator of the bytes in the given equivalence class.
|
||||||
|
pub(crate) fn elements(&self, class: u8) -> ByteClassElements {
|
||||||
|
ByteClassElements { classes: self, class, bytes: 0..=255 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator of byte ranges in the given equivalence class.
|
||||||
|
///
|
||||||
|
/// That is, a sequence of contiguous ranges are returned. Typically, every
|
||||||
|
/// class maps to a single contiguous range.
|
||||||
|
fn element_ranges(&self, class: u8) -> ByteClassElementRanges {
|
||||||
|
ByteClassElementRanges { elements: self.elements(class), range: None }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::fmt::Debug for ByteClasses {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
if self.is_singleton() {
|
||||||
|
write!(f, "ByteClasses(<one-class-per-byte>)")
|
||||||
|
} else {
|
||||||
|
write!(f, "ByteClasses(")?;
|
||||||
|
for (i, class) in self.iter().enumerate() {
|
||||||
|
if i > 0 {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
write!(f, "{:?} => [", class)?;
|
||||||
|
for (start, end) in self.element_ranges(class) {
|
||||||
|
if start == end {
|
||||||
|
write!(f, "{:?}", start)?;
|
||||||
|
} else {
|
||||||
|
write!(f, "{:?}-{:?}", start, end)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
write!(f, "]")?;
|
||||||
|
}
|
||||||
|
write!(f, ")")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over each equivalence class.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct ByteClassIter {
|
||||||
|
it: core::ops::Range<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for ByteClassIter {
|
||||||
|
type Item = u8;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<u8> {
|
||||||
|
self.it.next().map(|class| class.as_u8())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over all elements in a specific equivalence class.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct ByteClassElements<'a> {
|
||||||
|
classes: &'a ByteClasses,
|
||||||
|
class: u8,
|
||||||
|
bytes: core::ops::RangeInclusive<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for ByteClassElements<'a> {
|
||||||
|
type Item = u8;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<u8> {
|
||||||
|
while let Some(byte) = self.bytes.next() {
|
||||||
|
if self.class == self.classes.get(byte) {
|
||||||
|
return Some(byte);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over all elements in an equivalence class expressed as a
|
||||||
|
/// sequence of contiguous ranges.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct ByteClassElementRanges<'a> {
|
||||||
|
elements: ByteClassElements<'a>,
|
||||||
|
range: Option<(u8, u8)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for ByteClassElementRanges<'a> {
|
||||||
|
type Item = (u8, u8);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<(u8, u8)> {
|
||||||
|
loop {
|
||||||
|
let element = match self.elements.next() {
|
||||||
|
None => return self.range.take(),
|
||||||
|
Some(element) => element,
|
||||||
|
};
|
||||||
|
match self.range.take() {
|
||||||
|
None => {
|
||||||
|
self.range = Some((element, element));
|
||||||
|
}
|
||||||
|
Some((start, end)) => {
|
||||||
|
if usize::from(end) + 1 != usize::from(element) {
|
||||||
|
self.range = Some((element, element));
|
||||||
|
return Some((start, end));
|
||||||
|
}
|
||||||
|
self.range = Some((start, element));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A partitioning of bytes into equivalence classes.
|
||||||
|
///
|
||||||
|
/// A byte class set keeps track of an *approximation* of equivalence classes
|
||||||
|
/// of bytes during NFA construction. That is, every byte in an equivalence
|
||||||
|
/// class cannot discriminate between a match and a non-match.
|
||||||
|
///
|
||||||
|
/// Note that this may not compute the minimal set of equivalence classes.
|
||||||
|
/// Basically, any byte in a pattern given to the noncontiguous NFA builder
|
||||||
|
/// will automatically be treated as its own equivalence class. All other
|
||||||
|
/// bytes---any byte not in any pattern---will be treated as their own
|
||||||
|
/// equivalence classes. In theory, all bytes not in any pattern should
|
||||||
|
/// be part of a single equivalence class, but in practice, we only treat
|
||||||
|
/// contiguous ranges of bytes as an equivalence class. So the number of
|
||||||
|
/// classes computed may be bigger than necessary. This usually doesn't make
|
||||||
|
/// much of a difference, and keeps the implementation simple.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct ByteClassSet(ByteSet);
|
||||||
|
|
||||||
|
impl Default for ByteClassSet {
|
||||||
|
fn default() -> ByteClassSet {
|
||||||
|
ByteClassSet::empty()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ByteClassSet {
|
||||||
|
/// Create a new set of byte classes where all bytes are part of the same
|
||||||
|
/// equivalence class.
|
||||||
|
pub(crate) fn empty() -> Self {
|
||||||
|
ByteClassSet(ByteSet::empty())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Indicate the the range of byte given (inclusive) can discriminate a
|
||||||
|
/// match between it and all other bytes outside of the range.
|
||||||
|
pub(crate) fn set_range(&mut self, start: u8, end: u8) {
|
||||||
|
debug_assert!(start <= end);
|
||||||
|
if start > 0 {
|
||||||
|
self.0.add(start - 1);
|
||||||
|
}
|
||||||
|
self.0.add(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert this boolean set to a map that maps all byte values to their
|
||||||
|
/// corresponding equivalence class. The last mapping indicates the largest
|
||||||
|
/// equivalence class identifier (which is never bigger than 255).
|
||||||
|
pub(crate) fn byte_classes(&self) -> ByteClasses {
|
||||||
|
let mut classes = ByteClasses::empty();
|
||||||
|
let mut class = 0u8;
|
||||||
|
let mut b = 0u8;
|
||||||
|
loop {
|
||||||
|
classes.set(b, class);
|
||||||
|
if b == 255 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if self.0.contains(b) {
|
||||||
|
class = class.checked_add(1).unwrap();
|
||||||
|
}
|
||||||
|
b = b.checked_add(1).unwrap();
|
||||||
|
}
|
||||||
|
classes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A simple set of bytes that is reasonably cheap to copy and allocation free.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||||
|
pub(crate) struct ByteSet {
|
||||||
|
bits: BitSet,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The representation of a byte set. Split out so that we can define a
|
||||||
|
/// convenient Debug impl for it while keeping "ByteSet" in the output.
|
||||||
|
#[derive(Clone, Copy, Default, Eq, PartialEq)]
|
||||||
|
struct BitSet([u128; 2]);
|
||||||
|
|
||||||
|
impl ByteSet {
|
||||||
|
/// Create an empty set of bytes.
|
||||||
|
pub(crate) fn empty() -> ByteSet {
|
||||||
|
ByteSet { bits: BitSet([0; 2]) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a byte to this set.
|
||||||
|
///
|
||||||
|
/// If the given byte already belongs to this set, then this is a no-op.
|
||||||
|
pub(crate) fn add(&mut self, byte: u8) {
|
||||||
|
let bucket = byte / 128;
|
||||||
|
let bit = byte % 128;
|
||||||
|
self.bits.0[usize::from(bucket)] |= 1 << bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return true if and only if the given byte is in this set.
|
||||||
|
pub(crate) fn contains(&self, byte: u8) -> bool {
|
||||||
|
let bucket = byte / 128;
|
||||||
|
let bit = byte % 128;
|
||||||
|
self.bits.0[usize::from(bucket)] & (1 << bit) > 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::fmt::Debug for BitSet {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||||
|
let mut fmtd = f.debug_set();
|
||||||
|
for b in 0u8..=255 {
|
||||||
|
if (ByteSet { bits: *self }).contains(b) {
|
||||||
|
fmtd.entry(&b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmtd.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use alloc::{vec, vec::Vec};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn byte_classes() {
|
||||||
|
let mut set = ByteClassSet::empty();
|
||||||
|
set.set_range(b'a', b'z');
|
||||||
|
|
||||||
|
let classes = set.byte_classes();
|
||||||
|
assert_eq!(classes.get(0), 0);
|
||||||
|
assert_eq!(classes.get(1), 0);
|
||||||
|
assert_eq!(classes.get(2), 0);
|
||||||
|
assert_eq!(classes.get(b'a' - 1), 0);
|
||||||
|
assert_eq!(classes.get(b'a'), 1);
|
||||||
|
assert_eq!(classes.get(b'm'), 1);
|
||||||
|
assert_eq!(classes.get(b'z'), 1);
|
||||||
|
assert_eq!(classes.get(b'z' + 1), 2);
|
||||||
|
assert_eq!(classes.get(254), 2);
|
||||||
|
assert_eq!(classes.get(255), 2);
|
||||||
|
|
||||||
|
let mut set = ByteClassSet::empty();
|
||||||
|
set.set_range(0, 2);
|
||||||
|
set.set_range(4, 6);
|
||||||
|
let classes = set.byte_classes();
|
||||||
|
assert_eq!(classes.get(0), 0);
|
||||||
|
assert_eq!(classes.get(1), 0);
|
||||||
|
assert_eq!(classes.get(2), 0);
|
||||||
|
assert_eq!(classes.get(3), 1);
|
||||||
|
assert_eq!(classes.get(4), 2);
|
||||||
|
assert_eq!(classes.get(5), 2);
|
||||||
|
assert_eq!(classes.get(6), 2);
|
||||||
|
assert_eq!(classes.get(7), 3);
|
||||||
|
assert_eq!(classes.get(255), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn full_byte_classes() {
|
||||||
|
let mut set = ByteClassSet::empty();
|
||||||
|
for b in 0u8..=255 {
|
||||||
|
set.set_range(b, b);
|
||||||
|
}
|
||||||
|
assert_eq!(set.byte_classes().alphabet_len(), 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn elements_typical() {
|
||||||
|
let mut set = ByteClassSet::empty();
|
||||||
|
set.set_range(b'b', b'd');
|
||||||
|
set.set_range(b'g', b'm');
|
||||||
|
set.set_range(b'z', b'z');
|
||||||
|
let classes = set.byte_classes();
|
||||||
|
// class 0: \x00-a
|
||||||
|
// class 1: b-d
|
||||||
|
// class 2: e-f
|
||||||
|
// class 3: g-m
|
||||||
|
// class 4: n-y
|
||||||
|
// class 5: z-z
|
||||||
|
// class 6: \x7B-\xFF
|
||||||
|
assert_eq!(classes.alphabet_len(), 7);
|
||||||
|
|
||||||
|
let elements = classes.elements(0).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements.len(), 98);
|
||||||
|
assert_eq!(elements[0], b'\x00');
|
||||||
|
assert_eq!(elements[97], b'a');
|
||||||
|
|
||||||
|
let elements = classes.elements(1).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements, vec![b'b', b'c', b'd'],);
|
||||||
|
|
||||||
|
let elements = classes.elements(2).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements, vec![b'e', b'f'],);
|
||||||
|
|
||||||
|
let elements = classes.elements(3).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements, vec![b'g', b'h', b'i', b'j', b'k', b'l', b'm',],);
|
||||||
|
|
||||||
|
let elements = classes.elements(4).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements.len(), 12);
|
||||||
|
assert_eq!(elements[0], b'n');
|
||||||
|
assert_eq!(elements[11], b'y');
|
||||||
|
|
||||||
|
let elements = classes.elements(5).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements, vec![b'z']);
|
||||||
|
|
||||||
|
let elements = classes.elements(6).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements.len(), 133);
|
||||||
|
assert_eq!(elements[0], b'\x7B');
|
||||||
|
assert_eq!(elements[132], b'\xFF');
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn elements_singletons() {
|
||||||
|
let classes = ByteClasses::singletons();
|
||||||
|
assert_eq!(classes.alphabet_len(), 256);
|
||||||
|
|
||||||
|
let elements = classes.elements(b'a').collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements, vec![b'a']);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn elements_empty() {
|
||||||
|
let classes = ByteClasses::empty();
|
||||||
|
assert_eq!(classes.alphabet_len(), 1);
|
||||||
|
|
||||||
|
let elements = classes.elements(0).collect::<Vec<_>>();
|
||||||
|
assert_eq!(elements.len(), 256);
|
||||||
|
assert_eq!(elements[0], b'\x00');
|
||||||
|
assert_eq!(elements[255], b'\xFF');
|
||||||
|
}
|
||||||
|
}
|
||||||
124
.gear/predownloaded-development/vendor/aho-corasick/src/util/buffer.rs
vendored
Normal file
124
.gear/predownloaded-development/vendor/aho-corasick/src/util/buffer.rs
vendored
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
use alloc::{vec, vec::Vec};
|
||||||
|
|
||||||
|
/// The default buffer capacity that we use for the stream buffer.
|
||||||
|
const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
|
||||||
|
|
||||||
|
/// A fairly simple roll buffer for supporting stream searches.
|
||||||
|
///
|
||||||
|
/// This buffer acts as a temporary place to store a fixed amount of data when
|
||||||
|
/// reading from a stream. Its central purpose is to allow "rolling" some
|
||||||
|
/// suffix of the data to the beginning of the buffer before refilling it with
|
||||||
|
/// more data from the stream. For example, let's say we are trying to match
|
||||||
|
/// "foobar" on a stream. When we report the match, we'd like to not only
|
||||||
|
/// report the correct offsets at which the match occurs, but also the matching
|
||||||
|
/// bytes themselves. So let's say our stream is a file with the following
|
||||||
|
/// contents: `test test foobar test test`. Now assume that we happen to read
|
||||||
|
/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
|
||||||
|
/// Naively, it would not be possible to report a single contiguous `foobar`
|
||||||
|
/// match, but this roll buffer allows us to do that. Namely, after the second
|
||||||
|
/// read, the contents of the buffer should be `st foobar test test`, where the
|
||||||
|
/// search should ultimately resume immediately after `foo`. (The prefix `st `
|
||||||
|
/// is included because the roll buffer saves N bytes at the end of the buffer,
|
||||||
|
/// where N is the maximum possible length of a match.)
|
||||||
|
///
|
||||||
|
/// A lot of the logic for dealing with this is unfortunately split out between
|
||||||
|
/// this roll buffer and the `StreamChunkIter`.
|
||||||
|
///
|
||||||
|
/// Note also that this buffer is not actually required to just report matches.
|
||||||
|
/// Because a `Match` is just some offsets. But it *is* required for supporting
|
||||||
|
/// things like `try_stream_replace_all` because that needs some mechanism for
|
||||||
|
/// knowing which bytes in the stream correspond to a match and which don't. So
|
||||||
|
/// when a match occurs across two `read` calls, *something* needs to retain
|
||||||
|
/// the bytes from the previous `read` call because you don't know before the
|
||||||
|
/// second read call whether a match exists or not.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct Buffer {
|
||||||
|
/// The raw buffer contents. This has a fixed size and never increases.
|
||||||
|
buf: Vec<u8>,
|
||||||
|
/// The minimum size of the buffer, which is equivalent to the maximum
|
||||||
|
/// possible length of a match. This corresponds to the amount that we
|
||||||
|
/// roll
|
||||||
|
min: usize,
|
||||||
|
/// The end of the contents of this buffer.
|
||||||
|
end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Buffer {
|
||||||
|
/// Create a new buffer for stream searching. The minimum buffer length
|
||||||
|
/// given should be the size of the maximum possible match length.
|
||||||
|
pub(crate) fn new(min_buffer_len: usize) -> Buffer {
|
||||||
|
let min = core::cmp::max(1, min_buffer_len);
|
||||||
|
// The minimum buffer amount is also the amount that we roll our
|
||||||
|
// buffer in order to support incremental searching. To this end,
|
||||||
|
// our actual capacity needs to be at least 1 byte bigger than our
|
||||||
|
// minimum amount, otherwise we won't have any overlap. In actuality,
|
||||||
|
// we want our buffer to be a bit bigger than that for performance
|
||||||
|
// reasons, so we set a lower bound of `8 * min`.
|
||||||
|
//
|
||||||
|
// TODO: It would be good to find a way to test the streaming
|
||||||
|
// implementation with the minimal buffer size. For now, we just
|
||||||
|
// uncomment out the next line and comment out the subsequent line.
|
||||||
|
// let capacity = 1 + min;
|
||||||
|
let capacity = core::cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
|
||||||
|
Buffer { buf: vec![0; capacity], min, end: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the contents of this buffer.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn buffer(&self) -> &[u8] {
|
||||||
|
&self.buf[..self.end]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the minimum size of the buffer. The only way a buffer may be
|
||||||
|
/// smaller than this is if the stream itself contains less than the
|
||||||
|
/// minimum buffer amount.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn min_buffer_len(&self) -> usize {
|
||||||
|
self.min
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return all free capacity in this buffer.
|
||||||
|
fn free_buffer(&mut self) -> &mut [u8] {
|
||||||
|
&mut self.buf[self.end..]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Refill the contents of this buffer by reading as much as possible into
|
||||||
|
/// this buffer's free capacity. If no more bytes could be read, then this
|
||||||
|
/// returns false. Otherwise, this reads until it has filled the buffer
|
||||||
|
/// past the minimum amount.
|
||||||
|
pub(crate) fn fill<R: std::io::Read>(
|
||||||
|
&mut self,
|
||||||
|
mut rdr: R,
|
||||||
|
) -> std::io::Result<bool> {
|
||||||
|
let mut readany = false;
|
||||||
|
loop {
|
||||||
|
let readlen = rdr.read(self.free_buffer())?;
|
||||||
|
if readlen == 0 {
|
||||||
|
return Ok(readany);
|
||||||
|
}
|
||||||
|
readany = true;
|
||||||
|
self.end += readlen;
|
||||||
|
if self.buffer().len() >= self.min {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Roll the contents of the buffer so that the suffix of this buffer is
|
||||||
|
/// moved to the front and all other contents are dropped. The size of the
|
||||||
|
/// suffix corresponds precisely to the minimum buffer length.
|
||||||
|
///
|
||||||
|
/// This should only be called when the entire contents of this buffer have
|
||||||
|
/// been searched.
|
||||||
|
pub(crate) fn roll(&mut self) {
|
||||||
|
let roll_start = self
|
||||||
|
.end
|
||||||
|
.checked_sub(self.min)
|
||||||
|
.expect("buffer capacity should be bigger than minimum amount");
|
||||||
|
let roll_end = roll_start + self.min;
|
||||||
|
|
||||||
|
assert!(roll_end <= self.end);
|
||||||
|
self.buf.copy_within(roll_start..roll_end, 0);
|
||||||
|
self.end = self.min;
|
||||||
|
}
|
||||||
|
}
|
||||||
26
.gear/predownloaded-development/vendor/aho-corasick/src/util/debug.rs
vendored
Normal file
26
.gear/predownloaded-development/vendor/aho-corasick/src/util/debug.rs
vendored
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
/// A type that wraps a single byte with a convenient fmt::Debug impl that
|
||||||
|
/// escapes the byte.
|
||||||
|
pub(crate) struct DebugByte(pub(crate) u8);
|
||||||
|
|
||||||
|
impl core::fmt::Debug for DebugByte {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||||
|
// Special case ASCII space. It's too hard to read otherwise, so
|
||||||
|
// put quotes around it. I sometimes wonder whether just '\x20' would
|
||||||
|
// be better...
|
||||||
|
if self.0 == b' ' {
|
||||||
|
return write!(f, "' '");
|
||||||
|
}
|
||||||
|
// 10 bytes is enough to cover any output from ascii::escape_default.
|
||||||
|
let mut bytes = [0u8; 10];
|
||||||
|
let mut len = 0;
|
||||||
|
for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
|
||||||
|
// capitalize \xab to \xAB
|
||||||
|
if i >= 2 && b'a' <= b && b <= b'f' {
|
||||||
|
b -= 32;
|
||||||
|
}
|
||||||
|
bytes[len] = b;
|
||||||
|
len += 1;
|
||||||
|
}
|
||||||
|
write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
259
.gear/predownloaded-development/vendor/aho-corasick/src/util/error.rs
vendored
Normal file
259
.gear/predownloaded-development/vendor/aho-corasick/src/util/error.rs
vendored
Normal file
|
|
@ -0,0 +1,259 @@
|
||||||
|
use crate::util::{
|
||||||
|
primitives::{PatternID, SmallIndex},
|
||||||
|
search::MatchKind,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An error that occurred during the construction of an Aho-Corasick
|
||||||
|
/// automaton.
|
||||||
|
///
|
||||||
|
/// Build errors occur when some kind of limit has been exceeded, either in the
|
||||||
|
/// number of states, the number of patterns of the length of a pattern. These
|
||||||
|
/// limits aren't part of the public API, but they should generally be large
|
||||||
|
/// enough to handle most use cases.
|
||||||
|
///
|
||||||
|
/// When the `std` feature is enabled, this implements the `std::error::Error`
|
||||||
|
/// trait.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct BuildError {
|
||||||
|
kind: ErrorKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The kind of error that occurred.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
enum ErrorKind {
|
||||||
|
/// An error that occurs when allocating a new state would result in an
|
||||||
|
/// identifier that exceeds the capacity of a `StateID`.
|
||||||
|
StateIDOverflow {
|
||||||
|
/// The maximum possible id.
|
||||||
|
max: u64,
|
||||||
|
/// The maximum ID requested.
|
||||||
|
requested_max: u64,
|
||||||
|
},
|
||||||
|
/// An error that occurs when adding a pattern to an Aho-Corasick
|
||||||
|
/// automaton would result in an identifier that exceeds the capacity of a
|
||||||
|
/// `PatternID`.
|
||||||
|
PatternIDOverflow {
|
||||||
|
/// The maximum possible id.
|
||||||
|
max: u64,
|
||||||
|
/// The maximum ID requested.
|
||||||
|
requested_max: u64,
|
||||||
|
},
|
||||||
|
/// Occurs when a pattern string is given to the Aho-Corasick constructor
|
||||||
|
/// that is too long.
|
||||||
|
PatternTooLong {
|
||||||
|
/// The ID of the pattern that was too long.
|
||||||
|
pattern: PatternID,
|
||||||
|
/// The length that was too long.
|
||||||
|
len: usize,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BuildError {
|
||||||
|
pub(crate) fn state_id_overflow(
|
||||||
|
max: u64,
|
||||||
|
requested_max: u64,
|
||||||
|
) -> BuildError {
|
||||||
|
BuildError { kind: ErrorKind::StateIDOverflow { max, requested_max } }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn pattern_id_overflow(
|
||||||
|
max: u64,
|
||||||
|
requested_max: u64,
|
||||||
|
) -> BuildError {
|
||||||
|
BuildError {
|
||||||
|
kind: ErrorKind::PatternIDOverflow { max, requested_max },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn pattern_too_long(
|
||||||
|
pattern: PatternID,
|
||||||
|
len: usize,
|
||||||
|
) -> BuildError {
|
||||||
|
BuildError { kind: ErrorKind::PatternTooLong { pattern, len } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
impl std::error::Error for BuildError {}
|
||||||
|
|
||||||
|
impl core::fmt::Display for BuildError {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
match self.kind {
|
||||||
|
ErrorKind::StateIDOverflow { max, requested_max } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"state identifier overflow: failed to create state ID \
|
||||||
|
from {}, which exceeds the max of {}",
|
||||||
|
requested_max, max,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
ErrorKind::PatternIDOverflow { max, requested_max } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"pattern identifier overflow: failed to create pattern ID \
|
||||||
|
from {}, which exceeds the max of {}",
|
||||||
|
requested_max, max,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
ErrorKind::PatternTooLong { pattern, len } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"pattern {} with length {} exceeds \
|
||||||
|
the maximum pattern length of {}",
|
||||||
|
pattern.as_usize(),
|
||||||
|
len,
|
||||||
|
SmallIndex::MAX.as_usize(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An error that occurred during an Aho-Corasick search.
|
||||||
|
///
|
||||||
|
/// An error that occurs during a search is limited to some kind of
|
||||||
|
/// misconfiguration that resulted in an illegal call. Stated differently,
|
||||||
|
/// whether an error occurs is not dependent on the specific bytes in the
|
||||||
|
/// haystack.
|
||||||
|
///
|
||||||
|
/// Examples of misconfiguration:
|
||||||
|
///
|
||||||
|
/// * Executing a stream or overlapping search on a searcher that was built was
|
||||||
|
/// something other than [`MatchKind::Standard`](crate::MatchKind::Standard)
|
||||||
|
/// semantics.
|
||||||
|
/// * Requested an anchored or an unanchored search on a searcher that doesn't
|
||||||
|
/// support unanchored or anchored searches, respectively.
|
||||||
|
///
|
||||||
|
/// When the `std` feature is enabled, this implements the `std::error::Error`
|
||||||
|
/// trait.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct MatchError(alloc::boxed::Box<MatchErrorKind>);
|
||||||
|
|
||||||
|
impl MatchError {
|
||||||
|
/// Create a new error value with the given kind.
|
||||||
|
///
|
||||||
|
/// This is a more verbose version of the kind-specific constructors, e.g.,
|
||||||
|
/// `MatchError::unsupported_stream`.
|
||||||
|
pub fn new(kind: MatchErrorKind) -> MatchError {
|
||||||
|
MatchError(alloc::boxed::Box::new(kind))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a reference to the underlying error kind.
|
||||||
|
pub fn kind(&self) -> &MatchErrorKind {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new "invalid anchored search" error. This occurs when the
|
||||||
|
/// caller requests an anchored search but where anchored searches aren't
|
||||||
|
/// supported.
|
||||||
|
///
|
||||||
|
/// This is the same as calling `MatchError::new` with a
|
||||||
|
/// [`MatchErrorKind::InvalidInputAnchored`] kind.
|
||||||
|
pub fn invalid_input_anchored() -> MatchError {
|
||||||
|
MatchError::new(MatchErrorKind::InvalidInputAnchored)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new "invalid unanchored search" error. This occurs when the
|
||||||
|
/// caller requests an unanchored search but where unanchored searches
|
||||||
|
/// aren't supported.
|
||||||
|
///
|
||||||
|
/// This is the same as calling `MatchError::new` with a
|
||||||
|
/// [`MatchErrorKind::InvalidInputUnanchored`] kind.
|
||||||
|
pub fn invalid_input_unanchored() -> MatchError {
|
||||||
|
MatchError::new(MatchErrorKind::InvalidInputUnanchored)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new "unsupported stream search" error. This occurs when the
|
||||||
|
/// caller requests a stream search while using an Aho-Corasick automaton
|
||||||
|
/// with a match kind other than [`MatchKind::Standard`].
|
||||||
|
///
|
||||||
|
/// The match kind given should be the match kind of the automaton. It
|
||||||
|
/// should never be `MatchKind::Standard`.
|
||||||
|
pub fn unsupported_stream(got: MatchKind) -> MatchError {
|
||||||
|
MatchError::new(MatchErrorKind::UnsupportedStream { got })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new "unsupported overlapping search" error. This occurs when
|
||||||
|
/// the caller requests an overlapping search while using an Aho-Corasick
|
||||||
|
/// automaton with a match kind other than [`MatchKind::Standard`].
|
||||||
|
///
|
||||||
|
/// The match kind given should be the match kind of the automaton. It
|
||||||
|
/// should never be `MatchKind::Standard`.
|
||||||
|
pub fn unsupported_overlapping(got: MatchKind) -> MatchError {
|
||||||
|
MatchError::new(MatchErrorKind::UnsupportedOverlapping { got })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new "unsupported empty pattern" error. This occurs when the
|
||||||
|
/// caller requests a search for which matching an automaton that contains
|
||||||
|
/// an empty pattern string is not supported.
|
||||||
|
pub fn unsupported_empty() -> MatchError {
|
||||||
|
MatchError::new(MatchErrorKind::UnsupportedEmpty)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The underlying kind of a [`MatchError`].
|
||||||
|
///
|
||||||
|
/// This is a **non-exhaustive** enum. That means new variants may be added in
|
||||||
|
/// a semver-compatible release.
|
||||||
|
#[non_exhaustive]
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub enum MatchErrorKind {
|
||||||
|
/// An error indicating that an anchored search was requested, but from a
|
||||||
|
/// searcher that was built without anchored support.
|
||||||
|
InvalidInputAnchored,
|
||||||
|
/// An error indicating that an unanchored search was requested, but from a
|
||||||
|
/// searcher that was built without unanchored support.
|
||||||
|
InvalidInputUnanchored,
|
||||||
|
/// An error indicating that a stream search was attempted on an
|
||||||
|
/// Aho-Corasick automaton with an unsupported `MatchKind`.
|
||||||
|
UnsupportedStream {
|
||||||
|
/// The match semantics for the automaton that was used.
|
||||||
|
got: MatchKind,
|
||||||
|
},
|
||||||
|
/// An error indicating that an overlapping search was attempted on an
|
||||||
|
/// Aho-Corasick automaton with an unsupported `MatchKind`.
|
||||||
|
UnsupportedOverlapping {
|
||||||
|
/// The match semantics for the automaton that was used.
|
||||||
|
got: MatchKind,
|
||||||
|
},
|
||||||
|
/// An error indicating that the operation requested doesn't support
|
||||||
|
/// automatons that contain an empty pattern string.
|
||||||
|
UnsupportedEmpty,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
impl std::error::Error for MatchError {}
|
||||||
|
|
||||||
|
impl core::fmt::Display for MatchError {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||||
|
match *self.kind() {
|
||||||
|
MatchErrorKind::InvalidInputAnchored => {
|
||||||
|
write!(f, "anchored searches are not supported or enabled")
|
||||||
|
}
|
||||||
|
MatchErrorKind::InvalidInputUnanchored => {
|
||||||
|
write!(f, "unanchored searches are not supported or enabled")
|
||||||
|
}
|
||||||
|
MatchErrorKind::UnsupportedStream { got } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"match kind {:?} does not support stream searching",
|
||||||
|
got,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
MatchErrorKind::UnsupportedOverlapping { got } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"match kind {:?} does not support overlapping searches",
|
||||||
|
got,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
MatchErrorKind::UnsupportedEmpty => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"matching with an empty pattern string is not \
|
||||||
|
supported for this operation",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
278
.gear/predownloaded-development/vendor/aho-corasick/src/util/int.rs
vendored
Normal file
278
.gear/predownloaded-development/vendor/aho-corasick/src/util/int.rs
vendored
Normal file
|
|
@ -0,0 +1,278 @@
|
||||||
|
/*!
|
||||||
|
This module provides several integer oriented traits for converting between
|
||||||
|
both fixed size integers and integers whose size varies based on the target
|
||||||
|
(like `usize`).
|
||||||
|
|
||||||
|
The main design principle for this module is to centralize all uses of `as`.
|
||||||
|
The thinking here is that `as` makes it very easy to perform accidental lossy
|
||||||
|
conversions, and if we centralize all its uses here under more descriptive
|
||||||
|
higher level operations, its use and correctness becomes easier to audit.
|
||||||
|
|
||||||
|
This was copied mostly wholesale from `regex-automata`.
|
||||||
|
|
||||||
|
NOTE: for simplicity, we don't take target pointer width into account here for
|
||||||
|
`usize` conversions. Since we currently only panic in debug mode, skipping the
|
||||||
|
check when it can be proven it isn't needed at compile time doesn't really
|
||||||
|
matter. Now, if we wind up wanting to do as many checks as possible in release
|
||||||
|
mode, then we would want to skip those when we know the conversions are always
|
||||||
|
non-lossy.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// We define a little more than what we need, but I'd rather just have
|
||||||
|
// everything via a consistent and uniform API then have holes.
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
pub(crate) trait U8 {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl U8 for u8 {
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
usize::from(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait U16 {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
fn low_u8(self) -> u8;
|
||||||
|
fn high_u8(self) -> u8;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl U16 for u16 {
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
usize::from(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn low_u8(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn high_u8(self) -> u8 {
|
||||||
|
(self >> 8) as u8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait U32 {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
fn low_u8(self) -> u8;
|
||||||
|
fn low_u16(self) -> u16;
|
||||||
|
fn high_u16(self) -> u16;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl U32 for u32 {
|
||||||
|
#[inline]
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
usize::try_from(self).expect("u32 overflowed usize")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn low_u8(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn low_u16(self) -> u16 {
|
||||||
|
self as u16
|
||||||
|
}
|
||||||
|
|
||||||
|
fn high_u16(self) -> u16 {
|
||||||
|
(self >> 16) as u16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait U64 {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
fn low_u8(self) -> u8;
|
||||||
|
fn low_u16(self) -> u16;
|
||||||
|
fn low_u32(self) -> u32;
|
||||||
|
fn high_u32(self) -> u32;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl U64 for u64 {
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
usize::try_from(self).expect("u64 overflowed usize")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn low_u8(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn low_u16(self) -> u16 {
|
||||||
|
self as u16
|
||||||
|
}
|
||||||
|
|
||||||
|
fn low_u32(self) -> u32 {
|
||||||
|
self as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
fn high_u32(self) -> u32 {
|
||||||
|
(self >> 32) as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait I8 {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
fn to_bits(self) -> u8;
|
||||||
|
fn from_bits(n: u8) -> i8;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl I8 for i8 {
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
usize::try_from(self).expect("i8 overflowed usize")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_bits(self) -> u8 {
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_bits(n: u8) -> i8 {
|
||||||
|
n as i8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait I32 {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
fn to_bits(self) -> u32;
|
||||||
|
fn from_bits(n: u32) -> i32;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl I32 for i32 {
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
usize::try_from(self).expect("i32 overflowed usize")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_bits(self) -> u32 {
|
||||||
|
self as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_bits(n: u32) -> i32 {
|
||||||
|
n as i32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait I64 {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
fn to_bits(self) -> u64;
|
||||||
|
fn from_bits(n: u64) -> i64;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl I64 for i64 {
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
usize::try_from(self).expect("i64 overflowed usize")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_bits(self) -> u64 {
|
||||||
|
self as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_bits(n: u64) -> i64 {
|
||||||
|
n as i64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait Usize {
|
||||||
|
fn as_u8(self) -> u8;
|
||||||
|
fn as_u16(self) -> u16;
|
||||||
|
fn as_u32(self) -> u32;
|
||||||
|
fn as_u64(self) -> u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Usize for usize {
|
||||||
|
fn as_u8(self) -> u8 {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
u8::try_from(self).expect("usize overflowed u8")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as u8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_u16(self) -> u16 {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
u16::try_from(self).expect("usize overflowed u16")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as u16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_u32(self) -> u32 {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
u32::try_from(self).expect("usize overflowed u32")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_u64(self) -> u64 {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
u64::try_from(self).expect("usize overflowed u64")
|
||||||
|
}
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
{
|
||||||
|
self as u64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pointers aren't integers, but we convert pointers to integers to perform
|
||||||
|
// offset arithmetic in some places. (And no, we don't convert the integers
|
||||||
|
// back to pointers.) So add 'as_usize' conversions here too for completeness.
|
||||||
|
//
|
||||||
|
// These 'as' casts are actually okay because they're always non-lossy. But the
|
||||||
|
// idea here is to just try and remove as much 'as' as possible, particularly
|
||||||
|
// in this crate where we are being really paranoid about offsets and making
|
||||||
|
// sure we don't panic on inputs that might be untrusted. This way, the 'as'
|
||||||
|
// casts become easier to audit if they're all in one place, even when some of
|
||||||
|
// them are actually okay 100% of the time.
|
||||||
|
|
||||||
|
pub(crate) trait Pointer {
|
||||||
|
fn as_usize(self) -> usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Pointer for *const T {
|
||||||
|
fn as_usize(self) -> usize {
|
||||||
|
self as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
12
.gear/predownloaded-development/vendor/aho-corasick/src/util/mod.rs
vendored
Normal file
12
.gear/predownloaded-development/vendor/aho-corasick/src/util/mod.rs
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
pub(crate) mod alphabet;
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
pub(crate) mod buffer;
|
||||||
|
pub(crate) mod byte_frequencies;
|
||||||
|
pub(crate) mod debug;
|
||||||
|
pub(crate) mod error;
|
||||||
|
pub(crate) mod int;
|
||||||
|
pub(crate) mod prefilter;
|
||||||
|
pub(crate) mod primitives;
|
||||||
|
pub(crate) mod remapper;
|
||||||
|
pub(crate) mod search;
|
||||||
|
pub(crate) mod special;
|
||||||
924
.gear/predownloaded-development/vendor/aho-corasick/src/util/prefilter.rs
vendored
Normal file
924
.gear/predownloaded-development/vendor/aho-corasick/src/util/prefilter.rs
vendored
Normal file
|
|
@ -0,0 +1,924 @@
|
||||||
|
use core::{
|
||||||
|
cmp,
|
||||||
|
fmt::Debug,
|
||||||
|
panic::{RefUnwindSafe, UnwindSafe},
|
||||||
|
u8,
|
||||||
|
};
|
||||||
|
|
||||||
|
use alloc::{sync::Arc, vec, vec::Vec};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
packed,
|
||||||
|
util::{
|
||||||
|
alphabet::ByteSet,
|
||||||
|
search::{Match, MatchKind, Span},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A prefilter for accelerating a search.
|
||||||
|
///
|
||||||
|
/// This crate uses prefilters in the core search implementations to accelerate
|
||||||
|
/// common cases. They typically only apply to cases where there are a small
|
||||||
|
/// number of patterns (less than 100 or so), but when they do, thoughput can
|
||||||
|
/// be boosted considerably, perhaps by an order of magnitude. When a prefilter
|
||||||
|
/// is active, it is used whenever a search enters an automaton's start state.
|
||||||
|
///
|
||||||
|
/// Currently, prefilters cannot be constructed by
|
||||||
|
/// callers. A `Prefilter` can only be accessed via the
|
||||||
|
/// [`Automaton::prefilter`](crate::automaton::Automaton::prefilter)
|
||||||
|
/// method and used to execute a search. In other words, a prefilter can be
|
||||||
|
/// used to optimize your own search implementation if necessary, but cannot do
|
||||||
|
/// much else. If you have a use case for more APIs, please submit an issue.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Prefilter {
|
||||||
|
finder: Arc<dyn PrefilterI>,
|
||||||
|
memory_usage: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Prefilter {
|
||||||
|
/// Execute a search in the haystack within the span given. If a match or
|
||||||
|
/// a possible match is returned, then it is guaranteed to occur within
|
||||||
|
/// the bounds of the span.
|
||||||
|
///
|
||||||
|
/// If the span provided is invalid for the given haystack, then behavior
|
||||||
|
/// is unspecified.
|
||||||
|
#[inline]
|
||||||
|
pub fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
self.finder.find_in(haystack, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn memory_usage(&self) -> usize {
|
||||||
|
self.memory_usage
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A candidate is the result of running a prefilter on a haystack at a
|
||||||
|
/// particular position.
|
||||||
|
///
|
||||||
|
/// The result is either no match, a confirmed match or a possible match.
|
||||||
|
///
|
||||||
|
/// When no match is returned, the prefilter is guaranteeing that no possible
|
||||||
|
/// match can be found in the haystack, and the caller may trust this. That is,
|
||||||
|
/// all correct prefilters must never report false negatives.
|
||||||
|
///
|
||||||
|
/// In some cases, a prefilter can confirm a match very quickly, in which case,
|
||||||
|
/// the caller may use this to stop what it's doing and report the match. In
|
||||||
|
/// this case, prefilter implementations must never report a false positive.
|
||||||
|
/// In other cases, the prefilter can only report a potential match, in which
|
||||||
|
/// case the callers must attempt to confirm the match. In this case, prefilter
|
||||||
|
/// implementations are permitted to return false positives.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Candidate {
|
||||||
|
/// No match was found. Since false negatives are not possible, this means
|
||||||
|
/// the search can quit as it is guaranteed not to find another match.
|
||||||
|
None,
|
||||||
|
/// A confirmed match was found. Callers do not need to confirm it.
|
||||||
|
Match(Match),
|
||||||
|
/// The start of a possible match was found. Callers must confirm it before
|
||||||
|
/// reporting it as a match.
|
||||||
|
PossibleStartOfMatch(usize),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Candidate {
|
||||||
|
/// Convert this candidate into an option. This is useful when callers
|
||||||
|
/// do not distinguish between true positives and false positives (i.e.,
|
||||||
|
/// the caller must always confirm the match).
|
||||||
|
pub fn into_option(self) -> Option<usize> {
|
||||||
|
match self {
|
||||||
|
Candidate::None => None,
|
||||||
|
Candidate::Match(ref m) => Some(m.start()),
|
||||||
|
Candidate::PossibleStartOfMatch(start) => Some(start),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A prefilter describes the behavior of fast literal scanners for quickly
|
||||||
|
/// skipping past bytes in the haystack that we know cannot possibly
|
||||||
|
/// participate in a match.
|
||||||
|
trait PrefilterI:
|
||||||
|
Send + Sync + RefUnwindSafe + UnwindSafe + Debug + 'static
|
||||||
|
{
|
||||||
|
/// Returns the next possible match candidate. This may yield false
|
||||||
|
/// positives, so callers must confirm a match starting at the position
|
||||||
|
/// returned. This, however, must never produce false negatives. That is,
|
||||||
|
/// this must, at minimum, return the starting position of the next match
|
||||||
|
/// in the given haystack after or at the given position.
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<P: PrefilterI + ?Sized> PrefilterI for Arc<P> {
|
||||||
|
#[inline(always)]
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
(**self).find_in(haystack, span)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for constructing the best possible prefilter. When constructed,
|
||||||
|
/// this builder will heuristically select the best prefilter it can build,
|
||||||
|
/// if any, and discard the rest.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct Builder {
|
||||||
|
count: usize,
|
||||||
|
ascii_case_insensitive: bool,
|
||||||
|
start_bytes: StartBytesBuilder,
|
||||||
|
rare_bytes: RareBytesBuilder,
|
||||||
|
memmem: MemmemBuilder,
|
||||||
|
packed: Option<packed::Builder>,
|
||||||
|
// If we run across a condition that suggests we shouldn't use a prefilter
|
||||||
|
// at all (like an empty pattern), then disable prefilters entirely.
|
||||||
|
enabled: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
|
/// Create a new builder for constructing the best possible prefilter.
|
||||||
|
pub(crate) fn new(kind: MatchKind) -> Builder {
|
||||||
|
let pbuilder = kind
|
||||||
|
.as_packed()
|
||||||
|
.map(|kind| packed::Config::new().match_kind(kind).builder());
|
||||||
|
Builder {
|
||||||
|
count: 0,
|
||||||
|
ascii_case_insensitive: false,
|
||||||
|
start_bytes: StartBytesBuilder::new(),
|
||||||
|
rare_bytes: RareBytesBuilder::new(),
|
||||||
|
memmem: MemmemBuilder::default(),
|
||||||
|
packed: pbuilder,
|
||||||
|
enabled: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Enable ASCII case insensitivity. When set, byte strings added to this
|
||||||
|
/// builder will be interpreted without respect to ASCII case.
|
||||||
|
pub(crate) fn ascii_case_insensitive(mut self, yes: bool) -> Builder {
|
||||||
|
self.ascii_case_insensitive = yes;
|
||||||
|
self.start_bytes = self.start_bytes.ascii_case_insensitive(yes);
|
||||||
|
self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a prefilter suitable for quickly finding potential matches.
|
||||||
|
///
|
||||||
|
/// All patterns added to an Aho-Corasick automaton should be added to this
|
||||||
|
/// builder before attempting to construct the prefilter.
|
||||||
|
pub(crate) fn build(&self) -> Option<Prefilter> {
|
||||||
|
if !self.enabled {
|
||||||
|
debug!("prefilter not enabled, skipping");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
// If we only have one pattern, then deferring to memmem is always
|
||||||
|
// the best choice. This is kind of a weird case, because, well, why
|
||||||
|
// use Aho-Corasick if you only have one pattern? But maybe you don't
|
||||||
|
// know exactly how many patterns you'll get up front, and you need to
|
||||||
|
// support the option of multiple patterns. So instead of relying on
|
||||||
|
// the caller to branch and use memmem explicitly, we just do it for
|
||||||
|
// them.
|
||||||
|
if !self.ascii_case_insensitive {
|
||||||
|
if let Some(pre) = self.memmem.build() {
|
||||||
|
debug!("using memmem prefilter");
|
||||||
|
return Some(pre);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let (packed, patlen, minlen) = if self.ascii_case_insensitive {
|
||||||
|
(None, usize::MAX, 0)
|
||||||
|
} else {
|
||||||
|
let patlen = self.packed.as_ref().map_or(usize::MAX, |p| p.len());
|
||||||
|
let minlen = self.packed.as_ref().map_or(0, |p| p.minimum_len());
|
||||||
|
let packed =
|
||||||
|
self.packed.as_ref().and_then(|b| b.build()).map(|s| {
|
||||||
|
let memory_usage = s.memory_usage();
|
||||||
|
debug!(
|
||||||
|
"built packed prefilter (len: {}, \
|
||||||
|
minimum pattern len: {}, memory usage: {}) \
|
||||||
|
for consideration",
|
||||||
|
patlen, minlen, memory_usage,
|
||||||
|
);
|
||||||
|
Prefilter { finder: Arc::new(Packed(s)), memory_usage }
|
||||||
|
});
|
||||||
|
(packed, patlen, minlen)
|
||||||
|
};
|
||||||
|
match (self.start_bytes.build(), self.rare_bytes.build()) {
|
||||||
|
// If we could build both start and rare prefilters, then there are
|
||||||
|
// a few cases in which we'd want to use the start-byte prefilter
|
||||||
|
// over the rare-byte prefilter, since the former has lower
|
||||||
|
// overhead.
|
||||||
|
(prestart @ Some(_), prerare @ Some(_)) => {
|
||||||
|
debug!(
|
||||||
|
"both start (len={}, rank={}) and \
|
||||||
|
rare (len={}, rank={}) byte prefilters \
|
||||||
|
are available",
|
||||||
|
self.start_bytes.count,
|
||||||
|
self.start_bytes.rank_sum,
|
||||||
|
self.rare_bytes.count,
|
||||||
|
self.rare_bytes.rank_sum,
|
||||||
|
);
|
||||||
|
if patlen <= 16
|
||||||
|
&& minlen >= 2
|
||||||
|
&& self.start_bytes.count >= 3
|
||||||
|
&& self.rare_bytes.count >= 3
|
||||||
|
{
|
||||||
|
debug!(
|
||||||
|
"start and rare byte prefilters available, but \
|
||||||
|
they're probably slower than packed so using \
|
||||||
|
packed"
|
||||||
|
);
|
||||||
|
return packed;
|
||||||
|
}
|
||||||
|
// If the start-byte prefilter can scan for a smaller number
|
||||||
|
// of bytes than the rare-byte prefilter, then it's probably
|
||||||
|
// faster.
|
||||||
|
let has_fewer_bytes =
|
||||||
|
self.start_bytes.count < self.rare_bytes.count;
|
||||||
|
// Otherwise, if the combined frequency rank of the detected
|
||||||
|
// bytes in the start-byte prefilter is "close" to the combined
|
||||||
|
// frequency rank of the rare-byte prefilter, then we pick
|
||||||
|
// the start-byte prefilter even if the rare-byte prefilter
|
||||||
|
// heuristically searches for rare bytes. This is because the
|
||||||
|
// rare-byte prefilter has higher constant costs, so we tend to
|
||||||
|
// prefer the start-byte prefilter when we can.
|
||||||
|
let has_rarer_bytes =
|
||||||
|
self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50;
|
||||||
|
if has_fewer_bytes {
|
||||||
|
debug!(
|
||||||
|
"using start byte prefilter because it has fewer
|
||||||
|
bytes to search for than the rare byte prefilter",
|
||||||
|
);
|
||||||
|
prestart
|
||||||
|
} else if has_rarer_bytes {
|
||||||
|
debug!(
|
||||||
|
"using start byte prefilter because its byte \
|
||||||
|
frequency rank was determined to be \
|
||||||
|
\"good enough\" relative to the rare byte prefilter \
|
||||||
|
byte frequency rank",
|
||||||
|
);
|
||||||
|
prestart
|
||||||
|
} else {
|
||||||
|
debug!("using rare byte prefilter");
|
||||||
|
prerare
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(prestart @ Some(_), None) => {
|
||||||
|
if patlen <= 16 && minlen >= 2 && self.start_bytes.count >= 3 {
|
||||||
|
debug!(
|
||||||
|
"start byte prefilter available, but \
|
||||||
|
it's probably slower than packed so using \
|
||||||
|
packed"
|
||||||
|
);
|
||||||
|
return packed;
|
||||||
|
}
|
||||||
|
debug!(
|
||||||
|
"have start byte prefilter but not rare byte prefilter, \
|
||||||
|
so using start byte prefilter",
|
||||||
|
);
|
||||||
|
prestart
|
||||||
|
}
|
||||||
|
(None, prerare @ Some(_)) => {
|
||||||
|
if patlen <= 16 && minlen >= 2 && self.rare_bytes.count >= 3 {
|
||||||
|
debug!(
|
||||||
|
"rare byte prefilter available, but \
|
||||||
|
it's probably slower than packed so using \
|
||||||
|
packed"
|
||||||
|
);
|
||||||
|
return packed;
|
||||||
|
}
|
||||||
|
debug!(
|
||||||
|
"have rare byte prefilter but not start byte prefilter, \
|
||||||
|
so using rare byte prefilter",
|
||||||
|
);
|
||||||
|
prerare
|
||||||
|
}
|
||||||
|
(None, None) if self.ascii_case_insensitive => {
|
||||||
|
debug!(
|
||||||
|
"no start or rare byte prefilter and ASCII case \
|
||||||
|
insensitivity was enabled, so skipping prefilter",
|
||||||
|
);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
(None, None) => {
|
||||||
|
if packed.is_some() {
|
||||||
|
debug!("falling back to packed prefilter");
|
||||||
|
} else {
|
||||||
|
debug!("no prefilter available");
|
||||||
|
}
|
||||||
|
packed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a literal string to this prefilter builder.
|
||||||
|
pub(crate) fn add(&mut self, bytes: &[u8]) {
|
||||||
|
if bytes.is_empty() {
|
||||||
|
self.enabled = false;
|
||||||
|
}
|
||||||
|
if !self.enabled {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.count += 1;
|
||||||
|
self.start_bytes.add(bytes);
|
||||||
|
self.rare_bytes.add(bytes);
|
||||||
|
self.memmem.add(bytes);
|
||||||
|
if let Some(ref mut pbuilder) = self.packed {
|
||||||
|
pbuilder.add(bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A type that wraps a packed searcher and implements the `Prefilter`
|
||||||
|
/// interface.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct Packed(packed::Searcher);
|
||||||
|
|
||||||
|
impl PrefilterI for Packed {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
self.0
|
||||||
|
.find_in(&haystack, span)
|
||||||
|
.map_or(Candidate::None, Candidate::Match)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for constructing a prefilter that uses memmem.
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
struct MemmemBuilder {
|
||||||
|
/// The number of patterns that have been added.
|
||||||
|
count: usize,
|
||||||
|
/// The singular pattern to search for. This is only set when count==1.
|
||||||
|
one: Option<Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MemmemBuilder {
|
||||||
|
fn build(&self) -> Option<Prefilter> {
|
||||||
|
#[cfg(all(feature = "std", feature = "perf-literal"))]
|
||||||
|
fn imp(builder: &MemmemBuilder) -> Option<Prefilter> {
|
||||||
|
let pattern = builder.one.as_ref()?;
|
||||||
|
assert_eq!(1, builder.count);
|
||||||
|
let finder = Arc::new(Memmem(
|
||||||
|
memchr::memmem::Finder::new(pattern).into_owned(),
|
||||||
|
));
|
||||||
|
let memory_usage = pattern.len();
|
||||||
|
Some(Prefilter { finder, memory_usage })
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(all(feature = "std", feature = "perf-literal")))]
|
||||||
|
fn imp(_: &MemmemBuilder) -> Option<Prefilter> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
imp(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(&mut self, bytes: &[u8]) {
|
||||||
|
self.count += 1;
|
||||||
|
if self.count == 1 {
|
||||||
|
self.one = Some(bytes.to_vec());
|
||||||
|
} else {
|
||||||
|
self.one = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A type that wraps a SIMD accelerated single substring search from the
|
||||||
|
/// `memchr` crate for use as a prefilter.
|
||||||
|
///
|
||||||
|
/// Currently, this prefilter is only active for Aho-Corasick searchers with
|
||||||
|
/// a single pattern. In theory, this could be extended to support searchers
|
||||||
|
/// that have a common prefix of more than one byte (for one byte, we would use
|
||||||
|
/// memchr), but it's not clear if it's worth it or not.
|
||||||
|
///
|
||||||
|
/// Also, unfortunately, this currently also requires the 'std' feature to
|
||||||
|
/// be enabled. That's because memchr doesn't have a no-std-but-with-alloc
|
||||||
|
/// mode, and so APIs like Finder::into_owned aren't available when 'std' is
|
||||||
|
/// disabled. But there should be an 'alloc' feature that brings in APIs like
|
||||||
|
/// Finder::into_owned but doesn't use std-only features like runtime CPU
|
||||||
|
/// feature detection.
|
||||||
|
#[cfg(all(feature = "std", feature = "perf-literal"))]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct Memmem(memchr::memmem::Finder<'static>);
|
||||||
|
|
||||||
|
#[cfg(all(feature = "std", feature = "perf-literal"))]
|
||||||
|
impl PrefilterI for Memmem {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
use crate::util::primitives::PatternID;
|
||||||
|
|
||||||
|
self.0.find(&haystack[span]).map_or(Candidate::None, |i| {
|
||||||
|
let start = span.start + i;
|
||||||
|
let end = start + self.0.needle().len();
|
||||||
|
// N.B. We can declare a match and use a fixed pattern ID here
|
||||||
|
// because a Memmem prefilter is only ever created for searchers
|
||||||
|
// with exactly one pattern. Thus, every match is always a match
|
||||||
|
// and it is always for the first and only pattern.
|
||||||
|
Candidate::Match(Match::new(PatternID::ZERO, start..end))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for constructing a rare byte prefilter.
|
||||||
|
///
|
||||||
|
/// A rare byte prefilter attempts to pick out a small set of rare bytes that
|
||||||
|
/// occurr in the patterns, and then quickly scan to matches of those rare
|
||||||
|
/// bytes.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct RareBytesBuilder {
|
||||||
|
/// Whether this prefilter should account for ASCII case insensitivity or
|
||||||
|
/// not.
|
||||||
|
ascii_case_insensitive: bool,
|
||||||
|
/// A set of rare bytes, indexed by byte value.
|
||||||
|
rare_set: ByteSet,
|
||||||
|
/// A set of byte offsets associated with bytes in a pattern. An entry
|
||||||
|
/// corresponds to a particular bytes (its index) and is only non-zero if
|
||||||
|
/// the byte occurred at an offset greater than 0 in at least one pattern.
|
||||||
|
///
|
||||||
|
/// If a byte's offset is not representable in 8 bits, then the rare bytes
|
||||||
|
/// prefilter becomes inert.
|
||||||
|
byte_offsets: RareByteOffsets,
|
||||||
|
/// Whether this is available as a prefilter or not. This can be set to
|
||||||
|
/// false during construction if a condition is seen that invalidates the
|
||||||
|
/// use of the rare-byte prefilter.
|
||||||
|
available: bool,
|
||||||
|
/// The number of bytes set to an active value in `byte_offsets`.
|
||||||
|
count: usize,
|
||||||
|
/// The sum of frequency ranks for the rare bytes detected. This is
|
||||||
|
/// intended to give a heuristic notion of how rare the bytes are.
|
||||||
|
rank_sum: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A set of byte offsets, keyed by byte.
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
struct RareByteOffsets {
|
||||||
|
/// Each entry corresponds to the maximum offset of the corresponding
|
||||||
|
/// byte across all patterns seen.
|
||||||
|
set: [RareByteOffset; 256],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RareByteOffsets {
|
||||||
|
/// Create a new empty set of rare byte offsets.
|
||||||
|
pub(crate) fn empty() -> RareByteOffsets {
|
||||||
|
RareByteOffsets { set: [RareByteOffset::default(); 256] }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add the given offset for the given byte to this set. If the offset is
|
||||||
|
/// greater than the existing offset, then it overwrites the previous
|
||||||
|
/// value and returns false. If there is no previous value set, then this
|
||||||
|
/// sets it and returns true.
|
||||||
|
pub(crate) fn set(&mut self, byte: u8, off: RareByteOffset) {
|
||||||
|
self.set[byte as usize].max =
|
||||||
|
cmp::max(self.set[byte as usize].max, off.max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::fmt::Debug for RareByteOffsets {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
let mut offsets = vec![];
|
||||||
|
for off in self.set.iter() {
|
||||||
|
if off.max > 0 {
|
||||||
|
offsets.push(off);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f.debug_struct("RareByteOffsets").field("set", &offsets).finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Offsets associated with an occurrence of a "rare" byte in any of the
|
||||||
|
/// patterns used to construct a single Aho-Corasick automaton.
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
struct RareByteOffset {
|
||||||
|
/// The maximum offset at which a particular byte occurs from the start
|
||||||
|
/// of any pattern. This is used as a shift amount. That is, when an
|
||||||
|
/// occurrence of this byte is found, the candidate position reported by
|
||||||
|
/// the prefilter is `position_of_byte - max`, such that the automaton
|
||||||
|
/// will begin its search at a position that is guaranteed to observe a
|
||||||
|
/// match.
|
||||||
|
///
|
||||||
|
/// To avoid accidentally quadratic behavior, a prefilter is considered
|
||||||
|
/// ineffective when it is asked to start scanning from a position that it
|
||||||
|
/// has already scanned past.
|
||||||
|
///
|
||||||
|
/// Using a `u8` here means that if we ever see a pattern that's longer
|
||||||
|
/// than 255 bytes, then the entire rare byte prefilter is disabled.
|
||||||
|
max: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for RareByteOffset {
|
||||||
|
fn default() -> RareByteOffset {
|
||||||
|
RareByteOffset { max: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RareByteOffset {
|
||||||
|
/// Create a new rare byte offset. If the given offset is too big, then
|
||||||
|
/// None is returned. In that case, callers should render the rare bytes
|
||||||
|
/// prefilter inert.
|
||||||
|
fn new(max: usize) -> Option<RareByteOffset> {
|
||||||
|
if max > u8::MAX as usize {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(RareByteOffset { max: max as u8 })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RareBytesBuilder {
|
||||||
|
/// Create a new builder for constructing a rare byte prefilter.
|
||||||
|
fn new() -> RareBytesBuilder {
|
||||||
|
RareBytesBuilder {
|
||||||
|
ascii_case_insensitive: false,
|
||||||
|
rare_set: ByteSet::empty(),
|
||||||
|
byte_offsets: RareByteOffsets::empty(),
|
||||||
|
available: true,
|
||||||
|
count: 0,
|
||||||
|
rank_sum: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Enable ASCII case insensitivity. When set, byte strings added to this
|
||||||
|
/// builder will be interpreted without respect to ASCII case.
|
||||||
|
fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder {
|
||||||
|
self.ascii_case_insensitive = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the rare bytes prefilter.
|
||||||
|
///
|
||||||
|
/// If there are more than 3 distinct rare bytes found, or if heuristics
|
||||||
|
/// otherwise determine that this prefilter should not be used, then `None`
|
||||||
|
/// is returned.
|
||||||
|
fn build(&self) -> Option<Prefilter> {
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
fn imp(builder: &RareBytesBuilder) -> Option<Prefilter> {
|
||||||
|
if !builder.available || builder.count > 3 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let (mut bytes, mut len) = ([0; 3], 0);
|
||||||
|
for b in 0..=255 {
|
||||||
|
if builder.rare_set.contains(b) {
|
||||||
|
bytes[len] = b as u8;
|
||||||
|
len += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let finder: Arc<dyn PrefilterI> = match len {
|
||||||
|
0 => return None,
|
||||||
|
1 => Arc::new(RareBytesOne {
|
||||||
|
byte1: bytes[0],
|
||||||
|
offset: builder.byte_offsets.set[bytes[0] as usize],
|
||||||
|
}),
|
||||||
|
2 => Arc::new(RareBytesTwo {
|
||||||
|
offsets: builder.byte_offsets,
|
||||||
|
byte1: bytes[0],
|
||||||
|
byte2: bytes[1],
|
||||||
|
}),
|
||||||
|
3 => Arc::new(RareBytesThree {
|
||||||
|
offsets: builder.byte_offsets,
|
||||||
|
byte1: bytes[0],
|
||||||
|
byte2: bytes[1],
|
||||||
|
byte3: bytes[2],
|
||||||
|
}),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
Some(Prefilter { finder, memory_usage: 0 })
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "perf-literal"))]
|
||||||
|
fn imp(_: &RareBytesBuilder) -> Option<Prefilter> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
imp(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a byte string to this builder.
|
||||||
|
///
|
||||||
|
/// All patterns added to an Aho-Corasick automaton should be added to this
|
||||||
|
/// builder before attempting to construct the prefilter.
|
||||||
|
fn add(&mut self, bytes: &[u8]) {
|
||||||
|
// If we've already given up, then do nothing.
|
||||||
|
if !self.available {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// If we've already blown our budget, then don't waste time looking
|
||||||
|
// for more rare bytes.
|
||||||
|
if self.count > 3 {
|
||||||
|
self.available = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// If the pattern is too long, then our offset table is bunk, so
|
||||||
|
// give up.
|
||||||
|
if bytes.len() >= 256 {
|
||||||
|
self.available = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let mut rarest = match bytes.get(0) {
|
||||||
|
None => return,
|
||||||
|
Some(&b) => (b, freq_rank(b)),
|
||||||
|
};
|
||||||
|
// The idea here is to look for the rarest byte in each pattern, and
|
||||||
|
// add that to our set. As a special exception, if we see a byte that
|
||||||
|
// we've already added, then we immediately stop and choose that byte,
|
||||||
|
// even if there's another rare byte in the pattern. This helps us
|
||||||
|
// apply the rare byte optimization in more cases by attempting to pick
|
||||||
|
// bytes that are in common between patterns. So for example, if we
|
||||||
|
// were searching for `Sherlock` and `lockjaw`, then this would pick
|
||||||
|
// `k` for both patterns, resulting in the use of `memchr` instead of
|
||||||
|
// `memchr2` for `k` and `j`.
|
||||||
|
let mut found = false;
|
||||||
|
for (pos, &b) in bytes.iter().enumerate() {
|
||||||
|
self.set_offset(pos, b);
|
||||||
|
if found {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if self.rare_set.contains(b) {
|
||||||
|
found = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let rank = freq_rank(b);
|
||||||
|
if rank < rarest.1 {
|
||||||
|
rarest = (b, rank);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
self.add_rare_byte(rarest.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_offset(&mut self, pos: usize, byte: u8) {
|
||||||
|
// This unwrap is OK because pos is never bigger than our max.
|
||||||
|
let offset = RareByteOffset::new(pos).unwrap();
|
||||||
|
self.byte_offsets.set(byte, offset);
|
||||||
|
if self.ascii_case_insensitive {
|
||||||
|
self.byte_offsets.set(opposite_ascii_case(byte), offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_rare_byte(&mut self, byte: u8) {
|
||||||
|
self.add_one_rare_byte(byte);
|
||||||
|
if self.ascii_case_insensitive {
|
||||||
|
self.add_one_rare_byte(opposite_ascii_case(byte));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_one_rare_byte(&mut self, byte: u8) {
|
||||||
|
if !self.rare_set.contains(byte) {
|
||||||
|
self.rare_set.add(byte);
|
||||||
|
self.count += 1;
|
||||||
|
self.rank_sum += freq_rank(byte) as u16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A prefilter for scanning for a single "rare" byte.
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct RareBytesOne {
|
||||||
|
byte1: u8,
|
||||||
|
offset: RareByteOffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
impl PrefilterI for RareBytesOne {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
memchr::memchr(self.byte1, &haystack[span])
|
||||||
|
.map(|i| {
|
||||||
|
let pos = span.start + i;
|
||||||
|
cmp::max(
|
||||||
|
span.start,
|
||||||
|
pos.saturating_sub(usize::from(self.offset.max)),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A prefilter for scanning for two "rare" bytes.
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct RareBytesTwo {
|
||||||
|
offsets: RareByteOffsets,
|
||||||
|
byte1: u8,
|
||||||
|
byte2: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
impl PrefilterI for RareBytesTwo {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
memchr::memchr2(self.byte1, self.byte2, &haystack[span])
|
||||||
|
.map(|i| {
|
||||||
|
let pos = span.start + i;
|
||||||
|
let offset = self.offsets.set[usize::from(haystack[pos])].max;
|
||||||
|
cmp::max(span.start, pos.saturating_sub(usize::from(offset)))
|
||||||
|
})
|
||||||
|
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A prefilter for scanning for three "rare" bytes.
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct RareBytesThree {
|
||||||
|
offsets: RareByteOffsets,
|
||||||
|
byte1: u8,
|
||||||
|
byte2: u8,
|
||||||
|
byte3: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
impl PrefilterI for RareBytesThree {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span])
|
||||||
|
.map(|i| {
|
||||||
|
let pos = span.start + i;
|
||||||
|
let offset = self.offsets.set[usize::from(haystack[pos])].max;
|
||||||
|
cmp::max(span.start, pos.saturating_sub(usize::from(offset)))
|
||||||
|
})
|
||||||
|
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for constructing a starting byte prefilter.
|
||||||
|
///
|
||||||
|
/// A starting byte prefilter is a simplistic prefilter that looks for possible
|
||||||
|
/// matches by reporting all positions corresponding to a particular byte. This
|
||||||
|
/// generally only takes affect when there are at most 3 distinct possible
|
||||||
|
/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two
|
||||||
|
/// distinct starting bytes (`f` and `b`), and this prefilter returns all
|
||||||
|
/// occurrences of either `f` or `b`.
|
||||||
|
///
|
||||||
|
/// In some cases, a heuristic frequency analysis may determine that it would
|
||||||
|
/// be better not to use this prefilter even when there are 3 or fewer distinct
|
||||||
|
/// starting bytes.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct StartBytesBuilder {
|
||||||
|
/// Whether this prefilter should account for ASCII case insensitivity or
|
||||||
|
/// not.
|
||||||
|
ascii_case_insensitive: bool,
|
||||||
|
/// The set of starting bytes observed.
|
||||||
|
byteset: Vec<bool>,
|
||||||
|
/// The number of bytes set to true in `byteset`.
|
||||||
|
count: usize,
|
||||||
|
/// The sum of frequency ranks for the rare bytes detected. This is
|
||||||
|
/// intended to give a heuristic notion of how rare the bytes are.
|
||||||
|
rank_sum: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StartBytesBuilder {
|
||||||
|
/// Create a new builder for constructing a start byte prefilter.
|
||||||
|
fn new() -> StartBytesBuilder {
|
||||||
|
StartBytesBuilder {
|
||||||
|
ascii_case_insensitive: false,
|
||||||
|
byteset: vec![false; 256],
|
||||||
|
count: 0,
|
||||||
|
rank_sum: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Enable ASCII case insensitivity. When set, byte strings added to this
|
||||||
|
/// builder will be interpreted without respect to ASCII case.
|
||||||
|
fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder {
|
||||||
|
self.ascii_case_insensitive = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the starting bytes prefilter.
|
||||||
|
///
|
||||||
|
/// If there are more than 3 distinct starting bytes, or if heuristics
|
||||||
|
/// otherwise determine that this prefilter should not be used, then `None`
|
||||||
|
/// is returned.
|
||||||
|
fn build(&self) -> Option<Prefilter> {
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
fn imp(builder: &StartBytesBuilder) -> Option<Prefilter> {
|
||||||
|
if builder.count > 3 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let (mut bytes, mut len) = ([0; 3], 0);
|
||||||
|
for b in 0..256 {
|
||||||
|
if !builder.byteset[b] {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// We don't handle non-ASCII bytes for now. Getting non-ASCII
|
||||||
|
// bytes right is trickier, since we generally don't want to put
|
||||||
|
// a leading UTF-8 code unit into a prefilter that isn't ASCII,
|
||||||
|
// since they can frequently. Instead, it would be better to use a
|
||||||
|
// continuation byte, but this requires more sophisticated analysis
|
||||||
|
// of the automaton and a richer prefilter API.
|
||||||
|
if b > 0x7F {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
bytes[len] = b as u8;
|
||||||
|
len += 1;
|
||||||
|
}
|
||||||
|
let finder: Arc<dyn PrefilterI> = match len {
|
||||||
|
0 => return None,
|
||||||
|
1 => Arc::new(StartBytesOne { byte1: bytes[0] }),
|
||||||
|
2 => Arc::new(StartBytesTwo {
|
||||||
|
byte1: bytes[0],
|
||||||
|
byte2: bytes[1],
|
||||||
|
}),
|
||||||
|
3 => Arc::new(StartBytesThree {
|
||||||
|
byte1: bytes[0],
|
||||||
|
byte2: bytes[1],
|
||||||
|
byte3: bytes[2],
|
||||||
|
}),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
Some(Prefilter { finder, memory_usage: 0 })
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "perf-literal"))]
|
||||||
|
fn imp(_: &StartBytesBuilder) -> Option<Prefilter> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
imp(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a byte string to this builder.
|
||||||
|
///
|
||||||
|
/// All patterns added to an Aho-Corasick automaton should be added to this
|
||||||
|
/// builder before attempting to construct the prefilter.
|
||||||
|
fn add(&mut self, bytes: &[u8]) {
|
||||||
|
if self.count > 3 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if let Some(&byte) = bytes.get(0) {
|
||||||
|
self.add_one_byte(byte);
|
||||||
|
if self.ascii_case_insensitive {
|
||||||
|
self.add_one_byte(opposite_ascii_case(byte));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_one_byte(&mut self, byte: u8) {
|
||||||
|
if !self.byteset[byte as usize] {
|
||||||
|
self.byteset[byte as usize] = true;
|
||||||
|
self.count += 1;
|
||||||
|
self.rank_sum += freq_rank(byte) as u16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A prefilter for scanning for a single starting byte.
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct StartBytesOne {
|
||||||
|
byte1: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
impl PrefilterI for StartBytesOne {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
memchr::memchr(self.byte1, &haystack[span])
|
||||||
|
.map(|i| span.start + i)
|
||||||
|
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A prefilter for scanning for two starting bytes.
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct StartBytesTwo {
|
||||||
|
byte1: u8,
|
||||||
|
byte2: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
impl PrefilterI for StartBytesTwo {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
memchr::memchr2(self.byte1, self.byte2, &haystack[span])
|
||||||
|
.map(|i| span.start + i)
|
||||||
|
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A prefilter for scanning for three starting bytes.
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct StartBytesThree {
|
||||||
|
byte1: u8,
|
||||||
|
byte2: u8,
|
||||||
|
byte3: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "perf-literal")]
|
||||||
|
impl PrefilterI for StartBytesThree {
|
||||||
|
fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
|
||||||
|
memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span])
|
||||||
|
.map(|i| span.start + i)
|
||||||
|
.map_or(Candidate::None, Candidate::PossibleStartOfMatch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If the given byte is an ASCII letter, then return it in the opposite case.
|
||||||
|
/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns
|
||||||
|
/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned.
|
||||||
|
pub(crate) fn opposite_ascii_case(b: u8) -> u8 {
|
||||||
|
if b'A' <= b && b <= b'Z' {
|
||||||
|
b.to_ascii_lowercase()
|
||||||
|
} else if b'a' <= b && b <= b'z' {
|
||||||
|
b.to_ascii_uppercase()
|
||||||
|
} else {
|
||||||
|
b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the frequency rank of the given byte. The higher the rank, the more
|
||||||
|
/// common the byte (heuristically speaking).
|
||||||
|
fn freq_rank(b: u8) -> u8 {
|
||||||
|
use crate::util::byte_frequencies::BYTE_FREQUENCIES;
|
||||||
|
BYTE_FREQUENCIES[b as usize]
|
||||||
|
}
|
||||||
759
.gear/predownloaded-development/vendor/aho-corasick/src/util/primitives.rs
vendored
Normal file
759
.gear/predownloaded-development/vendor/aho-corasick/src/util/primitives.rs
vendored
Normal file
|
|
@ -0,0 +1,759 @@
|
||||||
|
/*!
|
||||||
|
Lower level primitive types that are useful in a variety of circumstances.
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
|
||||||
|
This list represents the principle types in this module and briefly describes
|
||||||
|
when you might want to use them.
|
||||||
|
|
||||||
|
* [`PatternID`] - A type that represents the identifier of a regex pattern.
|
||||||
|
This is probably the most widely used type in this module (which is why it's
|
||||||
|
also re-exported in the crate root).
|
||||||
|
* [`StateID`] - A type the represents the identifier of a finite automaton
|
||||||
|
state. This is used for both NFAs and DFAs, with the notable exception of
|
||||||
|
the hybrid NFA/DFA. (The hybrid NFA/DFA uses a special purpose "lazy" state
|
||||||
|
identifier.)
|
||||||
|
* [`SmallIndex`] - The internal representation of both a `PatternID` and a
|
||||||
|
`StateID`. Its purpose is to serve as a type that can index memory without
|
||||||
|
being as big as a `usize` on 64-bit targets. The main idea behind this type
|
||||||
|
is that there are many things in regex engines that will, in practice, never
|
||||||
|
overflow a 32-bit integer. (For example, like the number of patterns in a regex
|
||||||
|
or the number of states in an NFA.) Thus, a `SmallIndex` can be used to index
|
||||||
|
memory without peppering `as` casts everywhere. Moreover, it forces callers
|
||||||
|
to handle errors in the case where, somehow, the value would otherwise overflow
|
||||||
|
either a 32-bit integer or a `usize` (e.g., on 16-bit targets).
|
||||||
|
*/
|
||||||
|
|
||||||
|
// The macro we use to define some types below adds methods that we don't
|
||||||
|
// use on some of the types. There isn't much, so we just squash the warning.
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
use crate::util::int::{Usize, U16, U32, U64};
|
||||||
|
|
||||||
|
/// A type that represents a "small" index.
|
||||||
|
///
|
||||||
|
/// The main idea of this type is to provide something that can index memory,
|
||||||
|
/// but uses less memory than `usize` on 64-bit systems. Specifically, its
|
||||||
|
/// representation is always a `u32` and has `repr(transparent)` enabled. (So
|
||||||
|
/// it is safe to transmute between a `u32` and a `SmallIndex`.)
|
||||||
|
///
|
||||||
|
/// A small index is typically useful in cases where there is no practical way
|
||||||
|
/// that the index will overflow a 32-bit integer. A good example of this is
|
||||||
|
/// an NFA state. If you could somehow build an NFA with `2^30` states, its
|
||||||
|
/// memory usage would be exorbitant and its runtime execution would be so
|
||||||
|
/// slow as to be completely worthless. Therefore, this crate generally deems
|
||||||
|
/// it acceptable to return an error if it would otherwise build an NFA that
|
||||||
|
/// requires a slice longer than what a 32-bit integer can index. In exchange,
|
||||||
|
/// we can use 32-bit indices instead of 64-bit indices in various places.
|
||||||
|
///
|
||||||
|
/// This type ensures this by providing a constructor that will return an error
|
||||||
|
/// if its argument cannot fit into the type. This makes it much easier to
|
||||||
|
/// handle these sorts of boundary cases that are otherwise extremely subtle.
|
||||||
|
///
|
||||||
|
/// On all targets, this type guarantees that its value will fit in a `u32`,
|
||||||
|
/// `i32`, `usize` and an `isize`. This means that on 16-bit targets, for
|
||||||
|
/// example, this type's maximum value will never overflow an `isize`,
|
||||||
|
/// which means it will never overflow a `i16` even though its internal
|
||||||
|
/// representation is still a `u32`.
|
||||||
|
///
|
||||||
|
/// The purpose for making the type fit into even signed integer types like
|
||||||
|
/// `isize` is to guarantee that the difference between any two small indices
|
||||||
|
/// is itself also a small index. This is useful in certain contexts, e.g.,
|
||||||
|
/// for delta encoding.
|
||||||
|
///
|
||||||
|
/// # Other types
|
||||||
|
///
|
||||||
|
/// The following types wrap `SmallIndex` to provide a more focused use case:
|
||||||
|
///
|
||||||
|
/// * [`PatternID`] is for representing the identifiers of patterns.
|
||||||
|
/// * [`StateID`] is for representing the identifiers of states in finite
|
||||||
|
/// automata. It is used for both NFAs and DFAs.
|
||||||
|
///
|
||||||
|
/// # Representation
|
||||||
|
///
|
||||||
|
/// This type is always represented internally by a `u32` and is marked as
|
||||||
|
/// `repr(transparent)`. Thus, this type always has the same representation as
|
||||||
|
/// a `u32`. It is thus safe to transmute between a `u32` and a `SmallIndex`.
|
||||||
|
///
|
||||||
|
/// # Indexing
|
||||||
|
///
|
||||||
|
/// For convenience, callers may use a `SmallIndex` to index slices.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// While a `SmallIndex` is meant to guarantee that its value fits into `usize`
|
||||||
|
/// without using as much space as a `usize` on all targets, callers must
|
||||||
|
/// not rely on this property for safety. Callers may choose to rely on this
|
||||||
|
/// property for correctness however. For example, creating a `SmallIndex` with
|
||||||
|
/// an invalid value can be done in entirely safe code. This may in turn result
|
||||||
|
/// in panics or silent logical errors.
|
||||||
|
#[derive(
|
||||||
|
Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
|
||||||
|
)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub(crate) struct SmallIndex(u32);
|
||||||
|
|
||||||
|
impl SmallIndex {
|
||||||
|
/// The maximum index value.
|
||||||
|
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
|
||||||
|
pub const MAX: SmallIndex =
|
||||||
|
// FIXME: Use as_usize() once const functions in traits are stable.
|
||||||
|
SmallIndex::new_unchecked(core::i32::MAX as usize - 1);
|
||||||
|
|
||||||
|
/// The maximum index value.
|
||||||
|
#[cfg(target_pointer_width = "16")]
|
||||||
|
pub const MAX: SmallIndex =
|
||||||
|
SmallIndex::new_unchecked(core::isize::MAX - 1);
|
||||||
|
|
||||||
|
/// The total number of values that can be represented as a small index.
|
||||||
|
pub const LIMIT: usize = SmallIndex::MAX.as_usize() + 1;
|
||||||
|
|
||||||
|
/// The zero index value.
|
||||||
|
pub const ZERO: SmallIndex = SmallIndex::new_unchecked(0);
|
||||||
|
|
||||||
|
/// The number of bytes that a single small index uses in memory.
|
||||||
|
pub const SIZE: usize = core::mem::size_of::<SmallIndex>();
|
||||||
|
|
||||||
|
/// Create a new small index.
|
||||||
|
///
|
||||||
|
/// If the given index exceeds [`SmallIndex::MAX`], then this returns
|
||||||
|
/// an error.
|
||||||
|
#[inline]
|
||||||
|
pub fn new(index: usize) -> Result<SmallIndex, SmallIndexError> {
|
||||||
|
SmallIndex::try_from(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new small index without checking whether the given value
|
||||||
|
/// exceeds [`SmallIndex::MAX`].
|
||||||
|
///
|
||||||
|
/// Using this routine with an invalid index value will result in
|
||||||
|
/// unspecified behavior, but *not* undefined behavior. In particular, an
|
||||||
|
/// invalid index value is likely to cause panics or possibly even silent
|
||||||
|
/// logical errors.
|
||||||
|
///
|
||||||
|
/// Callers must never rely on a `SmallIndex` to be within a certain range
|
||||||
|
/// for memory safety.
|
||||||
|
#[inline]
|
||||||
|
pub const fn new_unchecked(index: usize) -> SmallIndex {
|
||||||
|
// FIXME: Use as_u32() once const functions in traits are stable.
|
||||||
|
SmallIndex::from_u32_unchecked(index as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new small index from a `u32` without checking whether the
|
||||||
|
/// given value exceeds [`SmallIndex::MAX`].
|
||||||
|
///
|
||||||
|
/// Using this routine with an invalid index value will result in
|
||||||
|
/// unspecified behavior, but *not* undefined behavior. In particular, an
|
||||||
|
/// invalid index value is likely to cause panics or possibly even silent
|
||||||
|
/// logical errors.
|
||||||
|
///
|
||||||
|
/// Callers must never rely on a `SmallIndex` to be within a certain range
|
||||||
|
/// for memory safety.
|
||||||
|
#[inline]
|
||||||
|
pub const fn from_u32_unchecked(index: u32) -> SmallIndex {
|
||||||
|
SmallIndex(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like [`SmallIndex::new`], but panics if the given index is not valid.
|
||||||
|
#[inline]
|
||||||
|
pub fn must(index: usize) -> SmallIndex {
|
||||||
|
SmallIndex::new(index).expect("invalid small index")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return this small index as a `usize`. This is guaranteed to never
|
||||||
|
/// overflow `usize`.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_usize(&self) -> usize {
|
||||||
|
// FIXME: Use as_usize() once const functions in traits are stable.
|
||||||
|
self.0 as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return this small index as a `u64`. This is guaranteed to never
|
||||||
|
/// overflow.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_u64(&self) -> u64 {
|
||||||
|
// FIXME: Use u64::from() once const functions in traits are stable.
|
||||||
|
self.0 as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the internal `u32` of this small index. This is guaranteed to
|
||||||
|
/// never overflow `u32`.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_u32(&self) -> u32 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the internal `u32` of this small index represented as an `i32`.
|
||||||
|
/// This is guaranteed to never overflow an `i32`.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_i32(&self) -> i32 {
|
||||||
|
// This is OK because we guarantee that our max value is <= i32::MAX.
|
||||||
|
self.0 as i32
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns one more than this small index as a usize.
|
||||||
|
///
|
||||||
|
/// Since a small index has constraints on its maximum value, adding `1` to
|
||||||
|
/// it will always fit in a `usize`, `isize`, `u32` and a `i32`.
|
||||||
|
#[inline]
|
||||||
|
pub fn one_more(&self) -> usize {
|
||||||
|
self.as_usize() + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode this small index from the bytes given using the native endian
|
||||||
|
/// byte order for the current target.
|
||||||
|
///
|
||||||
|
/// If the decoded integer is not representable as a small index for the
|
||||||
|
/// current target, then this returns an error.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_ne_bytes(
|
||||||
|
bytes: [u8; 4],
|
||||||
|
) -> Result<SmallIndex, SmallIndexError> {
|
||||||
|
let id = u32::from_ne_bytes(bytes);
|
||||||
|
if id > SmallIndex::MAX.as_u32() {
|
||||||
|
return Err(SmallIndexError { attempted: u64::from(id) });
|
||||||
|
}
|
||||||
|
Ok(SmallIndex::new_unchecked(id.as_usize()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode this small index from the bytes given using the native endian
|
||||||
|
/// byte order for the current target.
|
||||||
|
///
|
||||||
|
/// This is analogous to [`SmallIndex::new_unchecked`] in that is does not
|
||||||
|
/// check whether the decoded integer is representable as a small index.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> SmallIndex {
|
||||||
|
SmallIndex::new_unchecked(u32::from_ne_bytes(bytes).as_usize())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the underlying small index integer as raw bytes in native endian
|
||||||
|
/// format.
|
||||||
|
#[inline]
|
||||||
|
pub fn to_ne_bytes(&self) -> [u8; 4] {
|
||||||
|
self.0.to_ne_bytes()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::Index<SmallIndex> for [T] {
|
||||||
|
type Output = T;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: SmallIndex) -> &T {
|
||||||
|
&self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::IndexMut<SmallIndex> for [T] {
|
||||||
|
#[inline]
|
||||||
|
fn index_mut(&mut self, index: SmallIndex) -> &mut T {
|
||||||
|
&mut self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::Index<SmallIndex> for Vec<T> {
|
||||||
|
type Output = T;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: SmallIndex) -> &T {
|
||||||
|
&self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::IndexMut<SmallIndex> for Vec<T> {
|
||||||
|
#[inline]
|
||||||
|
fn index_mut(&mut self, index: SmallIndex) -> &mut T {
|
||||||
|
&mut self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<StateID> for SmallIndex {
|
||||||
|
fn from(sid: StateID) -> SmallIndex {
|
||||||
|
sid.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<PatternID> for SmallIndex {
|
||||||
|
fn from(pid: PatternID) -> SmallIndex {
|
||||||
|
pid.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<u8> for SmallIndex {
|
||||||
|
fn from(index: u8) -> SmallIndex {
|
||||||
|
SmallIndex::new_unchecked(usize::from(index))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u16> for SmallIndex {
|
||||||
|
type Error = SmallIndexError;
|
||||||
|
|
||||||
|
fn try_from(index: u16) -> Result<SmallIndex, SmallIndexError> {
|
||||||
|
if u32::from(index) > SmallIndex::MAX.as_u32() {
|
||||||
|
return Err(SmallIndexError { attempted: u64::from(index) });
|
||||||
|
}
|
||||||
|
Ok(SmallIndex::new_unchecked(index.as_usize()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u32> for SmallIndex {
|
||||||
|
type Error = SmallIndexError;
|
||||||
|
|
||||||
|
fn try_from(index: u32) -> Result<SmallIndex, SmallIndexError> {
|
||||||
|
if index > SmallIndex::MAX.as_u32() {
|
||||||
|
return Err(SmallIndexError { attempted: u64::from(index) });
|
||||||
|
}
|
||||||
|
Ok(SmallIndex::new_unchecked(index.as_usize()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u64> for SmallIndex {
|
||||||
|
type Error = SmallIndexError;
|
||||||
|
|
||||||
|
fn try_from(index: u64) -> Result<SmallIndex, SmallIndexError> {
|
||||||
|
if index > SmallIndex::MAX.as_u64() {
|
||||||
|
return Err(SmallIndexError { attempted: index });
|
||||||
|
}
|
||||||
|
Ok(SmallIndex::new_unchecked(index.as_usize()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<usize> for SmallIndex {
|
||||||
|
type Error = SmallIndexError;
|
||||||
|
|
||||||
|
fn try_from(index: usize) -> Result<SmallIndex, SmallIndexError> {
|
||||||
|
if index > SmallIndex::MAX.as_usize() {
|
||||||
|
return Err(SmallIndexError { attempted: index.as_u64() });
|
||||||
|
}
|
||||||
|
Ok(SmallIndex::new_unchecked(index))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This error occurs when a small index could not be constructed.
|
||||||
|
///
|
||||||
|
/// This occurs when given an integer exceeding the maximum small index value.
|
||||||
|
///
|
||||||
|
/// When the `std` feature is enabled, this implements the `Error` trait.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct SmallIndexError {
|
||||||
|
attempted: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SmallIndexError {
|
||||||
|
/// Returns the value that could not be converted to a small index.
|
||||||
|
pub fn attempted(&self) -> u64 {
|
||||||
|
self.attempted
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
impl std::error::Error for SmallIndexError {}
|
||||||
|
|
||||||
|
impl core::fmt::Display for SmallIndexError {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"failed to create small index from {:?}, which exceeds {:?}",
|
||||||
|
self.attempted(),
|
||||||
|
SmallIndex::MAX,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct SmallIndexIter {
|
||||||
|
rng: core::ops::Range<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for SmallIndexIter {
|
||||||
|
type Item = SmallIndex;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<SmallIndex> {
|
||||||
|
if self.rng.start >= self.rng.end {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let next_id = self.rng.start + 1;
|
||||||
|
let id = core::mem::replace(&mut self.rng.start, next_id);
|
||||||
|
// new_unchecked is OK since we asserted that the number of
|
||||||
|
// elements in this iterator will fit in an ID at construction.
|
||||||
|
Some(SmallIndex::new_unchecked(id))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! index_type_impls {
|
||||||
|
($name:ident, $err:ident, $iter:ident, $withiter:ident) => {
|
||||||
|
impl $name {
|
||||||
|
/// The maximum value.
|
||||||
|
pub const MAX: $name = $name(SmallIndex::MAX);
|
||||||
|
|
||||||
|
/// The total number of values that can be represented.
|
||||||
|
pub const LIMIT: usize = SmallIndex::LIMIT;
|
||||||
|
|
||||||
|
/// The zero value.
|
||||||
|
pub const ZERO: $name = $name(SmallIndex::ZERO);
|
||||||
|
|
||||||
|
/// The number of bytes that a single value uses in memory.
|
||||||
|
pub const SIZE: usize = SmallIndex::SIZE;
|
||||||
|
|
||||||
|
/// Create a new value that is represented by a "small index."
|
||||||
|
///
|
||||||
|
/// If the given index exceeds the maximum allowed value, then this
|
||||||
|
/// returns an error.
|
||||||
|
#[inline]
|
||||||
|
pub fn new(value: usize) -> Result<$name, $err> {
|
||||||
|
SmallIndex::new(value).map($name).map_err($err)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new value without checking whether the given argument
|
||||||
|
/// exceeds the maximum.
|
||||||
|
///
|
||||||
|
/// Using this routine with an invalid value will result in
|
||||||
|
/// unspecified behavior, but *not* undefined behavior. In
|
||||||
|
/// particular, an invalid ID value is likely to cause panics or
|
||||||
|
/// possibly even silent logical errors.
|
||||||
|
///
|
||||||
|
/// Callers must never rely on this type to be within a certain
|
||||||
|
/// range for memory safety.
|
||||||
|
#[inline]
|
||||||
|
pub const fn new_unchecked(value: usize) -> $name {
|
||||||
|
$name(SmallIndex::new_unchecked(value))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new value from a `u32` without checking whether the
|
||||||
|
/// given value exceeds the maximum.
|
||||||
|
///
|
||||||
|
/// Using this routine with an invalid value will result in
|
||||||
|
/// unspecified behavior, but *not* undefined behavior. In
|
||||||
|
/// particular, an invalid ID value is likely to cause panics or
|
||||||
|
/// possibly even silent logical errors.
|
||||||
|
///
|
||||||
|
/// Callers must never rely on this type to be within a certain
|
||||||
|
/// range for memory safety.
|
||||||
|
#[inline]
|
||||||
|
pub const fn from_u32_unchecked(index: u32) -> $name {
|
||||||
|
$name(SmallIndex::from_u32_unchecked(index))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like `new`, but panics if the given value is not valid.
|
||||||
|
#[inline]
|
||||||
|
pub fn must(value: usize) -> $name {
|
||||||
|
$name::new(value).expect(concat!(
|
||||||
|
"invalid ",
|
||||||
|
stringify!($name),
|
||||||
|
" value"
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the internal value as a `usize`. This is guaranteed to
|
||||||
|
/// never overflow `usize`.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_usize(&self) -> usize {
|
||||||
|
self.0.as_usize()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the internal value as a `u64`. This is guaranteed to
|
||||||
|
/// never overflow.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_u64(&self) -> u64 {
|
||||||
|
self.0.as_u64()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the internal value as a `u32`. This is guaranteed to
|
||||||
|
/// never overflow `u32`.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_u32(&self) -> u32 {
|
||||||
|
self.0.as_u32()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the internal value as a `i32`. This is guaranteed to
|
||||||
|
/// never overflow an `i32`.
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_i32(&self) -> i32 {
|
||||||
|
self.0.as_i32()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns one more than this value as a usize.
|
||||||
|
///
|
||||||
|
/// Since values represented by a "small index" have constraints
|
||||||
|
/// on their maximum value, adding `1` to it will always fit in a
|
||||||
|
/// `usize`, `u32` and a `i32`.
|
||||||
|
#[inline]
|
||||||
|
pub fn one_more(&self) -> usize {
|
||||||
|
self.0.one_more()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode this value from the bytes given using the native endian
|
||||||
|
/// byte order for the current target.
|
||||||
|
///
|
||||||
|
/// If the decoded integer is not representable as a small index
|
||||||
|
/// for the current target, then this returns an error.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<$name, $err> {
|
||||||
|
SmallIndex::from_ne_bytes(bytes).map($name).map_err($err)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode this value from the bytes given using the native endian
|
||||||
|
/// byte order for the current target.
|
||||||
|
///
|
||||||
|
/// This is analogous to `new_unchecked` in that is does not check
|
||||||
|
/// whether the decoded integer is representable as a small index.
|
||||||
|
#[inline]
|
||||||
|
pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> $name {
|
||||||
|
$name(SmallIndex::from_ne_bytes_unchecked(bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the underlying integer as raw bytes in native endian
|
||||||
|
/// format.
|
||||||
|
#[inline]
|
||||||
|
pub fn to_ne_bytes(&self) -> [u8; 4] {
|
||||||
|
self.0.to_ne_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator over all values from 0 up to and not
|
||||||
|
/// including the given length.
|
||||||
|
///
|
||||||
|
/// If the given length exceeds this type's limit, then this
|
||||||
|
/// panics.
|
||||||
|
pub(crate) fn iter(len: usize) -> $iter {
|
||||||
|
$iter::new(len)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We write our own Debug impl so that we get things like PatternID(5)
|
||||||
|
// instead of PatternID(SmallIndex(5)).
|
||||||
|
impl core::fmt::Debug for $name {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||||
|
f.debug_tuple(stringify!($name)).field(&self.as_u32()).finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::Index<$name> for [T] {
|
||||||
|
type Output = T;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: $name) -> &T {
|
||||||
|
&self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::IndexMut<$name> for [T] {
|
||||||
|
#[inline]
|
||||||
|
fn index_mut(&mut self, index: $name) -> &mut T {
|
||||||
|
&mut self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::Index<$name> for Vec<T> {
|
||||||
|
type Output = T;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: $name) -> &T {
|
||||||
|
&self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> core::ops::IndexMut<$name> for Vec<T> {
|
||||||
|
#[inline]
|
||||||
|
fn index_mut(&mut self, index: $name) -> &mut T {
|
||||||
|
&mut self[index.as_usize()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<SmallIndex> for $name {
|
||||||
|
fn from(index: SmallIndex) -> $name {
|
||||||
|
$name(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<u8> for $name {
|
||||||
|
fn from(value: u8) -> $name {
|
||||||
|
$name(SmallIndex::from(value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u16> for $name {
|
||||||
|
type Error = $err;
|
||||||
|
|
||||||
|
fn try_from(value: u16) -> Result<$name, $err> {
|
||||||
|
SmallIndex::try_from(value).map($name).map_err($err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u32> for $name {
|
||||||
|
type Error = $err;
|
||||||
|
|
||||||
|
fn try_from(value: u32) -> Result<$name, $err> {
|
||||||
|
SmallIndex::try_from(value).map($name).map_err($err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u64> for $name {
|
||||||
|
type Error = $err;
|
||||||
|
|
||||||
|
fn try_from(value: u64) -> Result<$name, $err> {
|
||||||
|
SmallIndex::try_from(value).map($name).map_err($err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<usize> for $name {
|
||||||
|
type Error = $err;
|
||||||
|
|
||||||
|
fn try_from(value: usize) -> Result<$name, $err> {
|
||||||
|
SmallIndex::try_from(value).map($name).map_err($err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This error occurs when an ID could not be constructed.
|
||||||
|
///
|
||||||
|
/// This occurs when given an integer exceeding the maximum allowed
|
||||||
|
/// value.
|
||||||
|
///
|
||||||
|
/// When the `std` feature is enabled, this implements the `Error`
|
||||||
|
/// trait.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct $err(SmallIndexError);
|
||||||
|
|
||||||
|
impl $err {
|
||||||
|
/// Returns the value that could not be converted to an ID.
|
||||||
|
pub fn attempted(&self) -> u64 {
|
||||||
|
self.0.attempted()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "std")]
|
||||||
|
impl std::error::Error for $err {}
|
||||||
|
|
||||||
|
impl core::fmt::Display for $err {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"failed to create {} from {:?}, which exceeds {:?}",
|
||||||
|
stringify!($name),
|
||||||
|
self.attempted(),
|
||||||
|
$name::MAX,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct $iter(SmallIndexIter);
|
||||||
|
|
||||||
|
impl $iter {
|
||||||
|
fn new(len: usize) -> $iter {
|
||||||
|
assert!(
|
||||||
|
len <= $name::LIMIT,
|
||||||
|
"cannot create iterator for {} when number of \
|
||||||
|
elements exceed {:?}",
|
||||||
|
stringify!($name),
|
||||||
|
$name::LIMIT,
|
||||||
|
);
|
||||||
|
$iter(SmallIndexIter { rng: 0..len })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for $iter {
|
||||||
|
type Item = $name;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<$name> {
|
||||||
|
self.0.next().map($name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator adapter that is like std::iter::Enumerate, but attaches
|
||||||
|
/// small index values instead. It requires `ExactSizeIterator`. At
|
||||||
|
/// construction, it ensures that the index of each element in the
|
||||||
|
/// iterator is representable in the corresponding small index type.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct $withiter<I> {
|
||||||
|
it: I,
|
||||||
|
ids: $iter,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: Iterator + ExactSizeIterator> $withiter<I> {
|
||||||
|
fn new(it: I) -> $withiter<I> {
|
||||||
|
let ids = $name::iter(it.len());
|
||||||
|
$withiter { it, ids }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: Iterator + ExactSizeIterator> Iterator for $withiter<I> {
|
||||||
|
type Item = ($name, I::Item);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<($name, I::Item)> {
|
||||||
|
let item = self.it.next()?;
|
||||||
|
// Number of elements in this iterator must match, according
|
||||||
|
// to contract of ExactSizeIterator.
|
||||||
|
let id = self.ids.next().unwrap();
|
||||||
|
Some((id, item))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The identifier of a pattern in an Aho-Corasick automaton.
|
||||||
|
///
|
||||||
|
/// It is represented by a `u32` even on 64-bit systems in order to conserve
|
||||||
|
/// space. Namely, on all targets, this type guarantees that its value will
|
||||||
|
/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit
|
||||||
|
/// targets, for example, this type's maximum value will never overflow an
|
||||||
|
/// `isize`, which means it will never overflow a `i16` even though its
|
||||||
|
/// internal representation is still a `u32`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// While a `PatternID` is meant to guarantee that its value fits into `usize`
|
||||||
|
/// without using as much space as a `usize` on all targets, callers must
|
||||||
|
/// not rely on this property for safety. Callers may choose to rely on this
|
||||||
|
/// property for correctness however. For example, creating a `StateID` with an
|
||||||
|
/// invalid value can be done in entirely safe code. This may in turn result in
|
||||||
|
/// panics or silent logical errors.
|
||||||
|
#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct PatternID(SmallIndex);
|
||||||
|
|
||||||
|
/// The identifier of a finite automaton state.
|
||||||
|
///
|
||||||
|
/// It is represented by a `u32` even on 64-bit systems in order to conserve
|
||||||
|
/// space. Namely, on all targets, this type guarantees that its value will
|
||||||
|
/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit
|
||||||
|
/// targets, for example, this type's maximum value will never overflow an
|
||||||
|
/// `isize`, which means it will never overflow a `i16` even though its
|
||||||
|
/// internal representation is still a `u32`.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// While a `StateID` is meant to guarantee that its value fits into `usize`
|
||||||
|
/// without using as much space as a `usize` on all targets, callers must
|
||||||
|
/// not rely on this property for safety. Callers may choose to rely on this
|
||||||
|
/// property for correctness however. For example, creating a `StateID` with an
|
||||||
|
/// invalid value can be done in entirely safe code. This may in turn result in
|
||||||
|
/// panics or silent logical errors.
|
||||||
|
#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct StateID(SmallIndex);
|
||||||
|
|
||||||
|
index_type_impls!(PatternID, PatternIDError, PatternIDIter, WithPatternIDIter);
|
||||||
|
index_type_impls!(StateID, StateIDError, StateIDIter, WithStateIDIter);
|
||||||
|
|
||||||
|
/// A utility trait that defines a couple of adapters for making it convenient
|
||||||
|
/// to access indices as "small index" types. We require ExactSizeIterator so
|
||||||
|
/// that iterator construction can do a single check to make sure the index of
|
||||||
|
/// each element is representable by its small index type.
|
||||||
|
pub(crate) trait IteratorIndexExt: Iterator {
|
||||||
|
fn with_pattern_ids(self) -> WithPatternIDIter<Self>
|
||||||
|
where
|
||||||
|
Self: Sized + ExactSizeIterator,
|
||||||
|
{
|
||||||
|
WithPatternIDIter::new(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_state_ids(self) -> WithStateIDIter<Self>
|
||||||
|
where
|
||||||
|
Self: Sized + ExactSizeIterator,
|
||||||
|
{
|
||||||
|
WithStateIDIter::new(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: Iterator> IteratorIndexExt for I {}
|
||||||
214
.gear/predownloaded-development/vendor/aho-corasick/src/util/remapper.rs
vendored
Normal file
214
.gear/predownloaded-development/vendor/aho-corasick/src/util/remapper.rs
vendored
Normal file
|
|
@ -0,0 +1,214 @@
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
use crate::{nfa::noncontiguous, util::primitives::StateID};
|
||||||
|
|
||||||
|
/// Remappable is a tightly coupled abstraction that facilitates remapping
|
||||||
|
/// state identifiers in DFAs.
|
||||||
|
///
|
||||||
|
/// The main idea behind remapping state IDs is that DFAs often need to check
|
||||||
|
/// if a certain state is a "special" state of some kind (like a match state)
|
||||||
|
/// during a search. Since this is extremely perf critical code, we want this
|
||||||
|
/// check to be as fast as possible. Partitioning state IDs into, for example,
|
||||||
|
/// into "non-match" and "match" states means one can tell if a state is a
|
||||||
|
/// match state via a simple comparison of the state ID.
|
||||||
|
///
|
||||||
|
/// The issue is that during the DFA construction process, it's not
|
||||||
|
/// particularly easy to partition the states. Instead, the simplest thing is
|
||||||
|
/// to often just do a pass over all of the states and shuffle them into their
|
||||||
|
/// desired partitionings. To do that, we need a mechanism for swapping states.
|
||||||
|
/// Hence, this abstraction.
|
||||||
|
///
|
||||||
|
/// Normally, for such little code, I would just duplicate it. But this is a
|
||||||
|
/// key optimization and the implementation is a bit subtle. So the abstraction
|
||||||
|
/// is basically a ham-fisted attempt at DRY. The only place we use this is in
|
||||||
|
/// the dense and one-pass DFAs.
|
||||||
|
///
|
||||||
|
/// See also src/dfa/special.rs for a more detailed explanation of how dense
|
||||||
|
/// DFAs are partitioned.
|
||||||
|
pub(crate) trait Remappable: core::fmt::Debug {
|
||||||
|
/// Return the total number of states.
|
||||||
|
fn state_len(&self) -> usize;
|
||||||
|
|
||||||
|
/// Swap the states pointed to by the given IDs. The underlying finite
|
||||||
|
/// state machine should be mutated such that all of the transitions in
|
||||||
|
/// `id1` are now in the memory region where the transitions for `id2`
|
||||||
|
/// were, and all of the transitions in `id2` are now in the memory region
|
||||||
|
/// where the transitions for `id1` were.
|
||||||
|
///
|
||||||
|
/// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
|
||||||
|
///
|
||||||
|
/// It is expected that, after calling this, the underlying state machine
|
||||||
|
/// will be left in an inconsistent state, since any other transitions
|
||||||
|
/// pointing to, e.g., `id1` need to be updated to point to `id2`, since
|
||||||
|
/// that's where `id1` moved to.
|
||||||
|
///
|
||||||
|
/// In order to "fix" the underlying inconsistent state, a `Remapper`
|
||||||
|
/// should be used to guarantee that `remap` is called at the appropriate
|
||||||
|
/// time.
|
||||||
|
fn swap_states(&mut self, id1: StateID, id2: StateID);
|
||||||
|
|
||||||
|
/// This must remap every single state ID in the underlying value according
|
||||||
|
/// to the function given. For example, in a DFA, this should remap every
|
||||||
|
/// transition and every starting state ID.
|
||||||
|
fn remap(&mut self, map: impl Fn(StateID) -> StateID);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remapper is an abstraction the manages the remapping of state IDs in a
|
||||||
|
/// finite state machine. This is useful when one wants to shuffle states into
|
||||||
|
/// different positions in the machine.
|
||||||
|
///
|
||||||
|
/// One of the key complexities this manages is the ability to correctly move
|
||||||
|
/// one state multiple times.
|
||||||
|
///
|
||||||
|
/// Once shuffling is complete, `remap` must be called, which will rewrite
|
||||||
|
/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
|
||||||
|
/// will almost certainly result in a corrupt machine.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct Remapper {
|
||||||
|
/// A map from the index of a state to its pre-multiplied identifier.
|
||||||
|
///
|
||||||
|
/// When a state is swapped with another, then their corresponding
|
||||||
|
/// locations in this map are also swapped. Thus, its new position will
|
||||||
|
/// still point to its old pre-multiplied StateID.
|
||||||
|
///
|
||||||
|
/// While there is a bit more to it, this then allows us to rewrite the
|
||||||
|
/// state IDs in a DFA's transition table in a single pass. This is done
|
||||||
|
/// by iterating over every ID in this map, then iterating over each
|
||||||
|
/// transition for the state at that ID and re-mapping the transition from
|
||||||
|
/// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
|
||||||
|
/// in this map where `old_id` *started*, and set it to where it ended up
|
||||||
|
/// after all swaps have been completed.
|
||||||
|
map: Vec<StateID>,
|
||||||
|
/// A way to map indices to state IDs (and back).
|
||||||
|
idx: IndexMapper,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Remapper {
|
||||||
|
/// Create a new remapper from the given remappable implementation. The
|
||||||
|
/// remapper can then be used to swap states. The remappable value given
|
||||||
|
/// here must the same one given to `swap` and `remap`.
|
||||||
|
///
|
||||||
|
/// The given stride should be the stride of the transition table expressed
|
||||||
|
/// as a power of 2. This stride is used to map between state IDs and state
|
||||||
|
/// indices. If state IDs and state indices are equivalent, then provide
|
||||||
|
/// a `stride2` of `0`, which acts as an identity.
|
||||||
|
pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper {
|
||||||
|
let idx = IndexMapper { stride2 };
|
||||||
|
let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect();
|
||||||
|
Remapper { map, idx }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Swap two states. Once this is called, callers must follow through to
|
||||||
|
/// call `remap`, or else it's possible for the underlying remappable
|
||||||
|
/// value to be in a corrupt state.
|
||||||
|
pub(crate) fn swap(
|
||||||
|
&mut self,
|
||||||
|
r: &mut impl Remappable,
|
||||||
|
id1: StateID,
|
||||||
|
id2: StateID,
|
||||||
|
) {
|
||||||
|
if id1 == id2 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
r.swap_states(id1, id2);
|
||||||
|
self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Complete the remapping process by rewriting all state IDs in the
|
||||||
|
/// remappable value according to the swaps performed.
|
||||||
|
pub(crate) fn remap(mut self, r: &mut impl Remappable) {
|
||||||
|
// Update the map to account for states that have been swapped
|
||||||
|
// multiple times. For example, if (A, C) and (C, G) are swapped, then
|
||||||
|
// transitions previously pointing to A should now point to G. But if
|
||||||
|
// we don't update our map, they will erroneously be set to C. All we
|
||||||
|
// do is follow the swaps in our map until we see our original state
|
||||||
|
// ID.
|
||||||
|
//
|
||||||
|
// The intuition here is to think about how changes are made to the
|
||||||
|
// map: only through pairwise swaps. That means that starting at any
|
||||||
|
// given state, it is always possible to find the loop back to that
|
||||||
|
// state by following the swaps represented in the map (which might be
|
||||||
|
// 0 swaps).
|
||||||
|
//
|
||||||
|
// We are also careful to clone the map before starting in order to
|
||||||
|
// freeze it. We use the frozen map to find our loops, since we need to
|
||||||
|
// update our map as well. Without freezing it, our updates could break
|
||||||
|
// the loops referenced above and produce incorrect results.
|
||||||
|
let oldmap = self.map.clone();
|
||||||
|
for i in 0..r.state_len() {
|
||||||
|
let cur_id = self.idx.to_state_id(i);
|
||||||
|
let mut new_id = oldmap[i];
|
||||||
|
if cur_id == new_id {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
loop {
|
||||||
|
let id = oldmap[self.idx.to_index(new_id)];
|
||||||
|
if cur_id == id {
|
||||||
|
self.map[i] = new_id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
new_id = id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.remap(|sid| self.map[self.idx.to_index(sid)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A simple type for mapping between state indices and state IDs.
|
||||||
|
///
|
||||||
|
/// The reason why this exists is because state IDs are "premultiplied" in a
|
||||||
|
/// DFA. That is, in order to get to the transitions for a particular state,
|
||||||
|
/// one need only use the state ID as-is, instead of having to multiply it by
|
||||||
|
/// transition table's stride.
|
||||||
|
///
|
||||||
|
/// The downside of this is that it's inconvenient to map between state IDs
|
||||||
|
/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
|
||||||
|
/// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`,
|
||||||
|
/// etc.
|
||||||
|
///
|
||||||
|
/// Since our state IDs are premultiplied, we can convert back-and-forth
|
||||||
|
/// between IDs and indices by simply unmultiplying the IDs and multiplying the
|
||||||
|
/// indices.
|
||||||
|
///
|
||||||
|
/// Note that for a sparse NFA, state IDs and indices are equivalent. In this
|
||||||
|
/// case, we set the stride of the index mapped to be `0`, which acts as an
|
||||||
|
/// identity.
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct IndexMapper {
|
||||||
|
/// The power of 2 corresponding to the stride of the corresponding
|
||||||
|
/// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
|
||||||
|
/// stride2' pre-multiplies an index to an ID.
|
||||||
|
stride2: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IndexMapper {
|
||||||
|
/// Convert a state ID to a state index.
|
||||||
|
fn to_index(&self, id: StateID) -> usize {
|
||||||
|
id.as_usize() >> self.stride2
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a state index to a state ID.
|
||||||
|
fn to_state_id(&self, index: usize) -> StateID {
|
||||||
|
// CORRECTNESS: If the given index is not valid, then it is not
|
||||||
|
// required for this to panic or return a valid state ID. We'll "just"
|
||||||
|
// wind up with panics or silent logic errors at some other point. But
|
||||||
|
// this is OK because if Remappable::state_len is correct and so is
|
||||||
|
// 'to_index', then all inputs to 'to_state_id' should be valid indices
|
||||||
|
// and thus transform into valid state IDs.
|
||||||
|
StateID::new_unchecked(index << self.stride2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Remappable for noncontiguous::NFA {
|
||||||
|
fn state_len(&self) -> usize {
|
||||||
|
noncontiguous::NFA::states(self).len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn swap_states(&mut self, id1: StateID, id2: StateID) {
|
||||||
|
noncontiguous::NFA::swap_states(self, id1, id2)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
|
||||||
|
noncontiguous::NFA::remap(self, map)
|
||||||
|
}
|
||||||
|
}
|
||||||
1148
.gear/predownloaded-development/vendor/aho-corasick/src/util/search.rs
vendored
Normal file
1148
.gear/predownloaded-development/vendor/aho-corasick/src/util/search.rs
vendored
Normal file
File diff suppressed because it is too large
Load diff
42
.gear/predownloaded-development/vendor/aho-corasick/src/util/special.rs
vendored
Normal file
42
.gear/predownloaded-development/vendor/aho-corasick/src/util/special.rs
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
use crate::util::primitives::StateID;
|
||||||
|
|
||||||
|
/// A collection of sentinel state IDs for Aho-Corasick automata.
|
||||||
|
///
|
||||||
|
/// This specifically enables the technique by which we determine which states
|
||||||
|
/// are dead, matches or start states. Namely, by arranging states in a
|
||||||
|
/// particular order, we can determine the type of a state simply by looking at
|
||||||
|
/// its ID.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) struct Special {
|
||||||
|
/// The maximum ID of all the "special" states. This corresponds either to
|
||||||
|
/// start_anchored_id when a prefilter is active and max_match_id when a
|
||||||
|
/// prefilter is not active. The idea here is that if there is no prefilter,
|
||||||
|
/// then there is no point in treating start states as special.
|
||||||
|
pub(crate) max_special_id: StateID,
|
||||||
|
/// The maximum ID of all the match states. Any state ID bigger than this
|
||||||
|
/// is guaranteed to be a non-match ID.
|
||||||
|
///
|
||||||
|
/// It is possible and legal for max_match_id to be equal to
|
||||||
|
/// start_anchored_id, which occurs precisely in the case where the empty
|
||||||
|
/// string is a pattern that was added to the underlying automaton.
|
||||||
|
pub(crate) max_match_id: StateID,
|
||||||
|
/// The state ID of the start state used for unanchored searches.
|
||||||
|
pub(crate) start_unanchored_id: StateID,
|
||||||
|
/// The state ID of the start state used for anchored searches. This is
|
||||||
|
/// always start_unanchored_id+1.
|
||||||
|
pub(crate) start_anchored_id: StateID,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Special {
|
||||||
|
/// Create a new set of "special" state IDs with all IDs initialized to
|
||||||
|
/// zero. The general idea here is that they will be updated and set to
|
||||||
|
/// correct values later.
|
||||||
|
pub(crate) fn zero() -> Special {
|
||||||
|
Special {
|
||||||
|
max_special_id: StateID::ZERO,
|
||||||
|
max_match_id: StateID::ZERO,
|
||||||
|
start_unanchored_id: StateID::ZERO,
|
||||||
|
start_anchored_id: StateID::ZERO,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1
.gear/predownloaded-development/vendor/allocator-api2/.cargo-checksum.json
vendored
Normal file
1
.gear/predownloaded-development/vendor/allocator-api2/.cargo-checksum.json
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"files":{".cargo_vcs_info.json":"f44a14175e1995bd4b75ea0c74654a53b7073fedcb37c92c41841126e6f40f17","CHANGELOG.md":"886f8c688db0c22d24b650df0dc30a39d05d54d0e562c00d9574bf31cbf73251","Cargo.toml":"ddaa434cc54a30a33bbe0096e72479d71ba5deffa2ad9bee39419d4e50b75275","Cargo.toml.orig":"c1688fbd2be36f529a6eebbbf62fcf612c86b6f824738a44f6caa8875f57279b","LICENSE-APACHE":"20fe7b00e904ed690e3b9fd6073784d3fc428141dbd10b81c01fd143d0797f58","LICENSE-MIT":"36516aefdc84c5d5a1e7485425913a22dbda69eb1930c5e84d6ae4972b5194b9","README.md":"8b8c45a89f9d61688fd32516ca24ea11cc6be4994757bd01bd9d02d96cd49337","src/lib.rs":"56a7344026bf5be503ca8b3fe208b74550956e82be7806a229951e80ebb3c249","src/nightly.rs":"c12152b6721216174c9a3cec90e612d5571a5d2c0a94ad54900cb814414519c3","src/stable/alloc/global.rs":"14836ad7d73a364474fc153b24a1f17ad0e60a69b90a8721dc1059eada8bf869","src/stable/alloc/mod.rs":"866dafd3984dd246e381d8ad1c2b3e02a60c3421b598ca493aa83f9b6422608d","src/stable/alloc/system.rs":"db5d5bf088eecac3fc5ff1281e1bf26ca36dd38f13cd52c49d95ff1bab064254","src/stable/boxed.rs":"fb664ab68a599b7fc5acbae1c634c2007ba2bda9a24fea2212b8202bb537f7a0","src/stable/macros.rs":"74490796a766338d0163f40a37612cd9ea2de58ae3d8e9abf6c7bcf81d9be4a6","src/stable/mod.rs":"474dce5f150456a98fa7c4debc24f03ec2db4ebf0d54011ae19c8b575feb5712","src/stable/raw_vec.rs":"9a56ce1bab4562000285e80837da7b7bd2bbbc63850c83ab5d8df9888b65f5db","src/stable/slice.rs":"089263b058e6c185467bad7ad14908479e5675408fc70a8291e5dddaef36035a","src/stable/unique.rs":"6ed3678beed7fa6bd18b694f7357e638d83e3f1f895f9988a465dc5afebfbac9","src/stable/vec/drain.rs":"740cd2e0f31eeb0146bbd0f645a14fe12bacd3912f003db433ddc6b3a178461f","src/stable/vec/into_iter.rs":"da72ce52344ea2e263ddf7776356cc012bbafc51f48499955c1771729448754d","src/stable/vec/mod.rs":"dd3ddca02747686ed2064397dd17068b64f28c6f42b55e9e2ce129cd573fe44c","src/stable/vec/partial_eq.rs":"9f1b18605164a62b58d9e17914d573698735de31c51ceb8bd3666e83d32df370","src/stable/vec/set_len_on_drop.rs":"561342e22a194e515cc25c9a1bcd827ca24c4db033e9e2c4266fbdd2fb16e5bc","src/stable/vec/splice.rs":"95a460b3a7b4af60fdc9ba04d3a719b61a0c11786cd2d8823d022e22c397f9c9"},"package":"683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"}
|
||||||
6
.gear/predownloaded-development/vendor/allocator-api2/.cargo_vcs_info.json
vendored
Normal file
6
.gear/predownloaded-development/vendor/allocator-api2/.cargo_vcs_info.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"git": {
|
||||||
|
"sha1": "63cd7fcc2f8854b5821c7054d026e8a4647acde1"
|
||||||
|
},
|
||||||
|
"path_in_vcs": ""
|
||||||
|
}
|
||||||
7
.gear/predownloaded-development/vendor/allocator-api2/CHANGELOG.md
vendored
Normal file
7
.gear/predownloaded-development/vendor/allocator-api2/CHANGELOG.md
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Changelog
|
||||||
|
All notable changes to this project will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
48
.gear/predownloaded-development/vendor/allocator-api2/Cargo.toml
vendored
Normal file
48
.gear/predownloaded-development/vendor/allocator-api2/Cargo.toml
vendored
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||||
|
#
|
||||||
|
# When uploading crates to the registry Cargo will automatically
|
||||||
|
# "normalize" Cargo.toml files for maximal compatibility
|
||||||
|
# with all versions of Cargo and also rewrite `path` dependencies
|
||||||
|
# to registry (e.g., crates.io) dependencies.
|
||||||
|
#
|
||||||
|
# If you are reading this file be aware that the original Cargo.toml
|
||||||
|
# will likely look very different (and much more reasonable).
|
||||||
|
# See Cargo.toml.orig for the original contents.
|
||||||
|
|
||||||
|
[package]
|
||||||
|
edition = "2018"
|
||||||
|
rust-version = "1.63"
|
||||||
|
name = "allocator-api2"
|
||||||
|
version = "0.2.21"
|
||||||
|
authors = ["Zakarum <zaq.dev@icloud.com>"]
|
||||||
|
build = false
|
||||||
|
autobins = false
|
||||||
|
autoexamples = false
|
||||||
|
autotests = false
|
||||||
|
autobenches = false
|
||||||
|
description = "Mirror of Rust's allocator API"
|
||||||
|
homepage = "https://github.com/zakarumych/allocator-api2"
|
||||||
|
documentation = "https://docs.rs/allocator-api2"
|
||||||
|
readme = "README.md"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
repository = "https://github.com/zakarumych/allocator-api2"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "allocator_api2"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[dependencies.serde]
|
||||||
|
version = "1.0"
|
||||||
|
optional = true
|
||||||
|
|
||||||
|
[features]
|
||||||
|
alloc = []
|
||||||
|
default = ["std"]
|
||||||
|
fresh-rust = []
|
||||||
|
nightly = []
|
||||||
|
std = ["alloc"]
|
||||||
|
|
||||||
|
[lints.rust.unexpected_cfgs]
|
||||||
|
level = "warn"
|
||||||
|
priority = 0
|
||||||
|
check-cfg = ["cfg(no_global_oom_handling)"]
|
||||||
30
.gear/predownloaded-development/vendor/allocator-api2/Cargo.toml.orig
generated
vendored
Normal file
30
.gear/predownloaded-development/vendor/allocator-api2/Cargo.toml.orig
generated
vendored
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
[package]
|
||||||
|
name = "allocator-api2"
|
||||||
|
version = "0.2.21"
|
||||||
|
edition = "2018"
|
||||||
|
authors = ["Zakarum <zaq.dev@icloud.com>"]
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
documentation = "https://docs.rs/allocator-api2"
|
||||||
|
homepage = "https://github.com/zakarumych/allocator-api2"
|
||||||
|
repository = "https://github.com/zakarumych/allocator-api2"
|
||||||
|
readme = "README.md"
|
||||||
|
description = "Mirror of Rust's allocator API"
|
||||||
|
rust-version = "1.63"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
alloc = []
|
||||||
|
std = ["alloc"]
|
||||||
|
default = ["std"]
|
||||||
|
nightly = []
|
||||||
|
fresh-rust = []
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
serde = { version = "1.0", optional = true }
|
||||||
|
|
||||||
|
[workspace]
|
||||||
|
members = ["tests"]
|
||||||
|
|
||||||
|
[lints.rust]
|
||||||
|
unexpected_cfgs = { level = "warn", check-cfg = [
|
||||||
|
'cfg(no_global_oom_handling)',
|
||||||
|
] }
|
||||||
176
.gear/predownloaded-development/vendor/allocator-api2/LICENSE-APACHE
vendored
Normal file
176
.gear/predownloaded-development/vendor/allocator-api2/LICENSE-APACHE
vendored
Normal file
|
|
@ -0,0 +1,176 @@
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
23
.gear/predownloaded-development/vendor/allocator-api2/LICENSE-MIT
vendored
Normal file
23
.gear/predownloaded-development/vendor/allocator-api2/LICENSE-MIT
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
Permission is hereby granted, free of charge, to any
|
||||||
|
person obtaining a copy of this software and associated
|
||||||
|
documentation files (the "Software"), to deal in the
|
||||||
|
Software without restriction, including without
|
||||||
|
limitation the rights to use, copy, modify, merge,
|
||||||
|
publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software
|
||||||
|
is furnished to do so, subject to the following
|
||||||
|
conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice
|
||||||
|
shall be included in all copies or substantial portions
|
||||||
|
of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||||
|
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||||
|
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||||
|
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||||
|
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
DEALINGS IN THE SOFTWARE.
|
||||||
61
.gear/predownloaded-development/vendor/allocator-api2/README.md
vendored
Normal file
61
.gear/predownloaded-development/vendor/allocator-api2/README.md
vendored
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
# allocator-api2
|
||||||
|
|
||||||
|
[](https://crates.io/crates/allocator-api2)
|
||||||
|
[](https://docs.rs/allocator-api2)
|
||||||
|
[](https://github.com/zakarumych/allocator-api2/actions/workflows/badge.yml)
|
||||||
|
[](COPYING)
|
||||||
|

|
||||||
|
|
||||||
|
This crate mirrors types and traits from Rust's unstable [`allocator_api`]
|
||||||
|
The intention of this crate is to serve as substitution for actual thing
|
||||||
|
for libs when build on stable and beta channels.
|
||||||
|
The target users are library authors who implement allocators or collection types
|
||||||
|
that use allocators, or anyone else who wants using [`allocator_api`]
|
||||||
|
|
||||||
|
The crate should be frequently updated with minor version bump.
|
||||||
|
When [`allocator_api`] is stable this crate will get version `1.0` and simply
|
||||||
|
re-export from `core`, `alloc` and `std`.
|
||||||
|
|
||||||
|
The code is mostly verbatim copy from rust repository.
|
||||||
|
Mostly attributes are removed.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
This paragraph describes how to use this crate correctly to ensure
|
||||||
|
compatibility and interoperability on both stable and nightly channels.
|
||||||
|
|
||||||
|
If you are writing a library that interacts with allocators API, you can
|
||||||
|
add this crate as a dependency and use the types and traits from this
|
||||||
|
crate instead of the ones in `core` or `alloc`.
|
||||||
|
This will allow your library to compile on stable and beta channels.
|
||||||
|
|
||||||
|
Your library *MAY* provide a feature that will enable "allocator-api2/nightly".
|
||||||
|
When this feature is enabled, your library *MUST* enable
|
||||||
|
unstable `#![feature(allocator_api)]` or it may not compile.
|
||||||
|
If feature is not provided, your library may not be compatible with the
|
||||||
|
rest of the users and cause compilation errors on nightly channel
|
||||||
|
when some other crate enables "allocator-api2/nightly" feature.
|
||||||
|
|
||||||
|
# Minimal Supported Rust Version (MSRV)
|
||||||
|
|
||||||
|
This crate is guaranteed to compile on stable Rust 1.63 and up.
|
||||||
|
A feature "fresh-rust" bumps the MSRV to unspecified higher version, but should be compatible with
|
||||||
|
at least few latest stable releases. The feature enables some additional functionality:
|
||||||
|
|
||||||
|
* `CStr` without "std" feature
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Licensed under either of
|
||||||
|
|
||||||
|
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
|
||||||
|
* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
at your option.
|
||||||
|
|
||||||
|
## Contributions
|
||||||
|
|
||||||
|
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
|
||||||
|
|
||||||
|
|
||||||
|
[`allocator_api`]: https://doc.rust-lang.org/unstable-book/library-features/allocator-api.html
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue