mcwzh-walker
Some checks are pending
ci/woodpecker/push/woodpecker Pipeline is running

This commit is contained in:
xtex 2023-12-17 09:29:45 +08:00
parent 348f0e1672
commit 354ad0c922
5 changed files with 130 additions and 67 deletions

130
Cargo.lock generated
View file

@ -222,7 +222,7 @@ dependencies = [
"dtoa-short",
"itoa 0.4.8",
"matches",
"phf",
"phf 0.8.0",
"proc-macro2",
"quote",
"smallvec",
@ -553,9 +553,9 @@ checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
[[package]]
name = "html5ever"
version = "0.25.2"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5c13fb08e5d4dfc151ee5e88bae63f7773d61852f3bdc73c9f4b9e1bde03148"
checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
dependencies = [
"log",
"mac",
@ -712,7 +712,6 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
"serde",
]
[[package]]
@ -723,6 +722,7 @@ checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
dependencies = [
"equivalent",
"hashbrown 0.14.2",
"serde",
]
[[package]]
@ -753,13 +753,14 @@ dependencies = [
]
[[package]]
name = "kuchiki"
version = "0.8.1"
name = "kuchikiki"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ea8e9c6e031377cff82ee3001dc8026cdf431ed4e2e6b51f98ab8c73484a358"
checksum = "f29e4755b7b995046f510a7520c42b2fed58b77bd94d5a87a8eb43d2fd126da8"
dependencies = [
"cssparser",
"html5ever",
"indexmap 1.9.3",
"matches",
"selectors",
]
@ -817,13 +818,13 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.10.1"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd"
checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
dependencies = [
"log",
"phf",
"phf_codegen",
"phf 0.10.1",
"phf_codegen 0.10.0",
"string_cache",
"string_cache_codegen",
"tendril",
@ -888,12 +889,10 @@ dependencies = [
[[package]]
name = "mwapi"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5b17007ca0fdbc12f7720b4e9578213da0f80fd678b6f4bd8ff8705b307bcb"
version = "0.6.0"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"js-sys",
"mwapi_responses",
"reqwest",
"serde",
"serde_json",
@ -906,20 +905,19 @@ dependencies = [
[[package]]
name = "mwapi_responses"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a146bdef725eb621b3e876c968fbfbcec07052d5d58be752422b6e7305aa656a"
version = "0.4.0"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"mwapi",
"mwapi_responses_derive",
"mwtimestamp",
"serde",
"time",
]
[[package]]
name = "mwapi_responses_derive"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4859cc2a29d5cd84f493f49b120e7f572ecc5d9fe82bfe5a2c1bd8a7c1737c04"
version = "0.4.0"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"proc-macro2",
"quote",
@ -930,15 +928,15 @@ dependencies = [
[[package]]
name = "mwbot"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8630719d7d4a3c7ef92a1e23f2a50a7732cf27ea9010c2b8cb72d7b99161b9d"
version = "0.6.0"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"dirs",
"libc",
"mwapi",
"mwapi_responses",
"mwbot_derive",
"mwtimestamp",
"mwtitle",
"once_cell",
"parsoid",
@ -947,27 +945,34 @@ dependencies = [
"serde_json",
"thiserror",
"tokio",
"toml 0.7.8",
"toml",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "mwbot_derive"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef7f1ea65276ab67396583e569bb9d57910e55a7e776bdac4bc59b4c11cee8a0"
version = "0.6.0"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
]
[[package]]
name = "mwtimestamp"
version = "0.1.0"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"chrono",
"serde",
]
[[package]]
name = "mwtitle"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c0f488429997bc77a693a53b2977c53455819126590f555fe43fd1f4b4e059d"
version = "0.2.3"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"bytemuck",
"regex",
@ -1131,12 +1136,11 @@ dependencies = [
[[package]]
name = "parsoid"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3a7d90af03ff8f55062316bf217d88a2e5e50a539e57db42b7d7af7e5d80867"
version = "0.9.0"
source = "git+https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git#0a74d93affa1cb53e8417e14f6a07ccca7993144"
dependencies = [
"indexmap 1.9.3",
"kuchiki",
"indexmap 2.1.0",
"kuchikiki",
"lazy_static",
"markup5ever",
"percent-encoding",
@ -1167,6 +1171,15 @@ dependencies = [
"proc-macro-hack",
]
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_shared 0.10.0",
]
[[package]]
name = "phf_codegen"
version = "0.8.0"
@ -1177,6 +1190,16 @@ dependencies = [
"phf_shared 0.8.0",
]
[[package]]
name = "phf_codegen"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
]
[[package]]
name = "phf_generator"
version = "0.8.0"
@ -1647,8 +1670,8 @@ dependencies = [
"fxhash",
"log",
"matches",
"phf",
"phf_codegen",
"phf 0.8.0",
"phf_codegen 0.8.0",
"precomputed-hash",
"servo_arc",
"smallvec",
@ -2037,18 +2060,6 @@ dependencies = [
"tracing",
]
[[package]]
name = "toml"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit 0.19.15",
]
[[package]]
name = "toml"
version = "0.8.8"
@ -2058,7 +2069,7 @@ dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit 0.21.0",
"toml_edit",
]
[[package]]
@ -2070,19 +2081,6 @@ dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.19.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
dependencies = [
"indexmap 2.1.0",
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
]
[[package]]
name = "toml_edit"
version = "0.21.0"
@ -2494,7 +2492,7 @@ dependencies = [
"serde_json",
"similar",
"tokio",
"toml 0.8.8",
"toml",
"tracing",
"tracing-subscriber",
]

View file

@ -8,7 +8,7 @@ anyhow = "1.0.75"
chrono = { version = "0.4.31", features = ["serde"] }
csv = "1.3.0"
dotenv = "0.15.0"
mwbot = "0.5.3"
mwbot = { git = "https://gitlab.wikimedia.org/repos/mwbot-rs/mwbot.git", version = "0.6.0" }
rand = "0.8.5"
regex = "1.10.2"
reqwest = { version = "0.11.22", features = ["json"] }

View file

@ -0,0 +1 @@
0 0 * * * /dist/sbin/dinitctl start mcwzh-walker

View file

@ -0,0 +1,6 @@
type = process
command = /dist/bin/mcwzh-walker
restart = false
log-type = file
logfile = /srv/run/logs/mcwzh-walker.log
working-dir = /dist

58
src/bin/mcwzh-walker.rs Normal file
View file

@ -0,0 +1,58 @@
#![feature(let_chains)]
#![feature(lazy_cell)]
use std::{
fs::{create_dir_all, File},
path::Path,
};
use mwbot::generators::{categories::Categories, AllPages, Generator};
use xt_bot_wiki::{init_log, prelude::*};
#[tokio::main]
async fn main() -> Result<()> {
init_log();
create_dir_all("pub/mcwzh-walker")?;
let bot = MwBot::from_path(&Path::new("config/mwbot-mcwzh.toml")).await?;
let mut wtr: csv::Writer<File> =
csv::Writer::from_writer(File::create("pub/mcwzh-walker/log.csv")?);
let mut allpages = AllPages::new(0u32)
.filter_redirect(mwbot::generators::FilterRedirect::All)
.generate(&bot);
while let Some(page) = allpages.recv().await {
let page = page?;
info!(page = page.title(), "checked page");
// let wt = page.wikitext().await?;
if let Some(redirect_target) = page.redirect_target().await? {
// 命令重定向
if redirect_target.title().starts_with("命令/") {
if Categories::new(vec![page.title().to_string()])
.categories(vec!["Category:命令重定向".to_string()])
.generate(&bot)
.recv()
.await
.is_none()
{
info!(page = page.title(), "missing command redirection category");
wtr.serialize(Log::MissingCommandRedirectionCat(page.title().to_string()))?;
}
}
}
if page.title().starts_with("命令/") {
if !bot.page(&page.title()[2..]).unwrap().exists().await? {
info!(page = page.title(), "missing command redirection page");
wtr.serialize(Log::MissingCommandRedirectionCat(page.title().to_string()))?;
}
}
}
Ok(())
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub enum Log {
MissingCommandRedirectionCat(String),
MissingCommandRedirectionPage(String),
}