add switches and stuff

This commit is contained in:
theBreadCompany 2024-11-18 09:51:33 +01:00
parent 17bd70e3bd
commit ad7ab65366
3 changed files with 222 additions and 20 deletions

51
Cargo.lock generated
View file

@ -171,6 +171,21 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "futures"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.31"
@ -178,6 +193,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
@ -186,6 +202,34 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
[[package]]
name = "futures-executor"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
[[package]]
name = "futures-macro"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.31"
@ -204,10 +248,16 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
@ -917,6 +967,7 @@ dependencies = [
name = "sfwikiscraper"
version = "0.1.0"
dependencies = [
"futures",
"reqwest",
"serde_json",
"tl",

View file

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
futures = "0.3.31"
reqwest = "0.12.9"
serde_json = "1.0.132"
tl = "0.7.8"

View file

@ -1,41 +1,190 @@
use tl;
use futures::future::join_all;
use reqwest;
use serde_json::{self, Value, json};
use std::{fs::{write, File}, io::Write};
use serde_json::{self, Value};
use std::{collections::HashSet, env, fs::File, io::Write};
use tl;
#[tokio::main]
async fn main() {
let url = "https://wiki.kmods.space/show/klib-res-timecrystal/itemDescriptor";
let webdata = reqwest::get(url).await.expect("request failed").text().await.expect("request failed");
let args: Vec<String> = env::args().collect();
match args.len() {
1 => {
panic!("missing arguments");
}
2 => {
let cmd = args[1].as_str();
match cmd {
"--all" => {
write_all().await;
}
"--help" => {
panic!("not implemented");
}
_ => {
panic!("bad command {}", cmd);
}
}
}
3 => {
let cmd = args[1].as_str();
let var = args[2].as_str();
match cmd {
"--item" => {
write_item(var.to_string()).await;
}
"--tier" => {
write_tier(var.to_string()).await;
}
"--test" => {
fetch_json(var.to_string()).await;
}
//"--milestone" => { }
_ => {
panic!("bad command {}", cmd);
}
}
}
_ => {
panic!("too many arguments");
}
}
}
async fn fetch_data(url: String) -> Vec<Value> {
let webdata = reqwest::get(url)
.await
.expect("request failed")
.text()
.await
.expect("request failed");
let dom = tl::parse(webdata.as_str(), tl::ParserOptions::default()).expect("page is not html");
let parser = dom.parser();
let element = dom.get_element_by_id("__NUXT_DATA__").expect("__NUXT_DATA__ not found").get(parser).unwrap();
let element = dom
.get_element_by_id("__NUXT_DATA__")
.expect("__NUXT_DATA__ not found")
.get(parser)
.unwrap();
let data = element.inner_text(parser);
let mut json: Vec<Value> = serde_json::from_str(&data).unwrap();
//println!("{:#?}", json);
serde_json::from_str(&data).unwrap()
}
async fn fetch_recipe_links(url: String) -> Vec<String> {
let webdata = reqwest::get(url)
.await
.expect("request failed")
.text()
.await
.expect("request failed");
let dom = tl::parse(webdata.as_str(), tl::ParserOptions::default()).expect("page is not html");
let parser = dom.parser();
dom.query_selector("a") // Correctly query all <a> tags
.expect("failed to query <a> tags")
.filter_map(|e| {
e.get(parser)
.and_then(|node| node.as_tag()) // Ensure the node is a tag
.and_then(|tag| tag.attributes().get("href")) // Get the href attribute
.and_then(|attr| Some(attr?.as_utf8_str().to_string())) // Convert to String
})
.filter(|e| {
e.contains("/show") && !e.contains("satisfactoryplus") && !e.contains("game-schematic")
})
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>()
}
async fn fetch_item(url: String) -> Value {
let json = fetch_data(url.clone()).await;
let mut v = json[0].clone();
recurse(&mut v, &json);
v[1]["data"]
.as_object()
.unwrap()
.values()
.next()
.unwrap()
.clone()
}
async fn write_item(url: String) {
let result = fetch_item(url).await;
let mut file = File::create(format!(
"{}.json",
&result["extraInformations"]["name"].as_str().unwrap()
))
.unwrap();
file.write(serde_json::to_string_pretty(&result).unwrap().as_bytes())
.expect("failed to write json");
}
async fn fetch_tier(tier: String) -> Vec<Value> {
let recipes =
fetch_recipe_links(format!("https://wiki.kmods.space/milestones/tier-{}", tier)).await;
join_all(
recipes
.iter()
.map(|recipe| async move {
fetch_item(format!("https://wiki.kmods.space{}/itemDescriptor", recipe)).await
})
.collect::<Vec<_>>(),
)
.await
}
async fn write_tier(tier: String) {
let result = fetch_tier(tier.clone()).await;
let mut file = File::create(format!("Tier {}.json", tier)).unwrap();
file.write(serde_json::to_string_pretty(&result).unwrap().as_bytes())
.expect("failed to write json");
}
async fn write_all() {
let mut file = File::create("complete.json").unwrap();
file.write(
serde_json::to_string_pretty(
&join_all(
(0..9)
.into_iter()
.map(|i| async move { fetch_tier(i.to_string()).await })
)
.await
.into_iter()
.reduce(|mut acc, e| { acc.extend(e.iter().cloned()); acc })
)
.unwrap()
.as_bytes(),
)
.unwrap();
}
async fn fetch_json(url: String) {
let mut json = fetch_data(url).await;
let mut v = json[0].clone();
recurse(&mut v, &mut json);
let result_b = v[1]["data"].as_object().unwrap().values().collect::<Vec<_>>();
let result = result_b.get(0).unwrap();
let mut file = File::create(format!("{}.json", &result["extraInformations"]["name"].as_str().unwrap())).unwrap();
file.write(serde_json::to_string_pretty(&result).unwrap().as_bytes()).expect("failed to write json");
write_output("test.json", &v);
}
fn write_output(name: &str, content: &Value) {
let mut file = File::create(name).unwrap();
file.write(serde_json::to_string_pretty(&content).unwrap().as_bytes())
.expect("failed to write json");
}
fn recurse<'a>(val: &'a mut Value, src: &'a Vec<Value>) {
//println!("val before mutation: {}", val);
match val {
Value::Array(arr) => {
for val in arr.iter_mut() {
recurse(val, src);
};
}
}
Value::Object(map) => {
for (key, val) in map.iter_mut() {
for val in map.values_mut() {
recurse(val, src);
}
}
@ -44,11 +193,12 @@ fn recurse<'a>(val: &'a mut Value, src: &'a Vec<Value>) {
let mut r = src.get(i as usize).unwrap().clone();
match r {
Value::Number(_) => {}
_ => { recurse(&mut r, src); }
_ => {
recurse(&mut r, src);
}
}
*val = r;
}
_ => {}
}
//println!("val after mutation: {}", val);
}