From ad7ab65366572d6c83f417c5319cd21de3293574 Mon Sep 17 00:00:00 2001 From: theBreadCompany Date: Mon, 18 Nov 2024 09:51:33 +0100 Subject: [PATCH] add switches and stuff --- Cargo.lock | 51 ++++++++++++++ Cargo.toml | 1 + src/main.rs | 190 ++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 222 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec67b1e..74b4b43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -171,6 +171,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.31" @@ -178,6 +193,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -186,6 +202,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -204,10 +248,16 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -917,6 +967,7 @@ dependencies = [ name = "sfwikiscraper" version = "0.1.0" dependencies = [ + "futures", "reqwest", "serde_json", "tl", diff --git a/Cargo.toml b/Cargo.toml index 02fb5f2..c627030 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +futures = "0.3.31" reqwest = "0.12.9" serde_json = "1.0.132" tl = "0.7.8" diff --git a/src/main.rs b/src/main.rs index 5612c8a..0344dce 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,54 +1,204 @@ -use tl; +use futures::future::join_all; use reqwest; -use serde_json::{self, Value, json}; -use std::{fs::{write, File}, io::Write}; - +use serde_json::{self, Value}; +use std::{collections::HashSet, env, fs::File, io::Write}; +use tl; #[tokio::main] async fn main() { - let url = "https://wiki.kmods.space/show/klib-res-timecrystal/itemDescriptor"; - let webdata = reqwest::get(url).await.expect("request failed").text().await.expect("request failed"); + let args: Vec = env::args().collect(); + match args.len() { + 1 => { + panic!("missing arguments"); + } + 2 => { + let cmd = args[1].as_str(); + + match cmd { + "--all" => { + write_all().await; + } + "--help" => { + panic!("not implemented"); + } + _ => { + panic!("bad command {}", cmd); + } + } + } + 3 => { + let cmd = args[1].as_str(); + let var = args[2].as_str(); + + match cmd { + "--item" => { + write_item(var.to_string()).await; + } + "--tier" => { + write_tier(var.to_string()).await; + } + "--test" => { + fetch_json(var.to_string()).await; + } + //"--milestone" => { } + _ => { + panic!("bad command {}", cmd); + } + } + } + _ => { + panic!("too many arguments"); + } + } +} + +async fn fetch_data(url: String) -> Vec { + let webdata = reqwest::get(url) + .await + .expect("request failed") + .text() + .await + .expect("request failed"); let dom = tl::parse(webdata.as_str(), tl::ParserOptions::default()).expect("page is not html"); let parser = dom.parser(); - let element = dom.get_element_by_id("__NUXT_DATA__").expect("__NUXT_DATA__ not found").get(parser).unwrap(); + let element = dom + .get_element_by_id("__NUXT_DATA__") + .expect("__NUXT_DATA__ not found") + .get(parser) + .unwrap(); let data = element.inner_text(parser); - let mut json: Vec = serde_json::from_str(&data).unwrap(); - //println!("{:#?}", json); + serde_json::from_str(&data).unwrap() +} + +async fn fetch_recipe_links(url: String) -> Vec { + let webdata = reqwest::get(url) + .await + .expect("request failed") + .text() + .await + .expect("request failed"); + + let dom = tl::parse(webdata.as_str(), tl::ParserOptions::default()).expect("page is not html"); + let parser = dom.parser(); + dom.query_selector("a") // Correctly query all tags + .expect("failed to query tags") + .filter_map(|e| { + e.get(parser) + .and_then(|node| node.as_tag()) // Ensure the node is a tag + .and_then(|tag| tag.attributes().get("href")) // Get the href attribute + .and_then(|attr| Some(attr?.as_utf8_str().to_string())) // Convert to String + }) + .filter(|e| { + e.contains("/show") && !e.contains("satisfactoryplus") && !e.contains("game-schematic") + }) + .collect::>() + .into_iter() + .collect::>() +} + +async fn fetch_item(url: String) -> Value { + let json = fetch_data(url.clone()).await; + + let mut v = json[0].clone(); + recurse(&mut v, &json); + v[1]["data"] + .as_object() + .unwrap() + .values() + .next() + .unwrap() + .clone() +} +async fn write_item(url: String) { + let result = fetch_item(url).await; + let mut file = File::create(format!( + "{}.json", + &result["extraInformations"]["name"].as_str().unwrap() + )) + .unwrap(); + file.write(serde_json::to_string_pretty(&result).unwrap().as_bytes()) + .expect("failed to write json"); +} + +async fn fetch_tier(tier: String) -> Vec { + let recipes = + fetch_recipe_links(format!("https://wiki.kmods.space/milestones/tier-{}", tier)).await; + + join_all( + recipes + .iter() + .map(|recipe| async move { + fetch_item(format!("https://wiki.kmods.space{}/itemDescriptor", recipe)).await + }) + .collect::>(), + ) + .await +} +async fn write_tier(tier: String) { + let result = fetch_tier(tier.clone()).await; + let mut file = File::create(format!("Tier {}.json", tier)).unwrap(); + file.write(serde_json::to_string_pretty(&result).unwrap().as_bytes()) + .expect("failed to write json"); +} + +async fn write_all() { + let mut file = File::create("complete.json").unwrap(); + file.write( + serde_json::to_string_pretty( + &join_all( + (0..9) + .into_iter() + .map(|i| async move { fetch_tier(i.to_string()).await }) + ) + .await + .into_iter() + .reduce(|mut acc, e| { acc.extend(e.iter().cloned()); acc }) + ) + .unwrap() + .as_bytes(), + ) + .unwrap(); +} + +async fn fetch_json(url: String) { + let mut json = fetch_data(url).await; let mut v = json[0].clone(); recurse(&mut v, &mut json); - let result_b = v[1]["data"].as_object().unwrap().values().collect::>(); - let result = result_b.get(0).unwrap(); - let mut file = File::create(format!("{}.json", &result["extraInformations"]["name"].as_str().unwrap())).unwrap(); - file.write(serde_json::to_string_pretty(&result).unwrap().as_bytes()).expect("failed to write json"); + write_output("test.json", &v); +} +fn write_output(name: &str, content: &Value) { + let mut file = File::create(name).unwrap(); + file.write(serde_json::to_string_pretty(&content).unwrap().as_bytes()) + .expect("failed to write json"); } fn recurse<'a>(val: &'a mut Value, src: &'a Vec) { - //println!("val before mutation: {}", val); match val { Value::Array(arr) => { for val in arr.iter_mut() { recurse(val, src); - }; + } } Value::Object(map) => { - for (key, val) in map.iter_mut() { + for val in map.values_mut() { recurse(val, src); } } Value::Number(num) => { let i = num.as_u64().unwrap(); let mut r = src.get(i as usize).unwrap().clone(); - match r { + match r { Value::Number(_) => {} - _ => { recurse(&mut r, src); } + _ => { + recurse(&mut r, src); + } } *val = r; } _ => {} } - //println!("val after mutation: {}", val); -} \ No newline at end of file +}