From bdd84974566c32939b55223425cbcd0cd855e809 Mon Sep 17 00:00:00 2001 From: theBreadCompany Date: Thu, 5 Sep 2024 21:18:09 +0200 Subject: [PATCH] outline project --- .gitignore | 1 + Cargo.toml | 14 ++++++++ src/main.rs | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..14de23b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "pxiv" +version = "0.1.0" +edition = "2021" + +[dependencies] +tl = "0.7.8" +reqwest = "0.12.7" +json = "0.12.4" +rocket = "0.5.1" +scraper = "0.20.0" +markup5ever = "0.13.0" +tendril = "0.4.3" +html5ever = "0.28.0" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..b65ae49 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,96 @@ +#[macro_use] extern crate rocket; +#[macro_use] extern crate json; +#[macro_use] extern crate markup5ever; + + +use rocket::response::content::RawHtml; +use rocket::http::Status; +use rocket::http::uri::Segments; +use reqwest::Client; +use scraper::{Html, Selector}; +use html5ever::tree_builder::TreeSink; +use tendril::Tendril; +use markup5ever::{Attribute, QualName, LocalName}; +use markup5ever::interface::tree_builder::TreeSink; + +#[get("/")] +async fn handle_route(path: std::path::PathBuf) -> Result, Status> { + let target = format!("https://pixiv.net/{}", path.display()); + + let html = match fetch_content(&target).await { + Ok(html) => html, + Err(err) => return Err(err), + }; + + let modified = change_meta(&html).await.unwrap(); + + Ok(RawHtml(modified)) +} + +async fn fetch_content(url: &String) -> Result { + println!("{}", url); + let client = Client::new(); + let response = match client.get(url).send().await { + Ok(resp) => resp, + Err(_) => return Err(Status::BadGateway), + }; + + let html = match response.text().await { + Ok(text) => text, + Err(_) => return Err(Status::InternalServerError), + }; + + Ok(html) +} + +async fn change_meta(html: &String) -> Result { + let dom = Html::parse_document(html); + let data_selector = match Selector::parse(r#"meta[name="preload-data"]"#) { + Ok(sel) => sel, + Err(_) => return Ok(html.to_string()), + }; + let data_meta = match dom.select(&data_selector).next() { + Some(meta) => meta, + None => return Ok(html.to_string()), + }; + + let illust = json::parse(data_meta.value().attr("content").unwrap()); + let target_url = illust.unwrap()["illust"].entries().next().unwrap().1["urls"]["regular"].as_str().unwrap(); + + + let image_selector = match Selector::parse(r#"meta[name="og:image"]"#) { + Ok(sel) => sel, + Err(_) => return Ok(html.to_string()), + }; + let image_meta = match dom.select(&image_selector).next() { + Some(meta) => meta, + None => return Ok(html.to_string()), + }; + + let parent = image_meta.parent().unwrap(); + dom.remove_from_parent(image_meta); + dom.create_element("meta", vec![ + Attribute { name: QualName::new( + None, + ns!(html), + LocalName::from("name"), + ), + value: Tendril::from("og:image") + }, + Attribute { name: QualName::new( + None, + ns!(html), + LocalName::from("content"), + ), + value: Tendril::from(target_url) + } + ]); + + + Ok(html.to_string()) +} + +#[launch] +fn launch() -> _ { + rocket::build().mount("/", routes![handle_route]) +}