diff --git a/Cargo.lock b/Cargo.lock index bd3386b..b8e70a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -281,6 +281,8 @@ dependencies = [ "reqwest", "rss", "tokio", + "tracing", + "tracing-subscriber", "urlencoding", "warp", ] @@ -1002,6 +1004,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.175" @@ -1110,6 +1118,15 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nu-ansi-term" +version = "0.50.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1376,7 +1393,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1744,6 +1761,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -1929,6 +1955,15 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tinystr" version = "0.8.1" @@ -2061,9 +2096,21 @@ checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "tracing-core" version = "0.1.34" @@ -2071,6 +2118,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -2145,6 +2218,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index f779e1c..f982590 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "complete-rss" -version = "0.1.0" +version = "1.0.0" edition = "2024" [dependencies] @@ -10,5 +10,7 @@ readability-rust = "0.1.0" reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false } rss = "2.0.12" tokio = { version = "1.47.1", features = ["full"] } +tracing = { version = "0.1", features = ["attributes"]} +tracing-subscriber = "0.3" urlencoding = "2.1.3" warp = { version = "0.4.2", features = ["server"] } diff --git a/src/main.rs b/src/main.rs index 05c46e0..bf69737 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,10 +6,12 @@ use readability_rust::{ReadabilityFlags, ReadabilityOptions}; use reqwest::{Client, Url}; use rss::Channel; use tokio::{task::JoinSet, time::sleep}; +use tracing::{Instrument, debug, info, info_span, instrument}; use warp::Filter; const REQUEST_DELAY: Duration = Duration::from_secs(1); +#[instrument(skip(client), err)] async fn get_feed(url: String, client: &Client) -> Result { let url = urlencoding::decode(&url)?.into_owned(); let content = client.get(url).send().await?.bytes().await?; @@ -23,32 +25,35 @@ fn get_domain(item: &rss::Item) -> Option { .and_then(|parsed| parsed.domain().map(|domain| domain.to_string())) } -async fn complete(mut channel: Channel, client: &Client) -> Result> { - let grouped: Vec> = channel +#[instrument(skip_all)] +async fn complete_channel(mut channel: Channel, client: &Client) -> Result> { + let grouped: Vec<(Option, Vec)> = channel .items() .iter() - .chunk_by(|item| get_domain(item)) + .sorted_by_cached_key(|a| get_domain(a)) + .chunk_by(|&item| get_domain(item)) .into_iter() - .map(|(_k, v)| v.cloned().collect()) + .map(|(k, v)| (k, v.cloned().collect())) .collect(); - let mut set = JoinSet::new(); - for mut items in grouped.into_iter() { - let client = client.clone(); - set.spawn(async move { - for (index, item) in &mut items.iter_mut().enumerate() { - if index > 0 { - sleep(REQUEST_DELAY).await; - } + info!( + num_items = channel.items().len(), + num_groups = grouped.len(), + ); - if let Some(ref link) = item.link - && let Ok(content) = get_content(link, &client.clone()).await - { - item.set_description(content); + let mut set = JoinSet::new(); + for (website, mut items) in grouped.into_iter() { + let client = client.clone(); + let num_items = items.len(); + set.spawn( + async move { + for (index, item) in &mut items.iter_mut().enumerate() { + get_item(&client, index, item).await; } + items } - items - }); + .instrument(info_span!("get_website", website, num_items)), + ); } let items: Vec = set.join_all().await.concat(); @@ -57,6 +62,22 @@ async fn complete(mut channel: Channel, client: &Client) -> Result> Ok(Box::new(channel)) } +#[instrument(skip(client, item))] +async fn get_item(client: &Client, index: usize, item: &mut rss::Item) { + if index > 0 { + debug!("delaying_next_request"); + sleep(REQUEST_DELAY).await; + } + + if let Some(ref link) = item.link + && let Ok(content) = get_content(link, &client.clone()).await + { + debug!(content); + item.set_description(content); + } +} + +#[instrument(skip(client), err)] async fn get_content(link: &str, client: &Client) -> Result { let response = client.get(link).send().await?; let mut readablity = readability_rust::Readability::new( @@ -94,16 +115,21 @@ pub(crate) fn custom_reject(error: impl Into) -> warp::Rejection #[tokio::main] async fn main() { + tracing_subscriber::fmt::init(); + let client = Client::new(); - let path = warp::path!(String) + let rss = warp::path!("rss" / String) .and_then(move |url| { let client = client.clone(); async move { let feed = get_feed(url, &client).await.map_err(custom_reject)?; - let updated = complete(feed, &client).await.map_err(custom_reject)?; + let updated = complete_channel(feed, &client) + .await + .map_err(custom_reject)?; Ok::(format!("{}", updated)) } }) .map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml")); - warp::serve(path).run(([0, 0, 0, 0], 3030)).await; + let health = warp::path!("health").map(|| "Healthy"); + warp::serve(rss.or(health)).run(([0, 0, 0, 0], 3030)).await; }