Compare commits

..

5 commits

Author SHA1 Message Date
17af5a649b
chore: first stable version 2025-10-03 17:51:03 +02:00
de3e738445
feat!: add health endpoint 2025-10-03 17:51:02 +02:00
d82dbfdd43
refactor: do not get domains as often 2025-10-03 17:51:01 +02:00
d2fa95d1e4
refactor: add tracing and logging 2025-10-03 17:51:00 +02:00
80a6cefbeb
fix: sort items before chunking
The chunk_by function works like coreutils sort in that it expects
sorted inputs.
2025-10-03 17:50:55 +02:00
3 changed files with 130 additions and 23 deletions

81
Cargo.lock generated
View file

@ -281,6 +281,8 @@ dependencies = [
"reqwest",
"rss",
"tokio",
"tracing",
"tracing-subscriber",
"urlencoding",
"warp",
]
@ -1002,6 +1004,12 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.175"
@ -1110,6 +1118,15 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
name = "nu-ansi-term"
version = "0.50.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "num-traits"
version = "0.2.19"
@ -1376,7 +1393,7 @@ dependencies = [
"once_cell",
"socket2",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.60.2",
]
[[package]]
@ -1744,6 +1761,15 @@ dependencies = [
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "shlex"
version = "1.3.0"
@ -1929,6 +1955,15 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "thread_local"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
dependencies = [
"cfg-if",
]
[[package]]
name = "tinystr"
version = "0.8.1"
@ -2061,9 +2096,21 @@ checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
dependencies = [
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "tracing-core"
version = "0.1.34"
@ -2071,6 +2118,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
dependencies = [
"nu-ansi-term",
"sharded-slab",
"smallvec",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
@ -2145,6 +2218,12 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "valuable"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]]
name = "version_check"
version = "0.9.5"

View file

@ -1,6 +1,6 @@
[package]
name = "complete-rss"
version = "0.1.0"
version = "1.0.0"
edition = "2024"
[dependencies]
@ -10,5 +10,7 @@ readability-rust = "0.1.0"
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
rss = "2.0.12"
tokio = { version = "1.47.1", features = ["full"] }
tracing = { version = "0.1", features = ["attributes"]}
tracing-subscriber = "0.3"
urlencoding = "2.1.3"
warp = { version = "0.4.2", features = ["server"] }

View file

@ -6,10 +6,12 @@ use readability_rust::{ReadabilityFlags, ReadabilityOptions};
use reqwest::{Client, Url};
use rss::Channel;
use tokio::{task::JoinSet, time::sleep};
use tracing::{Instrument, debug, info, info_span, instrument};
use warp::Filter;
const REQUEST_DELAY: Duration = Duration::from_secs(1);
#[instrument(skip(client), err)]
async fn get_feed(url: String, client: &Client) -> Result<Channel> {
let url = urlencoding::decode(&url)?.into_owned();
let content = client.get(url).send().await?.bytes().await?;
@ -23,32 +25,35 @@ fn get_domain(item: &rss::Item) -> Option<String> {
.and_then(|parsed| parsed.domain().map(|domain| domain.to_string()))
}
async fn complete(mut channel: Channel, client: &Client) -> Result<Box<Channel>> {
let grouped: Vec<Vec<rss::Item>> = channel
#[instrument(skip_all)]
async fn complete_channel(mut channel: Channel, client: &Client) -> Result<Box<Channel>> {
let grouped: Vec<(Option<String>, Vec<rss::Item>)> = channel
.items()
.iter()
.chunk_by(|item| get_domain(item))
.sorted_by_cached_key(|a| get_domain(a))
.chunk_by(|&item| get_domain(item))
.into_iter()
.map(|(_k, v)| v.cloned().collect())
.map(|(k, v)| (k, v.cloned().collect()))
.collect();
let mut set = JoinSet::new();
for mut items in grouped.into_iter() {
let client = client.clone();
set.spawn(async move {
for (index, item) in &mut items.iter_mut().enumerate() {
if index > 0 {
sleep(REQUEST_DELAY).await;
}
info!(
num_items = channel.items().len(),
num_groups = grouped.len(),
);
if let Some(ref link) = item.link
&& let Ok(content) = get_content(link, &client.clone()).await
{
item.set_description(content);
let mut set = JoinSet::new();
for (website, mut items) in grouped.into_iter() {
let client = client.clone();
let num_items = items.len();
set.spawn(
async move {
for (index, item) in &mut items.iter_mut().enumerate() {
get_item(&client, index, item).await;
}
items
}
items
});
.instrument(info_span!("get_website", website, num_items)),
);
}
let items: Vec<rss::Item> = set.join_all().await.concat();
@ -57,6 +62,22 @@ async fn complete(mut channel: Channel, client: &Client) -> Result<Box<Channel>>
Ok(Box::new(channel))
}
#[instrument(skip(client, item))]
async fn get_item(client: &Client, index: usize, item: &mut rss::Item) {
if index > 0 {
debug!("delaying_next_request");
sleep(REQUEST_DELAY).await;
}
if let Some(ref link) = item.link
&& let Ok(content) = get_content(link, &client.clone()).await
{
debug!(content);
item.set_description(content);
}
}
#[instrument(skip(client), err)]
async fn get_content(link: &str, client: &Client) -> Result<String> {
let response = client.get(link).send().await?;
let mut readablity = readability_rust::Readability::new(
@ -94,16 +115,21 @@ pub(crate) fn custom_reject(error: impl Into<anyhow::Error>) -> warp::Rejection
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
let client = Client::new();
let path = warp::path!(String)
let rss = warp::path!("rss" / String)
.and_then(move |url| {
let client = client.clone();
async move {
let feed = get_feed(url, &client).await.map_err(custom_reject)?;
let updated = complete(feed, &client).await.map_err(custom_reject)?;
let updated = complete_channel(feed, &client)
.await
.map_err(custom_reject)?;
Ok::<String, warp::Rejection>(format!("{}", updated))
}
})
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
warp::serve(path).run(([0, 0, 0, 0], 3030)).await;
let health = warp::path!("health").map(|| "Healthy");
warp::serve(rss.or(health)).run(([0, 0, 0, 0], 3030)).await;
}