feat: add delay for same website requests

This commit is contained in:
Moritz Böhme 2025-09-04 08:03:03 +02:00
parent 87a001f0cc
commit d971927b4d
No known key found for this signature in database
GPG key ID: 970C6E89EB0547A9
3 changed files with 57 additions and 20 deletions

16
Cargo.lock generated
View file

@ -164,6 +164,7 @@ name = "complete-rss"
version = "0.1.0"
dependencies = [
"anyhow",
"itertools",
"llm_readability",
"reqwest",
"rss",
@ -287,6 +288,12 @@ dependencies = [
"syn",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -743,6 +750,15 @@ dependencies = [
"serde",
]
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.15"

View file

@ -5,6 +5,7 @@ edition = "2024"
[dependencies]
anyhow = "1.0.99"
itertools = "0.14.0"
llm_readability = "0.0.11"
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
rss = "2.0.12"

View file

@ -1,4 +1,7 @@
use std::time::Duration;
use anyhow::Result;
use itertools::Itertools;
use llm_readability::extractor;
use reqwest::{Client, Url};
use rss::Channel;
@ -12,38 +15,55 @@ async fn get_feed(url: String, client: &Client) -> Result<Channel> {
Ok(channel)
}
fn get_domain(item: &rss::Item) -> Option<String> {
item.link().map(|link| {
Url::parse(link)
.ok()
.map(|parsed| parsed.domain().map(|domain| domain.to_string()))
}).flatten()?
}
async fn complete(channel: Channel, client: &Client) -> Result<Box<Channel>> {
let items: Vec<rss::Item> = channel.items().into_iter().cloned().collect();
let grouped: Vec<Vec<rss::Item>> = channel
.items()
.into_iter()
.chunk_by(|item| get_domain(*item))
.into_iter()
.map(|(_k, v)| v.cloned().collect())
.collect();
let mut set = JoinSet::new();
for mut item in items {
set.spawn({
let client = client.clone();
async move {
if let Some(link) = item.link.clone() {
if let Ok(content) = get_content(link, &client.clone()).await {
item.set_description(content);
for items in grouped.into_iter() {
let client = client.clone();
set.spawn(async move {
let mut new_items = vec![];
let mut wait_time = Duration::from_secs(0);
for item in items {
tokio::time::sleep(wait_time).await;
let mut new_item: rss::Item = item.clone().to_owned();
if let Some(link) = item.link() {
if let Ok(content) = get_content(link, &client.clone()).await {
new_item.set_description(content);
};
};
new_items.push(new_item);
wait_time = Duration::from_secs(1);
}
item
}
new_items
});
}
let updated_items = set.join_all().await;
let items: Vec<rss::Item> = set.join_all().await.concat();
let mut new_channel = channel.clone();
new_channel.set_items(updated_items);
new_channel.set_items(items);
Ok(Box::new(new_channel))
}
async fn get_content(link: String, client: &Client) -> Result<String> {
let response = client.get(&link).send().await?;
let content = extractor::extract(
&mut response.bytes().await?.as_ref(),
&Url::parse(link.as_str())?,
)?
.content;
async fn get_content(link: &str, client: &Client) -> Result<String> {
let response = client.get(link).send().await?;
let content =
extractor::extract(&mut response.bytes().await?.as_ref(), &Url::parse(link)?)?.content;
Ok(content)
}
@ -69,6 +89,6 @@ async fn main() {
Ok::<String, warp::Rejection>(format!("{}", updated))
}
})
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
warp::serve(path).run(([127, 0, 0, 1], 3030)).await;
}