feat: add delay for same website requests
This commit is contained in:
parent
87a001f0cc
commit
d971927b4d
3 changed files with 57 additions and 20 deletions
16
Cargo.lock
generated
16
Cargo.lock
generated
|
|
@ -164,6 +164,7 @@ name = "complete-rss"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools",
|
||||
"llm_readability",
|
||||
"reqwest",
|
||||
"rss",
|
||||
|
|
@ -287,6 +288,12 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.35"
|
||||
|
|
@ -743,6 +750,15 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.15"
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ edition = "2024"
|
|||
|
||||
[dependencies]
|
||||
anyhow = "1.0.99"
|
||||
itertools = "0.14.0"
|
||||
llm_readability = "0.0.11"
|
||||
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
rss = "2.0.12"
|
||||
|
|
|
|||
60
src/main.rs
60
src/main.rs
|
|
@ -1,4 +1,7 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use itertools::Itertools;
|
||||
use llm_readability::extractor;
|
||||
use reqwest::{Client, Url};
|
||||
use rss::Channel;
|
||||
|
|
@ -12,38 +15,55 @@ async fn get_feed(url: String, client: &Client) -> Result<Channel> {
|
|||
Ok(channel)
|
||||
}
|
||||
|
||||
fn get_domain(item: &rss::Item) -> Option<String> {
|
||||
item.link().map(|link| {
|
||||
Url::parse(link)
|
||||
.ok()
|
||||
.map(|parsed| parsed.domain().map(|domain| domain.to_string()))
|
||||
}).flatten()?
|
||||
}
|
||||
|
||||
async fn complete(channel: Channel, client: &Client) -> Result<Box<Channel>> {
|
||||
let items: Vec<rss::Item> = channel.items().into_iter().cloned().collect();
|
||||
let grouped: Vec<Vec<rss::Item>> = channel
|
||||
.items()
|
||||
.into_iter()
|
||||
.chunk_by(|item| get_domain(*item))
|
||||
.into_iter()
|
||||
.map(|(_k, v)| v.cloned().collect())
|
||||
.collect();
|
||||
|
||||
let mut set = JoinSet::new();
|
||||
for mut item in items {
|
||||
set.spawn({
|
||||
let client = client.clone();
|
||||
async move {
|
||||
if let Some(link) = item.link.clone() {
|
||||
if let Ok(content) = get_content(link, &client.clone()).await {
|
||||
item.set_description(content);
|
||||
for items in grouped.into_iter() {
|
||||
let client = client.clone();
|
||||
set.spawn(async move {
|
||||
let mut new_items = vec![];
|
||||
let mut wait_time = Duration::from_secs(0);
|
||||
for item in items {
|
||||
tokio::time::sleep(wait_time).await;
|
||||
let mut new_item: rss::Item = item.clone().to_owned();
|
||||
if let Some(link) = item.link() {
|
||||
if let Ok(content) = get_content(link, &client.clone()).await {
|
||||
new_item.set_description(content);
|
||||
};
|
||||
};
|
||||
new_items.push(new_item);
|
||||
wait_time = Duration::from_secs(1);
|
||||
}
|
||||
item
|
||||
}
|
||||
new_items
|
||||
});
|
||||
}
|
||||
|
||||
let updated_items = set.join_all().await;
|
||||
let items: Vec<rss::Item> = set.join_all().await.concat();
|
||||
|
||||
let mut new_channel = channel.clone();
|
||||
new_channel.set_items(updated_items);
|
||||
new_channel.set_items(items);
|
||||
Ok(Box::new(new_channel))
|
||||
}
|
||||
|
||||
async fn get_content(link: String, client: &Client) -> Result<String> {
|
||||
let response = client.get(&link).send().await?;
|
||||
let content = extractor::extract(
|
||||
&mut response.bytes().await?.as_ref(),
|
||||
&Url::parse(link.as_str())?,
|
||||
)?
|
||||
.content;
|
||||
async fn get_content(link: &str, client: &Client) -> Result<String> {
|
||||
let response = client.get(link).send().await?;
|
||||
let content =
|
||||
extractor::extract(&mut response.bytes().await?.as_ref(), &Url::parse(link)?)?.content;
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
|
|
@ -69,6 +89,6 @@ async fn main() {
|
|||
Ok::<String, warp::Rejection>(format!("{}", updated))
|
||||
}
|
||||
})
|
||||
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
|
||||
.map(|reply| warp::reply::with_header(reply, "Content-Type", "application/rss+xml"));
|
||||
warp::serve(path).run(([127, 0, 0, 1], 3030)).await;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue