refactor: change readablity lib

This commit is contained in:
Moritz Böhme 2025-10-03 12:36:43 +02:00
parent c242ca8cc6
commit 0424544abf
No known key found for this signature in database
GPG key ID: 970C6E89EB0547A9
3 changed files with 607 additions and 141 deletions

739
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -6,7 +6,7 @@ edition = "2024"
[dependencies] [dependencies]
anyhow = "1.0.99" anyhow = "1.0.99"
itertools = "0.14.0" itertools = "0.14.0"
llm_readability = "0.0.11" readability-rust = "0.1.0"
reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.12.23", features = ["blocking", "rustls-tls"], default-features = false }
rss = "2.0.12" rss = "2.0.12"
tokio = { version = "1.47.1", features = ["full"] } tokio = { version = "1.47.1", features = ["full"] }

View file

@ -1,8 +1,7 @@
use std::time::Duration; use std::time::Duration;
use anyhow::Result; use anyhow::{Context, Result};
use itertools::Itertools; use itertools::Itertools;
use llm_readability::extractor;
use reqwest::{Client, Url}; use reqwest::{Client, Url};
use rss::Channel; use rss::Channel;
use tokio::{task::JoinSet, time::sleep}; use tokio::{task::JoinSet, time::sleep};
@ -59,8 +58,8 @@ async fn complete(mut channel: Channel, client: &Client) -> Result<Box<Channel>>
async fn get_content(link: &str, client: &Client) -> Result<String> { async fn get_content(link: &str, client: &Client) -> Result<String> {
let response = client.get(link).send().await?; let response = client.get(link).send().await?;
let content = let mut readablity = readability_rust::Readability::new(response.text().await?.as_ref(), None)?;
extractor::extract(&mut response.bytes().await?.as_ref(), &Url::parse(link)?)?.content; let content = readablity.parse().context("readablity parse error")?.content.context("readablity no content")?;
Ok(content) Ok(content)
} }