use scraper::{Html,Selector}; use url::{Url,Host}; use futures::future; type SiteStat = (Url, Vec); #[tokio::main] async fn main() -> Result<(), Box> { let addr = std::env::args().nth(1) .unwrap_or_else(|| "https://tmplab.org".to_string()); let links = get_links(addr.as_ref()).await?; let links : Vec = links.into_iter() .filter(| url | url.host() != Some(Host::Domain(&addr))) .collect(); let joins = links.into_iter() .map(| url | { tokio::spawn(async move { fetch_site(url).await }) }); let results = future::join_all(joins).await .into_iter() .filter_map(| r | r.ok()); for (url, links) in results { println!("{url} : {} links", links.len()) } Ok(()) } async fn fetch_site(url: Url) -> SiteStat { println!("Start fetching {url}"); let links = get_links(url.as_ref()).await .unwrap_or_else(|_| vec![]); println!("Got {url} => {}", links.len()); (url, links) } async fn get_links( url: &str ) -> Result, reqwest::Error> { let a_selector = Selector::parse("a[href]").unwrap(); let body = reqwest::get(url) .await? .text() .await?; Ok(Html::parse_document(&body) .select(&a_selector) .filter_map(| link | link.value().attr("href") .and_then(| href | Url::parse(href).ok())) .collect()) }