use scraper::{Html,Selector};
use url::Url;
use tokio::sync::mpsc::{self, Sender}; //, Receiver};
const WORKERS : usize = 8;
type SiteStat = (Url, Vec);
#[tokio::main]
async fn main() -> Result<(), Box> {
let addr = std::env::args().nth(1)
.unwrap_or_else(|| "https://www.tmplab.org".to_string());
let links = get_links(addr.as_ref()).await?;
let addr = Url::parse(addr.as_ref())?;
let mut links : Vec = links.into_iter()
.filter(| url | url.host() != addr.host())
.collect();
let mut to_fetch = links.len();
let (tx, mut rx) = mpsc::channel(32);
// spawn a pool of workers to get the things started...
for _ in 0..WORKERS {
if let Some(addr) = links.pop() {
spawn_worker(addr, tx.clone());
}
}
let mut results = vec![];
// gets the results back from the workers through the channel
while let Some(res) = rx.recv().await {
to_fetch -= 1;
let Some(site) = res else {
continue
};
results.push(site);
// if there are still urls to fetch, pop one and spawn a new worker
// otherwise we want to break this loop if all the urls
// have been fetched...
if let Some(addr) = links.pop() {
spawn_worker(addr, tx.clone());
} else if to_fetch == 0 {
break;
}
}
for (url, links) in results {
println!("{url} : {} links", links.len())
}
Ok(())
}
// interestingly, this function must not be async...
//a worker fetch one url, send it's result back in the channel, and terminates.
fn spawn_worker(url: Url, tx: Sender