65 lines
1.5 KiB
Rust
65 lines
1.5 KiB
Rust
|
use scraper::{Html,Selector};
|
||
|
use url::{Url,Host};
|
||
|
use std::sync::{Arc,Mutex};
|
||
|
|
||
|
#[tokio::main]
|
||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||
|
|
||
|
let links = get_links("https://tmplab.org").await?;
|
||
|
let links : Vec<Url> = links.into_iter()
|
||
|
.filter(| url | url.host() != Some(Host::Domain("www.tmplab.org")))
|
||
|
.collect();
|
||
|
|
||
|
let links1 = Arc::new(Mutex::new(links));
|
||
|
let links2 = links1.clone();
|
||
|
let links3 = links1.clone();
|
||
|
let links4 = links1.clone();
|
||
|
|
||
|
let h1 = tokio::spawn(async move { looper(links1).await; });
|
||
|
let h2 = tokio::spawn(async move { looper(links2).await; });
|
||
|
let h3 = tokio::spawn(async move { looper(links3).await; });
|
||
|
let h4 = tokio::spawn(async move { looper(links4).await; });
|
||
|
|
||
|
h1.await?;
|
||
|
h2.await?;
|
||
|
h3.await?;
|
||
|
h4.await?;
|
||
|
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
async fn looper(links: Arc<Mutex<Vec<Url>>>) {
|
||
|
loop {
|
||
|
let url = {
|
||
|
let mut v = links.lock().unwrap();
|
||
|
if let Some(url) = v.pop() {
|
||
|
url
|
||
|
} else{
|
||
|
return;
|
||
|
}
|
||
|
};
|
||
|
let res = match get_links(&url.to_string()).await {
|
||
|
Err(_) => "nope",
|
||
|
Ok(_) => "YEA!"
|
||
|
};
|
||
|
println!("{url} => {res}");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
async fn get_links(
|
||
|
url: &str
|
||
|
) -> Result<Vec<Url>, reqwest::Error> {
|
||
|
let a_selector = Selector::parse("a[href]").unwrap();
|
||
|
|
||
|
let body = reqwest::get(url)
|
||
|
.await?
|
||
|
.text()
|
||
|
.await?;
|
||
|
|
||
|
Ok(Html::parse_document(&body)
|
||
|
.select(&a_selector)
|
||
|
.filter_map(| link | link.value().attr("href")
|
||
|
.and_then(| href | Url::parse(href).ok()))
|
||
|
.collect())
|
||
|
}
|