RUST实战代码:提取网页中的链接
获取某个网页中的链接, 只需要得到网页中所有a标签的href属性
依赖(Cargo.toml)
[package]
name = "links-extrack"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
error-chain = "0.12.4"
reqwest = "0.11.12"
select = "0.5.0"
tokio = { version = "1.21.2", features = ["full"] }
代码
use error_chain::error_chain;
use select::document::Document;
use select::predicate::Name;
// 错误处理
error_chain! {
foreign_links{
Reqerror(reqwest::Error);
IoError(std::io::Error);
}
}
#[tokio::main]
async fn main()->Result<()> {
let res= reqwest::get("http://www.rust-lang.org/zh-CN/")
.await?
.text() // 获取文本
.await?;
Document::from(res.as_str())
// 找到a标签
.find(Name("a"))
// 过滤得到带href属性的
.filter_map(|n| n.attr("href"))
.for_each(|x| println!("{}", x));
Ok(())
}
运行

THE END