initial commit
This commit is contained in:
commit
d646d23895
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
1516
Cargo.lock
generated
Normal file
1516
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "european-youth-card-discount-scraper"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
reqwest = { version = "0.11", features = [] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
scraper = "0.13.0"
|
78
src/main.rs
Normal file
78
src/main.rs
Normal file
@ -0,0 +1,78 @@
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let url_base = "https://www.eyca.org/discounts/gb/";
|
||||
|
||||
let response = reqwest::get(url_base.to_string() + &1.to_string()).await.unwrap();
|
||||
|
||||
let html = response.text().await.unwrap();
|
||||
|
||||
let document = Html::parse_document(&html);
|
||||
|
||||
println!("page: 1");
|
||||
print_discounts(&document);
|
||||
|
||||
let selector = Selector::parse("div.paginate-pages > a").unwrap();
|
||||
|
||||
let paginate = document.select(&selector).map(|node| node.text().next().unwrap()).collect::<Vec<_>>();
|
||||
|
||||
let page_count = paginate[paginate.len() - 2].parse::<i32>().unwrap();
|
||||
|
||||
|
||||
for page_number in 2..=page_count {
|
||||
println!("page: {}", page_number);
|
||||
|
||||
let response = reqwest::get(url_base.to_string() + &page_number.to_string()).await.unwrap();
|
||||
|
||||
let html = response.text().await.unwrap();
|
||||
|
||||
let document = Html::parse_document(&html);
|
||||
|
||||
print_discounts(&document);
|
||||
}
|
||||
//print pretty html
|
||||
//println!("{:#?}", document.root_element().html());
|
||||
}
|
||||
|
||||
fn print_discounts(document: &Html) {
|
||||
let domain_base = "https://www.eyca.org";
|
||||
let selector = Selector::parse("article.p025 > div > a").unwrap();
|
||||
|
||||
for article in document.select(&selector) {
|
||||
//get the href attribute
|
||||
println!("link for deal: {}{}", domain_base,article.value().attr("href").unwrap());
|
||||
|
||||
//get company name
|
||||
let selector = Selector::parse("div > h2").unwrap();
|
||||
let company_name = article.select(&selector).next().unwrap().text().next().unwrap();
|
||||
println!("company name: {}", company_name);
|
||||
|
||||
//get description
|
||||
let selector = Selector::parse("div > p").unwrap();
|
||||
let description = article.select(&selector).next().unwrap().text().next().unwrap();
|
||||
println!("description: {}", description);
|
||||
|
||||
//get tags(if any)
|
||||
let selector = Selector::parse("div > div > ul > li").unwrap();
|
||||
let tags = article.select(&selector);
|
||||
if tags.clone().count() > 0 {
|
||||
print!("tags:");
|
||||
for tag in tags {
|
||||
print!(" \"{}\"", tag.text().next().unwrap());
|
||||
}
|
||||
println!();
|
||||
} else {
|
||||
println!("tags: None");
|
||||
}
|
||||
|
||||
//get category and locations
|
||||
let selector = Selector::parse("div.card-content.p1.sd-p2 > ul > li").unwrap();
|
||||
let mut category_and_locations = article.select(&selector);
|
||||
println!("category: {}", category_and_locations.next().unwrap().text().next().unwrap());
|
||||
println!("locations: {}", category_and_locations.next().unwrap().text().next().unwrap().trim());
|
||||
|
||||
//print separator
|
||||
println!("-----------------------------------------------------");
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user