initial commit
This commit is contained in:
commit
9eb2e09aab
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
1516
Cargo.lock
generated
Normal file
1516
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "young-scot-discount-scraper"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
reqwest = { version = "0.11", features = [] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
scraper = "0.13.0"
|
81
src/main.rs
Normal file
81
src/main.rs
Normal file
@ -0,0 +1,81 @@
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let url_base = "https://young.scot/discounts-and-rewards/discounts?curPage=";
|
||||
|
||||
//request first page
|
||||
let response = reqwest::get(url_base.to_string() + &1.to_string()).await.unwrap();
|
||||
|
||||
//parse first page
|
||||
let document = Html::parse_document(&response.text().await.unwrap());
|
||||
|
||||
println!("page: 1");
|
||||
print_deals(&document);
|
||||
|
||||
//get total pages
|
||||
//the number of pages is in the second last page of the pagination
|
||||
|
||||
//get pagnation
|
||||
let selector = Selector::parse("ul.pagination > li > a").unwrap();
|
||||
let pages = document.select(&selector).map(|element| element.text().next().unwrap().to_string()).collect::<Vec<String>>();
|
||||
|
||||
//get element before next page button
|
||||
let total_pages = pages[pages.len() - 2].parse::<i32>().unwrap();
|
||||
|
||||
//loop through all pages
|
||||
for page_number in 2..=total_pages{
|
||||
println!("page: {}", page_number);
|
||||
//request first page
|
||||
let response = reqwest::get(url_base.to_string() + &page_number.to_string()).await.unwrap();
|
||||
|
||||
//parse first page
|
||||
let document = Html::parse_document(&response.text().await.unwrap());
|
||||
|
||||
print_deals(&document);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
fn print_deals(document: &Html){
|
||||
//select ul with class="card__list flex-equal-height push--top"
|
||||
let selector = Selector::parse(r#"ul[class="card__list flex-equal-height push--top"] > li > a"#).unwrap();
|
||||
|
||||
//get element with selector
|
||||
let elements = document.select(&selector);
|
||||
|
||||
//iterate over deals
|
||||
for element in elements {
|
||||
//get href from element
|
||||
let href = element.value().attr("href").unwrap();
|
||||
println!("link for deal: {}", href);
|
||||
|
||||
let logo = element.select(&Selector::parse(r#"div[class="card__logo"] > img"#).unwrap()).next();
|
||||
if !logo.is_some(){
|
||||
let text = element.select(&Selector::parse(r#"div[class="card__description"] > p"#).unwrap()).next().unwrap().text().next().unwrap();
|
||||
println!("company name: {}", text);
|
||||
} else {
|
||||
let logo_src = logo.unwrap().value().attr("alt").unwrap();
|
||||
println!("company name(from image alt text): {}", logo_src);
|
||||
}
|
||||
|
||||
let flag = element.select(&Selector::parse(r#"div[class="card__flag"]"#).unwrap()).next();
|
||||
|
||||
if flag.is_some() {
|
||||
let flag_src = flag.unwrap().text().next().unwrap();
|
||||
println!("flag: {}", flag_src);
|
||||
} else {
|
||||
println!("flag: None");
|
||||
}
|
||||
|
||||
//get offer text
|
||||
let offer_text = element.select(&Selector::parse(r#"span[class="offer-text"]"#).unwrap()).next().unwrap().text().next().unwrap().to_string();
|
||||
|
||||
println!("offer text: {}", offer_text);
|
||||
|
||||
//print separator
|
||||
println!("-----------------------------------------------------");
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user