initial commit

This commit is contained in:
winneratwin 2022-06-08 01:16:26 +01:00
commit 9eb2e09aab
Signed by: winneratwin
GPG Key ID: CDBC42F8803D689E
5 changed files with 3527 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1516
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

11
Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "young-scot-discount-scraper"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
reqwest = { version = "0.11", features = [] }
tokio = { version = "1", features = ["full"] }
scraper = "0.13.0"

1918
deals.txt Normal file

File diff suppressed because it is too large Load Diff

81
src/main.rs Normal file
View File

@ -0,0 +1,81 @@
use scraper::{Html, Selector};
#[tokio::main]
async fn main() {
let url_base = "https://young.scot/discounts-and-rewards/discounts?curPage=";
//request first page
let response = reqwest::get(url_base.to_string() + &1.to_string()).await.unwrap();
//parse first page
let document = Html::parse_document(&response.text().await.unwrap());
println!("page: 1");
print_deals(&document);
//get total pages
//the number of pages is in the second last page of the pagination
//get pagnation
let selector = Selector::parse("ul.pagination > li > a").unwrap();
let pages = document.select(&selector).map(|element| element.text().next().unwrap().to_string()).collect::<Vec<String>>();
//get element before next page button
let total_pages = pages[pages.len() - 2].parse::<i32>().unwrap();
//loop through all pages
for page_number in 2..=total_pages{
println!("page: {}", page_number);
//request first page
let response = reqwest::get(url_base.to_string() + &page_number.to_string()).await.unwrap();
//parse first page
let document = Html::parse_document(&response.text().await.unwrap());
print_deals(&document);
}
}
fn print_deals(document: &Html){
//select ul with class="card__list flex-equal-height push--top"
let selector = Selector::parse(r#"ul[class="card__list flex-equal-height push--top"] > li > a"#).unwrap();
//get element with selector
let elements = document.select(&selector);
//iterate over deals
for element in elements {
//get href from element
let href = element.value().attr("href").unwrap();
println!("link for deal: {}", href);
let logo = element.select(&Selector::parse(r#"div[class="card__logo"] > img"#).unwrap()).next();
if !logo.is_some(){
let text = element.select(&Selector::parse(r#"div[class="card__description"] > p"#).unwrap()).next().unwrap().text().next().unwrap();
println!("company name: {}", text);
} else {
let logo_src = logo.unwrap().value().attr("alt").unwrap();
println!("company name(from image alt text): {}", logo_src);
}
let flag = element.select(&Selector::parse(r#"div[class="card__flag"]"#).unwrap()).next();
if flag.is_some() {
let flag_src = flag.unwrap().text().next().unwrap();
println!("flag: {}", flag_src);
} else {
println!("flag: None");
}
//get offer text
let offer_text = element.select(&Selector::parse(r#"span[class="offer-text"]"#).unwrap()).next().unwrap().text().next().unwrap().to_string();
println!("offer text: {}", offer_text);
//print separator
println!("-----------------------------------------------------");
}
}