|
|
@ -0,0 +1,234 @@ |
|
|
|
|
|
|
|
use std::collections::{HashSet, HashMap}; |
|
|
|
|
|
|
|
use serde::{Serialize, Deserialize}; |
|
|
|
|
|
|
|
use std::fs; |
|
|
|
|
|
|
|
use std::io::Write; |
|
|
|
|
|
|
|
use anyhow::Result; |
|
|
|
|
|
|
|
use matrix_sdk::{ |
|
|
|
|
|
|
|
config::SyncSettings, |
|
|
|
|
|
|
|
Client, Room, |
|
|
|
|
|
|
|
ruma::events::room::message::RoomMessageEventContent, |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] |
|
|
|
|
|
|
|
struct Config { |
|
|
|
|
|
|
|
keywords : Vec<String>, |
|
|
|
|
|
|
|
overview_url : String, // "https://www.tuebingen.de/gemeinderat/info.php"
|
|
|
|
|
|
|
|
detail_link_path : String, // "si0057.php"
|
|
|
|
|
|
|
|
id_query_key : String, // "__ksinr"
|
|
|
|
|
|
|
|
detail_generate_url : String, // "https://www.tuebingen.de/gemeinderat/si0056.php"
|
|
|
|
|
|
|
|
id_generate_key : String, // "__ksinr"
|
|
|
|
|
|
|
|
matrix_server_url : String, |
|
|
|
|
|
|
|
matrix_user : String, |
|
|
|
|
|
|
|
matrix_password : String, |
|
|
|
|
|
|
|
matrix_room : String, |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] |
|
|
|
|
|
|
|
struct ScrapeResult { |
|
|
|
|
|
|
|
by_id : HashMap<String, Vec<String>>, |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] |
|
|
|
|
|
|
|
struct State { |
|
|
|
|
|
|
|
scrape_result : Option<ScrapeResult>, |
|
|
|
|
|
|
|
error : Option<String>, |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn box_error<T: std::error::Error + 'static>(error: T) -> Box<dyn std::error::Error> { |
|
|
|
|
|
|
|
Box::new(error) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async fn scrape_websites(config: &Config) -> Result<ScrapeResult, Box<dyn std::error::Error>> { |
|
|
|
|
|
|
|
let overview_url = url::Url::parse(config.overview_url.as_str())?; |
|
|
|
|
|
|
|
let resp = reqwest::get(overview_url.as_str()) |
|
|
|
|
|
|
|
.await? |
|
|
|
|
|
|
|
.text() |
|
|
|
|
|
|
|
.await?; |
|
|
|
|
|
|
|
let fragment = scraper::Html::parse_document(&resp); |
|
|
|
|
|
|
|
let selector = scraper::Selector::parse("a[href]").unwrap(); |
|
|
|
|
|
|
|
let mut ids = Vec::new(); |
|
|
|
|
|
|
|
for element in fragment.select(&selector) { |
|
|
|
|
|
|
|
let link_target = overview_url.join(element.attr("href").unwrap()); |
|
|
|
|
|
|
|
if let Ok(link_target) = link_target { |
|
|
|
|
|
|
|
if link_target.path().contains(&config.detail_link_path) { |
|
|
|
|
|
|
|
let mut nr : Option<String> = None; |
|
|
|
|
|
|
|
for (k, v) in link_target.query_pairs() { |
|
|
|
|
|
|
|
if k == config.id_query_key { |
|
|
|
|
|
|
|
nr = Some(v.to_string()); |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if let Some(nr) = nr { |
|
|
|
|
|
|
|
ids.push(nr); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
let base_sub_url = url::Url::parse(config.detail_generate_url.as_str())?; |
|
|
|
|
|
|
|
let selectors = vec![ |
|
|
|
|
|
|
|
scraper::Selector::parse("div.smc-card-text-title").unwrap(), |
|
|
|
|
|
|
|
scraper::Selector::parse("div.card-body").unwrap(), |
|
|
|
|
|
|
|
]; |
|
|
|
|
|
|
|
let mut by_id = HashMap::<String, Vec<String>>::new(); |
|
|
|
|
|
|
|
for id in ids { |
|
|
|
|
|
|
|
let mut sub_url = base_sub_url.clone(); |
|
|
|
|
|
|
|
let query = format!("{}={}", config.id_generate_key, &id); |
|
|
|
|
|
|
|
sub_url.set_query(Some(&query)); |
|
|
|
|
|
|
|
let resp = reqwest::get(sub_url.as_str()) |
|
|
|
|
|
|
|
.await? |
|
|
|
|
|
|
|
.text() |
|
|
|
|
|
|
|
.await?; |
|
|
|
|
|
|
|
let fragment = scraper::Html::parse_document(&resp); |
|
|
|
|
|
|
|
let mut contained_keywords = HashSet::<String>::new(); |
|
|
|
|
|
|
|
for selector in &selectors { |
|
|
|
|
|
|
|
for element in fragment.select(selector) { |
|
|
|
|
|
|
|
for text in element.text() { |
|
|
|
|
|
|
|
for n in 0..config.keywords.len() { |
|
|
|
|
|
|
|
let keyword = &config.keywords[n]; |
|
|
|
|
|
|
|
if text.to_lowercase().contains(&keyword.to_lowercase()) { |
|
|
|
|
|
|
|
contained_keywords.insert(keyword.to_string()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if !contained_keywords.is_empty() { |
|
|
|
|
|
|
|
by_id.insert(id, contained_keywords.into_iter().collect()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Ok(ScrapeResult{by_id}) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async fn post_message(room: &Room, message: &str) -> Result<(), Box<dyn std::error::Error>> |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
room.send(RoomMessageEventContent::text_plain(message)).await?; |
|
|
|
|
|
|
|
Ok(()) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn get_keyword_messages(config: &Config, old_result: Option<&ScrapeResult>, new_result: &ScrapeResult) -> Vec<String> |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
let base_sub_url = url::Url::parse(config.detail_generate_url.as_str()).unwrap(); |
|
|
|
|
|
|
|
let mut new_pages = Vec::<String>::new(); |
|
|
|
|
|
|
|
for (id, new_keywords) in &new_result.by_id { |
|
|
|
|
|
|
|
let changed = { |
|
|
|
|
|
|
|
if let Some(old_result) = old_result { |
|
|
|
|
|
|
|
if let Some(old_keywords) = old_result.by_id.get(id) { |
|
|
|
|
|
|
|
old_keywords != new_keywords |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
true |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
true |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
if changed { |
|
|
|
|
|
|
|
let mut sub_url = base_sub_url.clone(); |
|
|
|
|
|
|
|
let query = format!("{}={}", config.id_generate_key, &id); |
|
|
|
|
|
|
|
sub_url.set_query(Some(&query)); |
|
|
|
|
|
|
|
if !new_keywords.is_empty() { |
|
|
|
|
|
|
|
new_pages.push(format!("Auf der Seite {} sind folgende Keywords in der Tagesordnung gefunden worden (neue Seite oder Änderung an den Keywords): {:?}", |
|
|
|
|
|
|
|
sub_url.as_str(), new_keywords)); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
new_pages |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[tokio::main] |
|
|
|
|
|
|
|
async fn main() -> Result<(), Box<dyn std::error::Error>> { |
|
|
|
|
|
|
|
let builder = config::Config::builder() |
|
|
|
|
|
|
|
.add_source(config::File::new("config.toml", config::FileFormat::Toml)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let config : Config = builder.build()?.try_deserialize()?; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let matrix_server_url = url::Url::parse(&config.matrix_server_url).expect("Couldn't parse the matrix server URL"); |
|
|
|
|
|
|
|
let client = Client::new(matrix_server_url).await.unwrap(); |
|
|
|
|
|
|
|
client |
|
|
|
|
|
|
|
.matrix_auth() |
|
|
|
|
|
|
|
.login_username(&config.matrix_user, &config.matrix_password) |
|
|
|
|
|
|
|
.initial_device_display_name("gemeinderat-bot") |
|
|
|
|
|
|
|
.await?; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
client.sync_once(SyncSettings::default()).await?; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let mut room : Option<Room> = None; |
|
|
|
|
|
|
|
for r in client.rooms() { |
|
|
|
|
|
|
|
if let Some(name) = r.name() { |
|
|
|
|
|
|
|
if name == config.matrix_room { |
|
|
|
|
|
|
|
room = Some(r); |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
let room = room.ok_or(Box::<dyn std::error::Error>::from("The room was not found on the server"))?; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let mut last_state = State { scrape_result: None, error: None }; |
|
|
|
|
|
|
|
if let Ok(state_file_contents) = fs::read_to_string("state.toml") { |
|
|
|
|
|
|
|
if let Ok(state_file_contents) = toml::from_str(state_file_contents.as_str()) { |
|
|
|
|
|
|
|
last_state = state_file_contents |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let scrape_result = scrape_websites(&config).await; |
|
|
|
|
|
|
|
let mut new_state = State { scrape_result: None, error: None }; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
match (&last_state.scrape_result, &last_state.error, &scrape_result) { |
|
|
|
|
|
|
|
(_, Some(old_error_message), Err(err)) => { |
|
|
|
|
|
|
|
let new_error_message = format!("{}", err); |
|
|
|
|
|
|
|
if new_error_message != *old_error_message { |
|
|
|
|
|
|
|
post_message(&room, format!("Euer freundlicher Gemeinderat-Bot konnte die Gemeinderats-Seite nicht auslesen (der Fehler hat sich seit dem letzten Mal geändert): {}", new_error_message).as_str()).await?; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
}, |
|
|
|
|
|
|
|
(_, _, Err(err)) => { |
|
|
|
|
|
|
|
let new_error_message = format!("{}", err); |
|
|
|
|
|
|
|
post_message(&room, format!("Euer freundlicher Gemeinderat-Bot konnte die Gemeinderats-Seite nicht auslesen: {}", new_error_message).as_str()).await?; |
|
|
|
|
|
|
|
}, |
|
|
|
|
|
|
|
(Some(old_result), _, Ok(new_result)) => { |
|
|
|
|
|
|
|
let messages = get_keyword_messages(&config, Some(old_result), new_result); |
|
|
|
|
|
|
|
if !messages.is_empty() { |
|
|
|
|
|
|
|
post_message(&room, "Euer freundlicher Gemeinderat-Bot hat neue Gemeinderatssitzungen gefunden, in denen beobachtete Keywords in der Tagesordnung gefunden wurden:").await?; |
|
|
|
|
|
|
|
for message in &messages { |
|
|
|
|
|
|
|
post_message(&room, message).await?; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} else if old_result != new_result { |
|
|
|
|
|
|
|
post_message(&room, "Alle Gemeinderatssitzungen, in denen beobachtete Keywords in der Tagesordnung gefunden wurden, sind nicht mehr auf der Webseite.").await?; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
}, |
|
|
|
|
|
|
|
(None, Some(_old_error), Ok(new_result)) => { |
|
|
|
|
|
|
|
let messages = get_keyword_messages(&config, None, new_result); |
|
|
|
|
|
|
|
if messages.is_empty() { |
|
|
|
|
|
|
|
post_message(&room, "Euer freundlicher Gemeinderat-Bot funktioniert nach dem letzten Fehler wieder. Es sind aktuell keine Sitzungen mit beobachteten Keywords auf der Webseite.").await?; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
post_message(&room, "Euer freundlicher Gemeinderat-Bot funktioniert nach dem letzten Fehler wieder. Folgende Sitzungen haben beobachtete Keywords auf der Tagesordnung:").await?; |
|
|
|
|
|
|
|
for message in &messages { |
|
|
|
|
|
|
|
post_message(&room, message).await?; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
}, |
|
|
|
|
|
|
|
(None, None, Ok(new_result)) => { |
|
|
|
|
|
|
|
let messages = get_keyword_messages(&config, None, new_result); |
|
|
|
|
|
|
|
if messages.is_empty() { |
|
|
|
|
|
|
|
post_message(&room, "Euer freundlicher Gemeinderat-Bot ist zum ersten Mal durchgelaufen. Es sind aktuell keine Sitzungen mit beobachteten Keywords auf der Webseite.").await?; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
post_message(&room, "Euer freundlicher Gemeinderat-Bot ist zum ersten Mal durchgelaufen. Folgende Sitzungen haben beobachtete Keywords auf der Tagesordnung:").await?; |
|
|
|
|
|
|
|
for message in &messages { |
|
|
|
|
|
|
|
post_message(&room, message).await?; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
}, |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
match scrape_result { |
|
|
|
|
|
|
|
Ok(result) => { new_state.scrape_result = Some(result); }, |
|
|
|
|
|
|
|
Err(err) => { new_state.error = Some(format!("{}", err)); }, |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if let Err(error) = toml::to_string(&new_state).map_err(box_error).and_then(|new_state| std::fs::File::create("state.toml").map_err(box_error)?.write_all(new_state.as_bytes()).map_err(box_error)) { |
|
|
|
|
|
|
|
println!("Could not save state"); |
|
|
|
|
|
|
|
dbg!(error); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Ok(()) |
|
|
|
|
|
|
|
} |