ashbat/src/main.rs

320 lines
6.9 KiB
Rust

use ego_tree::NodeRef;
use gtk::{
Align, Application, ApplicationWindow, Box, Entry, Expander, Justification, Label, ListBox,
Orientation, PolicyType, ScrolledWindow, prelude::*,
};
use scraper::{
Html,
Node::{self, Element, Text},
};
use std::time::Instant;
const APP_ID: &str = "org.antopie.AshBat";
fn main() {
let app = Application::builder().application_id(APP_ID).build();
app.connect_activate(build_ui);
app.run();
}
fn build_ui(app: &Application) {
let box_main = Box::builder()
.margin_top(12)
.margin_bottom(12)
.margin_start(12)
.margin_end(12)
.spacing(12)
.hexpand(true)
.orientation(Orientation::Vertical)
.build();
let box_page = Box::builder().build();
let scrolled_window = ScrolledWindow::builder()
.hscrollbar_policy(PolicyType::Never)
.child(&box_page)
.build();
let window = ApplicationWindow::builder()
.application(app)
.title("AshBat")
.child(&box_main)
.maximized(true)
.build();
window.present();
let address_entry = Entry::builder()
.placeholder_text("Saisir une adresse")
.halign(Align::Center)
.max_width_chars(64)
.input_purpose(gtk::InputPurpose::Url)
.build();
box_main.append(&address_entry);
let label_meta = Label::new(None);
label_meta.set_justify(Justification::Center);
box_main.append(&label_meta);
let headers = ListBox::new();
headers.set_selection_mode(gtk::SelectionMode::None);
let headers_expander = Expander::new(Some("Entêtes"));
headers_expander.set_child(Some(&headers));
box_main.append(&headers_expander);
let init_html = "<!DOCTYPE html><html><body><p>Paragraphe ! et <a href='/test'>lien</a> et <a href='/hello'>.</a></p></body></html>".to_owned();
//let init_html = include_str!("test.html").to_owned();
let label = Label::builder()
.valign(Align::Start)
.vexpand(true)
.wrap(true)
.use_markup(true)
.selectable(true)
.build();
label.set_markup(&parse(init_html));
box_page.append(&label);
label.connect_activate_link(glib::clone!(
#[weak]
label_meta,
#[weak]
headers,
#[weak]
address_entry,
#[upgrade_or_panic]
move |label, uri| {
open(
address_entry.clone(),
label.clone(),
label_meta.clone(),
headers.clone(),
uri,
);
glib::Propagation::Stop
}
));
address_entry.connect_activate(glib::clone!(
#[weak]
label_meta,
#[weak]
headers,
move |address_entry| {
open(
address_entry.clone(),
label.clone(),
label_meta.clone(),
headers.clone(),
&address_entry.text(),
);
}
));
box_main.append(&scrolled_window);
}
fn open(address_entry: Entry, label: Label, meta: Label, headers: ListBox, address: &str) {
let base = reqwest::Url::parse(&address_entry.text()).unwrap();
let absolute_url = base.join(address).unwrap();
let download_start = Instant::now();
let response = download(absolute_url);
let html = response.4;
let download_duration = download_start.elapsed();
address_entry.buffer().set_text(response.0.to_string());
let html_length = html.len();
let parse_start = Instant::now();
let parsed = parse(html);
let parse_duration = parse_start.elapsed();
headers.remove_all();
for header in response.3 {
let h = Label::new(
(Some(
header.0.map_or(String::from(""), |v| v.to_string())
+ ": " + &format!("{:?}", header.1),
))
.as_deref(),
);
h.set_halign(Align::Start);
headers.append(&h);
}
let size = if html_length > 1024 * 8 {
(html_length / 1024).to_string() + " KiB"
} else {
html_length.to_string() + " B"
};
meta.set_text(&format!(
"{:#?} {}\n{size}\n{download_duration:.2?} to download, {parse_duration:.2?} to parse",
response.1,
response.2.as_str()
));
label.set_markup(&parsed);
}
#[tokio::main]
async fn download(
url: reqwest::Url,
) -> (
reqwest::Url,
reqwest::Version,
reqwest::StatusCode,
reqwest::header::HeaderMap,
String,
) {
use reqwest::header;
let mut headers = header::HeaderMap::new();
headers.insert("Accept", header::HeaderValue::from_static("text/html"));
let client = reqwest::Client::builder()
.http1_only()
.https_only(true)
.min_tls_version(reqwest::tls::Version::TLS_1_3)
.use_rustls_tls()
.referer(false)
.default_headers(headers)
.timeout(core::time::Duration::new(15, 0))
.build()
.unwrap();
let response = client.get(url).send().await.unwrap();
(
response.url().clone(),
response.version(),
response.status(),
response.headers().clone(),
response.text().await.unwrap(),
)
}
const BLOCK_TAGS: &[&str] = &[
"address",
"article",
"aside",
"blockquote",
"dd",
"details",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"header",
"hgroup",
"hr",
"li",
"main",
"nav",
"noscript",
"ol",
"p",
"pre",
"section",
"summary",
"table",
"ul",
];
fn parse_node(node: NodeRef<'_, Node>, paragraph_string: &String) -> String {
let mut paragraph_string_ret = paragraph_string.to_owned();
// Pre
if let Element(element) = node.value() {
if BLOCK_TAGS.contains(&element.name.local.as_ref())
&& !paragraph_string_ret.ends_with('\n')
{
paragraph_string_ret += "\n";
}
paragraph_string_ret += match element.name.local.as_ref() {
"h1" => "<span font_size=\"300%\">",
"h2" => "<span font_size=\"180%\">",
"h3" => "<span font_size=\"130%\">",
"code" => "<span font_family=\"monospace\">",
"br" => "\n",
_ => "",
};
if *element.name.local == *"a" {
paragraph_string_ret += &("<a href=\"".to_owned()
+ {
let mut val = "";
for attr in &element.attrs {
if *attr.0.local == *"href" {
val = attr.1.as_ref();
}
}
val
} + "\">");
}
}
// Recurse
let mut new_text: String;
match node.value() {
Text(text) => {
new_text = text.replace('\n', "").trim_matches('\t').to_owned();
if paragraph_string_ret.ends_with('\n') {
new_text = new_text.trim_start_matches(' ').to_owned();
}
paragraph_string_ret += html_escape::encode_text::<str>(&new_text).as_ref();
}
Element(element)
if *element.name.local != *"head"
&& *element.name.local != *"script"
&& *element.name.local != *"style" =>
{
for child in node.children() {
paragraph_string_ret = parse_node(child, &paragraph_string_ret);
}
}
_ => (),
};
// Post
if let Element(element) = node.value() {
paragraph_string_ret += match element.name.local.as_ref() {
"h1" | "h2" | "h3" => "</span>",
"code" => "</span>",
"a" => "</a>",
_ => "",
};
if BLOCK_TAGS.contains(&element.name.local.as_ref())
&& !paragraph_string_ret.ends_with('\n')
{
paragraph_string_ret += "\n";
}
}
paragraph_string_ret
}
fn parse(html: String) -> String {
let document = Html::parse_document(&html);
println!("{:#?}", document.tree);
for node in document.tree.nodes() {
match node.value() {
Element(element) if *element.name.local == *"html" => {
let ret = parse_node(node, &String::from(""));
println!("{ret:#?}");
return ret;
}
_ => (),
};
}
panic!("No <html> element.");
}