From 589dcb1cb3e053b90dcc862a56e716c146a40ac6 Mon Sep 17 00:00:00 2001 From: Paul Delafosse Date: Tue, 19 Oct 2021 15:10:46 +0200 Subject: [PATCH] feat(web): Fetch and cache favicons --- Cargo.lock | 241 +++++++++++++++++++++++++++++++- debian/control | 3 +- plugins/Cargo.toml | 2 + plugins/src/find/mod.rs | 5 +- plugins/src/terminal/mod.rs | 2 +- plugins/src/web/mod.rs | 268 +++++++++++++++++++++++++++++++++++- 6 files changed, 512 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4b00c96..07ab475 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -243,6 +243,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" +[[package]] +name = "bytes" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" + [[package]] name = "cache-padded" version = "1.1.1" @@ -361,6 +367,37 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "curl" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaa3b8db7f3341ddef15786d250106334d4a6c4b0ae4a46cd77082777d9849b9" +dependencies = [ + "curl-sys", + "libc", + "openssl-probe", + "openssl-sys", + "schannel", + "socket2", + "winapi", +] + +[[package]] +name = "curl-sys" +version = "0.4.49+curl-7.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0f44960aea24a786a46907b8824ebc0e66ca06bf4e4978408c7499620343483" +dependencies = [ + "cc", + "libc", + "libnghttp2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", + "winapi", +] + [[package]] name = "darling" version = "0.13.0" @@ -433,6 +470,15 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +[[package]] +name = "encoding_rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065" +dependencies = [ + "cfg-if 1.0.0", +] + [[package]] name = "enumflags2" version = "0.6.4" @@ -494,6 +540,16 @@ dependencies = [ "libc", ] +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + [[package]] name = "freedesktop-desktop-entry" version = "0.4.0" @@ -630,7 +686,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce54d63f8b0c75023ed920d46fd71d0cbbb830b0ee012726b5b4f506fb6dea5b" dependencies = [ - "bytes", + "bytes 0.5.6", "futures", "memchr", "pin-project 0.4.28", @@ -893,6 +949,17 @@ dependencies = [ "libc", ] +[[package]] +name = "http" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1323096b05d41827dadeaee54c9981958c0f94e670bc94ed80037d1a7b8b186b" +dependencies = [ + "bytes 1.1.0", + "fnv", + "itoa", +] + [[package]] name = "human-sort" version = "0.2.2" @@ -911,6 +978,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "instant" version = "0.1.10" @@ -920,6 +998,32 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "isahc" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "431445cb4ba85a80cb1438a9ae8042dadb78ae4046ecee89ad027b614aa0ddb7" +dependencies = [ + "async-channel", + "crossbeam-utils", + "curl", + "curl-sys", + "encoding_rs", + "event-listener", + "futures-lite", + "http", + "log", + "mime", + "once_cell", + "polling", + "slab", + "sluice", + "tracing", + "tracing-futures", + "url", + "waker-fn", +] + [[package]] name = "itertools" version = "0.10.1" @@ -947,6 +1051,28 @@ version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21" +[[package]] +name = "libnghttp2-sys" +version = "0.1.7+1.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57ed28aba195b38d5ff02b9170cbff627e336a20925e43b4945390401c5dc93f" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "libz-sys" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "locale_config" version = "0.3.0" @@ -987,6 +1113,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + [[package]] name = "memchr" version = "2.4.1" @@ -1105,6 +1237,25 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "openssl-probe" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a" + +[[package]] +name = "openssl-sys" +version = "0.9.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69df2d8dfc6ce3aaf44b40dec6f487d5a886516cf6879c49e98e0710f310a058" +dependencies = [ + "autocfg", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "pango" version = "0.14.3" @@ -1136,6 +1287,12 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + [[package]] name = "pest" version = "2.1.3" @@ -1256,6 +1413,7 @@ dependencies = [ "gtk", "human-sort", "human_format", + "isahc", "new_mime_guess", "pop-launcher", "postage", @@ -1268,6 +1426,7 @@ dependencies = [ "strsim", "tracing", "tracing-subscriber", + "url", "urlencoding", "ward", "zbus", @@ -1469,6 +1628,16 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "schannel" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75" +dependencies = [ + "lazy_static", + "winapi", +] + [[package]] name = "scoped-tls" version = "1.0.0" @@ -1592,6 +1761,17 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c307a32c1c5c437f38c7fd45d753050587732ba8628319fbdf12a7e289ccc590" +[[package]] +name = "sluice" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7400c0eff44aa2fcb5e31a5f24ba9716ed90138769e4977a2ba6014ae63eb5" +dependencies = [ + "async-channel", + "futures-core", + "futures-io", +] + [[package]] name = "smallvec" version = "1.6.1" @@ -1720,6 +1900,21 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tinyvec" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83b2a3d4d9091d0abd7eba4dc2710b1718583bd4d8992e2190720ea38f391f7" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + [[package]] name = "toml" version = "0.5.8" @@ -1736,6 +1931,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d" dependencies = [ "cfg-if 1.0.0", + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -1761,6 +1957,16 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "pin-project 1.0.8", + "tracing", +] + [[package]] name = "tracing-log" version = "0.1.2" @@ -1819,6 +2025,21 @@ dependencies = [ "version_check", ] +[[package]] +name = "unicode-bidi" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f" + +[[package]] +name = "unicode-normalization" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-segmentation" version = "1.8.0" @@ -1831,12 +2052,30 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + [[package]] name = "urlencoding" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68b90931029ab9b034b300b797048cf23723400aa757e8a2bfb9d748102f9821" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version-compare" version = "0.0.11" diff --git a/debian/control b/debian/control index 50f4129..ec22abb 100644 --- a/debian/control +++ b/debian/control @@ -6,13 +6,14 @@ Build-Depends: cargo, debhelper-compat (=10), libgtk-3-dev, + libssl-dev, rustc (>=1.47), Standards-Version: 4.1.1 Homepage: https://github.com/pop-os/launcher Package: pop-launcher Architecture: amd64 arm64 -Depends: libgtk-3-0, qalc, fd-find, ${misc:Depends}, ${shlibs:Depends} +Depends: libgtk-3-0, qalc, fd-find, libssl1.1, ${misc:Depends}, ${shlibs:Depends} Description: Modular IPC-based desktop launcher service Package: pop-launcher-system76-power diff --git a/plugins/Cargo.toml b/plugins/Cargo.toml index 71aacf0..4ec6011 100644 --- a/plugins/Cargo.toml +++ b/plugins/Cargo.toml @@ -30,3 +30,5 @@ urlencoding = "2" zbus = "1" zvariant = "=2.6" # Restrict for 1.47 ward = "2.1.0" +isahc = "1.5.0" +url = "2.2.2" diff --git a/plugins/src/find/mod.rs b/plugins/src/find/mod.rs index b5db67d..c1f48f5 100644 --- a/plugins/src/find/mod.rs +++ b/plugins/src/find/mod.rs @@ -53,7 +53,7 @@ pub async fn main() { app.search(search).await; app.active.set(false); crate::send(&mut app.out, PluginResponse::Finished).await; - }, + } } } }; @@ -165,7 +165,8 @@ impl SearchContext { }, ..Default::default() }), - ).await; + ) + .await; return; } diff --git a/plugins/src/terminal/mod.rs b/plugins/src/terminal/mod.rs index 2fd697d..0288bfe 100644 --- a/plugins/src/terminal/mod.rs +++ b/plugins/src/terminal/mod.rs @@ -120,4 +120,4 @@ fn detect_terminal() -> (PathBuf, &'static str) { } (PathBuf::from("/usr/bin/gnome-terminal"), "--") -} \ No newline at end of file +} diff --git a/plugins/src/web/mod.rs b/plugins/src/web/mod.rs index 74b7819..555cba9 100644 --- a/plugins/src/web/mod.rs +++ b/plugins/src/web/mod.rs @@ -1,14 +1,25 @@ // SPDX-License-Identifier: GPL-3.0-only // Copyright © 2021 System76 -mod config; +use std::borrow::Cow; +use std::io; +use std::path::PathBuf; +use std::time::Duration; -use self::config::{Config, Definition}; use futures_lite::StreamExt; +use isahc::config::{Configurable, RedirectPolicy}; +use isahc::http::header::CONTENT_TYPE; +use isahc::{AsyncReadResponseExt, HttpClient}; +use smol::io::AsyncReadExt; +use smol::Unblock; +use url::Url; + use pop_launcher::*; -use smol::Unblock; -use std::io; +use self::config::{Config, Definition}; +use regex::Regex; + +mod config; pub async fn main() { let mut app = App::default(); @@ -32,14 +43,38 @@ pub struct App { config: Config, queries: Vec, out: Unblock, + client: HttpClient, + cache: PathBuf, } +const ALLOWED_FAVICON_MIME: [&str; 5] = [ + "image/vnd.microsoft.icon", + "image/png", + "image/gif", + "image/svg+xml", + "image/x-icon", +]; + impl Default for App { fn default() -> Self { + let cache = std::env::home_dir() + .map(|cache| cache.join(".cache/pop-launcher")) + .expect("no home dir"); + + if !cache.exists() { + std::fs::create_dir(&cache).expect("unable to create $HOME/.cache/pop-launcher") + } + Self { config: config::load(), queries: Vec::new(), out: async_stdout(), + client: HttpClient::builder() + .redirect_policy(RedirectPolicy::Follow) + .timeout(Duration::from_secs(1)) + .build() + .expect("failed to create http client"), + cache, } } } @@ -62,6 +97,7 @@ impl App { let (_, mut query) = query.split_at(word.len()); query = query.trim(); let encoded = build_query(def, query); + let icon = self.get_favicon(&def.name, &encoded).await; crate::send( &mut self.out, @@ -69,6 +105,7 @@ impl App { id: id as u32, name: [&def.name, ": ", query].concat(), description: encoded.clone(), + icon, ..Default::default() }), ) @@ -83,6 +120,61 @@ impl App { } } +impl App { + async fn get_favicon(&self, rule_name: &str, url: &str) -> Option { + let url = Url::parse(url).expect("invalid url"); + + let favicon_path = self.cache.join(format!("{}.ico", rule_name)); + + if favicon_path.exists() { + let favicon_path = favicon_path.to_string_lossy().into_owned(); + Some(IconSource::Name(Cow::Owned(favicon_path))) + } else { + self.fetch_icon_in_background(url, &favicon_path).await; + None + } + } + + async fn fetch_icon_in_background(&self, url: Url, favicon_path: &PathBuf) { + let client = self.client.clone(); + + let domain = url + .domain() + .map(|domain| domain.to_string()) + .expect("url have no domain"); + let favicon_path = favicon_path.clone(); + + smol::spawn(async move { + let favicon_url = favicon_url_from_page_source(&domain, &client) + .await + .unwrap_or_else(|| { + format!("https://www.google.com/s2/favicons?domain={}&sz=32", domain) + }); + + let icon = fetch_favicon(&favicon_url, &favicon_path, &client).await; + + match icon { + Some(icon) => { + // Ensure we recreate the pop-launcher cache dir if it was removed at runtime + let cache_dir = favicon_path.parent().unwrap(); + if !cache_dir.exists() { + smol::fs::create_dir_all(cache_dir) + .await + .expect("error creating cache directory"); + } + + let copy = smol::fs::write(&favicon_path, icon).await; + if let Err(err) = copy { + tracing::error!("error writing favicon to {:?}: {}", &favicon_path, err); + } + } + None => tracing::error!("no icon found for {}", domain), + } + }) + .detach(); + } +} + fn build_query(definition: &Definition, query: &str) -> String { let prefix = if definition.query.starts_with("https://") { "" @@ -92,3 +184,171 @@ fn build_query(definition: &Definition, query: &str) -> String { [prefix, &*definition.query, &*urlencoding::encode(query)].concat() } + +async fn fetch_favicon(url: &str, favicon_path: &PathBuf, client: &HttpClient) -> Option> { + let response = client.get_async(url).await; + match response { + Err(err) => { + tracing::error!("error fetching favicon {}: {}", url, err); + None + } + Ok(mut response) => { + let content_type = response + .headers() + .get(CONTENT_TYPE) + .map(|header| header.to_str().ok()) + .flatten() + .unwrap(); + + if !ALLOWED_FAVICON_MIME.contains(&content_type) { + tracing::error!( + "Got unexpected content-type '{}' type for {:?} favicon", + content_type, + favicon_path + ); + }; + + let mut icon = vec![]; + + if let Err(err) = response.body_mut().read_to_end(&mut icon).await { + tracing::error!("error reading favicon response body: {}", err); + } + + Some(icon) + } + } +} + +// Try to extract a favicon url from html the icon path +// returned can be either absolute or relative to the page domain +async fn favicon_url_from_page_source(domain: &str, client: &HttpClient) -> Option { + let url = format!("https://{}", domain); + match client.get_async(&url).await { + Ok(mut html) => html + .text() + .await + .ok() + .map(|html| parse_favicon(&html)) + .flatten() + .map(|icon_url| { + if !icon_url.starts_with("https://") { + format!("https://{}{}", domain, icon_url) + } else { + icon_url.into() + } + }), + Err(_err) => None, + } +} + +fn parse_favicon(html: &str) -> Option { + let regex = Regex::new(r"").unwrap(); + let html = regex.replace_all(html, "").to_string(); + + let idx = html + .find("rel=\"shortcut icon") + .or_else(|| html.find("rel=\"alternate icon")) + .or_else(|| html.find("rel=\"icon")); + + if let Some(idx) = idx { + let html = &html[idx..]; + let idx = html.find("href=\""); + + if let Some(idx) = idx { + let start = idx + 6; + let html = &html[start..]; + let end = html.find("\""); + + if let Some(end) = end { + let icon_uri = &html[..end]; + let icon_uri = if icon_uri.starts_with("//") { + format!("https:{}", icon_uri) + } else { + icon_uri.to_string() + }; + + return Some(icon_uri); + } + } + } + + None +} + +#[cfg(test)] +mod test { + use isahc::{HttpClient, ReadResponseExt}; + + use crate::web::parse_favicon; + use isahc::config::{Configurable, RedirectPolicy}; + + #[test] + fn should_parse_favicon_url_github() { + let html = isahc::get("https://github.com").unwrap().text().unwrap(); + + let icon_url = parse_favicon(&html); + assert_eq!( + Some("https://github.githubassets.com/favicons/favicon.png".to_string()), + icon_url + ); + } + + #[test] + fn should_parse_favicon_url_ddg() { + // Ddg returns a relative path to its favicon + let html = isahc::get("https://duckduckgo.com") + .unwrap() + .text() + .unwrap(); + + let icon_url = parse_favicon(&html); + assert_eq!(Some("/favicon.ico".to_string()), icon_url); + } + + #[test] + fn parse_favicon_url_google_returns_none() { + // Google seems to set its favicon via javascript + // hence there is no way to get the favicon from the page + // source + let html = isahc::get("https://google.com").unwrap().text().unwrap(); + + let icon_url = parse_favicon(&html); + assert!(icon_url.is_none()); + } + + #[test] + fn should_parse_favicon_url_flathub() { + // Ensure we don't match the commented icon in flathub page + // + // + let html = isahc::get("https://flathub.org").unwrap().text().unwrap(); + + let icon_url = parse_favicon(&html); + assert_eq!( + Some("/assets/themes/flathub/favicon-32x32.png".to_string()), + icon_url + ); + } + + #[test] + fn should_parse_favicon_url_aliexpress() { + // Aliexpress icon href start with two slash :`href="//ae01.alicdn.com/images/eng/wholesale/icon/aliexpress.ico"` + + let client = HttpClient::builder() + .redirect_policy(RedirectPolicy::Follow) + .build() + .unwrap(); + + let html = client + .get("https://aliexpress.com") + .unwrap() + .text() + .unwrap(); + + let icon_url = parse_favicon(&html); + assert_eq!( + Some("https://ae01.alicdn.com/images/eng/wholesale/icon/aliexpress.ico".to_string()), + icon_url + ); + } +}