feat(web): Fetch and cache favicons

This commit is contained in:
Paul Delafosse 2021-10-19 15:10:46 +02:00 committed by GitHub
parent 55c1b6e4bd
commit 589dcb1cb3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 512 additions and 9 deletions

241
Cargo.lock generated
View file

@ -243,6 +243,12 @@ version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38"
[[package]]
name = "bytes"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
[[package]]
name = "cache-padded"
version = "1.1.1"
@ -361,6 +367,37 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "curl"
version = "0.4.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aaa3b8db7f3341ddef15786d250106334d4a6c4b0ae4a46cd77082777d9849b9"
dependencies = [
"curl-sys",
"libc",
"openssl-probe",
"openssl-sys",
"schannel",
"socket2",
"winapi",
]
[[package]]
name = "curl-sys"
version = "0.4.49+curl-7.79.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0f44960aea24a786a46907b8824ebc0e66ca06bf4e4978408c7499620343483"
dependencies = [
"cc",
"libc",
"libnghttp2-sys",
"libz-sys",
"openssl-sys",
"pkg-config",
"vcpkg",
"winapi",
]
[[package]]
name = "darling"
version = "0.13.0"
@ -433,6 +470,15 @@ version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "encoding_rs"
version = "0.8.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065"
dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "enumflags2"
version = "0.6.4"
@ -494,6 +540,16 @@ dependencies = [
"libc",
]
[[package]]
name = "form_urlencoded"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191"
dependencies = [
"matches",
"percent-encoding",
]
[[package]]
name = "freedesktop-desktop-entry"
version = "0.4.0"
@ -630,7 +686,7 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce54d63f8b0c75023ed920d46fd71d0cbbb830b0ee012726b5b4f506fb6dea5b"
dependencies = [
"bytes",
"bytes 0.5.6",
"futures",
"memchr",
"pin-project 0.4.28",
@ -893,6 +949,17 @@ dependencies = [
"libc",
]
[[package]]
name = "http"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1323096b05d41827dadeaee54c9981958c0f94e670bc94ed80037d1a7b8b186b"
dependencies = [
"bytes 1.1.0",
"fnv",
"itoa",
]
[[package]]
name = "human-sort"
version = "0.2.2"
@ -911,6 +978,17 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
]
[[package]]
name = "instant"
version = "0.1.10"
@ -920,6 +998,32 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "isahc"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "431445cb4ba85a80cb1438a9ae8042dadb78ae4046ecee89ad027b614aa0ddb7"
dependencies = [
"async-channel",
"crossbeam-utils",
"curl",
"curl-sys",
"encoding_rs",
"event-listener",
"futures-lite",
"http",
"log",
"mime",
"once_cell",
"polling",
"slab",
"sluice",
"tracing",
"tracing-futures",
"url",
"waker-fn",
]
[[package]]
name = "itertools"
version = "0.10.1"
@ -947,6 +1051,28 @@ version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21"
[[package]]
name = "libnghttp2-sys"
version = "0.1.7+1.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ed28aba195b38d5ff02b9170cbff627e336a20925e43b4945390401c5dc93f"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "libz-sys"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66"
dependencies = [
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "locale_config"
version = "0.3.0"
@ -987,6 +1113,12 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "matches"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
[[package]]
name = "memchr"
version = "2.4.1"
@ -1105,6 +1237,25 @@ version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56"
[[package]]
name = "openssl-probe"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a"
[[package]]
name = "openssl-sys"
version = "0.9.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69df2d8dfc6ce3aaf44b40dec6f487d5a886516cf6879c49e98e0710f310a058"
dependencies = [
"autocfg",
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "pango"
version = "0.14.3"
@ -1136,6 +1287,12 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
[[package]]
name = "percent-encoding"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "pest"
version = "2.1.3"
@ -1256,6 +1413,7 @@ dependencies = [
"gtk",
"human-sort",
"human_format",
"isahc",
"new_mime_guess",
"pop-launcher",
"postage",
@ -1268,6 +1426,7 @@ dependencies = [
"strsim",
"tracing",
"tracing-subscriber",
"url",
"urlencoding",
"ward",
"zbus",
@ -1469,6 +1628,16 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "schannel"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75"
dependencies = [
"lazy_static",
"winapi",
]
[[package]]
name = "scoped-tls"
version = "1.0.0"
@ -1592,6 +1761,17 @@ version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c307a32c1c5c437f38c7fd45d753050587732ba8628319fbdf12a7e289ccc590"
[[package]]
name = "sluice"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d7400c0eff44aa2fcb5e31a5f24ba9716ed90138769e4977a2ba6014ae63eb5"
dependencies = [
"async-channel",
"futures-core",
"futures-io",
]
[[package]]
name = "smallvec"
version = "1.6.1"
@ -1720,6 +1900,21 @@ dependencies = [
"once_cell",
]
[[package]]
name = "tinyvec"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f83b2a3d4d9091d0abd7eba4dc2710b1718583bd4d8992e2190720ea38f391f7"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "toml"
version = "0.5.8"
@ -1736,6 +1931,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d"
dependencies = [
"cfg-if 1.0.0",
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
@ -1761,6 +1957,16 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "tracing-futures"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
dependencies = [
"pin-project 1.0.8",
"tracing",
]
[[package]]
name = "tracing-log"
version = "0.1.2"
@ -1819,6 +2025,21 @@ dependencies = [
"version_check",
]
[[package]]
name = "unicode-bidi"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f"
[[package]]
name = "unicode-normalization"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.8.0"
@ -1831,12 +2052,30 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "url"
version = "2.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c"
dependencies = [
"form_urlencoded",
"idna",
"matches",
"percent-encoding",
]
[[package]]
name = "urlencoding"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68b90931029ab9b034b300b797048cf23723400aa757e8a2bfb9d748102f9821"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version-compare"
version = "0.0.11"

3
debian/control vendored
View file

@ -6,13 +6,14 @@ Build-Depends:
cargo,
debhelper-compat (=10),
libgtk-3-dev,
libssl-dev,
rustc (>=1.47),
Standards-Version: 4.1.1
Homepage: https://github.com/pop-os/launcher
Package: pop-launcher
Architecture: amd64 arm64
Depends: libgtk-3-0, qalc, fd-find, ${misc:Depends}, ${shlibs:Depends}
Depends: libgtk-3-0, qalc, fd-find, libssl1.1, ${misc:Depends}, ${shlibs:Depends}
Description: Modular IPC-based desktop launcher service
Package: pop-launcher-system76-power

View file

@ -30,3 +30,5 @@ urlencoding = "2"
zbus = "1"
zvariant = "=2.6" # Restrict for 1.47
ward = "2.1.0"
isahc = "1.5.0"
url = "2.2.2"

View file

@ -53,7 +53,7 @@ pub async fn main() {
app.search(search).await;
app.active.set(false);
crate::send(&mut app.out, PluginResponse::Finished).await;
},
}
}
}
};
@ -165,7 +165,8 @@ impl SearchContext {
},
..Default::default()
}),
).await;
)
.await;
return;
}

View file

@ -120,4 +120,4 @@ fn detect_terminal() -> (PathBuf, &'static str) {
}
(PathBuf::from("/usr/bin/gnome-terminal"), "--")
}
}

View file

@ -1,14 +1,25 @@
// SPDX-License-Identifier: GPL-3.0-only
// Copyright © 2021 System76
mod config;
use std::borrow::Cow;
use std::io;
use std::path::PathBuf;
use std::time::Duration;
use self::config::{Config, Definition};
use futures_lite::StreamExt;
use isahc::config::{Configurable, RedirectPolicy};
use isahc::http::header::CONTENT_TYPE;
use isahc::{AsyncReadResponseExt, HttpClient};
use smol::io::AsyncReadExt;
use smol::Unblock;
use url::Url;
use pop_launcher::*;
use smol::Unblock;
use std::io;
use self::config::{Config, Definition};
use regex::Regex;
mod config;
pub async fn main() {
let mut app = App::default();
@ -32,14 +43,38 @@ pub struct App {
config: Config,
queries: Vec<String>,
out: Unblock<io::Stdout>,
client: HttpClient,
cache: PathBuf,
}
const ALLOWED_FAVICON_MIME: [&str; 5] = [
"image/vnd.microsoft.icon",
"image/png",
"image/gif",
"image/svg+xml",
"image/x-icon",
];
impl Default for App {
fn default() -> Self {
let cache = std::env::home_dir()
.map(|cache| cache.join(".cache/pop-launcher"))
.expect("no home dir");
if !cache.exists() {
std::fs::create_dir(&cache).expect("unable to create $HOME/.cache/pop-launcher")
}
Self {
config: config::load(),
queries: Vec::new(),
out: async_stdout(),
client: HttpClient::builder()
.redirect_policy(RedirectPolicy::Follow)
.timeout(Duration::from_secs(1))
.build()
.expect("failed to create http client"),
cache,
}
}
}
@ -62,6 +97,7 @@ impl App {
let (_, mut query) = query.split_at(word.len());
query = query.trim();
let encoded = build_query(def, query);
let icon = self.get_favicon(&def.name, &encoded).await;
crate::send(
&mut self.out,
@ -69,6 +105,7 @@ impl App {
id: id as u32,
name: [&def.name, ": ", query].concat(),
description: encoded.clone(),
icon,
..Default::default()
}),
)
@ -83,6 +120,61 @@ impl App {
}
}
impl App {
async fn get_favicon(&self, rule_name: &str, url: &str) -> Option<IconSource> {
let url = Url::parse(url).expect("invalid url");
let favicon_path = self.cache.join(format!("{}.ico", rule_name));
if favicon_path.exists() {
let favicon_path = favicon_path.to_string_lossy().into_owned();
Some(IconSource::Name(Cow::Owned(favicon_path)))
} else {
self.fetch_icon_in_background(url, &favicon_path).await;
None
}
}
async fn fetch_icon_in_background(&self, url: Url, favicon_path: &PathBuf) {
let client = self.client.clone();
let domain = url
.domain()
.map(|domain| domain.to_string())
.expect("url have no domain");
let favicon_path = favicon_path.clone();
smol::spawn(async move {
let favicon_url = favicon_url_from_page_source(&domain, &client)
.await
.unwrap_or_else(|| {
format!("https://www.google.com/s2/favicons?domain={}&sz=32", domain)
});
let icon = fetch_favicon(&favicon_url, &favicon_path, &client).await;
match icon {
Some(icon) => {
// Ensure we recreate the pop-launcher cache dir if it was removed at runtime
let cache_dir = favicon_path.parent().unwrap();
if !cache_dir.exists() {
smol::fs::create_dir_all(cache_dir)
.await
.expect("error creating cache directory");
}
let copy = smol::fs::write(&favicon_path, icon).await;
if let Err(err) = copy {
tracing::error!("error writing favicon to {:?}: {}", &favicon_path, err);
}
}
None => tracing::error!("no icon found for {}", domain),
}
})
.detach();
}
}
fn build_query(definition: &Definition, query: &str) -> String {
let prefix = if definition.query.starts_with("https://") {
""
@ -92,3 +184,171 @@ fn build_query(definition: &Definition, query: &str) -> String {
[prefix, &*definition.query, &*urlencoding::encode(query)].concat()
}
async fn fetch_favicon(url: &str, favicon_path: &PathBuf, client: &HttpClient) -> Option<Vec<u8>> {
let response = client.get_async(url).await;
match response {
Err(err) => {
tracing::error!("error fetching favicon {}: {}", url, err);
None
}
Ok(mut response) => {
let content_type = response
.headers()
.get(CONTENT_TYPE)
.map(|header| header.to_str().ok())
.flatten()
.unwrap();
if !ALLOWED_FAVICON_MIME.contains(&content_type) {
tracing::error!(
"Got unexpected content-type '{}' type for {:?} favicon",
content_type,
favicon_path
);
};
let mut icon = vec![];
if let Err(err) = response.body_mut().read_to_end(&mut icon).await {
tracing::error!("error reading favicon response body: {}", err);
}
Some(icon)
}
}
}
// Try to extract a favicon url from html the icon path
// returned can be either absolute or relative to the page domain
async fn favicon_url_from_page_source(domain: &str, client: &HttpClient) -> Option<String> {
let url = format!("https://{}", domain);
match client.get_async(&url).await {
Ok(mut html) => html
.text()
.await
.ok()
.map(|html| parse_favicon(&html))
.flatten()
.map(|icon_url| {
if !icon_url.starts_with("https://") {
format!("https://{}{}", domain, icon_url)
} else {
icon_url.into()
}
}),
Err(_err) => None,
}
}
fn parse_favicon(html: &str) -> Option<String> {
let regex = Regex::new(r"<!--(.+)-->").unwrap();
let html = regex.replace_all(html, "").to_string();
let idx = html
.find("rel=\"shortcut icon")
.or_else(|| html.find("rel=\"alternate icon"))
.or_else(|| html.find("rel=\"icon"));
if let Some(idx) = idx {
let html = &html[idx..];
let idx = html.find("href=\"");
if let Some(idx) = idx {
let start = idx + 6;
let html = &html[start..];
let end = html.find("\"");
if let Some(end) = end {
let icon_uri = &html[..end];
let icon_uri = if icon_uri.starts_with("//") {
format!("https:{}", icon_uri)
} else {
icon_uri.to_string()
};
return Some(icon_uri);
}
}
}
None
}
#[cfg(test)]
mod test {
use isahc::{HttpClient, ReadResponseExt};
use crate::web::parse_favicon;
use isahc::config::{Configurable, RedirectPolicy};
#[test]
fn should_parse_favicon_url_github() {
let html = isahc::get("https://github.com").unwrap().text().unwrap();
let icon_url = parse_favicon(&html);
assert_eq!(
Some("https://github.githubassets.com/favicons/favicon.png".to_string()),
icon_url
);
}
#[test]
fn should_parse_favicon_url_ddg() {
// Ddg returns a relative path to its favicon
let html = isahc::get("https://duckduckgo.com")
.unwrap()
.text()
.unwrap();
let icon_url = parse_favicon(&html);
assert_eq!(Some("/favicon.ico".to_string()), icon_url);
}
#[test]
fn parse_favicon_url_google_returns_none() {
// Google seems to set its favicon via javascript
// hence there is no way to get the favicon from the page
// source
let html = isahc::get("https://google.com").unwrap().text().unwrap();
let icon_url = parse_favicon(&html);
assert!(icon_url.is_none());
}
#[test]
fn should_parse_favicon_url_flathub() {
// Ensure we don't match the commented icon in flathub page
// <!-- <link rel="icon" type="image/x-icon" href="favicon.ico"> -->
// <link rel="icon" type="image/png" href="/assets/themes/flathub/favicon-32x32.png">
let html = isahc::get("https://flathub.org").unwrap().text().unwrap();
let icon_url = parse_favicon(&html);
assert_eq!(
Some("/assets/themes/flathub/favicon-32x32.png".to_string()),
icon_url
);
}
#[test]
fn should_parse_favicon_url_aliexpress() {
// Aliexpress icon href start with two slash :`href="//ae01.alicdn.com/images/eng/wholesale/icon/aliexpress.ico"`
let client = HttpClient::builder()
.redirect_policy(RedirectPolicy::Follow)
.build()
.unwrap();
let html = client
.get("https://aliexpress.com")
.unwrap()
.text()
.unwrap();
let icon_url = parse_favicon(&html);
assert_eq!(
Some("https://ae01.alicdn.com/images/eng/wholesale/icon/aliexpress.ico".to_string()),
icon_url
);
}
}