Merge pull request #269 from ikatson/bep-47

BEP-47 padding files + refactor related code
This commit is contained in:
Igor Katson 2024-11-07 15:08:20 +00:00 committed by GitHub
commit c2b2e8e8e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 227 additions and 93 deletions

View file

@ -533,23 +533,23 @@ fn make_torrent_details(
output_folder: String,
) -> Result<TorrentDetailsResponse> {
let files = info
.iter_filenames_and_lengths()
.iter_file_details()
.context("error iterating filenames and lengths")?
.enumerate()
.map(|(idx, (filename_it, length))| {
let name = match filename_it.to_string() {
.map(|(idx, d)| {
let name = match d.filename.to_string() {
Ok(s) => s,
Err(err) => {
warn!("error reading filename: {:?}", err);
"<INVALID NAME>".to_string()
}
};
let components = filename_it.to_vec().unwrap_or_default();
let components = d.filename.to_vec().unwrap_or_default();
let included = only_files.map(|o| o.contains(&idx)).unwrap_or(true);
TorrentDetailsResponseFile {
name,
components,
length,
length: d.len,
included,
}
})
@ -568,10 +568,11 @@ fn torrent_file_mime_type(
info: &TorrentMetaV1Info<ByteBufOwned>,
file_idx: usize,
) -> Result<&'static str> {
info.iter_filenames_and_lengths()?
info.iter_file_details()?
.nth(file_idx)
.and_then(|(f, _)| {
f.iter_components()
.and_then(|d| {
d.filename
.iter_components()
.last()
.and_then(|r| r.ok())
.and_then(|s| mime_guess::from_path(s).first_raw())

View file

@ -124,7 +124,13 @@ async fn create_torrent_raw<'a>(
.components()
.map(|c| osstr_to_bytes(c.as_os_str()).into())
.collect();
output_files.push(TorrentMetaV1File { length, path });
output_files.push(TorrentMetaV1File {
length,
path,
attr: None,
sha1: None,
symlink_path: None,
});
continue 'outer;
}
@ -154,6 +160,9 @@ async fn create_torrent_raw<'a>(
} else {
Some(output_files)
},
attr: None,
sha1: None,
symlink_path: None,
})
}

View file

@ -1,10 +1,13 @@
use std::path::PathBuf;
use librqbit_core::torrent_metainfo::FileDetailsAttrs;
#[derive(Debug, Clone)]
pub struct FileInfo {
pub relative_filename: PathBuf,
pub offset_in_torrent: u64,
pub piece_range: std::ops::Range<u32>,
pub attrs: FileDetailsAttrs,
pub len: u64,
}

View file

@ -21,6 +21,7 @@ use crate::{
pub fn update_hash_from_file<Sha1: ISha1>(
file_id: usize,
file_info: &FileInfo,
mut pos: u64,
files: &dyn TorrentStorage,
hash: &mut Sha1,
@ -30,9 +31,15 @@ pub fn update_hash_from_file<Sha1: ISha1>(
let mut read = 0;
while bytes_to_read > 0 {
let chunk = std::cmp::min(buf.len(), bytes_to_read);
files
.pread_exact(file_id, pos, &mut buf[..chunk])
.with_context(|| format!("failed reading chunk of size {chunk}, read so far {read}"))?;
if file_info.attrs.padding {
buf[..chunk].fill(0);
} else {
files
.pread_exact(file_id, pos, &mut buf[..chunk])
.with_context(|| {
format!("failed reading chunk of size {chunk}, read so far {read}")
})?;
}
bytes_to_read -= chunk;
read += chunk;
pos += chunk as u64;
@ -138,6 +145,7 @@ impl<'a> FileOps<'a> {
if let Err(err) = update_hash_from_file(
current_file.index,
current_file.fi,
pos,
self.files,
&mut computed_hash,
@ -181,7 +189,8 @@ impl<'a> FileOps<'a> {
let mut piece_remaining_bytes = piece_length as usize;
for (file_idx, (name, file_len)) in self.torrent.iter_filenames_and_lengths()?.enumerate() {
for (file_idx, fi) in self.file_infos.iter().enumerate() {
let file_len = fi.len;
if absolute_offset > file_len {
absolute_offset -= file_len;
continue;
@ -198,6 +207,7 @@ impl<'a> FileOps<'a> {
);
update_hash_from_file(
file_idx,
fi,
absolute_offset,
self.files,
&mut h,
@ -205,7 +215,10 @@ impl<'a> FileOps<'a> {
to_read_in_file,
)
.with_context(|| {
format!("error reading {to_read_in_file} bytes, file_id: {file_idx} (\"{name:?}\")")
format!(
"error reading {to_read_in_file} bytes, file_id: {file_idx} (\"{:?}\")",
fi.relative_filename
)
})?;
piece_remaining_bytes -= to_read_in_file;
@ -246,7 +259,8 @@ impl<'a> FileOps<'a> {
let mut absolute_offset = self.lengths.chunk_absolute_offset(chunk_info);
let mut buf = result_buf;
for (file_idx, file_len) in self.torrent.iter_file_lengths()?.enumerate() {
for (file_idx, file_info) in self.file_infos.iter().enumerate() {
let file_len = file_info.len;
if absolute_offset > file_len {
absolute_offset -= file_len;
continue;
@ -262,11 +276,15 @@ impl<'a> FileOps<'a> {
absolute_offset,
&chunk_info
);
self.files
.pread_exact(file_idx, absolute_offset, &mut buf[..to_read_in_file])
.with_context(|| {
format!("error reading {file_idx} bytes, file_id: {to_read_in_file}")
})?;
if file_info.attrs.padding {
buf[..to_read_in_file].fill(0);
} else {
self.files
.pread_exact(file_idx, absolute_offset, &mut buf[..to_read_in_file])
.with_context(|| {
format!("error reading {file_idx} bytes, file_id: {to_read_in_file}")
})?;
}
buf = &mut buf[to_read_in_file..];
@ -292,7 +310,8 @@ impl<'a> FileOps<'a> {
let mut buf = data.block.as_ref();
let mut absolute_offset = self.lengths.chunk_absolute_offset(chunk_info);
for (file_idx, (name, file_len)) in self.torrent.iter_filenames_and_lengths()?.enumerate() {
for (file_idx, file_info) in self.file_infos.iter().enumerate() {
let file_len = file_info.len;
if absolute_offset > file_len {
absolute_offset -= file_len;
continue;
@ -311,9 +330,16 @@ impl<'a> FileOps<'a> {
to_write,
absolute_offset
);
self.files
.pwrite_all(file_idx, absolute_offset, &buf[..to_write])
.with_context(|| format!("error writing to file {file_idx} (\"{name:?}\")"))?;
if !file_info.attrs.padding {
self.files
.pwrite_all(file_idx, absolute_offset, &buf[..to_write])
.with_context(|| {
format!(
"error writing to file {file_idx} (\"{:?}\")",
file_info.relative_filename
)
})?;
}
buf = &buf[to_write..];
if buf.is_empty() {
break;

View file

@ -153,10 +153,10 @@ impl HttpApi {
let mut playlist_items = handle
.shared()
.info
.iter_filenames_and_lengths()?
.iter_file_details()?
.enumerate()
.filter_map(|(file_idx, (filename, _))| {
let filename = filename.to_vec().ok()?.join("/");
.filter_map(|(file_idx, file_details)| {
let filename = file_details.filename.to_vec().ok()?.join("/");
let is_playable = mime_guess::from_path(&filename)
.first()
.map(|mime| {

View file

@ -156,8 +156,9 @@ fn compute_only_files_regex<ByteBuf: AsRef<[u8]>>(
) -> anyhow::Result<Vec<usize>> {
let filename_re = regex::Regex::new(filename_re).context("filename regex is incorrect")?;
let mut only_files = Vec::new();
for (idx, (filename, _)) in torrent.iter_filenames_and_lengths()?.enumerate() {
let full_path = filename
for (idx, fd) in torrent.iter_file_details()?.enumerate() {
let full_path = fd
.filename
.to_pathbuf()
.with_context(|| format!("filename of file {idx} is not valid utf8"))?;
if filename_re.is_match(full_path.to_str().unwrap()) {
@ -191,12 +192,12 @@ fn compute_only_files(
}
(None, Some(filename_re)) => {
let only_files = compute_only_files_regex(info, &filename_re)?;
for (idx, (filename, _)) in info.iter_filenames_and_lengths()?.enumerate() {
for (idx, fd) in info.iter_file_details()?.enumerate() {
if !only_files.contains(&idx) {
continue;
}
if !list_only {
info!(?filename, "will download");
info!(filename=?fd.filename, "will download");
}
}
Ok(Some(only_files))
@ -1043,8 +1044,8 @@ impl Session {
info: &TorrentMetaV1Info<ByteBufOwned>,
) -> anyhow::Result<Option<PathBuf>> {
let files = info
.iter_filenames_and_lengths()?
.map(|(f, l)| Ok((f.to_pathbuf()?, l)))
.iter_file_details()?
.map(|fd| Ok((fd.filename.to_pathbuf()?, fd.len)))
.collect::<anyhow::Result<Vec<(PathBuf, u64)>>>()?;
if files.len() < 2 {
return Ok(None);
@ -1141,13 +1142,14 @@ impl Session {
let lengths = Lengths::from_torrent(&info)?;
let file_infos = info
.iter_file_details(&lengths)?
.iter_file_details_ext(&lengths)?
.map(|fd| {
Ok::<_, anyhow::Error>(FileInfo {
relative_filename: fd.filename.to_pathbuf()?,
relative_filename: fd.details.filename.to_pathbuf()?,
offset_in_torrent: fd.offset,
piece_range: fd.pieces,
len: fd.len,
len: fd.details.len,
attrs: fd.details.attrs(),
})
})
.collect::<anyhow::Result<Vec<FileInfo>>>()?;

View file

@ -151,23 +151,26 @@ impl TorrentStorage for FilesystemStorage {
fn init(&mut self, meta: &ManagedTorrentShared) -> anyhow::Result<()> {
let mut files = Vec::<OpenedFile>::new();
for file_details in meta.info.iter_file_details(&meta.lengths)? {
for file_details in meta.file_infos.iter() {
let mut full_path = self.output_folder.clone();
let relative_path = file_details
.filename
.to_pathbuf()
.context("error converting file to path")?;
let relative_path = &file_details.relative_filename;
full_path.push(relative_path);
std::fs::create_dir_all(full_path.parent().context("bug: no parent")?)?;
let file = if meta.options.allow_overwrite {
OpenOptions::new()
.create(true)
.truncate(false)
.read(true)
.write(true)
.open(&full_path)
.with_context(|| format!("error opening {full_path:?} in read/write mode"))?
let file = if file_details.attrs.padding {
OpenedFile::new_dummy()
} else if meta.options.allow_overwrite {
OpenedFile::new(
OpenOptions::new()
.create(true)
.truncate(false)
.read(true)
.write(true)
.open(&full_path)
.with_context(|| {
format!("error opening {full_path:?} in read/write mode")
})?,
)
} else {
// create_new does not seem to work with read(true), so calling this twice.
OpenOptions::new()
@ -180,9 +183,9 @@ impl TorrentStorage for FilesystemStorage {
&full_path
)
})?;
OpenOptions::new().read(true).write(true).open(&full_path)?
OpenedFile::new(OpenOptions::new().read(true).write(true).open(&full_path)?)
};
files.push(OpenedFile::new(file));
files.push(file);
}
self.opened_files = files;

View file

@ -14,6 +14,12 @@ impl OpenedFile {
}
}
pub fn new_dummy() -> Self {
Self {
file: RwLock::new(None),
}
}
pub fn take(&self) -> anyhow::Result<Option<File>> {
let mut f = self.file.write();
Ok(f.take())

View file

@ -245,6 +245,9 @@ impl TorrentStateInitializing {
.unwrap_or(true)
{
let now = Instant::now();
if fi.attrs.padding {
continue;
}
if let Err(err) = self.files.ensure_file_length(idx, fi.len) {
warn!(
"Error setting length for file {:?} to {}: {:#?}",

View file

@ -68,10 +68,10 @@ impl TorrentFileTreeNode {
let last_url_bit = torrent
.shared()
.info
.iter_filenames_and_lengths()
.iter_file_details()
.ok()
.and_then(|mut it| it.nth(fid))
.and_then(|(fi, _)| fi.to_vec().ok())
.and_then(|fd| fd.filename.to_vec().ok())
.map(|components| {
components
.into_iter()
@ -111,10 +111,10 @@ struct TorrentFileTree {
}
fn is_single_file_at_root(info: &TorrentMetaV1Info<ByteBufOwned>) -> bool {
info.iter_filenames_and_lengths()
info.iter_file_details()
.into_iter()
.flatten()
.flat_map(|(f, _)| f.iter_components())
.flat_map(|fd| fd.filename.iter_components())
.nth(1)
.is_none()
}
@ -123,10 +123,10 @@ impl TorrentFileTree {
fn build(torent_id: TorrentId, info: &TorrentMetaV1Info<ByteBufOwned>) -> anyhow::Result<Self> {
if is_single_file_at_root(info) {
let filename = info
.iter_filenames_and_lengths()?
.iter_file_details()?
.next()
.context("bug")?
.0
.filename
.iter_components()
.last()
.context("bug")??;
@ -159,8 +159,8 @@ impl TorrentFileTree {
let mut name_cache = HashMap::new();
for (fid, (fi, _)) in info.iter_filenames_and_lengths()?.enumerate() {
let components = match fi.to_vec() {
for (fid, fd) in info.iter_file_details()?.enumerate() {
let components = match fd.filename.to_vec() {
Ok(v) => v,
Err(_) => continue,
};
@ -402,9 +402,15 @@ mod tests {
.map(|f| TorrentMetaV1File {
length: 1,
path: f.split("/").map(|f| f.as_bytes().into()).collect(),
attr: None,
sha1: None,
symlink_path: None,
})
.collect(),
),
attr: None,
sha1: None,
symlink_path: None,
},
comment: None,
created_by: None,

View file

@ -6,6 +6,7 @@ use clone_to_owned::CloneToOwned;
use itertools::Either;
use serde::{Deserialize, Serialize};
use std::{iter::once, path::PathBuf};
use tracing::debug;
use crate::{hash_id::Id20, lengths::Lengths};
@ -99,6 +100,16 @@ pub struct TorrentMetaV1Info<BufType> {
// Single-file mode
#[serde(skip_serializing_if = "Option::is_none")]
pub length: Option<u64>,
#[serde(default = "none", skip_serializing_if = "Option::is_none")]
pub attr: Option<BufType>,
#[serde(default = "none", skip_serializing_if = "Option::is_none")]
pub sha1: Option<BufType>,
#[serde(
default = "none",
rename = "symlink path",
skip_serializing_if = "Option::is_none"
)]
pub symlink_path: Option<Vec<BufType>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub md5sum: Option<BufType>,
@ -174,14 +185,57 @@ where
}
}
#[derive(Default, Debug, Clone, Copy)]
pub struct FileDetailsAttrs {
pub symlink: bool,
pub hidden: bool,
pub padding: bool,
pub executable: bool,
}
pub struct FileDetails<'a, BufType> {
pub filename: FileIteratorName<'a, BufType>,
pub offset: u64,
pub len: u64,
// bep-47
attr: Option<&'a BufType>,
pub sha1: Option<&'a BufType>,
pub symlink_path: Option<&'a [BufType]>,
}
impl<'a, BufType> FileDetails<'a, BufType>
where
BufType: AsRef<[u8]>,
{
pub fn attrs(&self) -> FileDetailsAttrs {
let attrs = match self.attr {
Some(attrs) => attrs,
None => return FileDetailsAttrs::default(),
};
let mut result = FileDetailsAttrs::default();
for byte in attrs.as_ref().iter().copied() {
match byte {
b'l' => result.symlink = true,
b'h' => result.hidden = true,
b'p' => result.padding = true,
b'x' => result.executable = true,
other => debug!(attr = other, "unknown file attribute"),
}
}
result
}
}
pub struct FileDetailsExt<'a, BufType> {
pub details: FileDetails<'a, BufType>,
// absolute offset in torrent if it was a flat blob of bytes
pub offset: u64,
// the pieces that contain this file
pub pieces: std::ops::Range<u32>,
}
impl<'a, BufType> FileDetails<'a, BufType> {
impl<'a, BufType> FileDetailsExt<'a, BufType> {
pub fn pieces_usize(&self) -> std::ops::Range<usize> {
self.pieces.start as usize..self.pieces.end as usize
}
@ -203,60 +257,77 @@ impl<BufType: AsRef<[u8]>> TorrentMetaV1Info<BufType> {
}
#[inline(never)]
pub fn iter_filenames_and_lengths(
pub fn iter_file_details(
&self,
) -> anyhow::Result<impl Iterator<Item = (FileIteratorName<'_, BufType>, u64)>> {
) -> anyhow::Result<impl Iterator<Item = FileDetails<'_, BufType>>> {
match (self.length, self.files.as_ref()) {
// Single-file
(Some(length), None) => Ok(Either::Left(once((
FileIteratorName::Single(self.name.as_ref()),
length,
)))),
(Some(length), None) => Ok(Either::Left(once(FileDetails {
filename: FileIteratorName::Single(self.name.as_ref()),
len: length,
attr: self.attr.as_ref(),
sha1: self.sha1.as_ref(),
symlink_path: self.symlink_path.as_deref(),
}))),
// Multi-file
(None, Some(files)) => {
if files.is_empty() {
anyhow::bail!("expected multi-file torrent to have at least one file")
}
Ok(Either::Right(
files
.iter()
.map(|f| (FileIteratorName::Tree(&f.path), f.length)),
))
Ok(Either::Right(files.iter().map(|f| FileDetails {
filename: FileIteratorName::Tree(&f.path),
len: f.length,
attr: f.attr.as_ref(),
sha1: f.sha1.as_ref(),
symlink_path: f.symlink_path.as_deref(),
})))
}
_ => anyhow::bail!("torrent can't be both in single and multi-file mode"),
}
}
pub fn iter_file_lengths(&self) -> anyhow::Result<impl Iterator<Item = u64> + '_> {
Ok(self.iter_filenames_and_lengths()?.map(|(_, l)| l))
Ok(self.iter_file_details()?.map(|d| d.len))
}
// NOTE: lenghts MUST be construced with Lenghts::from_torrent, otherwise
// the yielded results will be garbage.
pub fn iter_file_details<'a>(
pub fn iter_file_details_ext<'a>(
&'a self,
lengths: &'a Lengths,
) -> anyhow::Result<impl Iterator<Item = FileDetails<'a, BufType>> + 'a> {
Ok(self
.iter_filenames_and_lengths()?
.scan(0u64, |acc_offset, (filename, len)| {
let offset = *acc_offset;
*acc_offset += len;
Some(FileDetails {
filename,
pieces: lengths.iter_pieces_within_offset(offset, len),
offset,
len,
})
}))
) -> anyhow::Result<impl Iterator<Item = FileDetailsExt<'a, BufType>> + 'a> {
Ok(self.iter_file_details()?.scan(0u64, |acc_offset, details| {
let offset = *acc_offset;
*acc_offset += details.len;
Some(FileDetailsExt {
pieces: lengths.iter_pieces_within_offset(offset, details.len),
details,
offset,
})
}))
}
}
const fn none<T>() -> Option<T> {
None
}
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
pub struct TorrentMetaV1File<BufType> {
pub length: u64,
pub path: Vec<BufType>,
#[serde(default = "none", skip_serializing_if = "Option::is_none")]
pub attr: Option<BufType>,
#[serde(default = "none", skip_serializing_if = "Option::is_none")]
pub sha1: Option<BufType>,
#[serde(
default = "none",
rename = "symlink path",
skip_serializing_if = "Option::is_none"
)]
pub symlink_path: Option<Vec<BufType>>,
}
impl<BufType> TorrentMetaV1File<BufType>
@ -282,6 +353,9 @@ where
TorrentMetaV1File {
length: self.length,
path: self.path.clone_to_owned(within_buffer),
attr: self.attr.clone_to_owned(within_buffer),
sha1: self.sha1.clone_to_owned(within_buffer),
symlink_path: self.symlink_path.clone_to_owned(within_buffer),
}
}
}
@ -300,6 +374,9 @@ where
length: self.length,
md5sum: self.md5sum.clone_to_owned(within_buffer),
files: self.files.clone_to_owned(within_buffer),
attr: self.attr.clone_to_owned(within_buffer),
sha1: self.sha1.clone_to_owned(within_buffer),
symlink_path: self.symlink_path.clone_to_owned(within_buffer),
}
}
}

View file

@ -762,17 +762,15 @@ async fn async_main(opts: Opts, cancel: CancellationToken) -> anyhow::Result<()>
only_files,
..
}) => {
for (idx, (filename, len)) in
info.iter_filenames_and_lengths()?.enumerate()
{
for (idx, fd) in info.iter_file_details()?.enumerate() {
let included = match &only_files {
Some(files) => files.contains(&idx),
None => true,
};
info!(
"File {}, size {}{}",
filename.to_string()?,
SF::new(len),
"File {:?}, size {}{}",
fd.filename,
SF::new(fd.len),
if included { "" } else { ", will skip" }
)
}