Things are pretty broken now

This commit is contained in:
Igor Katson 2021-06-26 16:43:36 +01:00
parent 6a7f8a4d1a
commit d546dfd1e6
7 changed files with 364 additions and 35 deletions

1
Cargo.lock generated
View file

@ -904,6 +904,7 @@ dependencies = [
"librqbit", "librqbit",
"log", "log",
"pretty_env_logger", "pretty_env_logger",
"regex",
"reqwest", "reqwest",
"tokio", "tokio",
] ]

View file

@ -14,6 +14,7 @@ clap = "3.0.0-beta.2"
log = "0.4" log = "0.4"
pretty_env_logger = "0.4" pretty_env_logger = "0.4"
reqwest = "0.11" reqwest = "0.11"
regex = "1"
[dev-dependencies] [dev-dependencies]
futures = {version = "0.3"} futures = {version = "0.3"}

13
TODO.md Normal file
View file

@ -0,0 +1,13 @@
- [ ] Selective file downloading (mostly done)
- [ ] Seeking optimization
- If a file is not needed, no need to check its hash
- [ ] Proper counting of how much is left, and how much is downloaded
- [ ] Refactor "needed pieces" into a bitfield
- [ ] Send bitfield at the start if I have something
- [ ] use the "update_hash" function in piece checking
- [ ] signaling when file is done
someday:
- [ ] cancellation

View file

@ -16,6 +16,12 @@ pub const fn last_element_size_u64(total: u64, chunk_size: u64) -> u64 {
rem rem
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PieceInfo {
pub piece_index: ValidPieceIndex,
pub len: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ChunkInfo { pub struct ChunkInfo {
pub piece_index: ValidPieceIndex, pub piece_index: ValidPieceIndex,
@ -119,6 +125,17 @@ impl Lengths {
pub const fn piece_offset(&self, index: ValidPieceIndex) -> u64 { pub const fn piece_offset(&self, index: ValidPieceIndex) -> u64 {
index.0 as u64 * self.piece_length as u64 index.0 as u64 * self.piece_length as u64
} }
pub fn iter_piece_infos(&self) -> impl Iterator<Item = PieceInfo> {
let last_id = self.last_piece_id;
let last_len = self.last_piece_length;
let pl = self.piece_length;
(0..self.total_pieces()).map(move |idx| PieceInfo {
piece_index: ValidPieceIndex(idx),
len: if idx == last_id { last_len } else { pl },
})
}
pub fn iter_chunk_infos(&self, index: ValidPieceIndex) -> impl Iterator<Item = ChunkInfo> { pub fn iter_chunk_infos(&self, index: ValidPieceIndex) -> impl Iterator<Item = ChunkInfo> {
let mut remaining = self.piece_length(index); let mut remaining = self.piece_length(index);
let chunk_size = self.chunk_length; let chunk_size = self.chunk_length;

View file

@ -3,7 +3,7 @@ use std::{
fmt::Display, fmt::Display,
fs::{File, OpenOptions}, fs::{File, OpenOptions},
future::Future, future::Future,
io::{Read, Seek, Write}, io::{self, Read, Seek, SeekFrom, Write},
net::SocketAddr, net::SocketAddr,
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::{ sync::{
@ -32,13 +32,14 @@ use crate::{
Handshake, Message, MessageBorrowed, MessageDeserializeError, MessageOwned, Piece, Request, Handshake, Message, MessageBorrowed, MessageDeserializeError, MessageOwned, Piece, Request,
}, },
peer_id::try_decode_peer_id, peer_id::try_decode_peer_id,
torrent_metainfo::TorrentMetaV1Owned, torrent_metainfo::{FileIteratorName, TorrentMetaV1Owned},
tracker_comms::{CompactTrackerResponse, TrackerRequest, TrackerRequestEvent}, tracker_comms::{CompactTrackerResponse, TrackerRequest, TrackerRequestEvent},
}; };
pub struct TorrentManagerBuilder { pub struct TorrentManagerBuilder {
torrent: TorrentMetaV1Owned, torrent: TorrentMetaV1Owned,
overwrite: bool, overwrite: bool,
output_folder: PathBuf, output_folder: PathBuf,
only_files: Option<Vec<usize>>,
} }
impl TorrentManagerBuilder { impl TorrentManagerBuilder {
@ -47,16 +48,27 @@ impl TorrentManagerBuilder {
torrent, torrent,
overwrite: false, overwrite: false,
output_folder: output_folder.as_ref().into(), output_folder: output_folder.as_ref().into(),
only_files: None,
} }
} }
pub fn overwrite(mut self, overwrite: bool) -> Self { pub fn only_files(&mut self, only_files: Vec<usize>) -> &mut Self {
self.only_files = Some(only_files);
self
}
pub fn overwrite(&mut self, overwrite: bool) -> &mut Self {
self.overwrite = overwrite; self.overwrite = overwrite;
self self
} }
pub async fn start_manager(self) -> anyhow::Result<TorrentManagerHandle> { pub async fn start_manager(self) -> anyhow::Result<TorrentManagerHandle> {
TorrentManager::start(self.torrent, self.output_folder, self.overwrite) TorrentManager::start(
self.torrent,
self.output_folder,
self.overwrite,
self.only_files,
)
} }
} }
@ -264,15 +276,206 @@ fn make_lengths(torrent: &TorrentMetaV1Owned) -> anyhow::Result<Lengths> {
Lengths::new(total_length, torrent.info.piece_length, None) Lengths::new(total_length, torrent.info.piece_length, None)
} }
fn update_hash_from_file(
file: &mut File,
hash: &mut sha1::Sha1,
buf: &mut [u8],
mut bytes_to_read: usize,
) -> anyhow::Result<()> {
let mut read = 0;
while bytes_to_read > 0 {
let chunk = std::cmp::min(buf.len(), bytes_to_read);
file.read_exact(&mut buf[..chunk]).with_context(|| {
format!(
"failed reading chunk of size {}, read so far {}",
chunk, read
)
})?;
bytes_to_read -= chunk;
read += chunk;
hash.update(&buf[..chunk]);
}
Ok(())
}
fn compute_needed_pieces( fn compute_needed_pieces(
torrent: &TorrentMetaV1Owned, torrent: &TorrentMetaV1Owned,
files: &mut [Arc<Mutex<File>>], files: &[Arc<Mutex<File>>],
only_files: Option<&[usize]>,
lengths: &Lengths, lengths: &Lengths,
) -> anyhow::Result<BF> { ) -> anyhow::Result<BF> {
let needed_pieces = vec![u8::MAX; lengths.piece_bitfield_bytes()]; let needed_pieces = vec![0u8; lengths.piece_bitfield_bytes()];
let needed_pieces = BF::from_vec(needed_pieces); let mut needed_pieces = BF::from_vec(needed_pieces);
struct CurrentFile<'a> {
index: usize,
fd: &'a Arc<Mutex<File>>,
len: u64,
name: FileIteratorName<'a, ByteString>,
full_file_required: bool,
processed_bytes: u64,
is_broken: bool,
}
impl<'a> CurrentFile<'a> {
fn remaining(&self) -> u64 {
self.len - self.processed_bytes
}
fn mark_processed_bytes(&mut self, bytes: u64) {
self.processed_bytes += bytes as u64
}
}
let mut file_iterator = files
.iter()
.zip(torrent.info.iter_filenames_and_lengths())
.enumerate()
.map(|(idx, (fd, (name, len)))| {
let full_file_required = if let Some(only_files) = only_files {
only_files.contains(&idx)
} else {
true
};
CurrentFile {
index: idx,
fd,
len,
name,
full_file_required,
processed_bytes: 0,
is_broken: false,
}
});
let mut current_file = file_iterator
.next()
.ok_or_else(|| anyhow::anyhow!("empty input file list"))?;
let mut read_buffer = vec![0u8; 65536];
for piece_info in lengths.iter_piece_infos() {
// We need to compute the hash (and afterwards mark the piece as NOT needed) if ANY of the following are true
// - the file is required
// - the current piece is required (i.e. it's a part of some other file that is required)
// This means, that for an easy implementation:
// - we ALWAYS try to compute the hash from existing files
// - after the whole piece was processed, we mark the piece needed if:
// - at least one file that the piece owns was required
// - and (there were errors OR the hash does not match)
//
// If there's an error, it's fine only if none of the files was required.
// let mut seek: Option<u64> = None;
// Optimization for a common case: if the piece is wholy in the file, and the file is not required, continue
// if !current_file.full_file_required && current_file.remaining() >= piece_info.len as u64 {
// seek = match seek {
// None => Some(piece_info.len as u64),
// Some(s) => {
// current_file.mark_processed_bytes(piece_info.len as u64);
// Some(s + piece_info.len as u64)
// }
// };
// continue;
// }
let mut computed_hash = sha1::Sha1::new();
let mut piece_remaining = piece_info.len as usize;
let mut piece_is_needed = false;
let mut at_least_one_file_required = current_file.full_file_required;
while piece_remaining > 0 {
let mut to_read_in_file =
std::cmp::min(current_file.remaining(), piece_remaining as u64) as usize;
while to_read_in_file == 0 {
current_file = file_iterator
.next()
.ok_or_else(|| anyhow::anyhow!("broken torrent metadata"))?;
at_least_one_file_required |= current_file.full_file_required;
to_read_in_file =
std::cmp::min(current_file.remaining(), piece_remaining as u64) as usize;
}
let pos = current_file.processed_bytes;
piece_remaining -= to_read_in_file;
current_file.mark_processed_bytes(to_read_in_file as u64);
if current_file.is_broken {
piece_is_needed = true;
continue;
}
if piece_is_needed {
continue;
}
let mut fd = current_file.fd.lock();
// if let Some(offset) = seek.take() {
// match fd.seek(SeekFrom::Start(offset)) {
// Ok(v) => {
// assert_eq!(v, offset)
// }
// Err(e) => {
// debug!(
// "error seeking in file {} to {}: {:#}",
// current_file.index, offset, &e
// );
// piece_is_needed = true;
// current_file.is_broken = true;
// continue;
// }
// }
// }
fd.seek(SeekFrom::Start(pos)).unwrap();
if let Err(err) = update_hash_from_file(
&mut fd,
&mut computed_hash,
&mut read_buffer,
to_read_in_file,
) {
debug!(
"error reading from file {} ({:?}) at {}: {:#}",
current_file.index, current_file.name, pos, &err
);
piece_is_needed = true;
current_file.is_broken = true;
}
}
if !at_least_one_file_required {
continue;
}
if piece_is_needed {
trace!(
"piece {} had errors, marking as needed",
piece_info.piece_index
);
needed_pieces.set(piece_info.piece_index.get() as usize, true);
continue;
}
if torrent
.info
.compare_hash(piece_info.piece_index.get(), &computed_hash)
.unwrap()
{
trace!(
"piece {} is fine, not marking as needed",
piece_info.piece_index
);
} else {
trace!(
"piece {} hash does not match, marking as needed",
piece_info.piece_index
);
needed_pieces.set(piece_info.piece_index.get() as usize, true);
}
}
// TODO: read and validate existing files
Ok(needed_pieces) Ok(needed_pieces)
} }
@ -281,6 +484,7 @@ impl TorrentManager {
torrent: TorrentMetaV1Owned, torrent: TorrentMetaV1Owned,
out: P, out: P,
overwrite: bool, overwrite: bool,
only_files: Option<Vec<usize>>,
) -> anyhow::Result<TorrentManagerHandle> { ) -> anyhow::Result<TorrentManagerHandle> {
let mut files = { let mut files = {
let mut files = let mut files =
@ -319,7 +523,8 @@ impl TorrentManager {
let peer_id = generate_peer_id(); let peer_id = generate_peer_id();
let lengths = make_lengths(&torrent).context("unable to compute Lengths from torrent")?; let lengths = make_lengths(&torrent).context("unable to compute Lengths from torrent")?;
let needed_pieces = compute_needed_pieces(&torrent, &mut files, &lengths)?; let needed_pieces =
compute_needed_pieces(&torrent, &files, only_files.as_deref(), &lengths)?;
debug!("computed lengths: {:?}", &lengths); debug!("computed lengths: {:?}", &lengths);
let chunk_tracker = ChunkTracker::new(needed_pieces, lengths); let chunk_tracker = ChunkTracker::new(needed_pieces, lengths);
@ -522,7 +727,7 @@ impl TorrentManager {
chunk_info.piece_index, who_sent, file_idx, absolute_offset, &chunk_info chunk_info.piece_index, who_sent, file_idx, absolute_offset, &chunk_info
); );
file_g file_g
.seek(std::io::SeekFrom::Start(absolute_offset)) .seek(SeekFrom::Start(absolute_offset))
.with_context(|| { .with_context(|| {
format!( format!(
"error seeking to {}, file id: {}", "error seeking to {}, file id: {}",
@ -736,49 +941,50 @@ impl TorrentManager {
let mut h = sha1::Sha1::new(); let mut h = sha1::Sha1::new();
let piece_length = self.inner.lengths.piece_length(piece_index); let piece_length = self.inner.lengths.piece_length(piece_index);
let mut absolute_offset = self.inner.lengths.piece_offset(piece_index); let mut absolute_offset = self.inner.lengths.piece_offset(piece_index);
let mut buf = vec![0u8; std::cmp::min(8192, piece_length as usize)]; let mut buf = vec![0u8; std::cmp::min(65536, piece_length as usize)];
let mut left_to_read = piece_length as usize; let mut piece_remaining_bytes = piece_length as usize;
for (file_idx, file_len) in self.inner.torrent.info.iter_file_lengths().enumerate() { for (file_idx, (name, file_len)) in self
.inner
.torrent
.info
.iter_filenames_and_lengths()
.enumerate()
{
if absolute_offset > file_len { if absolute_offset > file_len {
absolute_offset -= file_len; absolute_offset -= file_len;
continue; continue;
} }
let file_remaining_len = file_len - absolute_offset; let file_remaining_len = file_len - absolute_offset;
let to_read_in_file = std::cmp::min(file_remaining_len, left_to_read as u64) as usize; let to_read_in_file =
let mut left_to_read_in_file = to_read_in_file; std::cmp::min(file_remaining_len, piece_remaining_bytes as u64) as usize;
let mut file_g = self.inner.files[file_idx].lock(); let mut file_g = self.inner.files[file_idx].lock();
debug!( debug!(
"piece={}, handle={}, file_idx={}, seeking to {}. Last received chunk: {:?}", "piece={}, handle={}, file_idx={}, seeking to {}. Last received chunk: {:?}",
piece_index, who_sent, file_idx, absolute_offset, &last_received_chunk piece_index, who_sent, file_idx, absolute_offset, &last_received_chunk
); );
file_g file_g
.seek(std::io::SeekFrom::Start(absolute_offset)) .seek(SeekFrom::Start(absolute_offset))
.with_context(|| { .with_context(|| {
format!( format!(
"error seeking to {}, file id: {}", "error seeking to {}, file id: {}",
absolute_offset, file_idx absolute_offset, file_idx
) )
})?; })?;
while left_to_read_in_file > 0 { update_hash_from_file(&mut file_g, &mut h, &mut buf, to_read_in_file).with_context(
let chunk_length = std::cmp::min(buf.len(), left_to_read_in_file); || {
file_g format!(
.read_exact(&mut buf[..chunk_length]) "error reading {} bytes, file_id: {} (\"{:?}\")",
.with_context(|| { to_read_in_file, file_idx, name
format!( )
"error reading {} bytes, file_id: {}, left_to_read_in_file: {}", },
chunk_length, file_idx, left_to_read_in_file )?;
)
})?;
h.update(&buf[..chunk_length]);
left_to_read_in_file -= chunk_length;
}
left_to_read -= to_read_in_file; piece_remaining_bytes -= to_read_in_file;
if left_to_read == 0 { if piece_remaining_bytes == 0 {
return Ok(true); return Ok(true);
} }
@ -857,7 +1063,7 @@ impl TorrentManager {
to_write, to_write,
absolute_offset absolute_offset
); );
file_g.seek(std::io::SeekFrom::Start(absolute_offset))?; file_g.seek(SeekFrom::Start(absolute_offset))?;
file_g.write_all(&buf[..to_write])?; file_g.write_all(&buf[..to_write])?;
buf = &buf[to_write..]; buf = &buf[to_write..];
if buf.is_empty() { if buf.is_empty() {

View file

@ -1,4 +1,4 @@
use std::{fs::File, ops::Deref, path::PathBuf}; use std::{fmt::Write, fs::File, ops::Deref, path::PathBuf};
use serde::Deserialize; use serde::Deserialize;
@ -73,7 +73,42 @@ pub enum FileIteratorName<'a, ByteBuf> {
Tree(&'a [ByteBuf]), Tree(&'a [ByteBuf]),
} }
impl<'a, ByteBuf> std::fmt::Debug for FileIteratorName<'a, ByteBuf>
where
ByteBuf: AsRef<[u8]>,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (idx, item) in self.iter_components().enumerate() {
if idx > 0 {
f.write_char(std::path::MAIN_SEPARATOR)?;
}
match item {
Some(bit) => {
f.write_str(std::str::from_utf8(bit.as_ref()).unwrap_or("<INVALID UTF-8>"))?;
}
None => f.write_str("output")?,
}
}
Ok(())
}
}
impl<'a, ByteBuf> FileIteratorName<'a, ByteBuf> { impl<'a, ByteBuf> FileIteratorName<'a, ByteBuf> {
pub fn to_pathbuf(&self) -> anyhow::Result<PathBuf>
where
ByteBuf: AsRef<[u8]>,
{
let mut buf = PathBuf::new();
for part in self.iter_components() {
if let Some(part) = part {
buf.push(std::str::from_utf8(part.as_ref())?)
} else {
buf.push("output");
break;
}
}
Ok(buf)
}
pub fn iter_components(&self) -> impl Iterator<Item = Option<&'a ByteBuf>> { pub fn iter_components(&self) -> impl Iterator<Item = Option<&'a ByteBuf>> {
let single_it = std::iter::once(match self { let single_it = std::iter::once(match self {
FileIteratorName::Single(n) => Some(*n), FileIteratorName::Single(n) => Some(*n),
@ -91,6 +126,12 @@ impl<'a, ByteBuf> FileIteratorName<'a, ByteBuf> {
} }
impl<BufType: Clone + Deref<Target = [u8]>> TorrentMetaV1Info<BufType> { impl<BufType: Clone + Deref<Target = [u8]>> TorrentMetaV1Info<BufType> {
pub fn get_hash(&self, piece: u32, hash: &sha1::Sha1) -> Option<&[u8]> {
let start = piece as usize * 20;
let end = start + 20;
let expected_hash = self.pieces.deref().get(start..end)?;
Some(expected_hash)
}
pub fn compare_hash(&self, piece: u32, hash: &sha1::Sha1) -> Option<bool> { pub fn compare_hash(&self, piece: u32, hash: &sha1::Sha1) -> Option<bool> {
let start = piece as usize * 20; let start = piece as usize * 20;
let end = start + 20; let end = start + 20;

View file

@ -51,11 +51,38 @@ struct Opts {
/// The filename of the .torrent file. /// The filename of the .torrent file.
output_folder: String, output_folder: String,
#[clap(short = 'r', long = "filename-re")]
only_files_matching_regex: Option<String>,
/// Set if you are ok to write on top of existing files /// Set if you are ok to write on top of existing files
#[clap(long)] #[clap(long)]
overwrite: bool, overwrite: bool,
} }
fn compute_only_files(
torrent: &TorrentMetaV1Owned,
filename_re: &str,
) -> anyhow::Result<Vec<usize>> {
let filename_re = regex::Regex::new(&filename_re).context("filename regex is incorrect")?;
let mut only_files = Vec::new();
for (idx, (filename, _)) in torrent.info.iter_filenames_and_lengths().enumerate() {
let full_path = filename
.to_pathbuf()
.with_context(|| format!("filename of file {} is not valid utf8", idx))?;
if filename_re.is_match(
full_path
.to_str()
.ok_or_else(|| anyhow::anyhow!("filename of file {} is not valid utf8", idx))?,
) {
only_files.push(idx);
}
}
if only_files.is_empty() {
anyhow::bail!("none of the filenames match the given regex")
}
Ok(only_files)
}
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
pretty_env_logger::init(); pretty_env_logger::init();
@ -79,10 +106,33 @@ fn main() -> anyhow::Result<()> {
info!("Torrent metadata: {:#?}", &torrent); info!("Torrent metadata: {:#?}", &torrent);
let builder = let only_files = if let Some(filename_re) = opts.only_files_matching_regex {
TorrentManagerBuilder::new(torrent, opts.output_folder).overwrite(opts.overwrite); Some(compute_only_files(&torrent, &filename_re)?)
} else {
None
};
let mut builder = TorrentManagerBuilder::new(torrent, opts.output_folder);
builder.overwrite(opts.overwrite);
if let Some(only_files) = only_files {
builder.only_files(only_files);
}
let manager_handle = builder.start_manager().await?; let manager_handle = builder.start_manager().await?;
manager_handle.wait_until_completed().await?; manager_handle.wait_until_completed().await?;
Ok(()) Ok(())
}) })
} }
#[cfg(test)]
mod tests {
use std::{fs::File, io::Read};
#[test]
fn test_bullshit() {
let mut buf = vec![0u8; 65536];
let mut f =
File::open("/tmp/torrent-download/08.Comedy.Club.S17.WEB-DL.1080p.7turza.mkv").unwrap();
f.read_exact(&mut buf[..]).unwrap();
}
}