From 3c4ebb85f02531be7b64480add989825ff5f1660 Mon Sep 17 00:00:00 2001 From: Carl Lerche <me@carllerche.com> Date: Mon, 9 Feb 2015 09:04:18 -0800 Subject: [PATCH] WIP - Implement abstractions for working with bytes --- Cargo.toml | 28 +- README.md | 3 + src/alloc.rs | 180 +++++++++++ src/byte_buf.rs | 257 ++++++++++++++++ src/byte_str.rs | 227 ++++++++++++++ src/bytes.rs | 347 +++++++++++++++++++++ src/lib.rs | 356 +++++++++++++++++++++- src/ring.rs | 200 +++++++++++++ src/rope.rs | 583 ++++++++++++++++++++++++++++++++++++ src/slice.rs | 57 ++++ test/test.rs | 15 + test/test_buf.rs | 0 test/test_byte_buf.rs | 46 +++ test/test_rope.rs | 90 ++++++ test/test_seq_byte_str.rs | 33 ++ test/test_small_byte_str.rs | 33 ++ 16 files changed, 2446 insertions(+), 9 deletions(-) create mode 100644 src/alloc.rs create mode 100644 src/byte_buf.rs create mode 100644 src/byte_str.rs create mode 100644 src/bytes.rs create mode 100644 src/ring.rs create mode 100644 src/rope.rs create mode 100644 src/slice.rs create mode 100644 test/test.rs create mode 100644 test/test_buf.rs create mode 100644 test/test_byte_buf.rs create mode 100644 test/test_rope.rs create mode 100644 test/test_seq_byte_str.rs create mode 100644 test/test_small_byte_str.rs diff --git a/Cargo.toml b/Cargo.toml index 1196c8c..f88a5eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,23 @@ [package] -name = "bytes" -version = "0.0.1" -authors = ["Carl Lerche <me@carllerche.com>"] +name = "bytes" +version = "0.0.1" +authors = ["Carl Lerche <me@carllerche.com>"] description = "Types and traits for working with bytes" -license = "MIT" -# include = [ -# "Cargo.toml", -# "src/**/*.rs", -# ] +license = "MIT" + +[dev-dependencies] + +rand = "0.1.2" +# iobuf = "*" + + +[[bench]] + +name = "bench" +path = "bench/bench.rs" + +[[test]] + +name = "test" +path = "test/test.rs" diff --git a/README.md b/README.md index e69de29..ed48431 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,3 @@ +# Bytes + +A utility library for working with bytes. diff --git a/src/alloc.rs b/src/alloc.rs new file mode 100644 index 0000000..ad9452f --- /dev/null +++ b/src/alloc.rs @@ -0,0 +1,180 @@ +use std::{mem, ptr}; +use std::rt::heap; +use std::sync::atomic::{AtomicUsize, Ordering}; + +const MAX_ALLOC_SIZE: usize = (1 << 32) - 1; + +/// Allocates memory to be used by Bufs or Bytes. Allows allocating memory +/// using alternate stratgies than the default Rust heap allocator. Also does +/// not require that allocations are continuous in memory. +/// +/// For example, an alternate allocator could use a slab of 4kb chunks of +/// memory and return as many chunks as needed to satisfy the length +/// requirement. +pub trait Allocator: Sync + Send { + + /// Allocate memory. May or may not be contiguous. + fn allocate(&self, len: usize) -> MemRef; + + /// Deallocate a chunk of memory + fn deallocate(&self, mem: *mut Mem); +} + +pub struct MemRef { + ptr: *mut u8, +} + +impl MemRef { + pub fn new(mem: *mut Mem) -> MemRef { + let ptr = mem as *mut u8; + + unsafe { + MemRef { + ptr: ptr.offset(mem::size_of::<Mem>() as isize), + } + } + } + + #[inline] + pub fn none() -> MemRef { + MemRef { ptr: ptr::null_mut() } + } + + #[inline] + pub fn is_none(&self) -> bool { + self.ptr.is_null() + } + + #[inline] + pub fn ptr(&self) -> *mut u8 { + self.ptr + } + + pub fn bytes(&self) -> &[u8] { + use std::raw::Slice; + + unsafe { + mem::transmute(Slice { + data: self.ptr(), + len: self.mem().len, + }) + } + } + + #[inline] + pub fn bytes_mut(&mut self) -> &mut [u8] { + unsafe { mem::transmute(self.bytes()) } + } + + #[inline] + fn mem_ptr(&self) -> *mut Mem { + unsafe { + self.ptr.offset(-(mem::size_of::<Mem>() as isize)) as *mut Mem + } + } + + #[inline] + fn mem(&self) -> &Mem { + unsafe { + mem::transmute(self.mem_ptr()) + } + } +} + +impl Clone for MemRef { + #[inline] + fn clone(&self) -> MemRef { + self.mem().refs.fetch_add(1, Ordering::Relaxed); + MemRef { ptr: self.ptr } + } +} + +impl Drop for MemRef { + fn drop(&mut self) { + // Guard against the ref having already been dropped + if self.ptr.is_null() { return; } + + // Decrement the ref count + if 1 == self.mem().refs.fetch_sub(1, Ordering::Relaxed) { + // Last ref dropped, free the memory + unsafe { + let alloc: &Allocator = mem::transmute(self.mem().allocator); + alloc.deallocate(self.mem_ptr()); + } + } + } +} + +unsafe impl Send for MemRef { } +unsafe impl Sync for MemRef { } + +/// Memory allocated by an Allocator must be prefixed with Mem +pub struct Mem { + // TODO: It should be possible to reduce the size of this struct + allocator: *const Allocator, + refs: AtomicUsize, + len: usize, +} + +impl Mem { + fn new(len: usize, allocator: *const Allocator) -> Mem { + Mem { + allocator: allocator, + refs: AtomicUsize::new(1), + len: len, + } + } +} + +pub static HEAP: Heap = Heap; + +#[allow(missing_copy_implementations)] +pub struct Heap; + +impl Heap { + pub fn allocate(&self, len: usize) -> MemRef { + // Make sure that the allocation is within the permitted range + if len > MAX_ALLOC_SIZE { + return MemRef::none(); + } + + let alloc_len = len + mem::size_of::<Mem>(); + + unsafe { + // Attempt to allocate the memory + let ptr: *mut Mem = mem::transmute( + heap::allocate(alloc_len, mem::min_align_of::<u8>())); + + // If failed, return None + if ptr.is_null() { + return MemRef::none(); + } + + // Write the mem header + ptr::write(ptr, Mem::new(len, mem::transmute(self as &Allocator))); + + // Return the info + MemRef::new(ptr) + } + } + + pub fn deallocate(&self, mem: *mut Mem) { + unsafe { + let m: &Mem = mem::transmute(mem); + + heap::deallocate( + mem as *mut u8, m.len + mem::size_of::<Mem>(), + mem::min_align_of::<u8>()) + } + } +} + +impl Allocator for Heap { + fn allocate(&self, len: usize) -> MemRef { + Heap::allocate(self, len) + } + + fn deallocate(&self, mem: *mut Mem) { + Heap::deallocate(self, mem) + } +} diff --git a/src/byte_buf.rs b/src/byte_buf.rs new file mode 100644 index 0000000..14f5006 --- /dev/null +++ b/src/byte_buf.rs @@ -0,0 +1,257 @@ +use {alloc, Bytes, SeqByteStr, BufResult, BufError, MAX_CAPACITY}; +use traits::{Buf, MutBuf, ByteStr}; +use std::{cmp, ptr}; +use std::num::UnsignedInt; + +/* + * + * ===== ByteBuf ===== + * + */ + +pub struct ByteBuf { + mem: alloc::MemRef, + cap: u32, + pos: u32, + lim: u32 +} + +impl ByteBuf { + pub fn from_slice(bytes: &[u8]) -> ByteBuf { + let mut buf = ByteBuf::mut_with_capacity(bytes.len()); + buf.write(bytes).ok().expect("unexpected failure"); + buf.flip() + } + + pub fn mut_with_capacity(capacity: usize) -> MutByteBuf { + assert!(capacity <= MAX_CAPACITY); + MutByteBuf { buf: ByteBuf::new(capacity as u32) } + } + + pub fn none() -> ByteBuf { + ByteBuf { + mem: alloc::MemRef::none(), + cap: 0, + pos: 0, + lim: 0, + } + } + + pub unsafe fn from_mem_ref(mem: alloc::MemRef, cap: u32, pos: u32, lim: u32) -> ByteBuf { + debug_assert!(pos <= lim && lim <= cap, "invalid arguments; cap={}; pos={}; lim={}", cap, pos, lim); + + ByteBuf { + mem: mem, + cap: cap, + pos: pos, + lim: lim, + } + } + + fn new(mut capacity: u32) -> ByteBuf { + // Handle 0 capacity case + if capacity == 0 { + return ByteBuf::none(); + } + + // Round the capacity to the closest power of 2 + capacity = UnsignedInt::next_power_of_two(capacity); + + // Allocate the memory + let mem = alloc::HEAP.allocate(capacity as usize); + + // If the allocation failed, return a blank buf + if mem.is_none() { + return ByteBuf::none(); + } + + ByteBuf { + mem: mem, + cap: capacity, + pos: 0, + lim: capacity + } + } + + pub fn capacity(&self) -> usize { + self.cap as usize + } + + pub fn flip(self) -> MutByteBuf { + let mut buf = MutByteBuf { buf: self }; + buf.clear(); + buf + } + + pub fn read_slice(&mut self, dst: &mut [u8]) -> usize { + let len = cmp::min(dst.len(), self.remaining()); + let cnt = len as u32; + + unsafe { + ptr::copy_nonoverlapping_memory( + dst.as_mut_ptr(), + self.mem.ptr().offset(self.pos as isize), len); + } + + self.pos += cnt; + len + } + + pub fn to_seq_byte_str(self) -> SeqByteStr { + unsafe { + let ByteBuf { mem, pos, lim, .. } = self; + SeqByteStr::from_mem_ref( + mem, pos, lim - pos) + } + } + + pub fn to_bytes(self) -> Bytes { + Bytes::of(self.to_seq_byte_str()) + } + + fn pos(&self) -> usize { + self.pos as usize + } + + fn lim(&self) -> usize { + self.lim as usize + } + + fn remaining_u32(&self) -> u32 { + self.lim - self.pos + } + + fn ensure_remaining(&self, cnt: usize) -> BufResult<()> { + if cnt > self.remaining() { + return Err(BufError::Overflow); + } + + Ok(()) + } +} + +impl Buf for ByteBuf { + fn remaining(&self) -> usize { + self.remaining_u32() as usize + } + + fn bytes<'a>(&'a self) -> &'a [u8] { + &self.mem.bytes()[self.pos()..self.lim()] + } + + fn advance(&mut self, mut cnt: usize) { + cnt = cmp::min(cnt, self.remaining()); + self.pos += cnt as u32; + } + + fn read_slice(&mut self, dst: &mut [u8]) -> usize { + ByteBuf::read_slice(self, dst) + } +} + +unsafe impl Send for ByteBuf { } + +/* + * + * ===== ROByteBuf ===== + * + */ + +/// Same as `ByteBuf` but cannot be flipped to a `MutByteBuf`. +pub struct ROByteBuf { + buf: ByteBuf, +} + +impl ROByteBuf { + pub unsafe fn from_mem_ref(mem: alloc::MemRef, cap: u32, pos: u32, lim: u32) -> ROByteBuf { + ROByteBuf { + buf: ByteBuf::from_mem_ref(mem, cap, pos, lim) + } + } + + pub fn to_seq_byte_str(self) -> SeqByteStr { + self.buf.to_seq_byte_str() + } + + pub fn to_bytes(self) -> Bytes { + self.buf.to_bytes() + } +} + +impl Buf for ROByteBuf { + + fn remaining(&self) -> usize { + self.buf.remaining() + } + + fn bytes<'a>(&'a self) -> &'a [u8] { + self.buf.bytes() + } + + fn advance(&mut self, cnt: usize) { + self.buf.advance(cnt) + } + + fn read_slice(&mut self, dst: &mut [u8]) -> usize { + self.buf.read_slice(dst) + } +} + +/* + * + * ===== MutByteBuf ===== + * + */ + +pub struct MutByteBuf { + buf: ByteBuf, +} + +impl MutByteBuf { + pub fn capacity(&self) -> usize { + self.buf.capacity() as usize + } + + pub fn flip(self) -> ByteBuf { + let mut buf = self.buf; + + buf.lim = buf.pos; + buf.pos = 0; + buf + } + + pub fn clear(&mut self) { + self.buf.pos = 0; + self.buf.lim = self.buf.cap; + } + + pub fn write_slice(&mut self, src: &[u8]) -> BufResult<()> { + try!(self.buf.ensure_remaining(src.len())); + let cnt = src.len() as u32; + + unsafe { + ptr::copy_nonoverlapping_memory( + self.buf.mem.ptr().offset(self.buf.pos as isize), + src.as_ptr(), src.len()); + } + + self.buf.pos += cnt; + return Ok(()); + } +} + +impl MutBuf for MutByteBuf { + fn remaining(&self) -> usize { + self.buf.remaining() + } + + fn advance(&mut self, cnt: usize) { + self.buf.advance(cnt) + } + + fn mut_bytes<'a>(&'a mut self) -> &'a mut [u8] { + let pos = self.buf.pos(); + let lim = self.buf.lim(); + &mut self.buf.mem.bytes_mut()[pos..lim] + } +} diff --git a/src/byte_str.rs b/src/byte_str.rs new file mode 100644 index 0000000..2cccebf --- /dev/null +++ b/src/byte_str.rs @@ -0,0 +1,227 @@ +use {alloc, Bytes, ByteBuf, ROByteBuf}; +use traits::{Buf, MutBuf, ByteStr}; +use std::{cmp, ops}; + +/* + * + * ===== SeqByteStr ===== + * + */ + +pub struct SeqByteStr { + mem: alloc::MemRef, + pos: u32, + len: u32, +} + +impl SeqByteStr { + /// Create a new `SeqByteStr` from a byte slice. + /// + /// The contents of the byte slice will be copied. + pub fn from_slice(bytes: &[u8]) -> SeqByteStr { + let mut buf = ByteBuf::mut_with_capacity(bytes.len()); + + if let Err(e) = buf.write(bytes) { + panic!("failed to copy bytes from slice; err={:?}", e); + } + + buf.flip().to_seq_byte_str() + } + + /// Creates a new `SeqByteStr` from a `MemRef`, an offset, and a length. + /// + /// This function is unsafe as there are no guarantees that the given + /// arguments are valid. + pub unsafe fn from_mem_ref(mem: alloc::MemRef, pos: u32, len: u32) -> SeqByteStr { + SeqByteStr { + mem: mem, + pos: pos, + len: len, + } + } +} + +impl ByteStr for SeqByteStr { + type Buf = ROByteBuf; + + fn buf(&self) -> ROByteBuf { + unsafe { + let pos = self.pos; + let lim = pos + self.len; + + ROByteBuf::from_mem_ref(self.mem.clone(), lim, pos, lim) + } + } + + fn concat<B: ByteStr>(&self, _other: B) -> Bytes { + unimplemented!(); + } + + fn len(&self) -> usize { + self.len as usize + } + + fn slice(&self, begin: usize, end: usize) -> Bytes { + if begin >= end || begin >= self.len() { + return Bytes::empty() + } + + let bytes = unsafe { + SeqByteStr::from_mem_ref( + self.mem.clone(), + self.pos + begin as u32, + (end - begin) as u32) + }; + + Bytes::of(bytes) + } + + fn to_bytes(self) -> Bytes { + Bytes::of(self) + } +} + +impl ops::Index<usize> for SeqByteStr { + type Output = u8; + + fn index(&self, index: &usize) -> &u8 { + assert!(*index < self.len()); + + unsafe { + &*self.mem.ptr() + .offset(*index as isize + self.pos as isize) + } + } +} + +impl Clone for SeqByteStr { + fn clone(&self) -> SeqByteStr { + SeqByteStr { + mem: self.mem.clone(), + pos: self.pos, + len: self.len, + } + } +} + +/* + * + * ===== SmallByteStr ===== + * + */ + +#[cfg(target_pointer_width = "64")] +const MAX_LEN: usize = 7; + +#[cfg(target_pointer_width = "32")] +const MAX_LEN: usize = 3; + +#[derive(Clone, Copy)] +pub struct SmallByteStr { + len: u8, + bytes: [u8; MAX_LEN], +} + +impl SmallByteStr { + pub fn zero() -> SmallByteStr { + use std::mem; + + SmallByteStr { + len: 0, + bytes: unsafe { mem::zeroed() } + } + } + + pub fn from_slice(bytes: &[u8]) -> Option<SmallByteStr> { + use std::mem; + use std::slice::bytes; + + if bytes.len() > MAX_LEN { + return None; + } + + let mut ret = SmallByteStr { + len: bytes.len() as u8, + bytes: unsafe { mem::zeroed() }, + }; + + // Copy the memory + bytes::copy_memory(&mut ret.bytes, bytes); + + Some(ret) + } +} + +impl ByteStr for SmallByteStr { + type Buf = SmallByteStrBuf; + + fn buf(&self) -> SmallByteStrBuf { + SmallByteStrBuf { small: self.clone() } + } + + fn concat<B: ByteStr>(&self, _other: B) -> Bytes { + unimplemented!(); + } + + fn len(&self) -> usize { + self.len as usize + } + + fn slice(&self, _begin: usize, _end: usize) -> Bytes { + unimplemented!(); + } + + fn split_at(&self, _mid: usize) -> (Bytes, Bytes) { + unimplemented!(); + } + + fn to_bytes(self) -> Bytes { + Bytes::of(self) + } +} + +impl ops::Index<usize> for SmallByteStr { + type Output = u8; + + fn index(&self, index: &usize) -> &u8 { + assert!(*index < self.len()); + &self.bytes[*index] + } +} + +#[derive(Clone)] +#[allow(missing_copy_implementations)] +pub struct SmallByteStrBuf { + small: SmallByteStr, +} + +impl SmallByteStrBuf { + fn len(&self) -> usize { + (self.small.len & 0x0F) as usize + } + + fn pos(&self) -> usize { + (self.small.len >> 4) as usize + } +} + +impl Buf for SmallByteStrBuf { + fn remaining(&self) -> usize { + self.len() - self.pos() + } + + fn bytes(&self) -> &[u8] { + &self.small.bytes[self.pos()..self.len()] + } + + fn advance(&mut self, mut cnt: usize) { + cnt = cmp::min(cnt, self.remaining()); + self.small.len += (cnt as u8) << 4; + } +} + +#[test] +pub fn test_size_of() { + use std::mem; + assert_eq!(mem::size_of::<SmallByteStr>(), mem::size_of::<usize>()); +} diff --git a/src/bytes.rs b/src/bytes.rs new file mode 100644 index 0000000..0b97f0e --- /dev/null +++ b/src/bytes.rs @@ -0,0 +1,347 @@ +use {Buf, ByteStr, ByteBuf, SmallByteStr}; +use std::{mem, ops, ptr}; +use std::any::{Any, TypeId}; +use std::raw::TraitObject; +use core::nonzero::NonZero; + +const INLINE: usize = 1; + +#[unsafe_no_drop_flag] +pub struct Bytes { + vtable: NonZero<usize>, + data: *mut (), +} + +impl Bytes { + pub fn from_slice(bytes: &[u8]) -> Bytes { + SmallByteStr::from_slice(bytes) + .map(|small| Bytes::of(small)) + .unwrap_or_else(|| ByteBuf::from_slice(bytes).to_bytes()) + } + + pub fn of<B: ByteStr + 'static>(bytes: B) -> Bytes { + unsafe { + if inline::<B>() { + let mut vtable; + let mut data; + + { + let obj: &ByteStrPriv = &bytes; + let obj: TraitObject = mem::transmute(obj); + let ptr: *const *mut () = mem::transmute(obj.data); + + data = *ptr; + vtable = obj.vtable; + } + + // Prevent drop from being called + mem::forget(bytes); + + Bytes { + vtable: NonZero::new(vtable as usize | INLINE), + data: data, + } + } else { + let obj: Box<ByteStrPriv> = Box::new(bytes); + let obj: TraitObject = mem::transmute(obj); + + Bytes { + vtable: NonZero::new(obj.vtable as usize), + data: obj.data, + } + } + } + } + + pub fn empty() -> Bytes { + Bytes::of(SmallByteStr::zero()) + } + + /// If the underlying `ByteStr` is of type `B`, returns a reference to it + /// otherwise None. + pub fn downcast_ref<'a, B: ByteStr + 'static>(&'a self) -> Option<&'a B> { + if TypeId::of::<B>() == self.obj().get_type_id() { + unsafe { + if inline::<B>() { + return Some(mem::transmute(&self.data)); + } else { + return Some(mem::transmute(self.data)); + } + } + } + + None + } + + /// If the underlying `ByteStr` is of type `B`, returns the unwraped value, + /// otherwise, returns the original `Bytes` as `Err`. + pub fn try_unwrap<B: ByteStr + 'static>(self) -> Result<B, Bytes> { + if TypeId::of::<B>() == self.obj().get_type_id() { + unsafe { + // Underlying ByteStr value is of the correct type. Unwrap it + let mut ret; + + if inline::<B>() { + // The value is inline, read directly from the pointer + ret = ptr::read(mem::transmute(&self.data)); + } else { + ret = ptr::read(mem::transmute(self.data)); + } + + mem::forget(self); + Ok(ret) + } + } else { + Err(self) + } + } + + fn obj(&self) -> &ByteStrPriv { + unsafe { + let obj = if self.is_inline() { + TraitObject { + data: mem::transmute(&self.data), + vtable: mem::transmute(*self.vtable - 1), + } + } else { + TraitObject { + data: self.data, + vtable: mem::transmute(*self.vtable), + } + }; + + mem::transmute(obj) + } + } + + fn obj_mut(&mut self) -> &mut ByteStrPriv { + unsafe { mem::transmute(self.obj()) } + } + + fn is_inline(&self) -> bool { + (*self.vtable & INLINE) == INLINE + } +} + +fn inline<B: ByteStr>() -> bool { + mem::size_of::<B>() <= mem::size_of::<usize>() +} + +impl ByteStr for Bytes { + + type Buf = Box<Buf+'static>; + + fn buf(&self) -> Box<Buf+'static> { + self.obj().buf() + } + + fn concat<B: ByteStr+'static>(&self, other: B) -> Bytes { + self.obj().concat(Bytes::of(other)) + } + + fn len(&self) -> usize { + self.obj().len() + } + + fn slice(&self, begin: usize, end: usize) -> Bytes { + self.obj().slice(begin, end) + } + + fn split_at(&self, mid: usize) -> (Bytes, Bytes) { + self.obj().split_at(mid) + } + + fn to_bytes(self) -> Bytes { + self + } +} + +impl ops::Index<usize> for Bytes { + type Output = u8; + + fn index(&self, index: &usize) -> &u8 { + self.obj().index(index) + } +} + +impl Clone for Bytes { + fn clone(&self) -> Bytes { + self.obj().clone() + } +} + +impl Drop for Bytes { + fn drop(&mut self) { + if *self.vtable == 0 { + return; + } + + unsafe { + if self.is_inline() { + self.obj_mut().drop(); + } else { + let _: Box<ByteStrPriv> = + mem::transmute(self.obj()); + } + } + } +} + +unsafe impl Send for Bytes { } +unsafe impl Sync for Bytes { } + +trait ByteStrPriv { + + fn buf(&self) -> Box<Buf+'static>; + + fn clone(&self) -> Bytes; + + fn concat(&self, other: Bytes) -> Bytes; + + fn drop(&mut self); + + fn get_type_id(&self) -> TypeId; + + fn index(&self, index: &usize) -> &u8; + + fn len(&self) -> usize; + + fn slice(&self, begin: usize, end: usize) -> Bytes; + + fn split_at(&self, mid: usize) -> (Bytes, Bytes); +} + +impl<B: ByteStr + 'static> ByteStrPriv for B { + + fn buf(&self) -> Box<Buf+'static> { + Box::new(self.buf()) + } + + fn clone(&self) -> Bytes { + Bytes::of(self.clone()) + } + + fn concat(&self, other: Bytes) -> Bytes { + self.concat(other) + } + + fn drop(&mut self) { + unsafe { + ptr::read(mem::transmute(self)) + } + } + + fn get_type_id(&self) -> TypeId { + Any::get_type_id(self) + } + + fn index(&self, index: &usize) -> &u8 { + ops::Index::index(self, index) + } + + fn len(&self) -> usize { + self.len() + } + + fn slice(&self, begin: usize, end: usize) -> Bytes { + self.slice(begin, end) + } + + fn split_at(&self, mid: usize) -> (Bytes, Bytes) { + self.split_at(mid) + } +} + +/* +impl ops::Index<usize> for Bytes { + type Output = u8; + + fn index(&self, index: &usize) -> &u8 { + self.bytes.index(index) + } +} + +impl PartialEq<Bytes> for Bytes { + fn eq(&self, other: &Bytes) -> bool { + let mut i1 = self.iter(); + let mut i2 = other.iter(); + + loop { + let el = i1.next(); + + if el != i2.next() { + return false; + } + + if el.is_none() { + return true; + } + } + } +} + +pub struct BytesIter<'a> { + bytes: &'a Bytes, + pos: usize, +} + +impl<'a> Iterator for BytesIter<'a> { + type Item = u8; + + fn next(&mut self) -> Option<u8> { + if self.pos == self.bytes.len() { + return None; + } + + let ret = self.bytes[self.pos]; + self.pos += 1; + Some(ret) + } +} + +#[cfg(test)] +mod test { + use super::Bytes; + + #[test] + pub fn test_accessing_bytes() { + let bytes = from_slice(b"foo"); + + for i in 0..3us { + assert_eq!(b"foo"[i], bytes[i]); + } + } + + /* + * + * ===== Equality ===== + * + */ + + #[test] + pub fn test_literal_bytes_eq() { + assert!(from_slice(b"foo") == from_slice(b"foo")); + assert!(from_slice(b"foo") != from_slice(b"bar")); + assert!(from_slice(b"foo") != from_slice(b"foo*")); + assert!(from_slice(b"foo*") != from_slice(b"foo")); + } + + /* + * + * ===== Helpers ===== + * + */ + + fn from_slice(bytes: &[u8]) -> Bytes { + Bytes::from_slice(bytes) + } +} +*/ + +#[test] +pub fn test_size_of() { + let expect = mem::size_of::<usize>() * 2; + + assert_eq!(expect, mem::size_of::<Bytes>()); + assert_eq!(expect, mem::size_of::<Option<Bytes>>()); +} diff --git a/src/lib.rs b/src/lib.rs index b8d45e2..d8843dc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,19 +1,373 @@ #![crate_name = "bytes"] +#![unstable] + +#![feature(core)] +#![feature(alloc)] + +pub use byte_buf::{ByteBuf, ROByteBuf, MutByteBuf}; +pub use byte_str::{SeqByteStr, SmallByteStr, SmallByteStrBuf}; +pub use bytes::Bytes; +pub use ring::{RingBuf, RingBufReader, RingBufWriter}; +pub use rope::Rope; +pub use slice::{SliceBuf, MutSliceBuf}; + +use std::{cmp, io, ops, ptr, u32}; + +extern crate core; + +mod alloc; +mod byte_buf; +mod byte_str; +mod bytes; +mod ring; +mod rope; +mod slice; + +pub mod traits { + pub use {Buf, BufExt, MutBuf, ByteStr}; +} + +const MAX_CAPACITY: usize = u32::MAX as usize; /// A trait for objects that provide random and sequential access to bytes. pub trait Buf { + /// Returns the number of bytes that can be accessed from the Buf fn remaining(&self) -> usize; + /// Returns a slice starting at the current Buf position and of length + /// between 0 and `Buf::remaining()`. fn bytes<'a>(&'a self) -> &'a [u8]; + /// Advance the internal cursor of the Buf fn advance(&mut self, cnt: usize); + /// Returns true if there are any more bytes to consume fn has_remaining(&self) -> bool { self.remaining() > 0 } + + /// Read bytes from this Buf into the given slice and advance the cursor by + /// the number of bytes read. + /// + /// If there are fewer bytes remaining than is needed to satisfy the + /// request (aka `dst.len()` > self.remaining()`), then + /// `Err(BufError::Overflow)` is returned. + /// + /// ``` + /// use bytes::{SliceBuf, Buf}; + /// + /// let mut buf = SliceBuf::wrap(b"hello world"); + /// let mut dst = [0; 5]; + /// + /// buf.read_slice(&mut dst); + /// assert_eq!(b"hello", dst); + /// assert_eq!(6, buf.remaining()); + /// ``` + fn read_slice(&mut self, dst: &mut [u8]) -> usize { + let mut off = 0; + let len = cmp::min(dst.len(), self.remaining()); + + while off < len { + let mut cnt; + + unsafe { + let src = self.bytes(); + cnt = cmp::min(src.len(), len - off); + + ptr::copy_nonoverlapping_memory( + dst[off..].as_mut_ptr(), src.as_ptr(), cnt); + + off += src.len(); + } + + self.advance(cnt); + } + + len + } +} + +pub trait BufExt { + + /// Read bytes from this Buf into the given sink and advance the cursor by + /// the number of bytes read. + fn read<S: Sink>(&mut self, dst: S) -> Result<usize, S::Error>; } -pub trait MutBuf : Buf { +// TODO: Remove Sized +pub trait MutBuf : Sized { + + /// Returns the number of bytes that can be accessed from the Buf + fn remaining(&self) -> usize; + + /// Advance the internal cursor of the Buf + fn advance(&mut self, cnt: usize); + + /// Returns true if there are any more bytes to consume + fn has_remaining(&self) -> bool { + self.remaining() > 0 + } + + /// Returns a mutable slice starting at the current Buf position and of + /// length between 0 and `Buf::remaining()`. fn mut_bytes<'a>(&'a mut self) -> &'a mut [u8]; + + /// Read bytes from this Buf into the given sink and advance the cursor by + /// the number of bytes read. + fn write<S: Source>(&mut self, src: S) -> Result<usize, S::Error> { + src.fill(self) + } + + /// Read bytes from this Buf into the given slice and advance the cursor by + /// the number of bytes read. + /// + /// If there are fewer bytes remaining than is needed to satisfy the + /// request (aka `dst.len()` > self.remaining()`), then + /// `Err(BufError::Overflow)` is returned. + /// + /// ``` + /// use bytes::{MutSliceBuf, Buf, MutBuf}; + /// + /// let mut dst = [0; 6]; + /// + /// { + /// let mut buf = MutSliceBuf::wrap(&mut dst); + /// buf.write_slice(b"hello"); + /// + /// assert_eq!(1, buf.remaining()); + /// } + /// + /// assert_eq!(b"hello\0", dst); + /// ``` + fn write_slice(&mut self, src: &[u8]) -> usize { + let mut off = 0; + let len = cmp::min(src.len(), self.remaining()); + + while off < len { + let mut cnt; + + unsafe { + let dst = self.mut_bytes(); + cnt = cmp::min(dst.len(), len - off); + + ptr::copy_nonoverlapping_memory( + dst.as_mut_ptr(), src[off..].as_ptr(), cnt); + + off += cnt; + } + + self.advance(cnt); + } + + len + } +} + +/* + * + * ===== ByteStr ===== + * + */ + +pub trait ByteStr : Clone + Sized + Send + Sync + ops::Index<usize, Output=u8> { + + // Until HKT lands, the buf must be bound by 'static + type Buf: Buf+'static; + + /// Returns a read-only `Buf` for accessing the byte contents of the + /// `ByteStr`. + fn buf(&self) -> Self::Buf; + + /// Returns a new `Bytes` value representing the concatenation of `self` + /// with the given `Bytes`. + fn concat<B: ByteStr+'static>(&self, other: B) -> Bytes; + + /// Returns the number of bytes in the ByteStr + fn len(&self) -> usize; + + /// Returns true if the length of the `ByteStr` is 0 + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns a new ByteStr value containing the byte range between `begin` + /// (inclusive) and `end` (exclusive) + fn slice(&self, begin: usize, end: usize) -> Bytes; + + /// Returns a new ByteStr value containing the byte range starting from + /// `begin` (inclusive) to the end of the byte str. + /// + /// Equivalent to `bytes.slice(begin, bytes.len())` + fn slice_from(&self, begin: usize) -> Bytes { + self.slice(begin, self.len()) + } + + /// Returns a new ByteStr value containing the byte range from the start up + /// to `end` (exclusive). + /// + /// Equivalent to `bytes.slice(0, end)` + fn slice_to(&self, end: usize) -> Bytes { + self.slice(0, end) + } + + /// Divides the value into two `Bytes` at the given index. + /// + /// The first will contain all bytes from `[0, mid]` (excluding the index + /// `mid` itself) and the second will contain all indices from `[mid, len)` + /// (excluding the index `len` itself). + /// + /// Panics if `mid > len`. + fn split_at(&self, mid: usize) -> (Bytes, Bytes) { + (self.slice_to(mid), self.slice_from(mid)) + } + + /// Consumes the value and returns a `Bytes` instance containing + /// identical bytes + fn to_bytes(self) -> Bytes; } + +/* + * + * ===== *Ext impls ===== + * + */ + +impl<B: Buf> BufExt for B { + fn read<S: Sink>(&mut self, dst: S) -> Result<usize, S::Error> { + dst.sink(self) + } +} + +/* + * + * ===== Sink / Source ===== + * + */ + +/// An object that reads bytes from a Buf into itself +pub trait Sink { + type Error; + + fn sink<B: Buf>(self, buf: &mut B) -> Result<usize, Self::Error>; +} + +pub trait Source { + type Error; + + fn fill<B: MutBuf>(self, buf: &mut B) -> Result<usize, Self::Error>; +} + +impl<'a> Sink for &'a mut [u8] { + type Error = BufError; + + fn sink<B: Buf>(self, buf: &mut B) -> Result<usize, BufError> { + Ok(buf.read_slice(self)) + } +} + +impl<'a> Sink for &'a mut Vec<u8> { + type Error = BufError; + + fn sink<B: Buf>(self, buf: &mut B) -> Result<usize, BufError> { + use std::slice; + + let rem = buf.remaining(); + let cap = self.capacity(); + let len = rem - cap; + + // Ensure that the vec is big enough + if cap < rem { + self.reserve(len); + } + + unsafe { + { + let dst = self.as_mut_slice(); + buf.read_slice(slice::from_raw_parts_mut(dst.as_mut_ptr(), rem)); + } + + self.set_len(rem); + } + + Ok(len) + } +} + +impl<'a> Source for &'a [u8] { + type Error = BufError; + + fn fill<B: MutBuf>(self, buf: &mut B) -> Result<usize, BufError> { + Ok(buf.write_slice(self)) + } +} + +impl<'a> Source for &'a Vec<u8> { + type Error = BufError; + + fn fill<B: MutBuf>(self, buf: &mut B) -> Result<usize, BufError> { + Ok(buf.write_slice(self.as_slice())) + } +} + +impl<'a> Source for &'a Bytes { + type Error = BufError; + + fn fill<B: MutBuf>(self, _buf: &mut B) -> Result<usize, BufError> { + unimplemented!(); + } +} + +impl<'a> Source for &'a mut (io::Read+'a) { + type Error = io::Error; + + fn fill<B: MutBuf>(self, _buf: &mut B) -> Result<usize, io::Error> { + unimplemented!(); + } +} + +impl<'a> Source for &'a mut (Iterator<Item=u8>+'a) { + type Error = BufError; + + fn fill<B: MutBuf>(self, _buf: &mut B) -> Result<usize, BufError> { + unimplemented!(); + } +} + +/* + * + * ===== Buf impls ===== + * + */ + +impl Buf for Box<Buf+'static> { + fn remaining(&self) -> usize { + (**self).remaining() + } + + fn bytes(&self) -> &[u8] { + (**self).bytes() + } + + fn advance(&mut self, cnt: usize) { + (**self).advance(cnt); + } + + fn read_slice(&mut self, dst: &mut [u8]) -> usize { + (**self).read_slice(dst) + } +} + +/* + * + * ===== BufError / BufResult ===== + * + */ + +#[derive(Copy, Debug)] +pub enum BufError { + Underflow, + Overflow, +} + +pub type BufResult<T> = Result<T, BufError>; diff --git a/src/ring.rs b/src/ring.rs new file mode 100644 index 0000000..1de98b1 --- /dev/null +++ b/src/ring.rs @@ -0,0 +1,200 @@ +use super::{Buf, MutBuf}; +use std::{cmp, fmt, mem, ptr, slice}; +use std::num::UnsignedInt; +use std::rt::heap; + +/// Buf backed by a continous chunk of memory. Maintains a read cursor and a +/// write cursor. When reads and writes reach the end of the allocated buffer, +/// wraps around to the start. +pub struct RingBuf { + ptr: *mut u8, // Pointer to the memory + cap: usize, // Capacity of the buffer + pos: usize, // Offset of read cursor + len: usize // Number of bytes to read +} + +// TODO: There are most likely many optimizations that can be made +impl RingBuf { + pub fn new(mut capacity: usize) -> RingBuf { + // Handle the 0 length buffer case + if capacity == 0 { + return RingBuf { + ptr: ptr::null_mut(), + cap: 0, + pos: 0, + len: 0 + } + } + + // Round to the next power of 2 for better alignment + capacity = UnsignedInt::next_power_of_two(capacity); + + // Allocate the memory + let ptr = unsafe { heap::allocate(capacity, mem::min_align_of::<u8>()) }; + + RingBuf { + ptr: ptr as *mut u8, + cap: capacity, + pos: 0, + len: 0 + } + } + + pub fn is_full(&self) -> bool { + self.cap == self.len + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn capacity(&self) -> usize { + self.cap + } + + // Access readable bytes as a Buf + pub fn reader<'a>(&'a mut self) -> RingBufReader<'a> { + RingBufReader { ring: self } + } + + // Access writable bytes as a Buf + pub fn writer<'a>(&'a mut self) -> RingBufWriter<'a> { + RingBufWriter { ring: self } + } + + fn read_remaining(&self) -> usize { + self.len + } + + fn write_remaining(&self) -> usize { + self.cap - self.len + } + + fn advance_reader(&mut self, mut cnt: usize) { + cnt = cmp::min(cnt, self.read_remaining()); + + self.pos += cnt; + self.pos %= self.cap; + self.len -= cnt; + } + + fn advance_writer(&mut self, mut cnt: usize) { + cnt = cmp::min(cnt, self.write_remaining()); + self.len += cnt; + } + + fn as_slice(&self) -> &[u8] { + unsafe { + slice::from_raw_parts(self.ptr as *const u8, self.cap) + } + } + + fn as_mut_slice(&mut self) -> &mut [u8] { + unsafe { + slice::from_raw_parts_mut(self.ptr, self.cap) + } + } +} + +impl Clone for RingBuf { + fn clone(&self) -> RingBuf { + use std::cmp; + + let mut ret = RingBuf::new(self.cap); + + ret.pos = self.pos; + ret.len = self.len; + + unsafe { + let to = self.pos + self.len; + + if to > self.cap { + ptr::copy_memory(ret.ptr, self.ptr as *const u8, to % self.cap); + } + + ptr::copy_memory( + ret.ptr.offset(self.pos as isize), + self.ptr.offset(self.pos as isize) as *const u8, + cmp::min(self.len, self.cap - self.pos)); + } + + ret + } + + // TODO: an improved version of clone_from is possible that potentially + // re-uses the buffer +} + +impl fmt::Debug for RingBuf { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "RingBuf[.. {}]", self.len) + } +} + +impl Drop for RingBuf { + fn drop(&mut self) { + if self.cap > 0 { + unsafe { + heap::deallocate(self.ptr, self.cap, mem::min_align_of::<u8>()) + } + } + } +} + +pub struct RingBufReader<'a> { + ring: &'a mut RingBuf +} + +impl<'a> Buf for RingBufReader<'a> { + + fn remaining(&self) -> usize { + self.ring.read_remaining() + } + + fn bytes<'b>(&'b self) -> &'b [u8] { + let mut to = self.ring.pos + self.ring.len; + + if to > self.ring.cap { + to = self.ring.cap + } + + &self.ring.as_slice()[self.ring.pos .. to] + } + + fn advance(&mut self, cnt: usize) { + self.ring.advance_reader(cnt) + } +} + +pub struct RingBufWriter<'a> { + ring: &'a mut RingBuf +} + +impl<'a> MutBuf for RingBufWriter<'a> { + + fn remaining(&self) -> usize { + self.ring.write_remaining() + } + + fn advance(&mut self, cnt: usize) { + self.ring.advance_writer(cnt) + } + + fn mut_bytes<'b>(&'b mut self) -> &'b mut [u8] { + let mut from; + let mut to; + + from = self.ring.pos + self.ring.len; + from %= self.ring.cap; + + to = from + self.remaining(); + + if to >= self.ring.cap { + to = self.ring.cap; + } + + &mut self.ring.as_mut_slice()[from..to] + } +} + +unsafe impl Send for RingBuf { } diff --git a/src/rope.rs b/src/rope.rs new file mode 100644 index 0000000..6cae0a6 --- /dev/null +++ b/src/rope.rs @@ -0,0 +1,583 @@ +use {Bytes, ByteBuf, Source, BufError}; +use traits::*; +use std::{cmp, mem, ops}; +use std::sync::Arc; + +// The implementation is mostly a port of the implementation found in the Java +// protobuf lib. + +const CONCAT_BY_COPY_LEN: usize = 128; +const MAX_DEPTH: usize = 47; + +// Used to decide when to rebalance the tree. +static MIN_LENGTH_BY_DEPTH: [usize; MAX_DEPTH] = [ + 1, 2, 3, 5, 8, + 13, 21, 34, 55, 89, + 144, 233, 377, 610, 987, + 1_597, 2_584, 4_181, 6_765, 10_946, + 17_711, 28_657, 46_368, 75_025, 121_393, + 196_418, 317_811, 514_229, 832_040, 1_346_269, + 2_178_309, 3_524_578, 5_702_887, 9_227_465, 14_930_352, + 24_157_817, 39_088_169, 63_245_986, 102_334_155, 165_580_141, + 267_914_296, 433_494_437, 701_408_733, 1_134_903_170, 1_836_311_903, + 2_971_215_073, 4_294_967_295]; + +/// An immutable sequence of bytes formed by concatenation of other `ByteStr` +/// values, without copying the data in the pieces. The concatenation is +/// represented as a tree whose leaf nodes are each a `Bytes` value. +/// +/// Most of the operation here is inspired by the now-famous paper [Ropes: an +/// Alternative to Strings. hans-j. boehm, russ atkinson and michael +/// plass](http://www.cs.rit.edu/usr/local/pub/jeh/courses/QUARTERS/FP/Labs/CedarRope/rope-paper.pdf). +/// +/// Fundamentally the Rope algorithm represents the collection of pieces as a +/// binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum +/// sequence length, sequences that are too short relative to their depth cause +/// a tree rebalance. More precisely, a tree of depth d is "balanced" in the +/// terminology of BAP95 if its length is at least F(d+2), where F(n) is the +/// n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have +/// minimum lengths 1, 2, 3, 5, 8, 13,... +pub struct Rope { + inner: Arc<RopeInner>, +} + +impl Rope { + pub fn from_slice(bytes: &[u8]) -> Rope { + Rope::new(Bytes::from_slice(bytes), Bytes::empty()) + } + + /// Returns a Rope consisting of the supplied Bytes as a single segment. + pub fn of<B: ByteStr + 'static>(bytes: B) -> Rope { + let bytes = Bytes::of(bytes); + + match bytes.try_unwrap() { + Ok(rope) => rope, + Err(bytes) => Rope::new(bytes, Bytes::empty()), + } + } + + fn new(left: Bytes, right: Bytes) -> Rope { + Rope { inner: Arc::new(RopeInner::new(left, right)) } + } + + pub fn len(&self) -> usize { + self.inner.len as usize + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /* + * + * ===== Priv fns ===== + * + */ + + fn depth(&self) -> u16 { + self.inner.depth + } + + fn left(&self) -> &Bytes { + &self.inner.left + } + + fn right(&self) -> &Bytes { + &self.inner.right + } + + fn pieces<'a>(&'a self) -> PieceIter<'a> { + PieceIter::new(&self.inner) + } +} + +impl ByteStr for Rope { + type Buf = RopeBuf; + + fn buf(&self) -> RopeBuf { + RopeBuf::new(self.clone()) + } + + fn concat<B: ByteStr+'static>(&self, other: B) -> Bytes { + let left = Bytes::of(self.clone()); + let right = Bytes::of(other); + Bytes::of(concat(left, right)) + } + + fn len(&self) -> usize { + Rope::len(self) + } + + fn slice(&self, begin: usize, end: usize) -> Bytes { + if begin >= end || begin >= self.len() { + return Bytes::empty() + } + + let end = cmp::min(end, self.len()); + let len = end - begin; + + // Empty slice + if len == 0 { + return Bytes::empty(); + } + + // Full rope + if len == self.len() { + return Bytes::of(self.clone()); + } + + // == Proper substring == + + let left_len = self.inner.left.len(); + + if end <= left_len { + // Slice on the left + return self.inner.left.slice(begin, end); + } + + if begin >= left_len { + // Slice on the right + return self.inner.right.slice(begin - left_len, end - left_len); + } + + // Split slice + let left_slice = self.inner.left.slice_from(begin); + let right_slice = self.inner.right.slice_to(end - left_len); + + Bytes::of(Rope::new(left_slice, right_slice)) + } + + fn to_bytes(self) -> Bytes { + Bytes::of(self) + } +} + +impl ops::Index<usize> for Rope { + type Output = u8; + + fn index(&self, index: &usize) -> &u8 { + assert!(*index < self.len()); + + let left_len = self.inner.left.len(); + + if *index < left_len { + self.inner.left.index(index) + } else { + self.inner.right.index(&(*index - left_len)) + } + } +} + +impl Clone for Rope { + fn clone(&self) -> Rope { + Rope { inner: self.inner.clone() } + } +} + +impl<'a> Source for &'a Rope { + type Error = BufError; + + fn fill<B: MutBuf>(self, _buf: &mut B) -> Result<usize, BufError> { + unimplemented!(); + } +} + +/* + * + * ===== Helper Fns ===== + * + */ + +fn depth(bytes: &Bytes) -> u16 { + match bytes.downcast_ref::<Rope>() { + Some(rope) => rope.inner.depth, + None => 0, + } +} + +fn is_balanced(bytes: &Bytes) -> bool { + if let Some(rope) = bytes.downcast_ref::<Rope>() { + return rope.len() >= MIN_LENGTH_BY_DEPTH[rope.depth() as usize]; + } + + true +} + +fn concat(left: Bytes, right: Bytes) -> Rope { + if right.is_empty() { + return Rope::of(left); + } + + if left.is_empty() { + return Rope::of(right); + } + + let len = left.len() + right.len(); + + if len < CONCAT_BY_COPY_LEN { + return concat_bytes(&left, &right, len); + } + + if let Some(left) = left.downcast_ref::<Rope>() { + let len = left.inner.right.len() + right.len(); + + if len < CONCAT_BY_COPY_LEN { + // Optimization from BAP95: As an optimization of the case + // where the ByteString is constructed by repeated concatenate, + // recognize the case where a short string is concatenated to a + // left-hand node whose right-hand branch is short. In the + // paper this applies to leaves, but we just look at the length + // here. This has the advantage of shedding references to + // unneeded data when substrings have been taken. + // + // When we recognize this case, we do a copy of the data and + // create a new parent node so that the depth of the result is + // the same as the given left tree. + let new_right = concat_bytes(&left.inner.right, &right, len); + return Rope::new(left.inner.left.clone(), Bytes::of(new_right)); + } + + if depth(left.left()) > depth(left.right()) && left.depth() > depth(&right) { + // Typically for concatenate-built strings the left-side is + // deeper than the right. This is our final attempt to + // concatenate without increasing the tree depth. We'll redo + // the the node on the RHS. This is yet another optimization + // for building the string by repeatedly concatenating on the + // right. + let new_right = Rope::new(left.right().clone(), right); + return Rope::new(left.left().clone(), Bytes::of(new_right)); + } + } + + // Fine, we'll add a node and increase the tree depth -- unless we + // rebalance ;^) + let depth = cmp::max(depth(&left), depth(&right)) + 1; + + if len >= MIN_LENGTH_BY_DEPTH[depth as usize] { + // No need to rebalance + return Rope::new(left, right); + } + + Balance::new().balance(left, right) +} + +fn concat_bytes(left: &Bytes, right: &Bytes, len: usize) -> Rope { + let mut buf = ByteBuf::mut_with_capacity(len); + + buf.write(left).ok().expect("unexpected error"); + buf.write(right).ok().expect("unexpected error"); + + return Rope::of(buf.flip().to_bytes()); +} + +fn depth_for_len(len: usize) -> u16 { + match MIN_LENGTH_BY_DEPTH.binary_search(&len) { + Ok(idx) => idx as u16, + Err(idx) => { + // It wasn't an exact match, so convert to the index of the + // containing fragment, which is one less even than the insertion + // point. + idx as u16 - 1 + } + } +} + +/* + * + * ===== RopeBuf ===== + * + */ + +pub struct RopeBuf { + rem: usize, + + // Only here for the ref count + #[allow(dead_code)] + rope: Rope, + + // This must be done with unsafe code to avoid having a lifetime bound on + // RopeBuf but is safe due to Rope being held. As long as data doesn't + // escape (which it shouldn't) it is safe. Doing this properly would + // require HKT. + pieces: PieceIter<'static>, + leaf_buf: Option<Box<Buf+'static>>, +} + +impl RopeBuf { + fn new(rope: Rope) -> RopeBuf { + // In order to get the lifetimes to work out, transmute to a 'static + // lifetime. Never allow the iter to escape the internals of RopeBuf. + let mut pieces: PieceIter<'static> = + unsafe { mem::transmute(rope.pieces()) }; + + // Get the next buf + let leaf_buf = pieces.next() + .map(|bytes| bytes.buf()); + + let len = rope.len(); + + RopeBuf { + rope: rope, + rem: len, + pieces: pieces, + leaf_buf: leaf_buf, + } + } +} + +impl Buf for RopeBuf { + fn remaining(&self) -> usize { + self.rem + } + + fn bytes(&self) -> &[u8] { + self.leaf_buf.as_ref() + .map(|b| b.bytes()) + .unwrap_or(&[]) + } + + fn advance(&mut self, mut cnt: usize) { + cnt = cmp::min(cnt, self.rem); + + // Advance the internal cursor + self.rem -= cnt; + + // Advance the leaf buffer + while cnt > 0 { + { + let curr = self.leaf_buf.as_mut() + .expect("expected a value"); + + if curr.remaining() > cnt { + curr.advance(cnt); + break; + } + + cnt -= curr.remaining(); + } + + self.leaf_buf = self.pieces.next() + .map(|bytes| bytes.buf()); + } + } +} + +/* + * + * ===== PieceIter ===== + * + */ + +// TODO: store stack inline if possible +struct PieceIter<'a> { + stack: Vec<&'a RopeInner>, + next: Option<&'a Bytes>, +} + +impl<'a> PieceIter<'a> { + fn new(root: &'a RopeInner) -> PieceIter<'a> { + let mut iter = PieceIter { + stack: vec![], + next: None, + }; + + iter.next = iter.get_leaf_by_left(root); + iter + } + + fn get_leaf_by_left(&mut self, mut root: &'a RopeInner) -> Option<&'a Bytes> { + loop { + self.stack.push(root); + let left = &root.left; + + if left.is_empty() { + return None; + } + + if let Some(rope) = left.downcast_ref::<Rope>() { + root = &*rope.inner; + continue; + } + + return Some(left); + } + } + + fn next_non_empty_leaf(&mut self) -> Option<&'a Bytes>{ + loop { + if let Some(node) = self.stack.pop() { + if let Some(rope) = node.right.downcast_ref::<Rope>() { + let res = self.get_leaf_by_left(&rope.inner); + + if res.is_none() { + continue; + } + + return res; + } + + if node.right.is_empty() { + continue; + } + + return Some(&node.right); + } + + return None; + } + } +} + +impl<'a> Iterator for PieceIter<'a> { + type Item = &'a Bytes; + + fn next(&mut self) -> Option<&'a Bytes> { + let ret = self.next.take(); + + if ret.is_some() { + self.next = self.next_non_empty_leaf(); + } + + ret + } +} + +/* + * + * ===== Balance ===== + * + */ + +struct Balance { + stack: Vec<Bytes>, +} + +impl Balance { + fn new() -> Balance { + Balance { stack: vec![] } + } + + fn balance(&mut self, left: Bytes, right: Bytes) -> Rope { + self.do_balance(left); + self.do_balance(right); + + let mut partial = self.stack.pop() + .expect("expected a value"); + + while !partial.is_empty() { + let new_left = self.stack.pop() + .expect("expected a value"); + + partial = Bytes::of(Rope::new(new_left, partial)); + } + + Rope::of(partial) + } + + fn do_balance(&mut self, root: Bytes) { + // BAP95: Insert balanced subtrees whole. This means the result might not + // be balanced, leading to repeated rebalancings on concatenate. However, + // these rebalancings are shallow due to ignoring balanced subtrees, and + // relatively few calls to insert() result. + if is_balanced(&root) { + self.insert(root); + } else { + let rope = root.try_unwrap::<Rope>() + .ok().expect("expected a value"); + + self.do_balance(rope.left().clone()); + self.do_balance(rope.right().clone()); + } + } + + // Push a string on the balance stack (BAP95). BAP95 uses an array and + // calls the elements in the array 'bins'. We instead use a stack, so the + // 'bins' of lengths are represented by differences between the elements of + // minLengthByDepth. + // + // If the length bin for our string, and all shorter length bins, are + // empty, we just push it on the stack. Otherwise, we need to start + // concatenating, putting the given string in the "middle" and continuing + // until we land in an empty length bin that matches the length of our + // concatenation. + fn insert(&mut self, bytes: Bytes) { + let depth_bin = depth_for_len(bytes.len()); + let bin_end = MIN_LENGTH_BY_DEPTH[depth_bin as usize + 1]; + + // BAP95: Concatenate all trees occupying bins representing the length + // of our new piece or of shorter pieces, to the extent that is + // possible. The goal is to clear the bin which our piece belongs in, + // but that may not be entirely possible if there aren't enough longer + // bins occupied. + if let Some(len) = self.peek().map(|r| r.len()) { + if len >= bin_end { + self.stack.push(bytes); + return; + } + } + + let bin_start = MIN_LENGTH_BY_DEPTH[depth_bin as usize]; + + // Concatenate the subtrees of shorter length + let mut new_tree = self.stack.pop() + .expect("expected a value"); + + while let Some(len) = self.peek().map(|r| r.len()) { + // If the head is big enough, break the loop + if len >= bin_start { break; } + + let left = self.stack.pop() + .expect("expected a value"); + + new_tree = Bytes::of(Rope::new(left, new_tree)); + } + + // Concatenate the given string + new_tree = Bytes::of(Rope::new(new_tree, bytes)); + + // Continue concatenating until we land in an empty bin + while let Some(len) = self.peek().map(|r| r.len()) { + let depth_bin = depth_for_len(new_tree.len()); + let bin_end = MIN_LENGTH_BY_DEPTH[depth_bin as usize + 1]; + + if len < bin_end { + let left = self.stack.pop() + .expect("expected a value"); + + new_tree = Bytes::of(Rope::new(left, new_tree)); + } else { + break; + } + } + + self.stack.push(new_tree); + } + + fn peek(&self) -> Option<&Bytes> { + self.stack.as_slice().last() + } +} + +struct RopeInner { + left: Bytes, + right: Bytes, + depth: u16, + len: u32, +} + +impl RopeInner { + fn new(left: Bytes, right: Bytes) -> RopeInner { + // If left is 0 then right must be zero + debug_assert!(!left.is_empty() || right.is_empty()); + + let len = left.len() + right.len(); + let depth = cmp::max(depth(&left), depth(&right)) + 1; + + RopeInner { + left: left, + right: right, + depth: depth, + len: len as u32, + } + } +} diff --git a/src/slice.rs b/src/slice.rs new file mode 100644 index 0000000..1d23318 --- /dev/null +++ b/src/slice.rs @@ -0,0 +1,57 @@ +use std::cmp; +use {Buf, MutBuf}; + +pub struct SliceBuf<'a> { + bytes: &'a [u8], + pos: usize +} + +impl<'a> SliceBuf<'a> { + pub fn wrap(bytes: &'a [u8]) -> SliceBuf<'a> { + SliceBuf { bytes: bytes, pos: 0 } + } +} + +impl<'a> Buf for SliceBuf<'a> { + fn remaining(&self) -> usize { + self.bytes.len() - self.pos + } + + fn bytes<'b>(&'b self) -> &'b [u8] { + &self.bytes[self.pos..] + } + + fn advance(&mut self, mut cnt: usize) { + cnt = cmp::min(cnt, self.remaining()); + self.pos += cnt; + } +} + +pub struct MutSliceBuf<'a> { + bytes: &'a mut [u8], + pos: usize +} + +impl<'a> MutSliceBuf<'a> { + pub fn wrap(bytes: &'a mut [u8]) -> MutSliceBuf<'a> { + MutSliceBuf { + bytes: bytes, + pos: 0 + } + } +} + +impl<'a> MutBuf for MutSliceBuf<'a> { + fn remaining(&self) -> usize { + self.bytes.len() - self.pos + } + + fn advance(&mut self, mut cnt: usize) { + cnt = cmp::min(cnt, self.remaining()); + self.pos += cnt; + } + + fn mut_bytes<'b>(&'b mut self) -> &'b mut [u8] { + &mut self.bytes[self.pos..] + } +} diff --git a/test/test.rs b/test/test.rs new file mode 100644 index 0000000..a9bf0ea --- /dev/null +++ b/test/test.rs @@ -0,0 +1,15 @@ +#![feature(core)] + +use rand::random; + +extern crate bytes; +extern crate rand; + +mod test_byte_buf; +mod test_rope; +mod test_seq_byte_str; +mod test_small_byte_str; + +fn gen_bytes(n: usize) -> Vec<u8> { + (0..n).map(|_| random()).collect() +} diff --git a/test/test_buf.rs b/test/test_buf.rs new file mode 100644 index 0000000..e69de29 diff --git a/test/test_byte_buf.rs b/test/test_byte_buf.rs new file mode 100644 index 0000000..275dd0e --- /dev/null +++ b/test/test_byte_buf.rs @@ -0,0 +1,46 @@ +use bytes::ByteBuf; +use bytes::traits::*; + +#[test] +pub fn test_initial_buf_empty() { + let buf = ByteBuf::mut_with_capacity(100); + + assert!(buf.capacity() == 128); + assert!(buf.remaining() == 128); + + let buf = buf.flip(); + + assert!(buf.remaining() == 0); + + let buf = buf.flip(); + + assert!(buf.remaining() == 128); +} + +#[test] +pub fn test_byte_buf_read_write() { + let mut buf = ByteBuf::mut_with_capacity(32); + + buf.write(b"hello world").unwrap(); + assert_eq!(21, buf.remaining()); + + buf.write(b" goodbye").unwrap(); + assert_eq!(13, buf.remaining()); + + let mut buf = buf.flip(); + let mut dst = [0; 5]; + + assert_eq!(5, buf.read(dst.as_mut_slice()).unwrap()); + assert_eq!(b"hello", dst); + + assert_eq!(5, buf.read(dst.as_mut_slice()).unwrap()); + assert_eq!(b" worl", dst); + + let mut dst = [0; 2]; + assert_eq!(2, buf.read(dst.as_mut_slice()).unwrap()); + assert_eq!(b"d ", dst); + + let mut dst = [0; 7]; + assert_eq!(7, buf.read(dst.as_mut_slice()).unwrap()); + assert_eq!(b"goodbye", dst); +} diff --git a/test/test_rope.rs b/test/test_rope.rs new file mode 100644 index 0000000..cf0a7f2 --- /dev/null +++ b/test/test_rope.rs @@ -0,0 +1,90 @@ +use bytes::Rope; +use bytes::traits::*; +use super::gen_bytes; + +const TEST_BYTES_1: &'static [u8] = + &b"dblm4ng7jp4v9rdn1w6hhssmluoqrrrqj59rccl9 + nkv2tm1t2da4jyku51ge7f8hv581gkki8lekmf5f + 1l44whp4aiwbvhkziw02292on4noyvuwjzsloqyc + 5n0iyn4l6o6tgjhlek00mynfzb1wgcwj4mqp6zdr + 3625yy7rj7xuisal7b1a7xgq271abvt5ssxuj39v + njtetokxxrgxzp7ik9adnypkmmcn4270yv9l46m7 + 9mu2zmqmkxdmgia210vkdytb7ywfcyt2bvcsg9eq + 5yqizxl6888zrksvaxhzs2v355jxu8gr21m33t83 + qvoian1ra7c6pvxabshgngldxa408p18l1fdet2h"; + +const TEST_BYTES_2: &'static [u8] = + &b"jmh14t79mllzj1ohxfj6fun7idwbks8oh35f83g6 + ryaowe86mmou5t1xa91uyg8e95wcu5mje1mswien + tt4clgj029cw0pyuvfbvsgzdg1x7sr9qsjkf2b1t + h43smgp1ea22lph17f78cel0cc2kjoht5281xuy8 + 0ex9uaqwj4330jrp30stsk15j9bpqezu3w78ktit + ev5g6xsngr35q7pemdm9hihf0ebrw5fbwhm530lo + e0zyj1bm7yfyk7f2i45jhr3wu3bvb4hj8jve6db0 + iewmr9weecaon9vdnqo5hen9iaiox5vsaxuo461m + 8336ugp20u4sfky3kfawr0ome1tiqyx8chkerrjh + a95s0gypcsgo9jqxasqkoj08t4uq5moxmay5plg5 + tlh6f9omhn0ezvi0w2n8hx7n6qk7rn1s3mjpnpl6 + hvilp8awaa4tvsis66q4e5b3xwy2z1h2klpa87h7"; + +#[test] +pub fn test_rope_round_trip() { + let rope = Rope::from_slice(b"zomg"); + + assert_eq!(4, rope.len()); + + let mut dst = vec![]; + rope.buf().read(&mut dst).unwrap(); + + assert_eq!(b"zomg", dst.as_slice()); +} + +#[test] +pub fn test_rope_slice() { + let mut dst = vec![]; + + let bytes = Rope::from_slice(TEST_BYTES_1); + assert_eq!(TEST_BYTES_1.len(), bytes.len()); + + bytes.buf().read(&mut dst).unwrap(); + assert_eq!(dst.as_slice(), TEST_BYTES_1); + + let left = bytes.slice_to(250); + assert_eq!(250, left.len()); + + left.buf().read(&mut dst).unwrap(); + assert_eq!(dst.as_slice(), &TEST_BYTES_1[..250]); + + let right = bytes.slice_from(250); + assert_eq!(TEST_BYTES_1.len() - 250, right.len()); + + right.buf().read(&mut dst).unwrap(); + assert_eq!(dst.as_slice(), &TEST_BYTES_1[250..]); +} + +#[test] +pub fn test_rope_concat_two_byte_str() { + let mut dst = vec![]; + + let left = Rope::from_slice(TEST_BYTES_1); + let right = Rope::from_slice(TEST_BYTES_2); + + let both = left.concat(right); + + assert_eq!(both.len(), TEST_BYTES_1.len() + TEST_BYTES_2.len()); + + both.buf().read(&mut dst).unwrap(); + assert_eq!(dst.as_slice(), TEST_BYTES_1.to_vec() + TEST_BYTES_2); +} + +#[test] +#[ignore] +pub fn test_slice_parity() { + let bytes = gen_bytes(2048 * 1024); + let start = 512 * 1024 - 3333; + let end = 512 * 1024 + 7777; + + let _ = Rope::from_slice(bytes.as_slice()).slice(start, end); + + // stuff +} diff --git a/test/test_seq_byte_str.rs b/test/test_seq_byte_str.rs new file mode 100644 index 0000000..9b7a2db --- /dev/null +++ b/test/test_seq_byte_str.rs @@ -0,0 +1,33 @@ +use bytes::SeqByteStr; +use bytes::traits::*; +use super::gen_bytes; + +#[test] +pub fn test_slice_round_trip() { + let mut dst = vec![]; + let src = gen_bytes(2000); + + let s = SeqByteStr::from_slice(src.as_slice()); + assert_eq!(2000, s.len()); + + s.buf().read(&mut dst).unwrap(); + assert_eq!(dst, src); +} + +#[test] +pub fn test_index() { + let src = gen_bytes(2000); + + let s = SeqByteStr::from_slice(src.as_slice()); + + for i in 0..2000 { + assert_eq!(src[i], s[i]); + } +} + +#[test] +#[should_fail] +pub fn test_index_out_of_range() { + let s = SeqByteStr::from_slice(gen_bytes(2000).as_slice()); + let _ = s[2001]; +} diff --git a/test/test_small_byte_str.rs b/test/test_small_byte_str.rs new file mode 100644 index 0000000..cbc920e --- /dev/null +++ b/test/test_small_byte_str.rs @@ -0,0 +1,33 @@ +use bytes::SmallByteStr; +use bytes::traits::*; +use super::gen_bytes; + +#[test] +pub fn test_slice_round_trip() { + let mut dst = vec![]; + let src = gen_bytes(3); + + let s = SmallByteStr::from_slice(src.as_slice()).unwrap(); + assert_eq!(3, s.len()); + + s.buf().read(&mut dst).unwrap(); + assert_eq!(dst, src); +} + +#[test] +pub fn test_index() { + let src = gen_bytes(3); + + let s = SmallByteStr::from_slice(src.as_slice()).unwrap(); + + for i in 0..3 { + assert_eq!(src[i], s[i]); + } +} + +#[test] +#[should_fail] +pub fn test_index_out_of_range() { + let s = SmallByteStr::from_slice(gen_bytes(3).as_slice()).unwrap(); + let _ = s[2001]; +} -- GitLab