From 2d95683bd5da0ffa051c43c4e19b5311d16b0370 Mon Sep 17 00:00:00 2001
From: Noah Zentzis <ngz@generalproblem.net>
Date: Thu, 24 May 2018 14:50:31 -0700
Subject: [PATCH] Recycle space when reserving from Vec-backed Bytes (#197)

* Recycle space when reserving from Vec-backed Bytes

BytesMut::reserve, when called on a BytesMut instance which is backed by
a non-shared Vec<u8>, would previously just delegate to Vec::reserve
regardless of the current location in the buffer. If the Bytes is
actually the trailing component of a larger Vec, then the unused space
won't be recycled. In applications which continually move the pointer
forward to consume data as it comes in, this can cause the underlying
buffer to get extremely large.

This commit checks whether there's extra space at the start of the
backing Vec in this case, and reuses the unused space if possible
instead of allocating.

* Avoid excessive copying when reusing Vec space

Only reuse space in a Vec-backed Bytes when doing so would gain back
more than half of the current capacity. This avoids excessive copy
operations when a large buffer is almost (but not completely) full.
---
 src/bytes.rs        | 44 +++++++++++++++++++++++++++++++++-----------
 tests/test_bytes.rs | 15 +++++++++++++++
 2 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/src/bytes.rs b/src/bytes.rs
index 60cf5d4..c85ebf3 100644
--- a/src/bytes.rs
+++ b/src/bytes.rs
@@ -2250,20 +2250,42 @@ impl Inner {
         }
 
         if kind == KIND_VEC {
-            // Currently backed by a vector, so just use `Vector::reserve`.
+            // If there's enough free space before the start of the buffer, then
+            // just copy the data backwards and reuse the already-allocated
+            // space.
+            //
+            // Otherwise, since backed by a vector, use `Vec::reserve`
             unsafe {
-                let (off, _) = self.uncoordinated_get_vec_pos();
-                let mut v = rebuild_vec(self.ptr, self.len, self.cap, off);
-                v.reserve(additional);
-
-                // Update the info
-                self.ptr = v.as_mut_ptr().offset(off as isize);
-                self.len = v.len() - off;
-                self.cap = v.capacity() - off;
+                let (off, prev) = self.uncoordinated_get_vec_pos();
+
+                // Only reuse space if we stand to gain at least capacity/2
+                // bytes of space back
+                if off >= additional && off >= (self.cap / 2) {
+                    // There's space - reuse it
+                    //
+                    // Just move the pointer back to the start after copying
+                    // data back.
+                    let base_ptr = self.ptr.offset(-(off as isize));
+                    ptr::copy(self.ptr, base_ptr, self.len);
+                    self.ptr = base_ptr;
+                    self.uncoordinated_set_vec_pos(0, prev);
+
+                    // Length stays constant, but since we moved backwards we
+                    // can gain capacity back.
+                    self.cap += off;
+                } else {
+                    // No space - allocate more
+                    let mut v = rebuild_vec(self.ptr, self.len, self.cap, off);
+                    v.reserve(additional);
 
-                // Drop the vec reference
-                mem::forget(v);
+                    // Update the info
+                    self.ptr = v.as_mut_ptr().offset(off as isize);
+                    self.len = v.len() - off;
+                    self.cap = v.capacity() - off;
 
+                    // Drop the vec reference
+                    mem::forget(v);
+                }
                 return;
             }
         }
diff --git a/tests/test_bytes.rs b/tests/test_bytes.rs
index c75de1f..4ab8669 100644
--- a/tests/test_bytes.rs
+++ b/tests/test_bytes.rs
@@ -378,6 +378,21 @@ fn reserve_max_original_capacity_value() {
     assert_eq!(bytes.capacity(), 64 * 1024);
 }
 
+// Without either looking at the internals of the BytesMut or doing weird stuff
+// with the memory allocator, there's no good way to automatically verify from
+// within the program that this actually recycles memory. Instead, just exercise
+// the code path to ensure that the results are correct.
+#[test]
+fn reserve_vec_recycling() {
+    let mut bytes = BytesMut::from(Vec::with_capacity(16));
+    assert_eq!(bytes.capacity(), 16);
+    bytes.put("0123456789012345");
+    bytes.advance(10);
+    assert_eq!(bytes.capacity(), 6);
+    bytes.reserve(8);
+    assert_eq!(bytes.capacity(), 16);
+}
+
 #[test]
 fn reserve_in_arc_unique_does_not_overallocate() {
     let mut bytes = BytesMut::with_capacity(1000);
-- 
GitLab