I started thinking about the many collections including those in .Net’s System.Collections.Generics namespace that follow the pattern of using an array as their underlying datastore, and handling the issue of growth by doubling in size whenever capacity is exceeded. Wondering how hard it would be to implement a chunked array that would work for something like that, I got sidetracked into a little exercise that was more to satisfy my curiosity than to create an actual library class. What would it take to create a chunked array whose chunks are not of equal size, but whose size increases by two with each increase, and how much performance penalty would the additional overhead cost?
The implementation ended up having three parts. The main part is the DoubleChunkArray class. This implements the bulk of the logic – a virtual array that can double in size by adding an additional underlying array to what has been created already. The second part is a BitHelper class containing bit twiddling functions used by the DoubleChunkArray when converting the one dimensional virtual array indexes into the actual two dimensional array indexes. The third part is a quicky implementation of what List<T> does, but using a DoubleChunkArray as its underlying store instead of List<T'>’s direct use of Array. I only did the minimum implementation needed to see that it worked, and do some performance comparisons against List<T>.
The whole implementation really revolves around the conversion from the one dimensional virtual index into the actual two dimensional index. Since that is where the extra overhead is, it needs to be fairly efficient if it’s to be anywhere near the speed of List<T>, since that class has almost no extra overhead added over the very fast Array access it uses.
I did add one extra constraint. DoubleChunkArray only allows initial capacities that are powers of 2 (1, 2, 4, 8, 16, etc). Without that constraint, the index conversion becomes more complicated, and more likely to be slow. With powers of two, the index conversions can be done by some pretty fast bit twiddling tricks. The following image shows an example of how the virtual indexing maps to the actual indexing.
a[log(12)-1][12-2log(12)] = a[3-1][12-8] = a[2][4]
So here’s the code:
// Copyright ©2012 SoftWx, Inc. // Released under the MIT License // 11/28/2012 // <authors> Steve Hatchett using System; namespace SoftWx.Collections { /// <summary> /// DoubleChunkedArray maintains its elements in multiple array chunks, with /// each additional chunk doubling the total length of the array. So, for example, /// if the initial chunk was an array of length 4, doubling it would add another /// chunk of length 4 (total length of 8). Doubling again adds a chunk of /// length 8 (total length of 16), etc. /// </summary> public class DoubleChunkedArray<T> { private int initialSize; private int log2Offset; private int size; private T[][] chunks; public DoubleChunkedArray() : this(4) { } public DoubleChunkedArray(int initialSize) { if (initialSize < 1) throw new ArgumentOutOfRangeException("initialSize"); if (!BitHelper.IsPowerOf2(initialSize)) throw new ArgumentException("must be a power of 2", "initialSize"); this.initialSize = initialSize; this.log2Offset = BitHelper.Log2(initialSize) - 1; this.size = initialSize; this.chunks = new T[BitHelper.Log2(int.MaxValue) - this.log2Offset + 1][]; this.chunks[0] = new T[initialSize]; var empty = new T[0]; for (int i = 1; i < this.chunks.Length; i++) this.chunks[i] = empty; } // this could be computed but it slows things down // (1 << (this.chunks.Length - this.initialLog2Size + 3)) public int Length { get { return this.size; } } public T this[int index] { get { if (index >= this.initialSize) { int log2 = BitHelper.Log2(index); return this.chunks[log2 - this.log2Offset][index - (1 << log2)]; } else { return this.chunks[0][index]; } } set { if (index >= this.initialSize) { int log2 = BitHelper.Log2(index); this.chunks[log2 - this.log2Offset][index - (1 << log2)] = value; } else { this.chunks[0][index] = value; } } } public void DoubleLength() { this.chunks[BitHelper.Log2(this.size) - this.log2Offset] = new T[this.size]; this.size *= 2; } } }
// Copyright ©2012 SoftWx, Inc. // Released under the MIT License // 11/29/2012 // <authors> Steve Hatchett using System; namespace SoftWx.Collections { /// <summary> /// Helper methods for bit twiddling. Much of the ideas used come /// from http://graphics.stanford.edu/~seander/bithacks.html /// </summary> public static class BitHelper { static readonly int[] deBruijn = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; static readonly int[] logTable256 = new int[256]; static BitHelper() { logTable256[0] = logTable256[1] = 0; for (int i = 2; i < 256; i++) { logTable256[i] = 1 + logTable256[i / 2]; } } public static bool IsPowerOf2(int number) { return ((number > 0) && ((number & (number - 1)) == 0)); } public static int Log2(int number) { if (number <= 0xffff) { if (number > 0xff) { return 8 + logTable256[number >> 8]; } else { return logTable256[number]; } } else if (number <= 0xffffff) { return 16 + logTable256[number >> 16]; } else { return 24 + logTable256[number >> 24]; } } public static int Log2OfPowerOf2(int number) { return deBruijn[unchecked((uint) number * 0x077CB531U) >> 27]; } public static int PowerOf2Floor(int number) { number |= number >> 1; number |= number >> 2; number |= number >> 4; number |= number >> 8; number |= number >> 16; return (number >> 1) + 1; } public static int PowerOf2Ceiling(int number) { if (number > 0) number--; number |= number >> 1; number |= number >> 2; number |= number >> 4; number |= number >> 8; number |= number >> 16; return number + 1; } } }
// Copyright ©2012 SoftWx, Inc. // Released under the MIT License // 11/29/2012 // <authors> Steve Hatchett using System; namespace SoftWx.Collections { /// <summary> /// Like List{T}, but rather than using a regular Array as its underlying /// data store, it uses a DoubleChunkedArray (an array that can repeatedly /// double in size, but retains what it has already allocated, and just adds /// another array object for the additional space. /// </summary> public class DoubleChunkedList<T> { private DoubleChunkedArray<T> items; private int size; private int version; public DoubleChunkedList() : this(0) { } public DoubleChunkedList(int capacity) { capacity = BitHelper.PowerOf2Ceiling(capacity); this.items = new DoubleChunkedArray<T>(capacity); this.size = 0; } public int Count { get { return this.size; } } public T this[int index] { get { if (index >= this.size) throw new ArgumentOutOfRangeException("index"); return this.items[index]; } set { if (index >= this.size) throw new ArgumentOutOfRangeException("index"); this.items[index] = value; this.version++; } } public void Add(T item) { if (this.size == this.items.Length) { this.items.DoubleLength(); } this.items[this.size++] = item; this.version++; } } }Of course, there's no way it will match the speed of .Net's List
I'm not convinced this class has sufficient reason to exist that I will ever flesh it out and add it to my library, but it was an interesting diversion, and the bit twiddling ideas have general utility. If someone finds a compelling use for it, let me know.