]> source.dussan.org Git - jgit.git/commitdiff
UInt24Array: Array of unsigned ints encoded in 3 bytes. 92/199892/3
authorIvan Frade <ifrade@google.com>
Fri, 10 Feb 2023 20:21:01 +0000 (12:21 -0800)
committerIvan Frade <ifrade@google.com>
Tue, 14 Feb 2023 18:19:12 +0000 (10:19 -0800)
The object size index stores positions of objects in the main
index (when ordered by sha1). These positions are per-pack and usually
a pack has <16 million objects (there are exceptions but rather
rare). It could save some memory storing these positions in three bytes
instead of four. Note that these positions are sorted and always positive.

Implement a wrapper around a byte[] to access and search "ints" while
they are stored as unsigned 3 bytes.

Change-Id: Iaa26ce8e2272e706e35fe4cdb648fb6ca7591972

org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/UInt24ArrayTest.java [new file with mode: 0644]
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/UInt24Array.java [new file with mode: 0644]

diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/UInt24ArrayTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/UInt24ArrayTest.java
new file mode 100644 (file)
index 0000000..a96c217
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2023, Google LLC
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class UInt24ArrayTest {
+
+       private static final byte[] DATA = { 0x00, 0x00, 0x00, // 0
+                       0x00, 0x00, 0x05, // 5
+                       0x00, 0x00, 0x0a, // 10
+                       0x00, 0x00, 0x0f, // 15
+                       0x00, 0x00, 0x14, // 20
+                       0x00, 0x00, 0x19, // 25
+                       (byte) 0xff, 0x00, 0x00, // Uint with MSB=1
+                       (byte) 0xff, (byte) 0xff, (byte) 0xff, // MAX
+       };
+
+       private static final UInt24Array asArray = new UInt24Array(DATA);
+
+       @Test
+       public void uInt24Array_size() {
+               assertEquals(8, asArray.size());
+       }
+
+       @Test
+       public void uInt24Array_get() {
+               assertEquals(0, asArray.get(0));
+               assertEquals(5, asArray.get(1));
+               assertEquals(10, asArray.get(2));
+               assertEquals(15, asArray.get(3));
+               assertEquals(20, asArray.get(4));
+               assertEquals(25, asArray.get(5));
+               assertEquals(0xff0000, asArray.get(6));
+               assertEquals(0xffffff, asArray.get(7));
+               assertThrows(IndexOutOfBoundsException.class, () -> asArray.get(9));
+       }
+
+       @Test
+       public void uInt24Array_getLastValue() {
+               assertEquals(0xffffff, asArray.getLastValue());
+       }
+
+       @Test
+       public void uInt24Array_find() {
+               assertEquals(0, asArray.binarySearch(0));
+               assertEquals(1, asArray.binarySearch(5));
+               assertEquals(2, asArray.binarySearch(10));
+               assertEquals(3, asArray.binarySearch(15));
+               assertEquals(4, asArray.binarySearch(20));
+               assertEquals(5, asArray.binarySearch(25));
+               assertEquals(6, asArray.binarySearch(0xff0000));
+               assertEquals(7, asArray.binarySearch(0xffffff));
+               assertThrows(IllegalArgumentException.class,
+                               () -> asArray.binarySearch(Integer.MAX_VALUE));
+       }
+
+       @Test
+       public void uInt24Array_empty() {
+               Assert.assertTrue(UInt24Array.EMPTY.isEmpty());
+               assertEquals(0, UInt24Array.EMPTY.size());
+               assertEquals(-1, UInt24Array.EMPTY.binarySearch(1));
+               assertThrows(IndexOutOfBoundsException.class,
+                               () -> UInt24Array.EMPTY.getLastValue());
+               assertThrows(IndexOutOfBoundsException.class,
+                               () -> UInt24Array.EMPTY.get(0));
+       }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/UInt24Array.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/UInt24Array.java
new file mode 100644 (file)
index 0000000..3a0a18b
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2023, Google LLC
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Distribution License v. 1.0 which is available at
+ * https://www.eclipse.org/org/documents/edl-v10.php.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+package org.eclipse.jgit.internal.storage.file;
+
+/**
+ * A view of a byte[] as a list of integers stored in 3 bytes.
+ *
+ * The ints are stored in big-endian ("network order"), so
+ * byte[]{aa,bb,cc} becomes the int 0x00aabbcc
+ */
+final class UInt24Array {
+
+       public static final UInt24Array EMPTY = new UInt24Array(
+                       new byte[0]);
+
+       private static final int ENTRY_SZ = 3;
+
+       private final byte[] data;
+
+       private final int size;
+
+       UInt24Array(byte[] data) {
+               this.data = data;
+               this.size = data.length / ENTRY_SZ;
+       }
+
+       boolean isEmpty() {
+               return size == 0;
+       }
+
+       int size() {
+               return size;
+       }
+
+       int get(int index) {
+               if (index < 0 || index >= size) {
+                       throw new IndexOutOfBoundsException(index);
+               }
+               int offset = index * ENTRY_SZ;
+               int e = data[offset] & 0xff;
+               e <<= 8;
+               e |= data[offset + 1] & 0xff;
+               e <<= 8;
+               e |= data[offset + 2] & 0xff;
+               return e;
+       }
+
+       /**
+        * Search needle in the array.
+        *
+        * This assumes a sorted array.
+        *
+        * @param needle
+        *            It cannot be bigger than 0xffffff (max unsigned three bytes).
+        * @return position of the needle in the array, -1 if not found. Runtime
+        *         exception if the value is too big for 3 bytes.
+        */
+       int binarySearch(int needle) {
+               if ((needle & 0xff000000) != 0) {
+                       throw new IllegalArgumentException("Too big value for 3 bytes"); //$NON-NLS-1$
+               }
+               if (size == 0) {
+                       return -1;
+               }
+               int high = size;
+               if (high == 0)
+                       return -1;
+               int low = 0;
+               do {
+                       int mid = (low + high) >>> 1;
+                       int cmp;
+                       cmp = Integer.compare(needle, get(mid));
+                       if (cmp < 0)
+                               high = mid;
+                       else if (cmp == 0) {
+                               return mid;
+                       } else
+                               low = mid + 1;
+               } while (low < high);
+               return -1;
+       }
+
+       int getLastValue() {
+               return get(size - 1);
+       }
+}