EWAHCompressedBitmap是Google发布的类似BitMap的一个实现,先看看官方的一个例子:
/**
* https://github.com/lemire/javaewah
*/
@Test
public void test_03() throws IOException {
EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0, 2, 55, 64, 1 << 30);
EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1, 3, 64, 1 << 30);
System.out.println("bitmap 1: " + ewahBitmap1);
System.out.println("bitmap 2: " + ewahBitmap2);
// or
EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2);
System.out.println("bitmap 1 OR bitmap 2: " + orbitmap);
System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes");
// and
EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2);
System.out.println("bitmap 1 AND bitmap 2: " + andbitmap);
System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes");
// xor
EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2);
System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap);
System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes");
// fast aggregation over many bitmaps
EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(5, 55, 1 << 30);
EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4, 66, 1 << 30);
System.out.println("bitmap 3: " + ewahBitmap3);
System.out.println("bitmap 4: " + ewahBitmap4);
andbitmap = EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
System.out.println("b1 AND b2 AND b3 AND b4: " + andbitmap);
// serialization
ByteArrayOutputStream bos = new ByteArrayOutputStream();
// Note: you could use a file output steam instead of ByteArrayOutputStream
ewahBitmap1.serialize(new DataOutputStream(bos));
EWAHCompressedBitmap ewahBitmap1new = new EWAHCompressedBitmap();
byte[] bout = bos.toByteArray();
ewahBitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream(bout)));
System.out.println("bitmap 1 (recovered) : " + ewahBitmap1new);
if (!ewahBitmap1.equals(ewahBitmap1new)) throw new RuntimeException("Will not happen");
//
// we can use a ByteBuffer as backend for a bitmap
// which allows memory-mapped bitmaps
//
ByteBuffer bb = ByteBuffer.wrap(bout);
EWAHCompressedBitmap rmap = new EWAHCompressedBitmap(bb);
System.out.println("bitmap 1 (mapped) : " + rmap);
if (!rmap.equals(ewahBitmap1)) throw new RuntimeException("Will not happen");
//
// support for threshold function (new as of version 0.8.0):
// mark as true a bit that occurs at least T times in the source
// bitmaps
//
EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2,
ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
System.out.println("threshold 2 : " + threshold2);
}
EWAHCompressedBitmap.java中重要的一个属性LongArray.java,测试代码如下,注意包名:
package com.googlecode.javaewah;
import org.junit.Assert;
import org.junit.Test;
public class LongArrayTest {
@Test
public void testXxx() {
/**
* 未指定大小时,long类型数组的大小默认为4,默认值为0
*/
LongArray longArray = new LongArray();
for (int position = 0; position < 4; position++) {
long word = longArray.getWord(position);
Assert.assertEquals(0L, word);
}
/**
* 足以证明,long类型数组里的大小确实为4
*/
try {
longArray.getWord(4);
Assert.fail("不应该到这里");
} catch (Exception e) {
}
/**
* setWord(...)方法不会改变“sizeInWords”属性的值,sizeInWords默认为1
*/
longArray.setWord(0, 10L);
int sizeInWords = longArray.sizeInWords();
Assert.assertEquals(1, sizeInWords);
longArray.setWord(2, 12L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(1, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 0, 12, 0}
* push_back后:{10, 9527, 12, 0}
*/
longArray.push_back(9527L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(2, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 9527, 12, 0}
* push_back后:{10, 9527, 9528, 0}
*/
longArray.push_back(9528L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(3, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 9527, 9528, 0}
* push_back后:{10, 9527, 9528, 9529, 0, 0, 0, 0}
*/
longArray.push_back(9529L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(4, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 9527, 9528, 9529, 0, 0, 0, 0}
* push_back后:{10, 9527, 9528, 9529, 9530, 0, 0, 0}
*/
longArray.push_back(9530L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(5, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 9527, 9528, 9529, 9530, 0, 0, 0}
* push_back后:{10, 9527, 9528, 9529, 9530, 9531, 0, 0}
*/
longArray.push_back(9531L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(6, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 9527, 9528, 9529, 9530, 9531, 0, 0}
* push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 0}
*/
longArray.push_back(9532L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(7, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 9527, 9528, 9529, 9530, 9531, 9532, 0}
* push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 0, 0, 0, 0, 0, 0, 0, 0}
*/
longArray.push_back(9533L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(8, sizeInWords);
/**
* push_back(...)方法会改变“sizeInWords”属性的值,使其加1
* push_back前:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 0, 0, 0, 0, 0, 0, 0, 0}
* push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 9999999999, 0, 0, 0, 0, 0, 0, 0}
*/
longArray.push_back(9999999999L);
sizeInWords = longArray.sizeInWords();
Assert.assertEquals(9, sizeInWords);
}
}
官方源码中的一个单元测试方法:
@Test
public void testEWAHCompressedBitmap() {
System.out.println("testing EWAH");
long zero = 0;
long specialval = 1l | (1l << 4) | (1l << 63);
long notzero = ~zero;
EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap();
myarray1.addWord(zero);
myarray1.addWord(zero);
myarray1.addWord(zero);
myarray1.addWord(specialval);
myarray1.addWord(specialval);
myarray1.addWord(notzero);
myarray1.addWord(zero);
Assert.assertEquals(myarray1.toList().size(), 6 + 64);
EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap();
myarray2.addWord(zero);
myarray2.addWord(specialval);
myarray2.addWord(specialval);
myarray2.addWord(notzero);
myarray2.addWord(zero);
myarray2.addWord(zero);
myarray2.addWord(zero);
Assert.assertEquals(myarray2.toList().size(), 6 + 64);
List<Integer> data1 = myarray1.toList();
List<Integer> data2 = myarray2.toList();
Vector<Integer> logicalor = new Vector<Integer>();
{
HashSet<Integer> tmp = new HashSet<Integer>();
tmp.addAll(data1);
tmp.addAll(data2);
logicalor.addAll(tmp);
}
Collections.sort(logicalor);
Vector<Integer> logicaland = new Vector<Integer>();
logicaland.addAll(data1);
logicaland.retainAll(data2);
Collections.sort(logicaland);
EWAHCompressedBitmap arrayand = myarray1.and(myarray2);
Assert.assertTrue(arrayand.toList().equals(logicaland));
EWAHCompressedBitmap arrayor = myarray1.or(myarray2);
Assert.assertTrue(arrayor.toList().equals(logicalor));
EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1);
Assert.assertTrue(arrayandbis.toList().equals(logicaland));
EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1);
Assert.assertTrue(arrayorbis.toList().equals(logicalor));
EWAHCompressedBitmap x = new EWAHCompressedBitmap();
for (Integer i : myarray1.toList()) {
x.set(i);
}
Assert.assertTrue(x.toList().equals(
myarray1.toList()));
x = new EWAHCompressedBitmap();
for (Integer i : myarray2.toList()) {
x.set(i);
}
Assert.assertTrue(x.toList().equals(
myarray2.toList()));
x = new EWAHCompressedBitmap();
for (Iterator<Integer> k = myarray1.iterator(); k.hasNext(); ) {
x.set(extracted(k));
}
Assert.assertTrue(x.toList().equals(
myarray1.toList()));
x = new EWAHCompressedBitmap();
for (Iterator<Integer> k = myarray2.iterator(); k.hasNext(); ) {
x.set(extracted(k));
}
Assert.assertTrue(x.toList().equals(
myarray2.toList()));
}
EWAHCompressedBitmap.bitmapOf(…)
静态工厂方法,很好理解。
and(…)、or(…)、xor(…)
看看这篇里说的,想统计出“00后、且是程序员”的总用户数,就是可以使用and(…)方法来实现。
sizeInBytes()
TODO
threshold(…)
TODO
API对比
java.util.BitSet | 自定义的BitMapV1 | EWAHCompressedBitmap |
---|---|---|
BitSet.set(…) | BitMapV1.add(…) | addWord(…) |
BitSet.get(…) | BitMapV1.isExist(…) | get(…) |
BitSet.clear(…) | BitMapV1.clear(…) | clear(…) |