再看不懂BitMap算法,我请你吃饭(四)

本文介绍了EWAHCompressedBitmap的使用,包括位图的创建、或、与、异或操作,以及内存占用、序列化、阈值函数等功能。通过示例代码展示了LongArray类的特性,如默认大小、设置和获取元素以及push_back方法如何影响大小。此外,还对比了EWAHCompressedBitmap与java.util.BitSet和自定义BitMapV1的API差异。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

EWAHCompressedBitmap是Google发布的类似BitMap的一个实现,先看看官方的一个例子:

/**
 * https://github.com/lemire/javaewah
 */
@Test
public void test_03() throws IOException {
	EWAHCompressedBitmap ewahBitmap1 = EWAHCompressedBitmap.bitmapOf(0, 2, 55, 64, 1 << 30);
	EWAHCompressedBitmap ewahBitmap2 = EWAHCompressedBitmap.bitmapOf(1, 3, 64, 1 << 30);
	System.out.println("bitmap 1: " + ewahBitmap1);
	System.out.println("bitmap 2: " + ewahBitmap2);

	// or
	EWAHCompressedBitmap orbitmap = ewahBitmap1.or(ewahBitmap2);
	System.out.println("bitmap 1 OR bitmap 2: " + orbitmap);
	System.out.println("memory usage: " + orbitmap.sizeInBytes() + " bytes");

	// and
	EWAHCompressedBitmap andbitmap = ewahBitmap1.and(ewahBitmap2);
	System.out.println("bitmap 1 AND bitmap 2: " + andbitmap);
	System.out.println("memory usage: " + andbitmap.sizeInBytes() + " bytes");

	// xor
	EWAHCompressedBitmap xorbitmap = ewahBitmap1.xor(ewahBitmap2);
	System.out.println("bitmap 1 XOR bitmap 2:" + xorbitmap);
	System.out.println("memory usage: " + xorbitmap.sizeInBytes() + " bytes");

	// fast aggregation over many bitmaps
	EWAHCompressedBitmap ewahBitmap3 = EWAHCompressedBitmap.bitmapOf(5, 55, 1 << 30);
	EWAHCompressedBitmap ewahBitmap4 = EWAHCompressedBitmap.bitmapOf(4, 66, 1 << 30);
	System.out.println("bitmap 3: " + ewahBitmap3);
	System.out.println("bitmap 4: " + ewahBitmap4);

	andbitmap = EWAHCompressedBitmap.and(ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
	System.out.println("b1 AND b2 AND b3 AND b4: " + andbitmap);

	// serialization
	ByteArrayOutputStream bos = new ByteArrayOutputStream();
	// Note: you could use a file output steam instead of ByteArrayOutputStream
	ewahBitmap1.serialize(new DataOutputStream(bos));
	EWAHCompressedBitmap ewahBitmap1new = new EWAHCompressedBitmap();
	byte[] bout = bos.toByteArray();
	ewahBitmap1new.deserialize(new DataInputStream(new ByteArrayInputStream(bout)));
	System.out.println("bitmap 1 (recovered) : " + ewahBitmap1new);
	if (!ewahBitmap1.equals(ewahBitmap1new)) throw new RuntimeException("Will not happen");
	//
	// we can use a ByteBuffer as backend for a bitmap
	// which allows memory-mapped bitmaps
	//
	ByteBuffer bb = ByteBuffer.wrap(bout);
	EWAHCompressedBitmap rmap = new EWAHCompressedBitmap(bb);
	System.out.println("bitmap 1 (mapped) : " + rmap);

	if (!rmap.equals(ewahBitmap1)) throw new RuntimeException("Will not happen");
	//
	// support for threshold function (new as of version 0.8.0):
	// mark as true a bit that occurs at least T times in the source
	// bitmaps
	//
	EWAHCompressedBitmap threshold2 = EWAHCompressedBitmap.threshold(2,
			ewahBitmap1, ewahBitmap2, ewahBitmap3, ewahBitmap4);
	System.out.println("threshold 2 : " + threshold2);
}

EWAHCompressedBitmap.java中重要的一个属性LongArray.java,测试代码如下,注意包名:

package com.googlecode.javaewah;

import org.junit.Assert;
import org.junit.Test;

public class LongArrayTest {

    @Test
    public void testXxx() {
        /**
         * 未指定大小时,long类型数组的大小默认为4,默认值为0
         */
        LongArray longArray = new LongArray();

        for (int position = 0; position < 4; position++) {
            long word = longArray.getWord(position);
            Assert.assertEquals(0L, word);
        }

        /**
         * 足以证明,long类型数组里的大小确实为4
         */
        try {
            longArray.getWord(4);
            Assert.fail("不应该到这里");
        } catch (Exception e) {
        }

        /**
         * setWord(...)方法不会改变“sizeInWords”属性的值,sizeInWords默认为1
         */
        longArray.setWord(0, 10L);
        int sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(1, sizeInWords);

        longArray.setWord(2, 12L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(1, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 0, 12, 0}
         * push_back后:{10, 9527, 12, 0}
         */
        longArray.push_back(9527L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(2, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 12, 0}
         * push_back后:{10, 9527, 9528, 0}
         */
        longArray.push_back(9528L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(3, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 0}
         * push_back后:{10, 9527, 9528, 9529, 0, 0, 0, 0}
         */
        longArray.push_back(9529L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(4, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 0, 0, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 0, 0, 0}
         */
        longArray.push_back(9530L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(5, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 0, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 0, 0}
         */
        longArray.push_back(9531L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(6, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 9531, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 0}
         */
        longArray.push_back(9532L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(7, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 9531, 9532, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 0, 0, 0, 0, 0, 0, 0, 0}
         */
        longArray.push_back(9533L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(8, sizeInWords);

        /**
         * push_back(...)方法会改变“sizeInWords”属性的值,使其加1
         * push_back前:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 0, 0, 0, 0, 0, 0, 0, 0}
         * push_back后:{10, 9527, 9528, 9529, 9530, 9531, 9532, 9533, 9999999999, 0, 0, 0, 0, 0, 0, 0}
         */
        longArray.push_back(9999999999L);
        sizeInWords = longArray.sizeInWords();
        Assert.assertEquals(9, sizeInWords);
    }

}

官方源码中的一个单元测试方法:

@Test
public void testEWAHCompressedBitmap() {
    System.out.println("testing EWAH");
    long zero = 0;
    long specialval = 1l | (1l << 4) | (1l << 63);
    long notzero = ~zero;
    EWAHCompressedBitmap myarray1 = new EWAHCompressedBitmap();
    myarray1.addWord(zero);
    myarray1.addWord(zero);
    myarray1.addWord(zero);
    myarray1.addWord(specialval);
    myarray1.addWord(specialval);
    myarray1.addWord(notzero);
    myarray1.addWord(zero);
    Assert.assertEquals(myarray1.toList().size(), 6 + 64);
    EWAHCompressedBitmap myarray2 = new EWAHCompressedBitmap();
    myarray2.addWord(zero);
    myarray2.addWord(specialval);
    myarray2.addWord(specialval);
    myarray2.addWord(notzero);
    myarray2.addWord(zero);
    myarray2.addWord(zero);
    myarray2.addWord(zero);
    Assert.assertEquals(myarray2.toList().size(), 6 + 64);
    List<Integer> data1 = myarray1.toList();
    List<Integer> data2 = myarray2.toList();
    Vector<Integer> logicalor = new Vector<Integer>();
    {
        HashSet<Integer> tmp = new HashSet<Integer>();
        tmp.addAll(data1);
        tmp.addAll(data2);
        logicalor.addAll(tmp);
    }
    Collections.sort(logicalor);
    Vector<Integer> logicaland = new Vector<Integer>();
    logicaland.addAll(data1);
    logicaland.retainAll(data2);
    Collections.sort(logicaland);
    EWAHCompressedBitmap arrayand = myarray1.and(myarray2);
    Assert.assertTrue(arrayand.toList().equals(logicaland));
    EWAHCompressedBitmap arrayor = myarray1.or(myarray2);
    Assert.assertTrue(arrayor.toList().equals(logicalor));
    EWAHCompressedBitmap arrayandbis = myarray2.and(myarray1);
    Assert.assertTrue(arrayandbis.toList().equals(logicaland));
    EWAHCompressedBitmap arrayorbis = myarray2.or(myarray1);
    Assert.assertTrue(arrayorbis.toList().equals(logicalor));
    EWAHCompressedBitmap x = new EWAHCompressedBitmap();
    for (Integer i : myarray1.toList()) {
        x.set(i);
    }
    Assert.assertTrue(x.toList().equals(
            myarray1.toList()));
    x = new EWAHCompressedBitmap();
    for (Integer i : myarray2.toList()) {
        x.set(i);
    }
    Assert.assertTrue(x.toList().equals(
            myarray2.toList()));
    x = new EWAHCompressedBitmap();
    for (Iterator<Integer> k = myarray1.iterator(); k.hasNext(); ) {
        x.set(extracted(k));
    }
    Assert.assertTrue(x.toList().equals(
            myarray1.toList()));
    x = new EWAHCompressedBitmap();
    for (Iterator<Integer> k = myarray2.iterator(); k.hasNext(); ) {
        x.set(extracted(k));
    }
    Assert.assertTrue(x.toList().equals(
            myarray2.toList()));
}

EWAHCompressedBitmap.bitmapOf(…)

静态工厂方法,很好理解。

and(…)、or(…)、xor(…)

看看这篇里说的,想统计出“00后、且是程序员”的总用户数,就是可以使用and(…)方法来实现。

sizeInBytes()

TODO

threshold(…)

TODO

API对比
java.util.BitSet自定义的BitMapV1EWAHCompressedBitmap
BitSet.set(…)BitMapV1.add(…)addWord(…)
BitSet.get(…)BitMapV1.isExist(…)get(…)
BitSet.clear(…)BitMapV1.clear(…)clear(…)
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值