【根据特征标签,圈选用户】1000个标签, 每个标签有100w 用户,根据不同标签选取特定用户

场景:
1000个标签, 每个标签有100w 用户,根据不同标签选取特定用户。
例如:选取用户: 同时满足100个特征标签,并且没有10个特征标签。

具体案例

利用postgresql roaringbitmap 插件。

postgresql 使用

1 postgresql 创建插件支持

psql -c "create extension roaringbitmap"


创建表bitmap 字段 表

CREATE TABLE public.t1 (
   id int4 NOT NULL,
   bitmap public.roaringbitmap NULL,
   tag varchar(20) NULL -- 标签
);
CREATE UNIQUE INDEX t1_id_idx ON public.t1 USING btree (id);

-- Column comments

COMMENT ON COLUMN public.t1.tag IS '标签';

插入标签记录,bitmap 是用户id

--- 计算
set roaringbitmap.output_format='array';


CREATE TABLE t1 (id integer, bitmap roaringbitmap);

插入标签记录,bitmap 是用户id

INSERT INTO t1 SELECT 1,rb_build(ARRAY[1,2,3,4,5,6,7,8,9,200]),'A';

INSERT INTO t1 SELECT 2,rb_build_agg(e) FROM generate_series(1,100) e,'B';
INSERT INTO t1 SELECT 4,rb_build(ARRAY[ 1,6,7,8,9,200]),'C';

INSERT INTO t1 SELECT 4,rb_build(ARRAY[ 1,6,7,200]),'D';


SELECT *  FROM t1;

圈选目标用户id : 包含标签A或B标签

SELECT rb_or_agg(bitmap) 
FROM t1 where tag = 'A'  OR tag = 'B'  ;

 SELECT rb_and_agg(bitmap) FROM t1 where tag = 'A'  OR tag = 'B' 

插入标签记录,bitmap 是用户id

OR and 圈选目标用户id 圈取用 包含标签A或B标签,并且同时C标签D标签用户。

SELECT rb_or_agg(bitmap) & (SELECT rb_and_agg(bitmap) FROM t1 where tag = 'C'  OR tag = 'D' ) 
FROM t1 where tag = 'A'  OR tag = 'B'  ;
选取返回转为记录
SELECT rb_iterate(rb_and_agg(bitmap)) FROM t1 where tag = 'A'  OR tag = 'B' ;

根据目标用户id ,判断用户哪些标签。

SELECT *  FROM t1 where  bitmap  @>10;

Create table

CREATE TABLE t1 (id integer, bitmap roaringbitmap);

Build bitmap from integer array

INSERT INTO t1 SELECT 1,rb_build(ARRAY[1,2,3,4,5,6,7,8,9,200]);

INSERT INTO t1 SELECT 2,rb_build_agg(e) FROM generate_series(1,100) e;

Bitmap Calculation (OR, AND, XOR, ANDNOT)

SELECT roaringbitmap('{1,2,3}') | roaringbitmap('{3,4,5}');
SELECT roaringbitmap('{1,2,3}') & roaringbitmap('{3,4,5}');
SELECT roaringbitmap('{1,2,3}') # roaringbitmap('{3,4,5}');
SELECT roaringbitmap('{1,2,3}') - roaringbitmap('{3,4,5}');

Bitmap Aggregate (OR, AND, XOR, BUILD)

SELECT rb_or_agg(bitmap) FROM t1;
SELECT rb_and_agg(bitmap) FROM t1;
SELECT rb_xor_agg(bitmap) FROM t1;
SELECT rb_build_agg(e) FROM generate_series(1,100) e;

Calculate cardinality

SELECT rb_cardinality('{1,2,3}');

Convert bitmap to integer array

SELECT rb_to_array(bitmap) FROM t1 WHERE id = 1;

Convert bitmap to SET of integer

SELECT unnest(rb_to_array('{1,2,3}'::roaringbitmap));

or

SELECT rb_iterate('{1,2,3}'::roaringbitmap);

Opperator List

OpperatorInputOutputDescExampleResult
&roaringbitmap,roaringbitmaproaringbitmapbitwise ANDroaringbitmap('{1,2,3}') & roaringbitmap('{3,4,5}'){3}
|roaringbitmap,roaringbitmaproaringbitmapbitwise ORroaringbitmap('{1,2,3}') | roaringbitmap('{3,4,5}'){1,2,3,4,5}
|roaringbitmap,integerroaringbitmapadd element to roaringbitmaproaringbitmap('{1,2,3}') | 6{1,2,3,6}
|integer,roaringbitmaproaringbitmapadd element to roaringbitmap6 | roaringbitmap('{1,2,3}'){1,2,3,6}
#roaringbitmap,roaringbitmaproaringbitmapbitwise XORroaringbitmap('{1,2,3}') # roaringbitmap('{3,4,5}'){1,2,4,5}
<<roaringbitmap,bigintroaringbitmapbitwise shift leftroaringbitmap('{1,2,3}') << 2{0,1}
>>roaringbitmap,bigintroaringbitmapbitwise shift rightroaringbitmap('{1,2,3}') >> 3{4,5,6}
-roaringbitmap,roaringbitmaproaringbitmapdifference(bitwise ANDNOT)roaringbitmap('{1,2,3}') - roaringbitmap('{3,4,5}'){1,2}
-roaringbitmap,integerroaringbitmapremove element from roaringbitmaproaringbitmap('{1,2,3}') - 3{1,2}
@>roaringbitmap,roaringbitmapboolcontainsroaringbitmap('{1,2,3}') @> roaringbitmap('{3,4,5}')f
@>roaringbitmap,integerboolcontainsroaringbitmap('{1,2,3,4,5}') @> 3t
<@roaringbitmap,roaringbitmapboolis contained byroaringbitmap('{1,2,3}') <@ roaringbitmap('{3,4,5}')f
<@integer,roaringbitmapboolis contained by3 <@ roaringbitmap('{3,4,5}')t
&&roaringbitmap,roaringbitmapbooloverlap (have elements in common)roaringbitmap('{1,2,3}') && roaringbitmap('{3,4,5}')t
=roaringbitmap,roaringbitmapboolequalroaringbitmap('{1,2,3}') = roaringbitmap('{3,4,5}')f
<>roaringbitmap,roaringbitmapboolnot equalroaringbitmap('{1,2,3}') <> roaringbitmap('{3,4,5}')t

Function List

FunctionInputOutputDescExampleResult
rb_buildinteger[]roaringbitmapCreate roaringbitmap from integer arrayrb_build('{1,2,3,4,5}'){1,2,3,4,5}
rb_indexroaringbitmap,integerbigintReturn the 0-based index of element in this roaringbitmap, or -1 if do not exsitsrb_index('{1,2,3}',3)2
rb_cardinalityroaringbitmapbigintReturn cardinality of the roaringbitmaprb_cardinality('{1,2,3,4,5}')5
rb_and_cardinalityroaringbitmap,roaringbitmapbigintReturn cardinality of the AND of two roaringbitmapsrb_and_cardinality('{1,2,3}',rb_build('{3,4,5}'))1
rb_or_cardinalityroaringbitmap,roaringbitmapbigintReturn cardinality of the OR of two roaringbitmapsrb_or_cardinality('{1,2,3}','{3,4,5}')1
rb_xor_cardinalityroaringbitmap,roaringbitmapbigintReturn cardinality of the XOR of two roaringbitmapsrb_xor_cardinality('{1,2,3}','{3,4,5}')4
rb_andnot_cardinalityroaringbitmap,roaringbitmapbigintReturn cardinality of the ANDNOT of two roaringbitmapsrb_andnot_cardinality('{1,2,3}','{3,4,5}')2
rb_is_emptyroaringbitmapbooleanCheck if roaringbitmap is empty.rb_is_empty('{1,2,3,4,5}')t
rb_fillroaringbitmap,range_start bigint,range_end bigintroaringbitmapFill the specified range (not include the range_end)rb_fill('{1,2,3}',5,7){1,2,3,5,6}
rb_clearroaringbitmap,range_start bigint,range_end bigintroaringbitmapClear the specified range (not include the range_end)rb_clear('{1,2,3}',2,3){1,3}
rb_fliproaringbitmap,range_start bigint,range_end bigintroaringbitmapNegative the specified range (not include the range_end)rb_flip('{1,2,3}',2,10){1,4,5,6,7,8,9}
rb_rangeroaringbitmap,range_start bigint,range_end bigintroaringbitmapReturn new set with specified range (not include the range_end)rb_range('{1,2,3}',2,3){2}
rb_range_cardinalityroaringbitmap,range_start bigint,range_end bigintbigintReturn the cardinality of specified range (not include the range_end)rb_range_cardinality('{1,2,3}',2,3)1
rb_minroaringbitmapintegerReturn the smallest offset in roaringbitmap. Return NULL if the bitmap is emptyrb_min('{1,2,3}')1
rb_maxroaringbitmapintegerReturn the greatest offset in roaringbitmap. Return NULL if the bitmap is emptyrb_max('{1,2,3}')3
rb_rankroaringbitmap,integerbigintReturn the number of elements that are smaller or equal to the specified offsetrb_rank('{1,2,3}',3)3
rb_jaccard_distroaringbitmap,roaringbitmapdouble precisionReturn the jaccard distance(or the Jaccard similarity coefficient) of two bitmapsrb_jaccard_dist('{1,2,3}','{3,4}')0.25
rb_selectroaringbitmap,bitset_limit bigint,bitset_offset bigint=0,reverse boolean=false,range_start bigint=0,range_end bigint=4294967296roaringbitmapReturn subset [bitset_offset,bitset_offset+bitset_limit) of bitmap between range [range_start,range_end)rb_select('{1,2,3,4,5,6,7,8,9}',5,2){3,4,5,6,7}
rb_to_arrayroaringbitmapinteger[]Convert roaringbitmap to integer arrayrb_to_array(roaringbitmap('{1,2,3}')){1,2,3}
rb_iterateroaringbitmapSET of integerReturn set of integer from a roaringbitmap data.
SELECT rb_iterate(rb_build('{1,2,3}'))
1
2
3

Aggregation List

FunctionInputOutputDescExampleResult
rb_build_aggintegerroaringbitmapBuild a roaringbitmap from a integer set
select rb_build_agg(id)
    from (values (1),(2),(3)) t(id)
{1,2,3}
rb_or_aggroaringbitmaproaringbitmapAND Aggregate calculations from a roaringbitmap set
select rb_or_agg(bitmap) 
    from (values (roaringbitmap('{1,2,3}')),
                 (roaringbitmap('{2,3,4}'))
          ) t(bitmap)
{1,2,3,4}
rb_and_aggroaringbitmaproaringbitmapAND Aggregate calculations from a roaringbitmap set
select rb_and_agg(bitmap) 
    from (values (roaringbitmap('{1,2,3}')),
                 (roaringbitmap('{2,3,4}'))
          ) t(bitmap)
{2,3}
rb_xor_aggroaringbitmaproaringbitmapXOR Aggregate calculations from a roaringbitmap set
select rb_xor_agg(bitmap) 
    from (values (roaringbitmap('{1,2,3}')),
                 (roaringbitmap('{2,3,4}'))
          ) t(bitmap)
{1,4}
rb_or_cardinality_aggroaringbitmapbigintOR Aggregate calculations from a roaringbitmap set, return cardinality.
select rb_or_cardinality_agg(bitmap) 
    from (values (roaringbitmap('{1,2,3}')),
                 (roaringbitmap('{2,3,4}'))
          ) t(bitmap)
4
rb_and_cardinality_aggroaringbitmapbigintAND Aggregate calculations from a roaringbitmap set, return cardinality
select rb_and_cardinality_agg(bitmap) 
    from (values (roaringbitmap('{1,2,3}')),
                 (roaringbitmap('{2,3,4}'))
          ) t(bitmap)
2
rb_xor_cardinality_aggroaringbitmapbigintXOR Aggregate calculations from a roaringbitmap set, return cardinality
select rb_xor_cardinality_agg(bitmap) 
    from (values (roaringbitmap('{1,2,3}')),
                 (roaringbitmap('{2,3,4}'))
          ) t(bitmap)
2

总结:

根据特征标签 圈取用户,roaringbitmap 非常高效一种方式, 其适用场景是,特征标签与用户id,有对于关系。
例如其中A标签,有1000个用户。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值