scala中去除字符串前后空格API---trim,自定义hashset用来存储元素作为查找是否存在

本文介绍了Scala中如何使用trim方法去除字符串前后空格,并讲解了利用自定义HashSet存储元素以进行高效查找的操作。通过实例展示了这两个功能在实际编程中的应用。
	import scala.collection.mutable.ListBuffer

	val test_hash = new collection.mutable.HashSet[String]()
    test_hash.add("123")
    test_hash.add("126")
    test_hash.add("125")
    test_hash.add("124")
    val list_test = new ListBuffer[String]()
    if(test_hash.contains("123")){
      list_test.append("TTT")
    }else{
      println("FFF")
    }
    println(list_test)
//	去除字符串前后空格
val str = "  hello   "
println(str.trim) //hello
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner; import org.apache.flink.api.common.eventtime.WatermarkStrategy; import org.apache.flink.api.common.functions.AggregateFunction; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.KeyedProcessFunction; import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.streaming.api.windowing.windows.TimeWindow; import org.apache.flink.util.Collector; import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; public class test { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 模拟用户行为数据流 (用户ID, 页面ID, 访问时间戳) DataStream<UserBehavior> dataStream = env.fromElements( new UserBehavior("user1", "pageA", 1000L), new UserBehavior("user2", "pageA", 2000L), new UserBehavior("user1", "pageB", 3000L), new UserBehavior("user3", "pageA", 4000L), new UserBehavior("user2", "pageB", 5000L), new UserBehavior("user4", "pageC", 6000L), new UserBehavior("user3", "pageB", 7000L), new UserBehavior("user5", "pageC", 8000L) ); // 分配时间戳和水位线 DataStream<UserBehavior> timedStream = dataStream .assignTimestampsAndWatermarks( WatermarkStrategy.<UserBehavior>forMonotonousTimestamps() .withTimestampAssigner( (SerializableTimestampAssigner<UserBehavior>) (element, recordTimestamp) -> element.timestamp ) ); // 计算每个页面的UV DataStream<PageViewCount> uvStream = timedStream .keyBy(UserBehavior::getPageId) .window(TumblingEventTimeWindows.of(Time.minutes(10))) .aggregate(new UVAggregate(), new UVWindowResult()); DataStream<String> result = uvStream .keyBy(PageViewCount::getWindowEnd) .process(new TopNPages(10)); result.print("Top10 Pages"); env.execute("Page UV Top10"); } // UV统计聚合函数 public static class UVAggregate implements AggregateFunction<UserBehavior, Set<String>, Long> { @Override public Set<String> createAccumulator() { return new HashSet<>(); } @Override public Set<String> add(UserBehavior value, Set<String> accumulator) { accumulator.add(value.getUserId()); return accumulator; } @Override public Long getResult(Set<String> accumulator) { return (long) accumulator.size(); } @Override public Set<String> merge(Set<String> a, Set<String> b) { a.addAll(b); return a; } } // 窗口结果处理函数 public static class UVWindowResult extends ProcessWindowFunction<Long, PageViewCount, String, TimeWindow> { @Override public void process(String pageId, Context context, Iterable<Long> elements, Collector<PageViewCount> out) { Long uv = elements.iterator().next(); out.collect(new PageViewCount(pageId, uv, context.window().getEnd())); } } // TopN处理函数 public static class TopNPages extends KeyedProcessFunction<Long, PageViewCount, String> { private final int topSize; private transient ListState<PageViewCount> pageViewState; public TopNPages(int topSize) { this.topSize = topSize; } @Override public void open(Configuration parameters) { // 初始化状态存储 ListStateDescriptor<PageViewCount> descriptor = new ListStateDescriptor<>("pageViewState", PageViewCount.class); pageViewState = getRuntimeContext().getListState(descriptor); } @Override public void processElement( PageViewCount value, Context ctx, Collector<String> out ) throws Exception { // 将每个页面UV数据添加到状态 pageViewState.add(value); // 注册定时器,在窗口结束时触发排序 ctx.timerService().registerEventTimeTimer(value.getWindowEnd() + 100); } @Override public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception { List<PageViewCount> allPageViews = new ArrayList<>(); for (PageViewCount pageView : pageViewState.get()) { allPageViews.add(pageView); } pageViewState.clear(); allPageViews.sort(Comparator.comparing(PageViewCount::getCount).reversed()); int resultSize = Math.min(topSize, allPageViews.size()); List<PageViewCount> topPages = allPageViews.subList(0, resultSize); StringBuilder sb = new StringBuilder(); for (PageViewCount page : topPages) { sb.append(page.getPageId()) .append(", ") .append(page.getCount()) .append("\n"); } out.collect(sb.toString()); } } // 数据结构定义 public static class UserBehavior { private String userId; private String pageId; private Long timestamp; public UserBehavior() { } public UserBehavior(String userId, String pageId, Long timestamp) { this.userId = userId; this.pageId = pageId; this.timestamp = timestamp; } public String getUserId() { return userId; } public String getPageId() { return pageId; } public Long getTimestamp() { return timestamp; } } public static class PageViewCount { private String pageId; private Long count; private Long windowEnd; public PageViewCount() { } public PageViewCount(String pageId, Long count, Long windowEnd) { this.pageId = pageId; this.count = count; this.windowEnd = windowEnd; } public String getPageId() { return pageId; } public Long getCount() { return count; } public Long getWindowEnd() { return windowEnd; } } } 这段代码,最终输出结果为: Top10 Pages> pageA, 3 pageB, 3 pageC, 2 如何使最终输出结果为 pageA, 3 pageB, 3 pageC, 2
最新发布
07-03
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值