《初级》Flink 异步io练习

加微信拉微信交流群

微信:weixin605405145

AsyncIO

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.checkpoint.ListCheckpointed;
import org.apache.flink.streaming.api.datastream.AsyncDataStream;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.async.AsyncFunction;
import org.apache.flink.streaming.api.functions.async.ResultFuture;
import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.ExecutorUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;

/**
 * Example to illustrates how to use {@link AsyncFunction}.
 */
public class AsyncIOExample {

	private static final Logger LOG = LoggerFactory.getLogger(AsyncIOExample.class);

	private static final String EXACTLY_ONCE_MODE = "exactly_once";
	private static final String EVENT_TIME = "EventTime";
	private static final String INGESTION_TIME = "IngestionTime";
	private static final String ORDERED = "ordered";

	/**
	 * A checkpointed source.
	 */
	private static class SimpleSource implements SourceFunction<Integer>, ListCheckpointed<Integer> {
		private static final long serialVersionUID = 1L;

		private volatile boolean isRunning = true;
		private int counter = 0;
		private int start = 0;

		@Override
		public List<Integer> snapshotState(long checkpointId, long timestamp) throws Exception {
			return Collections.singletonList(start);
		}

		@Override
		public void restoreState(List<Integer> state) throws Exception {
			for (Integer i : state) {
				this.start = i;
			}
		}

		public SimpleSource(int maxNum) {
			this.counter = maxNum;
		}

		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
			while ((start < counter || counter == -1) && isRunning) {
				synchronized (ctx.getCheckpointLock()) {
					ctx.collect(start);
					++start;

					// loop back to 0
					if (start == Integer.MAX_VALUE) {
						start = 0;
					}
				}
				Thread.sleep(10L);
			}
		}

		@Override
		public void cancel() {
			isRunning = false;
		}
	}


	/**
	 * An sample of {@link AsyncFunction} using a thread pool and executing working threads
	 * to simulate multiple async operations.
	 *
	 * <p>For the real use case in production environment, the thread pool may stay in the
	 * async client.
	 */
	private static class SampleAsyncFunction extends RichAsyncFunction<Integer, String> {
		private static final long serialVersionUID = 2098635244857937717L;

		private transient ExecutorService executorService;

		/**
		 * The result of multiplying sleepFactor with a random float is used to pause
		 * the working thread in the thread pool, simulating a time consuming async operation.
		 */
		private final long sleepFactor;

		/**
		 * The ratio to generate an exception to simulate an async error. For example, the error
		 * may be a TimeoutException while visiting HBase.
		 */
		private final float failRatio;

		private final long shutdownWaitTS;

		SampleAsyncFunction(long sleepFactor, float failRatio, long shutdownWaitTS) {
			this.sleepFactor = sleepFactor;
			this.failRatio = failRatio;
			this.shutdownWaitTS = shutdownWaitTS;
		}

		@Override
		public void open(Configuration parameters) throws Exception {
			super.open(parameters);

			executorService = Executors.newFixedThreadPool(30);
		}

		@Override
		public void close() throws Exception {
			super.close();
			ExecutorUtils.gracefulShutdown(shutdownWaitTS, TimeUnit.MILLISECONDS, executorService);
		}

		@Override
		public void asyncInvoke(final Integer input, final ResultFuture<String> resultFuture) {
			executorService.submit(() -> {
				// wait for while to simulate async operation here
				long sleep = (long) (ThreadLocalRandom.current().nextFloat() * sleepFactor);
				try {
					Thread.sleep(sleep);

					if (ThreadLocalRandom.current().nextFloat() < failRatio) {
						resultFuture.completeExceptionally(new Exception("wahahahaha..."));
					} else {
						resultFuture.complete(
							Collections.singletonList("key-" + (input % 10)));
					}
				} catch (InterruptedException e) {
					resultFuture.complete(new ArrayList<>(0));
				}
			});
		}
	}

	private static void printUsage() {
		System.out.println("To customize example, use: AsyncIOExample [--fsStatePath <path to fs state>] " +
				"[--checkpointMode <exactly_once or at_least_once>] " +
				"[--maxCount <max number of input from source, -1 for infinite input>] " +
				"[--sleepFactor <interval to sleep for each stream element>] [--failRatio <possibility to throw exception>] " +
				"[--waitMode <ordered or unordered>] [--waitOperatorParallelism <parallelism for async wait operator>] " +
				"[--eventType <EventTime or IngestionTime>] [--shutdownWaitTS <milli sec to wait for thread pool>]" +
				"[--timeout <Timeout for the asynchronous operations>]");
	}

	public static void main(String[] args) throws Exception {

		// obtain execution environment
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// parse parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		final String statePath;
		final String cpMode;
		final int maxCount;
		final long sleepFactor;
		final float failRatio;
		final String mode;
		final int taskNum;
		final String timeType;
		final long shutdownWaitTS;
		final long timeout;

		try {
			// check the configuration for the job
			statePath = params.get("fsStatePath", null);
			cpMode = params.get("checkpointMode", "exactly_once");
			maxCount = params.getInt("maxCount", 100000);
			sleepFactor = params.getLong("sleepFactor", 100);
			failRatio = params.getFloat("failRatio", 0.001f);
			mode = params.get("waitMode", "ordered");
			taskNum = params.getInt("waitOperatorParallelism", 1);
			timeType = params.get("eventType", "EventTime");
			shutdownWaitTS = params.getLong("shutdownWaitTS", 20000);
			timeout = params.getLong("timeout", 10000L);
		} catch (Exception e) {
			printUsage();

			throw e;
		}

		StringBuilder configStringBuilder = new StringBuilder();

		final String lineSeparator = System.getProperty("line.separator");

		configStringBuilder
			.append("Job configuration").append(lineSeparator)
			.append("FS state path=").append(statePath).append(lineSeparator)
			.append("Checkpoint mode=").append(cpMode).append(lineSeparator)
			.append("Max count of input from source=").append(maxCount).append(lineSeparator)
			.append("Sleep factor=").append(sleepFactor).append(lineSeparator)
			.append("Fail ratio=").append(failRatio).append(lineSeparator)
			.append("Waiting mode=").append(mode).append(lineSeparator)
			.append("Parallelism for async wait operator=").append(taskNum).append(lineSeparator)
			.append("Event type=").append(timeType).append(lineSeparator)
			.append("Shutdown wait timestamp=").append(shutdownWaitTS);

		LOG.info(configStringBuilder.toString());

		if (statePath != null) {
			// setup state and checkpoint mode
			env.setStateBackend(new FsStateBackend(statePath));
		}

		if (EXACTLY_ONCE_MODE.equals(cpMode)) {
			env.enableCheckpointing(1000L, CheckpointingMode.EXACTLY_ONCE);
		}
		else {
			env.enableCheckpointing(1000L, CheckpointingMode.AT_LEAST_ONCE);
		}

		// enable watermark or not
		if (EVENT_TIME.equals(timeType)) {
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		}
		else if (INGESTION_TIME.equals(timeType)) {
			env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
		}

		// create input stream of an single integer
		DataStream<Integer> inputStream = env.addSource(new SimpleSource(maxCount));

		// create async function, which will *wait* for a while to simulate the process of async i/o
		AsyncFunction<Integer, String> function =
				new SampleAsyncFunction(sleepFactor, failRatio, shutdownWaitTS);

		// add async operator to streaming job
		DataStream<String> result;
		if (ORDERED.equals(mode)) {
			result = AsyncDataStream.orderedWait(
				inputStream,
				function,
				timeout,
				TimeUnit.MILLISECONDS,
				20).setParallelism(taskNum);
		}
		else {
			result = AsyncDataStream.unorderedWait(
				inputStream,
				function,
				timeout,
				TimeUnit.MILLISECONDS,
				20).setParallelism(taskNum);
		}

		// add a reduce to get the sum of each keys.
		result.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
			private static final long serialVersionUID = -938116068682344455L;

			@Override
			public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
				out.collect(new Tuple2<>(value, 1));
			}
		}).keyBy(0).sum(1).print();

		// execute the program
		env.execute("Async IO Example");
	}
}

 

<think>好的,我需要帮助用户了解Flink异步IO的用法及示例。首先,用户提供的引用中有三个来源,我需要结合这些内容来整理回答。 首先,引用[1]提到了Flink异步IO的基本概念和一个示例,但要注意用户提供的引用中可能存在错误,比如用户提到的引用[1]是关于快速排序的,但实际给出的链接是关于Flink异步IO的。这可能是个错误,但按照用户提供的引用内容来处理。需要确认是否有混淆,但根据用户的问题,重点在Flink异步IO,所以可能用户在引用部分有误。但根据用户给出的三个引用链接,都是关于Flink异步IO的,所以应该以这三个引用为基础。 接下来,引用[2]和[3]提供了具体的实现示例,比如Redis异步读取和HTTP客户端的使用。需要将这些内容整合起来,说明异步IO的使用步骤、配置参数和注意事项。 用户的需求是了解异步IO的使用方法及示例,因此需要分步骤说明,包括添加依赖、实现AsyncFunction、使用AsyncDataStream工具类,以及参数配置。同时,需要给出具体的代码示例,比如访问Redis或HTTP服务。 另外,用户可能遇到的问题如超时处理、顺序模式与乱序模式的区别,以及资源管理,需要根据引用内容进行解答。例如,引用[3]中提到的capacity参数设置和线程池配置,需要强调这些参数的重要性。 最后,生成相关问题需要考虑用户可能进一步想了解的内容,比如性能优化、异常处理、与其他技术的整合等。 现在需要组织这些信息,确保结构清晰,符合用户要求的格式,使用中文回答,正确应用LaTeX语法,并添加引用标识。同时,检查是否有遗漏的关键点,比如AsyncFunction的实现细节,ResultFuture的使用,超时处理等,确保覆盖所有用户可能需要的方面。</think>### Flink异步IO使用方法及示例 Flink异步IO用于提升外部数据访问效率,通过异步请求避免阻塞算子处理。以下为关键步骤和示例: #### 一、核心步骤 1. **添加依赖** ```xml <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-async</artifactId> <version>${flink.version}</version> </dependency> ``` 2. **实现AsyncFunction** ```java public class AsyncHttpRequest extends RichAsyncFunction<String, String> { private transient AsyncHttpClient client; @Override public void open(Configuration parameters) { client = new DefaultAsyncHttpClient(); } @Override public void asyncInvoke(String input, ResultFuture<String> resultFuture) { client.prepareGet("https://api.example.com?q=" + input) .execute(new AsyncCompletionHandler<Response>() { @Override public Response onCompleted(Response response) { resultFuture.complete(Collections.singleton(response.getResponseBody())); return response; } }); } } ``` 3. **应用异步算子** ```java AsyncDataStream.unorderedWait( // 或orderedWait inputStream, new AsyncHttpRequest(), 10, // 超时时间(秒) TimeUnit.SECONDS, 100 // 最大并发请求数 ).print(); ``` #### 二、关键配置参数 - **Capacity**:控制同时处理的请求数(默认100)[^3] - **Timeout**:设定单个请求超时阈值 - **AsyncMode**: - `orderedWait`:保持输入顺序 - `unorderedWait`:允许乱序输出[^1] #### 三、典型应用场景 1. **Redis异步查询** ```java // 引用[2]的Redis访问示例 public class RedisAsyncFunction extends RichAsyncFunction<String, User> { private transient RedisClient redisClient; public void asyncInvoke(String userId, ResultFuture<User> resultFuture) { redisClient.get(userId, new RedisCallback<User>() { public void onComplete(User user) { resultFuture.complete(Collections.singleton(user)); } }); } } ``` 2. **HTTP服务调用** ```java // 引用[3]的高德地图API调用 asyncInvoke方法中配置: client.prepareGet("https://restapi.amap.com/v3/geocode/geo?address="+address+"&key=your_key") ``` #### 四、注意事项 1. **客户端选择**:推荐使用连接池管理的客户端(如Netty/AsyncHttpClient) 2. **异常处理**:需实现`ResultFuture`的超时回调 3. **资源释放**:在`close()`方法中释放连接资源[^2] #### 五、性能优化建议 $$ 吞吐量 = \frac{并发请求数}{平均响应时间} $$ 通过公式可知,提升吞吐量需要: 1. 增加capacity参数值 2. 优化外部系统响应速度 3. 使用连接池复用资源[^3]
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值