- FAILED SelectChannelConnector@0.0.0.0:4040: java.net.BindException: Address already in use: bind
-
java.net.BindException: Address already in use: bind

- java.net.BindException: Address already in use: bind
- at sun.nio.ch.Net.bind0(Native Method)
- at sun.nio.ch.Net.bind(Net.java:437)
- at sun.nio.ch.Net.bind(Net.java:429)
- at sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:223)
- at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74)
- at org.spark-project.jetty.server.nio.SelectChannelConnector.open(SelectChannelConnector.java:187)
- at org.spark-project.jetty.server.AbstractConnector.doStart(AbstractConnector.java:316)
- at org.spark-project.jetty.server.nio.SelectChannelConnector.doStart(SelectChannelConnector.java:265)
- at org.spark-project.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64)
- at org.spark-project.jetty.server.Server.doStart(Server.java:293)
- at org.spark-project.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64)
- at org.apache.spark.ui.JettyUtils$.org$apache$spark$ui$JettyUtils$$connect$1(JettyUtils.scala:236)
- at org.apache.spark.ui.JettyUtils$$anonfun$3.apply(JettyUtils.scala:246)
- at org.apache.spark.ui.JettyUtils$$anonfun$3.apply(JettyUtils.scala:246)
- at org.apache.spark.util.Utils$$anonfun$startServiceOnPort$1.apply$mcVI$sp(Utils.scala:1920)
- at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
- at org.apache.spark.util.Utils$.startServiceOnPort(Utils.scala:1911)
- at org.apache.spark.ui.JettyUtils$.startJettyServer(JettyUtils.scala:246)
- at org.apache.spark.ui.WebUI.bind(WebUI.scala:136)
- at org.apache.spark.SparkContext$$anonfun$13.apply(SparkContext.scala:474)
- at org.apache.spark.SparkContext$$anonfun$13.apply(SparkContext.scala:474)
- at scala.Option.foreach(Option.scala:236)
- at org.apache.spark.SparkContext.<init>(SparkContext.scala:474)
- at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:61)
- at com.vip.spark.detective.netty.NettyServerHandler.channelRead0(NettyServerHandler.java:33)
- at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
- at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
- at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
- at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
- at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
- at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
- at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:244)
- at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:308)
- at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:294)
- at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:846)
- at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
- at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511)
- at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468)
- at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382)
- at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354)
- at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
- at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137)
- at java.lang.Thread.run(Thread.java:745)
原因:
spark应用程序提交的时候是首先从4040端口开始的,如果被占用,会使用4041,如果还被占用,就使用4042,一直轮询下去。也可以在程序中配置 spark.ui.port 属性使用指定端口。上述案例中由于上一个spark 程序没有正常结束,才导致端口一直被占用
解决:
异常二
Caused by: java.lang.ClassCastException: kafka.cluster.BrokerEndPoint cannot be cast to kafka.cluster.Broker

Caused by: java.lang.ClassCastException: kafka.cluster.BrokerEndPoint cannot be cast to kafka.cluster.Broker
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3$$anonfun$apply$6$$anonfun$apply$7.apply(KafkaCluster.scala:97)
at scala.Option.map(Option.scala:146)
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3$$anonfun$apply$6.apply(KafkaCluster.scala:97)
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3$$anonfun$apply$6.apply(KafkaCluster.scala:94)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3.apply(KafkaCluster.scala:94)
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3.apply(KafkaCluster.scala:93)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.immutable.Set$Set1.foreach(Set.scala:94)
at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2.apply(KafkaCluster.scala:93)
at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2.apply(KafkaCluster.scala:92)
at scala.util.Either$RightProjection.flatMap(Either.scala:522)
at org.apache.spark.streaming.kafka.KafkaCluster.findLeaders(KafkaCluster.scala:92)
at org.apache.spark.streaming.kafka.KafkaCluster.getLeaderOffsets(KafkaCluster.scala:186)
at org.apache.spark.streaming.kafka.KafkaCluster.getLeaderOffsets(KafkaCluster.scala:168)
at org.apache.spark.streaming.kafka.KafkaCluster.getEarliestLeaderOffsets(KafkaCluster.scala:162)
at com.yisa.sparkstreaming.manager.KafkaManager$$anonfun$setOrUpdateOffsets$1.apply(KafkaManager.scala:100)
at com.yisa.sparkstreaming.manager.KafkaManager$$anonfun$setOrUpdateOffsets$1.apply(KafkaManager.scala:64)
at scala.collection.immutable.Set$Set1.foreach(Set.scala:94)
at com.yisa.sparkstreaming.manager.KafkaManager.setOrUpdateOffsets(KafkaManager.scala:64)
at com.yisa.sparkstreaming.manager.KafkaManager.createDirectStream(KafkaManager.scala:42)
at com.yisa.sparkstreaming.manager.DirectKafkaWordCount$.main(DirectKafkaWordCount .scala:54)
at com.yisa.sparkstreaming.manager.DirectKafkaWordCount.main(DirectKafkaWordCount .scala)
... 6 more
原因:
Kafka的maven包冲突
解决:
异常三
-
Spark java.lang.outofmemoryerror gc overhead limit exceeded 与 spark OOM:java heap space 解决方法

问题描述:
在使用Spark过程中,有时会因为数据增大,而出现下面两种错误:
Java.lang.OutOfMemoryError: Java heap space
java.lang.OutOfMemoryError:GC overhead limit exceeded
这两种错误之前我一直认为是executor的内存给的不够,但是仔细分析发现其实并不是executor内存给的不足,而是driver的内存给的不足。在standalone client模式下用spark-submit提交任务时(standalone模式部署时,默认使用的就是standalone client模式提交任务),我们自己写的程序(main)被称为driver,在不指定给driver分配内存时,默认分配的是512M。在这种情况下,如果处理的数据或者加载的数据很大(我是从Hive中加载数据),driver就可能会爆内存,出现上面的OOM错误。
解决方法:
参考:http://spark.apache.org/docs/latest/configuration.html
方法一:在spark-submit中指定 --driver-memory memSize参数来设定driver的jvm内存大小,可以通过spark-submit --help查看其他可以设置的参数。
eg:
方法二:在spark_home/conf/目录中,将spark-defaults.conf.template模板文件拷贝一份到/spark_home/conf目录下,命名为spark-defaults.conf,然后在里面设置spark.driver.memory memSize属性来改变driver内存大小。
eg: