apache/storm
https://github.com/apache/storm
Code Pull requests 161 Projects 0 Insights
STORM-3097: Remove storm-druid
This closes #2707
- Loading branch information...
Robert (Bobby) Evans authored and HeartSaVioR committed Jun 6, 2018
1 parent 4aa6b5e commit b6cab8dca97e2238aef8e45118f6946fc2d35049
Showing 18 changed files with 0 additions and 1,365 deletions.
| @@ -107,7 +107,6 @@ But small change will not affect the user experience. We will notify the user wh | ||
| * [Mongodb Integration](storm-mongodb.html) | ||
| * [OpenTSDB Integration](storm-opentsdb.html) | ||
| * [Kinesis Integration](storm-kinesis.html) | ||
| * [Druid Integration](storm-druid.html) | ||
| * [PMML Integration](storm-pmml.html) | ||
| * [Kestrel Integration](Kestrel-and-Storm.html) | ||
| @@ -1,119 +0,0 @@ | ||
| # Storm Druid Bolt and TridentState | ||
| This module provides core Storm and Trident bolt implementations for writing data to [Druid](http://druid.io/) data store. | ||
| This implementation uses Druid's [Tranquility library](https://github.com/druid-io/tranquility) to send messages to druid. | ||
| Some of the implementation details are borrowed from existing [Tranquility Storm Bolt](https://github.com/druid-io/tranquility/blob/master/docs/storm.md). | ||
| This new Bolt added to support latest storm release and maintain the bolt in the storm repo. | ||
| ### Core Bolt | ||
| Below example describes the usage of core bolt which is `org.apache.storm.druid.bolt.DruidBeamBolt` | ||
| By default this Bolt expects to receive tuples in which "event" field gives your event type. | ||
| This logic can be changed by implementing ITupleDruidEventMapper interface. | ||
| ```java | ||
| DruidBeamFactory druidBeamFactory = new SampleDruidBeamFactoryImpl(new HashMap<String, Object>()); | ||
| DruidConfig druidConfig = DruidConfig.newBuilder().discardStreamId(DruidConfig.DEFAULT_DISCARD_STREAM_ID).build(); | ||
| ITupleDruidEventMapper<Map<String, Object>> eventMapper = new TupleDruidEventMapper<>(TupleDruidEventMapper.DEFAULT_FIELD_NAME); | ||
| DruidBeamBolt<Map<String, Object>> druidBolt = new DruidBeamBolt<Map<String, Object>>(druidBeamFactory, eventMapper, druidConfig); | ||
| topologyBuilder.setBolt("druid-bolt", druidBolt).shuffleGrouping("event-gen"); | ||
| topologyBuilder.setBolt("printer-bolt", new PrinterBolt()).shuffleGrouping("druid-bolt" , druidConfig.getDiscardStreamId()); | ||
| ``` | ||
| ### Trident State | ||
| ```java | ||
| DruidBeamFactory druidBeamFactory = new SampleDruidBeamFactoryImpl(new HashMap<String, Object>()); | ||
| ITupleDruidEventMapper<Map<String, Object>> eventMapper = new TupleDruidEventMapper<>(TupleDruidEventMapper.DEFAULT_FIELD_NAME); | ||
| final Stream stream = tridentTopology.newStream("batch-event-gen", new SimpleBatchSpout(10)); | ||
| stream.peek(new Consumer() { | ||
| @Override | ||
| public void accept(TridentTuple input) { | ||
| LOG.info("########### Received tuple: [{}]", input); | ||
| } | ||
| }).partitionPersist(new DruidBeamStateFactory<Map<String, Object>>(druidBeamFactory, eventMapper), new Fields("event"), new DruidBeamStateUpdater()); | ||
| ``` | ||
| ### Sample Beam Factory Implementation | ||
| Druid bolt must be supplied with a BeamFactory. You can implement one of these using the [DruidBeams builder's] (https://github.com/druid-io/tranquility/blob/master/core/src/main/scala/com/metamx/tranquility/druid/DruidBeams.scala) "buildBeam()" method. | ||
| See the [Configuration documentation](https://github.com/druid-io/tranquility/blob/master/docs/configuration.md) for details. | ||
| For more details refer [Tranquility library](https://github.com/druid-io/tranquility) docs. | ||
| ```java | ||
| public class SampleDruidBeamFactoryImpl implements DruidBeamFactory<Map<String, Object>> { | ||
| @Override | ||
| public Beam<Map<String, Object>> makeBeam(Map<?, ?> conf, IMetricsContext metrics) { | ||
| final String indexService = "druid/overlord"; // The druid.service name of the indexing service Overlord node. | ||
| final String discoveryPath = "/druid/discovery"; // Curator service discovery path. config: druid.discovery.curator.path | ||
| final String dataSource = "test"; //The name of the ingested datasource. Datasources can be thought of as tables. | ||
| final List<String> dimensions = ImmutableList.of("publisher", "advertiser"); | ||
| List<AggregatorFactory> aggregators = ImmutableList.<AggregatorFactory>of( | ||
| new CountAggregatorFactory( | ||
| "click" | ||
| ) | ||
| ); | ||
| // Tranquility needs to be able to extract timestamps from your object type (in this case, Map<String, Object>). | ||
| final Timestamper<Map<String, Object>> timestamper = new Timestamper<Map<String, Object>>() | ||
| { | ||
| @Override | ||
| public DateTime timestamp(Map<String, Object> theMap) | ||
| { | ||
| return new DateTime(theMap.get("timestamp")); | ||
| } | ||
| }; | ||
| // Tranquility uses ZooKeeper (through Curator) for coordination. | ||
| final CuratorFramework curator = CuratorFrameworkFactory | ||
| .builder() | ||
| .connectString((String)conf.get("druid.tranquility.zk.connect")) //take config from storm conf | ||
| .retryPolicy(new ExponentialBackoffRetry(1000, 20, 30000)) | ||
| .build(); | ||
| curator.start(); | ||
| // The JSON serialization of your object must have a timestamp field in a format that Druid understands. By default, | ||
| // Druid expects the field to be called "timestamp" and to be an ISO8601 timestamp. | ||
| final TimestampSpec timestampSpec = new TimestampSpec("timestamp", "auto", null); | ||
| // Tranquility needs to be able to serialize your object type to JSON for transmission to Druid. By default this is | ||
| // done with Jackson. If you want to provide an alternate serializer, you can provide your own via ```.objectWriter(...)```. | ||
| // In this case, we won't provide one, so we're just using Jackson. | ||
| final Beam<Map<String, Object>> beam = DruidBeams | ||
| .builder(timestamper) | ||
| .curator(curator) | ||
| .discoveryPath(discoveryPath) | ||
| .location(DruidLocation.create(indexService, dataSource)) | ||
| .timestampSpec(timestampSpec) | ||
| .rollup(DruidRollup.create(DruidDimensions.specific(dimensions), aggregators, QueryGranularities.MINUTE)) | ||
| .tuning( | ||
| ClusteredBeamTuning | ||
| .builder() | ||
| .segmentGranularity(Granularity.HOUR) | ||
| .windowPeriod(new Period("PT10M")) | ||
| .partitions(1) | ||
| .replicants(1) | ||
| .build() | ||
| ) | ||
| .druidBeamConfig( | ||
| DruidBeamConfig | ||
| .builder() | ||
| .indexRetryPeriod(new Period("PT10M")) | ||
| .build()) | ||
| .buildBeam(); | ||
| return beam; | ||
| } | ||
| } | ||
| ``` | ||
| Example code is available [here.](https://github.com/apache/storm/tree/master/external/storm-druid/src/test/java/org/apache/storm/druid) |
147 external/storm-druid/README.md
| @@ -1,147 +0,0 @@ | ||
| # Storm Druid Bolt and TridentState | ||
| This module provides core Storm and Trident bolt implementations for writing data to [Druid](http://druid.io/) data store. | ||
| This implementation uses Druid's [Tranquility library](https://github.com/druid-io/tranquility) to send messages to druid. | ||
| Some of the implementation details are borrowed from existing [Tranquility Storm Bolt](https://github.com/druid-io/tranquility/blob/master/docs/storm.md). | ||
| This new Bolt added to support latest storm release and maintain the bolt in the storm repo. | ||
| ### Core Bolt | ||
| Below example describes the usage of core bolt which is `org.apache.storm.druid.bolt.DruidBeamBolt` | ||
| By default this Bolt expects to receive tuples in which "event" field gives your event type. | ||
| This logic can be changed by implementing ITupleDruidEventMapper interface. | ||
| ```java | ||
| DruidBeamFactory druidBeamFactory = new SampleDruidBeamFactoryImpl(new HashMap<String, Object>()); | ||
| DruidConfig druidConfig = DruidConfig.newBuilder().discardStreamId(DruidConfig.DEFAULT_DISCARD_STREAM_ID).build(); | ||
| ITupleDruidEventMapper<Map<String, Object>> eventMapper = new TupleDruidEventMapper<>(TupleDruidEventMapper.DEFAULT_FIELD_NAME); | ||
| DruidBeamBolt<Map<String, Object>> druidBolt = new DruidBeamBolt<Map<String, Object>>(druidBeamFactory, eventMapper, druidConfig); | ||
| topologyBuilder.setBolt("druid-bolt", druidBolt).shuffleGrouping("event-gen"); | ||
| topologyBuilder.setBolt("printer-bolt", new PrinterBolt()).shuffleGrouping("druid-bolt" , druidConfig.getDiscardStreamId()); | ||
| ``` | ||
| ### Trident State | ||
| ```java | ||
| DruidBeamFactory druidBeamFactory = new SampleDruidBeamFactoryImpl(new HashMap<String, Object>()); | ||
| ITupleDruidEventMapper<Map<String, Object>> eventMapper = new TupleDruidEventMapper<>(TupleDruidEventMapper.DEFAULT_FIELD_NAME); | ||
| final Stream stream = tridentTopology.newStream("batch-event-gen", new SimpleBatchSpout(10)); | ||
| stream.peek(new Consumer() { | ||
| @Override | ||
| public void accept(TridentTuple input) { | ||
| LOG.info("########### Received tuple: [{}]", input); | ||
| } | ||
| }).partitionPersist(new DruidBeamStateFactory<Map<String, Object>>(druidBeamFactory, eventMapper), new Fields("event"), new DruidBeamStateUpdater()); | ||
| ``` | ||
| ### Sample Beam Factory Implementation | ||
| Druid bolt must be supplied with a BeamFactory. You can implement one of these using the [DruidBeams builder's] (https://github.com/druid-io/tranquility/blob/master/core/src/main/scala/com/metamx/tranquility/druid/DruidBeams.scala) "buildBeam()" method. | ||
| See the [Configuration documentation](https://github.com/druid-io/tranquility/blob/master/docs/configuration.md) for details. | ||
| For more details refer [Tranquility library](https://github.com/druid-io/tranquility) docs. | ||
| ```java | ||
| public class SampleDruidBeamFactoryImpl implements DruidBeamFactory<Map<String, Object>> { | ||
| @Override | ||
| public Beam<Map<String, Object>> makeBeam(Map<?, ?> conf, IMetricsContext metrics) { | ||
| final String indexService = "druid/overlord"; // The druid.service name of the indexing service Overlord node. | ||
| final String discoveryPath = "/druid/discovery"; // Curator service discovery path. config: druid.discovery.curator.path | ||
| final String dataSource = "test"; //The name of the ingested datasource. Datasources can be thought of as tables. | ||
| final List<String> dimensions = ImmutableList.of("publisher", "advertiser"); | ||
| List<AggregatorFactory> aggregators = ImmutableList.<AggregatorFactory>of( | ||
| new CountAggregatorFactory( | ||
| "click" | ||
| ) | ||
| ); | ||
| // Tranquility needs to be able to extract timestamps from your object type (in this case, Map<String, Object>). | ||
| final Timestamper<Map<String, Object>> timestamper = new Timestamper<Map<String, Object>>() | ||
| { | ||
| @Override | ||
| public DateTime timestamp(Map<String, Object> theMap) | ||
| { | ||
| return new DateTime(theMap.get("timestamp")); | ||
| } | ||
| }; | ||
| // Tranquility uses ZooKeeper (through Curator) for coordination. | ||
| final CuratorFramework curator = CuratorFrameworkFactory | ||
| .builder() | ||
| .connectString((String)conf.get("druid.tranquility.zk.connect")) //take config from storm conf | ||
| .retryPolicy(new ExponentialBackoffRetry(1000, 20, 30000)) | ||
| .build(); | ||
| curator.start(); | ||
| // The JSON serialization of your object must have a timestamp field in a format that Druid understands. By default, | ||
| // Druid expects the field to be called "timestamp" and to be an ISO8601 timestamp. | ||
| final TimestampSpec timestampSpec = new TimestampSpec("timestamp", "auto", null); | ||
| // Tranquility needs to be able to serialize your object type to JSON for transmission to Druid. By default this is | ||
| // done with Jackson. If you want to provide an alternate serializer, you can provide your own via ```.objectWriter(...)```. | ||
| // In this case, we won't provide one, so we're just using Jackson. | ||
| final Beam<Map<String, Object>> beam = DruidBeams | ||
| .builder(timestamper) | ||
| .curator(curator) | ||
| .discoveryPath(discoveryPath) | ||
| .location(DruidLocation.create(indexService, dataSource)) | ||
| .timestampSpec(timestampSpec) | ||
| .rollup(DruidRollup.create(DruidDimensions.specific(dimensions), aggregators, QueryGranularities.MINUTE)) | ||
| .tuning( | ||
| ClusteredBeamTuning | ||
| .builder() | ||
| .segmentGranularity(Granularity.HOUR) | ||
| .windowPeriod(new Period("PT10M")) | ||
| .partitions(1) | ||
| .replicants(1) | ||
| .build() | ||
| ) | ||
| .druidBeamConfig( | ||
| DruidBeamConfig | ||
| .builder() | ||
| .indexRetryPeriod(new Period("PT10M")) | ||
| .build()) | ||
| .buildBeam(); | ||
| return beam; | ||
| } | ||
| } | ||
| ``` | ||
| Example code is available [here.](https://github.com/apache/storm/tree/master/external/storm-druid/src/test/java/org/apache/storm/druid) | ||
| This version is built to work with Druid 0.8.x. This connector uses the Tranquility module built for Scala 2.11. | ||
| This provides a shaded jar with all Tranquility dependencies. You should include Scala 2.11 dependency in your | ||
| application. | ||
| ## License | ||
| Licensed to the Apache Software Foundation (ASF) under one | ||
| or more contributor license agreements. See the NOTICE file | ||
| distributed with this work for additional information | ||
| regarding copyright ownership. The ASF licenses this file | ||
| to you under the Apache License, Version 2.0 (the | ||
| "License"); you may not use this file except in compliance | ||
| with the License. You may obtain a copy of the License at | ||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, | ||
| software distributed under the License is distributed on an | ||
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| KIND, either express or implied. See the License for the | ||
| specific language governing permissions and limitations | ||
| under the License. | ||
| ## Committer Sponsors | ||
| * Sriharha Chintalapani ([sriharsha@apache.org](mailto:sriharsha@apache.org)) | ||
| * P. Taylor Goetz ([ptgoetz@apache.org](mailto:ptgoetz@apache.org)) | ||
| * Satish Duggana ([satishd@apache.org](mailto:satishd@apache.org)) |
111 external/storm-druid/pom.xml
| @@ -1,111 +0,0 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <!-- | ||
| Licensed to the Apache Software Foundation (ASF) under one or more | ||
| contributor license agreements. See the NOTICE file distributed with | ||
| this work for additional information regarding copyright ownership. | ||
| The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| (the "License"); you may not use this file except in compliance with | ||
| the License. You may obtain a copy of the License at | ||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| --> | ||
| <project xmlns="http://maven.apache.org/POM/4.0.0" | ||
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
| xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
| <parent> | ||
| <artifactId>storm</artifactId> | ||
| <groupId>org.apache.storm</groupId> | ||
| <version>2.0.0-SNAPSHOT</version> | ||
| <relativePath>../../pom.xml</relativePath> | ||
| </parent> | ||
| <modelVersion>4.0.0</modelVersion> | ||
| <artifactId>storm-druid</artifactId> | ||
| <dependencies> | ||
| <dependency> | ||
| <groupId>org.apache.storm</groupId> | ||
| <artifactId>storm-client</artifactId> | ||
| <version>${project.version}</version> | ||
| <scope>${provided.scope}</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>org.apache.storm</groupId> | ||
| <artifactId>storm-server</artifactId> | ||
| <version>${project.version}</version> | ||
| <scope>test</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>io.druid</groupId> | ||
| <artifactId>tranquility-core_2.11</artifactId> | ||
| <version>${druid.version}</version> | ||
| <exclusions> | ||
| <exclusion> | ||
| <groupId>com.sun.jersey</groupId> | ||
| <artifactId>jersey-server</artifactId> | ||
| </exclusion> | ||
| <exclusion> | ||
| <groupId>com.sun.jersey</groupId> | ||
| <artifactId>jersey-core</artifactId> | ||
| </exclusion> | ||
| <exclusion> | ||
| <groupId>mysql</groupId> | ||
| <artifactId>mysql-connector-java</artifactId> | ||
| </exclusion> | ||
| </exclusions> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>org.scala-lang</groupId> | ||
| <artifactId>scala-library</artifactId> | ||
| <version>2.11.8</version> | ||
| <scope>provided</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.twitter</groupId> | ||
| <artifactId>util-core_2.11</artifactId> | ||
| <version>6.30.0</version> | ||
| </dependency> | ||
| <!-- tranquility library depends on jackson 2.4.6 version --> | ||
| <dependency> | ||
| <groupId>com.fasterxml.jackson.core</groupId> | ||
| <artifactId>jackson-core</artifactId> | ||
| <version>2.4.6</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.fasterxml.jackson.core</groupId> | ||
| <artifactId>jackson-databind</artifactId> | ||
| <version>2.4.6</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.fasterxml.jackson.dataformat</groupId> | ||
| <artifactId>jackson-dataformat-smile</artifactId> | ||
| <version>2.4.6</version> | ||
| </dependency> | ||
| <!--test dependencies --> | ||
| <dependency> | ||
| <groupId>junit</groupId> | ||
| <artifactId>junit</artifactId> | ||
| <scope>test</scope> | ||
| </dependency> | ||
| </dependencies> | ||
| <build> | ||
| <plugins> | ||
| <plugin> | ||
| <groupId>org.apache.maven.plugins</groupId> | ||
| <artifactId>maven-checkstyle-plugin</artifactId> | ||
| <!--Note - the version would be inherited--> | ||
| <configuration> | ||
| <maxAllowedViolations>29</maxAllowedViolations> | ||
| </configuration> | ||
| </plugin> | ||
| </plugins> | ||
| </build> | ||
| </project> |
本文介绍Apache Storm与Druid的数据集成模块,展示了如何使用核心Bolt及Trident State将数据写入Druid数据存储。此外,还提供了示例代码及Beam Factory实现说明。
4915

被折叠的 条评论
为什么被折叠?



