2)运行flume agent
- agent.sources = seqGenSrc
- agent.channels = memoryChannel
- agent.sinks = loggerSink
- # For each one of the sources, the type is defined
- agent.sources.seqGenSrc.type = exec
- agent.sources.seqGenSrc.command = tail -F /data/mongodata/mongo.log
- #agent.sources.seqGenSrc.bind = 172.168.49.130
- # The channel can be defined as follows.
- agent.sources.seqGenSrc.channels = memoryChannel
- # Each sink's type must be defined
- agent.sinks.loggerSink.type = file_roll
- agent.sinks.loggerSink.sink.directory = /data/flume
- #Specify the channel the sink should use
- agent.sinks.loggerSink.channel = memoryChannel
- # Each channel's type is defined.
- agent.channels.memoryChannel.type = memory
- # Other config values specific to each type of channel(sink or source)
- # can be defined as well
- # In this case, it specifies the capacity of the memory channel
- agent.channels.memoryChannel.capacity = 1000
- agent.channels.memory4log.transactionCapacity = 100
下一步,修改flume配置文件,将其中sink部分的配置改成kafka sink,如:
- Sink.Status status = Status.READY;
- Channel ch = getChannel();
- Transaction transaction = null;
- Event event = null;
- String eventTopic = null;
- String eventKey = null;
- try {
- transaction = ch.getTransaction();
- transaction.begin();
- messageList.clear();
- if (type.equals("sync")) {
- event = ch.take();
- if (event != null) {
- byte[] tempBody = event.getBody();
- String eventBody = new String(tempBody,"UTF-8");
- Map
headers = event.getHeaders(); - if ((eventTopic = headers.get(TOPIC_HDR)) == null) {
- eventTopic = topic;
- }
- eventKey = headers.get(KEY_HDR);
- if (logger.isDebugEnabled()) {
- logger.debug("{Event} " + eventTopic + " : " + eventKey + " : "
- + eventBody);
- }
- ProducerData
data = new ProducerData - (eventTopic, new Message(tempBody));
- long startTime = System.nanoTime();
- logger.debug(eventTopic+"++++"+eventBody);
- producer.send(data);
- long endTime = System.nanoTime();
- }
- } else {
- long processedEvents = 0;
- for (; processedEvents < batchSize; processedEvents += 1) {
- event = ch.take();
- if (event == null) {
- break;
- }
- byte[] tempBody = event.getBody();
- String eventBody = new String(tempBody,"UTF-8");
- Map
headers = event.getHeaders(); - if ((eventTopic = headers.get(TOPIC_HDR)) == null) {
- eventTopic = topic;
- }
- eventKey = headers.get(KEY_HDR);
- if (logger.isDebugEnabled()) {
- logger.debug("{Event} " + eventTopic + " : " + eventKey + " : "
- + eventBody);
- logger.debug("event #{}", processedEvents);
- }
- // create a message and add to buffer
- ProducerData
data = new ProducerData - (eventTopic, eventBody);
- messageList.add(data);
- }
- // publish batch and commit.
- if (processedEvents > 0) {
- long startTime = System.nanoTime();
- long endTime = System.nanoTime();
- }
- }
- transaction.commit();
- } catch (Exception ex) {
- String errorMsg = "Failed to publish events";
- logger.error("Failed to publish events", ex);
- status = Status.BACKOFF;
- if (transaction != null) {
- try {
- transaction.rollback();
- } catch (Exception e) {
- logger.error("Transaction rollback failed", e);
- throw Throwables.propagate(e);
- }
- }
- throw new EventDeliveryException(errorMsg, ex);
- } finally {
- if (transaction != null) {
- transaction.close();
- }
- }
- return status;
type指向kafkasink所在的完整路径
- producer.sinks.r.type = org.apache.flume.sink.kafka.KafkaSink
- producer.sinks.r.brokerList = bigdata-node00:9092
- producer.sinks.r.requiredAcks = 1
- producer.sinks.r.batchSize = 100
- #producer.sinks.r.kafka.producer.type=async
- #producer.sinks.r.kafka.customer.encoding=UTF-8
- producer.sinks.r.topic = testFlume1
http://www.bkjia.com/PHPjc/1109725.htmlwww.bkjia.comtruehttp://www.bkjia.com/PHPjc/1109725.htmlTechArticle使用flume+kafka+storm构建实时日志分析系统 本文只会涉及flume和kafka的结合,kafka和storm的结合可以参考其他博客 1. flume安装使用 下载flume安装...