[root@linux01 kb15conf]# vi ./events-flume-logger.conf
events.sources=eventsSource
events.channels=eventsChannel
events.sinks=eventsSink
events.sources.eventsSource.type=spooldir
events.sources.eventsSource.spoolDir=/opt/kb15tmp/flumelogfile/events
events.sources.eventsSource.deserializer=LINE
events.sources.eventsSource.deserializer.maxLineLength=32000
events.sources.eventsSource.includePattern=events_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
events.channels.eventsChannel.type=file
events.channels.eventsChannel.checkpointDir=/opt/kb15tmp/checkpoint/events
events.channels.eventsChannel.dataDirs=/opt/kb15tmp/checkpoint/data/events
events.sinks.eventsSink.type=logger
events.sources.eventsSource.channels=eventsChannel
events.sinks.eventsSink.channel=eventsChannel
[root@linux01 flume160]#./bin/flume-ng agent --name events --conf ./conf/ --conf-file ./conf/kb15conf/events-flume-logger.conf -Dflume.root.logger=INFO,console
[root@linux01 kb15tmp]#cp ./events.csv /opt/kb15tmp/flumelogfile/events/events_2021-12-21.csv
[root@linux01 events]# head -n 3 events_2021-12-21.csv
wc -l event_attendees.csv 行数
wc -L event_attendees.csv 列数
println("--------------------------------------------")
[root@linux01 kb15conf]# vi ./eventsattend-flume-logger.conf
eventattend.sources=eventattendSource
eventattend.channels=eventsChannel
eventattend.sinks=eventsSink
eventattend.sources.eventattendSource.type=spooldir
eventattend.sources.eventattendSource.spoolDir=/opt/kb15tmp/flumelogfile/eventattend
eventattend.sources.eventattendSource.deserializer=LINE
eventattend.sources.eventattendSource.deserializer.maxLineLength=320000
eventattend.sources.eventattendSource.includePattern=eventsattend_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
eventattend.channels.eventsChannel.type=file
eventattend.channels.eventsChannel.checkpointDir=/opt/kb15tmp/checkpoint/eventattend
eventattend.channels.eventsChannel.dataDirs=/opt/kb15tmp/checkpoint/data/eventattend
eventattend.sinks.eventsSink.type=logger
eventattend.sources.eventattendSource.channels=eventsChannel
eventattend.sinks.eventsSink.channel=eventsChannel
[root@linux01 flume160]# ./bin/flume-ng agent --name eventattend --conf ./conf/ --conf-file ./conf/kb15conf/eventsattend-flume-logger.conf -Dflume.root.logger=INFO,console
[root@linux01 kb15tmp]# cp event_attendees.csv /opt/kb15tmp/flumelogfile/eventattend/eventsattend_2021-12-21.csv
println("--------------------------------------------")
[root@linux01 kb15conf]# vi ./train-flume-hdfslogger.conf
train.sources=trainSource
train.channels=fileChannel memoryChannel
train.sinks=hdfsSink loggerSink
train.sources.trainSource.type=spooldir
train.sources.trainSource.spoolDir=/opt/kb15tmp/flumelogfile/train
train.sources.trainSource.deserializer=LINE
train.sources.trainSource.deserializer.maxLineLength=3200
train.sources.trainSource.includePattern=train_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
train.sources.trainSource.interceptors=head_filter
train.sources.trainSource.interceptors.head_filter.type=regex_filter
train.sources.trainSource.interceptors.head_filter.regex=^user*
train.sources=trainSource
train.channels=fileChannel memoryChannel
train.sinks=hdfsSink loggerSink
train.sources.trainSource.type=spooldir
train.sources.trainSource.spoolDir=/opt/kb15tmp/flumelogfile/train
train.sources.trainSource.deserializer=LINE
train.sources.trainSource.deserializer.maxLineLength=3200
train.sources.trainSource.includePattern=train_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
train.sources.trainSource.interceptors=head_filter
train.sources.trainSource.interceptors.head_filter.type=regex_filter
train.sources.trainSource.interceptors.head_filter.regex=^user*
train.sources.trainSource.interceptors.head_filter.excludeEvents=true
train.channels.fileChannel.type=file
train.channels.fileChannel.checkpointDir=/opt/kb15tmp/checkpoint/train
train.channels.fileChannel.dataDirs=/opt/kb15tmp/checkpoint/data/train
train.channels.memoryChannel.type=memory
train.channels.memoryChannel.capacity=64000
train.channels.memoryChannel.transactionCapacity=16000
train.sinks.hdfsSink.type=hdfs
train.sinks.hdfsSink.hdfs.fileType=DataStream
train.sinks.hdfsSink.hdfs.filePrefix=train
train.sinks.hdfsSink.hdfs.fileSuffix=.csv
train.sinks.hdfsSink.hdfs.path=hdfs://192.168.111.131:9000/kb15file/train/%Y-%m-%d
train.sinks.hdfsSink.hdfs.useLocalTimeStamp=true
train.sinks.hdfsSink.hdfs.batchSize=640
train.sinks.hdfsSink.hdfs.rollCount=0
train.sinks.hdfsSink.hdfs.rollSize=6400000
train.sinks.hdfsSink.hdfs.rollInterval=30
train.sinks.loggerSink.type=logger
train.sources.trainSource.channels=fileChannel memoryChannel
train.sinks.hdfsSink.channel=fileChannel
train.sinks.loggerSink.channel=memoryChannel
[root@linux01 flume160]#./bin/flume-ng agent --name train --conf ./conf/ --conf-file ./conf/kb15conf/train-flume-hdfslogger.conf -Dflume.root.logger=INFO,console
[root@linux01 kb15tmp]# cp train.csv /opt/kb15tmp/flumelogfile/train/train_2021-12-21.csv
2021.12.21flume一些复杂操作例子events、eventattend、train
2021/12/23 23:48:38