用Nginx採集日誌通過flume將日誌檔案儲存到HDFS上
阿新 • • 發佈:2018-12-13
- 安裝Tomcat
到官網下載apache-tomcat-7.0.69
開啟eclipse->window->preferences->server->runtime environments
編寫專案
- Nginx的介紹及其安裝部署(所有操作以root使用者執行)
-
建立目錄:
mkdir /opt/modules/nginx
-
解壓nginx
tar -zxvf tengine-2.1.0.tar.gz -C ./
-
安裝部署
- 下載依賴
yum install -y gcc openssl-devel pcre-devel zlib-devel cd tengine-2.1.0
- 編譯安裝
預編譯:./configure
./configure --prefix=/opt/modules/nginx/tengine-2.1.0/ --error-log-path=/var/log/nginx/error.log --http-log-path=/var/log/nginx/access.log --pid-path=/var/run/nginx/nginx.pid --lock-path=/var/lock/nginx.lock --with-http_ssl_module --with-http_flv_module --with-http_stub_status_module --with-http_gzip_static_module --http-client-body-temp-path=/var/tmp/nginx/client/ --http-proxy-temp-path=/var/tmp/nginx/proxy/ --http-fastcgi-temp-path=/var/tmp/nginx/fcgi/ --http-uwsgi-temp-path=/var/tmp/nginx/uwsgi --http-scgi-temp-path=/var/tmp/nginx/scgi --with-pcre
編譯
make
安裝
make install
- 啟動服務
cd /opt/modules/nginx/tengine-2.1.0/
sbin/nginx
- 第一次報錯:
mkdir -p /var/tmp/nginx/client/
- 重新啟動即可:
sbin/nginx -s stop
- nginx使用:
檢視幫助:
sbin/nginx -h
- 關閉:
sbin/nginx -s stop
配置使用service命令管理nginx服務
將提供的nginx指令碼放入/etc/init.d目錄下
#!/bin/bash
#
# chkconfig: - 85 15
# description: nginx is a World Wide Web server. It is used to serve
# Source function library.
. /etc/rc.d/init.d/functions
# Source networking configuration.
. /etc/sysconfig/network
# Check that networking is up.
[ "$NETWORKING" = "no" ] && exit 0
nginx="/opt/modules/nginx/tengine-2.1.0/sbin/nginx"
prog=$(basename $nginx)
NGINX_CONF_FILE="/opt/modules/nginx/tengine-2.1.0/conf/nginx.conf"
#[ -f /etc/sysconfig/nginx ] && . /etc/sysconfig/nginx
lockfile=/var/lock/subsys/nginx
#make_dirs() {
# # make required directories
# user=`nginx -V 2>&1 | grep "configure arguments:" | sed 's/[^*]*--user=\([^ ]*\).*/\1/g' -`
# options=`$nginx -V 2>&1 | grep 'configure arguments:'`
# for opt in $options; do
# if [ `echo $opt | grep '.*-temp-path'` ]; then
# value=`echo $opt | cut -d "=" -f 2`
# if [ ! -d "$value" ]; then
# # echo "creating" $value
# mkdir -p $value && chown -R $user $value
# fi
# fi
# done
#}
start() {
[ -x $nginx ] || exit 5
[ -f $NGINX_CONF_FILE ] || exit 6
# make_dirs
echo -n $"Starting $prog: "
daemon $nginx -c $NGINX_CONF_FILE
retval=$?
echo
[ $retval -eq 0 ] && touch $lockfile
return $retval
}
stop() {
echo -n $"Stopping $prog: "
killproc $prog -QUIT
retval=$?
echo
[ $retval -eq 0 ] && rm -f $lockfile
return $retval
}
restart() {
configtest || return $?
stop
sleep 1
start
}
reload() {
configtest || return $?
echo -n $"Reloading $prog: "
# -HUP是nginx平滑重啟引數
killproc $nginx -HUP
RETVAL=$?
echo
}
force_reload() {
restart
}
configtest() {
$nginx -t -c $NGINX_CONF_FILE
}
rh_status() {
status $prog
}
rh_status_q() {
rh_status >/dev/null 2>&1
}
case "$1" in
start)
rh_status_q && exit 0
$1
;;
stop)
rh_status_q || exit 0
$1
;;
restart|configtest)
$1
;;
reload)
rh_status_q || exit 7
$1
;;
force-reload)
force_reload
;;
status)
rh_status
;;
condrestart|try-restart)
rh_status_q || exit 0
;;
*)
echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload|configtest}"
exit 2
esac
注意:
- 14:配置nginx命令的地址,手動修改
- 17:配置nginx配置檔案地址,手動修改
修改許可權
chmod 755 /etc/init.d/nginx
設定開機啟動
chkconfig nginx on
nginx的配置檔案
more /opt/modules/nginx/tengine-2.1.0/conf/nginx.conf
- log_format:用於配置nginx記錄哪些欄位
- access_log:定義請求訪問的日誌位置及格式
- location:用於標記使用者請求的資源資訊
專案中的SDK
- SDK的功能:用於監聽使用者的行為,將使用者行為資料進行收集傳送給nginx
- SDK的設計:
- 儘量減少對業務系統的侵入
- 儘量收集多的欄位,避免資料丟失
- 本次專案中的SDK:
- js sdk
- 修改53行:
serverUrl : “http://bigdata-training01.hpsk.com/hpsk.jpg”- Java sdk
com.hpsk.bigdata.sdk.java.logmake.AnalyticsEngineSDK:21行
- 監聽事件的型別
- launch事件:使用者第一次開啟網頁,就會觸發launch事件
- pageView事件:使用者每訪問一個頁面就會觸發pageview事件
- event事件:其他事件型別
- chargeRequest事件:訂單請求事件
根據業務需求配置nginx接收的日誌
- 修改日誌記錄格式
- $remote_addr:客戶端的IP地址
- $msec:伺服器的時間,timestamp格式
- $http_host:請求主機地址
- $request_uri:客戶端所傳送過來的URI
log_format user_log '$remote_addr^A$msec^A$http_host^A$request_uri';
定義請求資源的日誌記錄
mkdir -p /opt/datas/nginx/user_log/
location =/hpsk.jpg{
#配置請求資源的型別
default_type image/jpg;
#配置請求資源的日誌的儲存位置和格式
access_log /opt/datas/nginx/user_log/access.log user_log;
#配置返回的資源
root /opt/datas/nginx/html;
}
重啟nginx服務,重新載入配置檔案
service nginx restart
在網頁上輸入主機名/hpsk.jpg:
http://192.168.235.xx/hpsk.jpg
# The configuration file needs to define the sources,
# the channels and the sinks.
# Sources, channels and sinks are defined per agent,
# in this case called 'a1'
a1.sources = s1
a1.channels = c1
a1.sinks = k1
# For each one of the sources, the type is defined
# source的來源為命令
a1.sources.s1.type = exec
# 命令為Nginx的日誌檔案
a1.sources.s1.command = tail -F /opt/datas/nginx/user_log/access.log
# The channel can be defined as follows.
a1.sources.s1.channels = c1
# Each sink's type must be defined
# 輸出到HDFS上
a1.sinks.k1.type = hdfs
# 分割槽目錄:必須先在HDFS上建立目錄
a1.sinks.k1.hdfs.path = /flume/nginx/user_log/part/daystr=%Y%m%d/hour=%H
# 檔案型別為日期
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 0
# 檔案大小,單位byte
a1.sinks.k1.hdfs.rollSize = 131072000
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#Specify the channel the sink should use
a1.sinks.k1.channel = c1
# Each channel's type is defined.
# channels的型別,這裡時記憶體,速度快,不安全
a1.channels.c1.type = memory
# Other config values specific to each type of channel(sink or source)
# can be defined as well
# In this case, it specifies the capacity of the memory channel
# 最大儲存大小
a1.channels.c1.capacity = 1000
# 每次取出大小
a1.channels.c1.transactionCapacity = 100
cd /opt/modules/flume-1.6.0-cdh5.7.6-bin/
bin/flume-ng agent --conf conf/ --name a1 --conf-file case/nginx-mem-part.properties -Dflume.root.logger=INFO,console
執行配置檔案
格式:
bin/flume-ng agent --conf|-c flume_conf_dir --name|-n agent_name --conf-file|-f file_path
--conf|-c:指定flume的配置檔案目錄
--conf-file|-f:執行的檔案地址
--name|-n:執行agent的名稱
-Dflume.root.logger=INFO,console:調整flume的日誌級別