之前我们统计域名状态码、平均响应时间和流量的方法是:在每台机器添加一个定时脚本,来获取每个域名最近一分钟的访问日志到临时文件。然后zabbix再对这个一分钟日志临时文件作相关统计。一直运行良好,最近发现某台服务器突然负载增高。使用iotop查看发现获取最近一分钟日志的脚本占用的IO特别高。停止这个定时任务之后恢复正常。于是就打算使用nginx lua来替换目前的方法。新的方法具有统计时占用资源少,实时的特点。
使用nginx lua统计网站相关数据的方法为(我们以统计www.centos.bz 404状态码为例):
记录过程:
查询过程:
提供一个接口,来累加key为www.centos.bz-404-(前60秒时间戳-当前时间戳)的值,返回结果。
http { [...] lua_shared_dict access 10m; log_by_lua_file conf/log_acesss.lua; server { [...] location /domain_status { default_type text/plain; content_by_lua_file "conf/domain_status.lua"; } [...] } [...] }
local access = ngx.shared.access local host = ngx.var.host local status = ngx.var.status local body_bytes_sent = ngx.var.body_bytes_sent local request_time = ngx.var.request_time local timestamp = os.date("%s") local expire_time = 70 local status_key = table.concat({host,"-",status,"-",timestamp}) local flow_key = table.concat({host,"-flow-",timestamp}) local req_time_key = table.concat({host,"-reqt-",timestamp}) local total_req_key = table.concat({host,"-total_req-",timestamp}) -- count total req local total_req_sum = access:get(total_req_key) or 0 total_req_sum = total_req_sum + 1 access:set(total_req_key, total_req_sum, expire_time) -- count status local status_sum = access:get(status_key) or 0 status_sum = status_sum + 1 access:set(status_key, status_sum, expire_time) -- count flow local flow_sum = access:get(flow_key) or 0 flow_sum = flow_sum + body_bytes_sent access:set(flow_key, flow_sum, expire_time) -- count request time local req_sum = access:get(req_time_key) or 0 req_sum = req_sum + request_time access:set(req_time_key, req_sum, expire_time)
local access = ngx.shared.access local args = ngx.req.get_uri_args() local count = args["count"] local host = args["host"] local status = args["status"] local one_minute_ago = tonumber(os.date("%s")) - 60 local now = tonumber(os.date("%s")) local status_total = 0 local flow_total = 0 local reqt_total = 0 local req_total = 0 if not host then ngx.print("host arg not found.") ngx.exit(ngx.HTTP_OK) end if count == "status" and not status then ngx.print("status arg not found.") ngx.exit(ngx.HTTP_OK) end if not (count == "status" or count == "flow" or count == "reqt") then ngx.print("count arg invalid.") ngx.exit(ngx.HTTP_OK) end for second_num=one_minute_ago,now do local flow_key = table.concat({host,"-flow-",second_num}) local req_time_key = table.concat({host,"-reqt-",second_num}) local total_req_key = table.concat({host,"-total_req-",second_num}) if count == "status" then local status_key = table.concat({host,"-",status,"-",second_num}) local status_sum = access:get(status_key) or 0 status_total = status_total + status_sum elseif count == "flow" then local flow_sum = access:get(flow_key) or 0 flow_total = flow_total + flow_sum elseif count == "reqt" then local req_sum = access:get(total_req_key) or 0 local req_time_sum = access:get(req_time_key) or 0 reqt_total = reqt_total + req_time_sum req_total = req_total + req_sum end end if count == "status" then ngx.print(status_total) elseif count == "flow" then ngx.print(flow_total) elseif count == "reqt" then if req_total == 0 then reqt_avg = 0 else reqt_avg = reqt_total/req_total end ngx.print(reqt_avg) end
1、获取域名状态码
如请求www.centos.bz一分钟内404状态码数量
请求接口http://$host/domain_status?count=status&host=www.centos.bz&status=404
2、获取域名流量
请求接口http://$host/domain_status?count=flow&host=www.centos.bz
3、获取域名一分钟内平均响应时间
请求接口http://$host/domain_status?count=reqt&host=www.centos.bz