メモ:ALBのログをAthenaで見る
About
ALBのログをAthenaで見たかったので
データベースの作成
CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs ( type string, time string, elb string, client_ip string, client_port int, target_ip string, target_port int, request_processing_time double, target_processing_time double, response_processing_time double, elb_status_code string, target_status_code string, received_bytes bigint, sent_bytes bigint, request_verb string, request_url string, request_proto string, user_agent string, ssl_cipher string, ssl_protocol string, target_group_arn string, trace_id string, domain_name string, chosen_cert_arn string, matched_rule_priority string, request_creation_time string, actions_executed string, redirect_url string, new_field string ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = '1', 'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\"($| \"[^ ]*\")(.*)') LOCATION 's3://your-alb-logs-directory/AWSLogs/<ACCOUNT-ID>/elasticloadbalancing/region';
特定時間のリクエストを分析する
SELECT request_verb, domain_name, count(*) as count FROM (SELECT from_iso8601_timestamp(time) AS request_time, * FROM "default"."alb_logs") WHERE request_time BETWEEN date_parse('2018-10-11 11:00:00', '%Y-%m-%d %H:%i:%S') AND date_parse('2018-10-11 23:00:00', '%Y-%m-%d %H:%i:%S') GROUP BY request_verb,domain_name order by domain_name