我正在运行一个Python脚本,以从CloudFront访问日志中获取错误日志,一旦成功,我可能会使用Lambda服务来执行它。当我尝试运行查询时,我发现表名存在于AWS Athena数据库中,我无法覆盖。因此,我没有得到预期的输出。以下是我尝试执行的脚本。
还有其他方法吗?有什么建议吗?
还有其他方法吗?有什么建议吗?
#Athena configuration
s3_ouput = 's3://athena/athenatest/'
database = 's3_accesslog'
table = 'test_output1'
#Athena database and table definition
create_database = "CREATE DATABASE IF NOT EXISTS %s;" % (database)
create_table = \
"""CREATE EXTERNAL TABLE IF NOT EXISTS %s.%s (
`Date` DATE,
Time STRING,
Location STRING,
SCBytes BIGINT,
RequestIP STRING,
Method STRING,
Host STRING,
Uri STRING,
Status INT,
Referrer STRING,
UserAgent STRING,
UriQS STRING,
Cookie STRING,
ResultType STRING,
RequestId STRING,
HostHeader STRING,
Protocol STRING,
CSBytes BIGINT,
TimeTaken FLOAT,
XForwardFor STRING,
SSLProtocol STRING,
SSLCipher STRING,
ResponseResultType STRING,
CSProtocolVersion STRING,
FleStatus STRING,
FleEncryptedFields INT,
CPort INT,
TimeToFirstByte FLOAT,
XEdgeDetailedResult STRING,
ScContent STRING,
ScContentLen BIGINT,
ScRangeStart BIGINT,
ScRangeEnd BIGINT
)
PARTITIONED BY (
`l_shipdate` string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '%s'
TBLPROPERTIES ('skip.header.line.count' = '2');""" % ( database, table, s3_input )
#Query definitions
query_1 = "SELECT * FROM %s.%s where CAST(status AS VARCHAR) like '404';" % (database, table)