Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132923 views
License: OTHER
1
2
from StringIO import StringIO
3
import unittest2 as unittest
4
from mr_s3_log_parser import MrS3LogParser
5
6
7
class MrTestsUtil:
8
9
def run_mr_sandbox(self, mr_job, stdin):
10
# inline runs the job in the same process so small jobs tend to
11
# run faster and stack traces are simpler
12
# --no-conf prevents options from local mrjob.conf from polluting
13
# the testing environment
14
# "-" reads from standard in
15
mr_job.sandbox(stdin=stdin)
16
17
# make_runner ensures job cleanup is performed regardless of
18
# success or failure
19
with mr_job.make_runner() as runner:
20
runner.run()
21
for line in runner.stream_output():
22
key, value = mr_job.parse_output_line(line)
23
yield value
24
25
26
class TestMrS3LogParser(unittest.TestCase):
27
28
mr_job = None
29
mr_tests_util = None
30
31
RAW_LOG_LINE_INVALID = \
32
'00000fe9688b6e57f75bd2b7f7c1610689e8f01000000' \
33
'00000388225bcc00000 ' \
34
's3-storage [22/Jul/2013:21:03:27 +0000] ' \
35
'00.111.222.33 ' \
36
37
RAW_LOG_LINE_VALID = \
38
'00000fe9688b6e57f75bd2b7f7c1610689e8f01000000' \
39
'00000388225bcc00000 ' \
40
's3-storage [22/Jul/2013:21:03:27 +0000] ' \
41
'00.111.222.33 ' \
42
'arn:aws:sts::000005646931:federated-user/user 00000AB825500000 ' \
43
'REST.HEAD.OBJECT user/file.pdf ' \
44
'"HEAD /user/file.pdf?versionId=00000XMHZJp6DjM9x500000' \
45
'00000SDZk ' \
46
'HTTP/1.1" 200 - - 4000272 18 - "-" ' \
47
'"Boto/2.5.1 (darwin) USER-AGENT/1.0.14.0" ' \
48
'00000XMHZJp6DjM9x5JVEAMo8MG00000'
49
50
DATE_TIME_ZONE_INVALID = "AB/Jul/2013:21:04:17 +0000"
51
DATE_TIME_ZONE_VALID = "22/Jul/2013:21:04:17 +0000"
52
DATE_VALID = "2013-07-22"
53
DATE_TIME_VALID = "2013-07-22 21:04:17"
54
TIME_ZONE_VALID = "+0000"
55
56
def __init__(self, *args, **kwargs):
57
super(TestMrS3LogParser, self).__init__(*args, **kwargs)
58
self.mr_job = MrS3LogParser(['-r', 'inline', '--no-conf', '-'])
59
self.mr_tests_util = MrTestsUtil()
60
61
def test_invalid_log_lines(self):
62
stdin = StringIO(self.RAW_LOG_LINE_INVALID)
63
64
for result in self.mr_tests_util.run_mr_sandbox(self.mr_job, stdin):
65
self.assertEqual(result.find("Error"), 0)
66
67
def test_valid_log_lines(self):
68
stdin = StringIO(self.RAW_LOG_LINE_VALID)
69
70
for result in self.mr_tests_util.run_mr_sandbox(self.mr_job, stdin):
71
self.assertEqual(result.find("Error"), -1)
72
73
def test_clean_date_time_zone(self):
74
date, date_time, time_zone_parsed = \
75
self.mr_job.clean_date_time_zone(self.DATE_TIME_ZONE_VALID)
76
self.assertEqual(date, self.DATE_VALID)
77
self.assertEqual(date_time, self.DATE_TIME_VALID)
78
self.assertEqual(time_zone_parsed, self.TIME_ZONE_VALID)
79
80
# Use a lambda to delay the calling of clean_date_time_zone so that
81
# assertRaises has enough time to handle it properly
82
self.assertRaises(ValueError,
83
lambda: self.mr_job.clean_date_time_zone(
84
self.DATE_TIME_ZONE_INVALID))
85
86
if __name__ == '__main__':
87
unittest.main()
88
89