📚 The CoCalc Library - books, templates and other resources
License: OTHER
1from StringIO import StringIO2import unittest2 as unittest3from mr_s3_log_parser import MrS3LogParser456class MrTestsUtil:78def run_mr_sandbox(self, mr_job, stdin):9# inline runs the job in the same process so small jobs tend to10# run faster and stack traces are simpler11# --no-conf prevents options from local mrjob.conf from polluting12# the testing environment13# "-" reads from standard in14mr_job.sandbox(stdin=stdin)1516# make_runner ensures job cleanup is performed regardless of17# success or failure18with mr_job.make_runner() as runner:19runner.run()20for line in runner.stream_output():21key, value = mr_job.parse_output_line(line)22yield value232425class TestMrS3LogParser(unittest.TestCase):2627mr_job = None28mr_tests_util = None2930RAW_LOG_LINE_INVALID = \31'00000fe9688b6e57f75bd2b7f7c1610689e8f01000000' \32'00000388225bcc00000 ' \33's3-storage [22/Jul/2013:21:03:27 +0000] ' \34'00.111.222.33 ' \3536RAW_LOG_LINE_VALID = \37'00000fe9688b6e57f75bd2b7f7c1610689e8f01000000' \38'00000388225bcc00000 ' \39's3-storage [22/Jul/2013:21:03:27 +0000] ' \40'00.111.222.33 ' \41'arn:aws:sts::000005646931:federated-user/user 00000AB825500000 ' \42'REST.HEAD.OBJECT user/file.pdf ' \43'"HEAD /user/file.pdf?versionId=00000XMHZJp6DjM9x500000' \44'00000SDZk ' \45'HTTP/1.1" 200 - - 4000272 18 - "-" ' \46'"Boto/2.5.1 (darwin) USER-AGENT/1.0.14.0" ' \47'00000XMHZJp6DjM9x5JVEAMo8MG00000'4849DATE_TIME_ZONE_INVALID = "AB/Jul/2013:21:04:17 +0000"50DATE_TIME_ZONE_VALID = "22/Jul/2013:21:04:17 +0000"51DATE_VALID = "2013-07-22"52DATE_TIME_VALID = "2013-07-22 21:04:17"53TIME_ZONE_VALID = "+0000"5455def __init__(self, *args, **kwargs):56super(TestMrS3LogParser, self).__init__(*args, **kwargs)57self.mr_job = MrS3LogParser(['-r', 'inline', '--no-conf', '-'])58self.mr_tests_util = MrTestsUtil()5960def test_invalid_log_lines(self):61stdin = StringIO(self.RAW_LOG_LINE_INVALID)6263for result in self.mr_tests_util.run_mr_sandbox(self.mr_job, stdin):64self.assertEqual(result.find("Error"), 0)6566def test_valid_log_lines(self):67stdin = StringIO(self.RAW_LOG_LINE_VALID)6869for result in self.mr_tests_util.run_mr_sandbox(self.mr_job, stdin):70self.assertEqual(result.find("Error"), -1)7172def test_clean_date_time_zone(self):73date, date_time, time_zone_parsed = \74self.mr_job.clean_date_time_zone(self.DATE_TIME_ZONE_VALID)75self.assertEqual(date, self.DATE_VALID)76self.assertEqual(date_time, self.DATE_TIME_VALID)77self.assertEqual(time_zone_parsed, self.TIME_ZONE_VALID)7879# Use a lambda to delay the calling of clean_date_time_zone so that80# assertRaises has enough time to handle it properly81self.assertRaises(ValueError,82lambda: self.mr_job.clean_date_time_zone(83self.DATE_TIME_ZONE_INVALID))8485if __name__ == '__main__':86unittest.main()878889