#!/usr/bin/env python

#
#
# Simplistic test suite for q.
#
# Currently takes into account the project folder structure for running, so it needs
# to be executed from the current folder
#
#

import unittest
import pprint

from subprocess import PIPE, Popen, STDOUT
import sys
import os
import time
from tempfile import NamedTemporaryFile
import locale

# q uses this encoding as the default output encoding. Some of the tests use it in order to 
# make sure that the output is correctly encoded
SYSTEM_ENCODING = locale.getpreferredencoding()

def run_command(cmd_to_run):
    p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True)
    o, e = p.communicate()
    # remove last newline
    o = o.strip()
    e = e.strip()
    # split rows
    if o != '':
        o = o.split(os.linesep)
    else:
        o = []
    if e != '':
        e = e.split(os.linesep)
    else:
        e = []
    return (p.returncode, o, e)

uneven_ls_output = """drwxr-xr-x   2 root     root      4096 Jun 11  2012 /selinux
drwxr-xr-x   2 root     root      4096 Apr 19  2013 /mnt
drwxr-xr-x   2 root     root      4096 Apr 24  2013 /srv
drwx------   2 root     root     16384 Jun 21  2013 /lost+found
lrwxrwxrwx   1 root     root        33 Jun 21  2013 /initrd.img.old -> /boot/initrd.img-3.8.0-19-generic
drwxr-xr-x   2 root     root      4096 Jun 21  2013 /cdrom
drwxr-xr-x   3 root     root      4096 Jun 21  2013 /home
lrwxrwxrwx   1 root     root        29 Jun 21  2013 /vmlinuz -> boot/vmlinuz-3.8.0-19-generic
lrwxrwxrwx   1 root     root        32 Jun 21  2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic
"""

find_output = """8257537   32 drwxrwxrwt 218 root     root        28672 Mar  1 11:00 /tmp
8299123    4 drwxrwxr-x   2 harel    harel        4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576
8263229  964 -rw-rw-r--   1 mapred   mapred      984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser
8263230    4 -rw-rw-r--   1 harel    harel        1223 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormconf.ser
8299113    4 drwxrwxr-x   2 harel    harel        4096 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate
8263406    4 -rw-rw-r--   1 harel    harel        2002 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746
8263476    0 -rw-rw-r--   1 harel    harel           0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746.version
8263607    0 -rw-rw-r--   1 harel    harel           0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514169735.version
8263533    0 -rw-rw-r--   1 harel    harel           0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514172733.version
8263604    0 -rw-rw-r--   1 harel    harel           0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version
"""

header_row = 'name,value1,value2'
sample_data_rows = ['a,1,0', 'b,2,0', 'c,,0']
sample_data_rows_with_empty_string = ['a,aaa,0', 'b,bbb,0', 'c,,0']
sample_data_no_header = "\n".join(sample_data_rows) + "\n"
sample_data_with_empty_string_no_header = "\n".join(
    sample_data_rows_with_empty_string) + "\n"
sample_data_with_header = header_row + "\n" + sample_data_no_header
sample_data_with_missing_header_names = "name,value1\n" + sample_data_no_header

# Values with leading whitespace
sample_data_rows_with_spaces = ['a,1,0', '   b,   2,0', 'c,,0']
sample_data_with_spaces_no_header = "\n".join(
    sample_data_rows_with_spaces) + "\n"

header_row_with_spaces = 'name,value 1,value2'
sample_data_with_spaces_with_header = header_row_with_spaces + \
    "\n" + sample_data_with_spaces_no_header

long_value1 = "23683289372328372328373"
int_value = "2328372328373"
sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value)

def one_column_warning(e):
    return e[0].startswith('Warning: column count is one')


class AbstractQTestCase(unittest.TestCase):

    def create_file_with_data(self, data, encoding='utf-8'):
        tmpfile = NamedTemporaryFile(delete=False)
        if encoding != 'none' and encoding is not None:
            tmpfile.write(data.encode(encoding))
        else:
            tmpfile.write(data)
        tmpfile.close()
        return tmpfile

    def cleanup(self, tmpfile):
        os.remove(tmpfile.name)


class BasicTests(AbstractQTestCase):

    def test_basic_aggregation(self):
        retcode, o, e = run_command(
            'seq 1 10 | ../bin/q "select sum(c1),avg(c1) from -"')
        self.assertTrue(retcode == 0)
        self.assertTrue(len(o) == 1)
        self.assertTrue(len(e) == 1)

        s = sum(xrange(1, 11))
        self.assertTrue(o[0] == '%s %s' % (s, s / 10.0))
        self.assertTrue(one_column_warning(e))

    def test_gzipped_file(self):
        tmpfile = self.create_file_with_data(
            '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none')

        cmd = '../bin/q -z "select sum(c1),avg(c1) from %s"' % tmpfile.name

        retcode, o, e = run_command(cmd)
        self.assertTrue(retcode == 0)
        self.assertTrue(len(o) == 1)
        self.assertTrue(len(e) == 1)

        s = sum(xrange(1, 11))
        self.assertTrue(o[0] == '%s %s' % (s, s / 10.0))
        self.assertTrue(one_column_warning(e))

        self.cleanup(tmpfile)

    def test_delimition_mistake_with_header(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)

        cmd = '../bin/q -d " " "select * from %s" -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertNotEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 3)

        self.assertTrue(e[0].startswith(
            "Warning: column count is one - did you provide the correct delimiter"))
        self.assertTrue(e[1].startswith("Bad header row"))
        self.assertTrue("Column name cannot contain commas" in e[2])

        self.cleanup(tmpfile)

    def test_regexp_int_data_handling(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)

        cmd = '../bin/q -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 1)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0],"1")

        self.cleanup(tmpfile)

    def test_regexp_null_data_handling(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)

        cmd = '../bin/q -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 1)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0],"2")

        self.cleanup(tmpfile)

    def test_select_one_column(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)

        cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(" ".join(o), 'a b c')

        self.cleanup(tmpfile)

    def test_tab_delimition_parameter(self):
        tmpfile = self.create_file_with_data(
            sample_data_no_header.replace(",", "\t"))
        cmd = '../bin/q -t "select c1,c2,c3 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)
        self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t"))
        self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t"))
        self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t"))

        self.cleanup(tmpfile)

    def test_tab_delimition_parameter__with_manual_override_attempt(self):
        tmpfile = self.create_file_with_data(
            sample_data_no_header.replace(",", "\t"))
        cmd = '../bin/q -t -d , "select c1,c2,c3 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)
        self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t"))
        self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t"))
        self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t"))

        self.cleanup(tmpfile)

    def test_output_delimiter(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], sample_data_rows[0].replace(",", "|"))
        self.assertEquals(o[1], sample_data_rows[1].replace(",", "|"))
        self.assertEquals(o[2], sample_data_rows[2].replace(",", "|"))

        self.cleanup(tmpfile)

    def test_output_delimiter_tab_parameter(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , -T "select c1,c2,c3 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t"))
        self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t"))
        self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t"))

        self.cleanup(tmpfile)

    def test_output_delimiter_tab_parameter__with_manual_override_attempt(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t"))
        self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t"))
        self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t"))

        self.cleanup(tmpfile)

    def test_stdin_input(self):
        cmd = 'printf "%s" | ../bin/q -d , "select c1,c2,c3 from -"' % sample_data_no_header
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], sample_data_rows[0])
        self.assertEquals(o[1], sample_data_rows[1])
        self.assertEquals(o[2], sample_data_rows[2])

    def test_column_separation(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , "select c1,c2,c3 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], sample_data_rows[0])
        self.assertEquals(o[1], sample_data_rows[1])
        self.assertEquals(o[2], sample_data_rows[2])

        self.cleanup(tmpfile)

    def test_column_analysis(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)

        cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1].strip(), '`c1` - text')
        self.assertEquals(o[2].strip(), '`c2` - int')
        self.assertEquals(o[3].strip(), '`c3` - int')

        self.cleanup(tmpfile)

    def test_column_analysis_no_header(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)

        cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1].strip(), '`c1` - text')
        self.assertEquals(o[2].strip(), '`c2` - int')
        self.assertEquals(o[3].strip(), '`c3` - int')

    def test_header_exception_on_numeric_header_data(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , "select * from %s" -A -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertNotEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 3)
        self.assertTrue(
            'Bad header row: Header must contain only strings' in e[0])
        self.assertTrue("Column name must be a string" in e[1])
        self.assertTrue("Column name must be a string" in e[2])

        self.cleanup(tmpfile)

    def test_column_analysis_with_header(self):
        tmpfile = self.create_file_with_data(sample_data_with_header)
        cmd = '../bin/q -d , "select c1 from %s" -A -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1].strip(), '`name` - text')
        self.assertEquals(o[2].strip(), '`value1` - int')
        self.assertEquals(o[3].strip(), '`value2` - int')

        self.cleanup(tmpfile)

    def test_data_with_header(self):
        tmpfile = self.create_file_with_data(sample_data_with_header)
        cmd = '../bin/q -d , "select name from %s" -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(" ".join(o), "a b c")

        self.cleanup(tmpfile)

    def test_output_header_when_input_header_exists(self):
        tmpfile = self.create_file_with_data(sample_data_with_header)
        cmd = '../bin/q -d , "select name from %s" -H -O' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 4)
        self.assertEquals(o[0],'name')
        self.assertEquals(o[1],'a')
        self.assertEquals(o[2],'b')
        self.assertEquals(o[3],'c')

        self.cleanup(tmpfile)

    def test_generated_column_name_warning_when_header_line_exists(self):
        tmpfile = self.create_file_with_data(sample_data_with_header)
        cmd = '../bin/q -d , "select c3 from %s" -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertNotEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 2)
        self.assertTrue('no such column: c3' in e[0])
        self.assertEquals(
            e[1], 'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')

        self.cleanup(tmpfile)

    def test_column_analysis_with_unexpected_header(self):
        tmpfile = self.create_file_with_data(sample_data_with_header)
        cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 4)
        self.assertEquals(len(e), 1)

        self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1].strip(), '`c1` - text')
        self.assertEquals(o[2].strip(), '`c2` - text')
        self.assertEquals(o[3].strip(), '`c3` - text')

        self.assertEquals(
            e[0], 'Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data')

        self.cleanup(tmpfile)

    def test_empty_data(self):
        tmpfile = self.create_file_with_data('')
        cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 1)

        self.assertTrue('Warning - data is empty' in e[0])

        self.cleanup(tmpfile)

    def test_empty_data_with_header_param(self):
        tmpfile = self.create_file_with_data('')
        cmd = '../bin/q -d , "select c1 from %s" -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 1)

        self.assertTrue('Warning - data is empty' in e[0])

        self.cleanup(tmpfile)

    def test_one_row_of_data_without_header_param(self):
        tmpfile = self.create_file_with_data(header_row)
        cmd = '../bin/q -d , "select c2 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 1)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], 'value1')

        self.cleanup(tmpfile)

    def test_one_row_of_data_with_header_param(self):
        tmpfile = self.create_file_with_data(header_row)
        cmd = '../bin/q -d , "select c2 from %s" -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 1)

        self.assertTrue('Warning - data is empty' in e[0])

        self.cleanup(tmpfile)

    def test_dont_leading_keep_whitespace_in_values(self):
        tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
        cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 3)

        self.assertEquals(o[0], 'a')
        self.assertEquals(o[1], 'b')
        self.assertEquals(o[2], 'c')

        self.cleanup(tmpfile)

    def test_keep_leading_whitespace_in_values(self):
        tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
        cmd = '../bin/q -d , "select c1 from %s" -k' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 3)

        self.assertEquals(o[0], 'a')
        self.assertEquals(o[1], '   b')
        self.assertEquals(o[2], 'c')

        self.cleanup(tmpfile)

    def test_no_impact_of_keeping_leading_whitespace_on_integers(self):
        tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
        cmd = '../bin/q -d , "select c2 from %s" -k -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 4)

        self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1].strip(), '`c1` - text')
        self.assertEquals(o[2].strip(), '`c2` - int')
        self.assertEquals(o[3].strip(), '`c3` - int')

        self.cleanup(tmpfile)

    def test_spaces_in_header_row(self):
        tmpfile = self.create_file_with_data(
            header_row_with_spaces + "\n" + sample_data_no_header)
        cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 3)

        self.assertEquals(o[0], 'a,1')
        self.assertEquals(o[1], 'b,2')
        self.assertEquals(o[2], 'c,')

        self.cleanup(tmpfile)

    def test_column_analysis_for_spaces_in_header_row(self):
        tmpfile = self.create_file_with_data(
            header_row_with_spaces + "\n" + sample_data_no_header)
        cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 4)

        self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1].strip(), '`name` - text')
        self.assertEquals(o[2].strip(), '`value 1` - int')
        self.assertEquals(o[3].strip(), '`value2` - int')

        self.cleanup(tmpfile)

    def test_no_query_in_command_line(self):
        cmd = '../bin/q -d , ""'
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 1)
        self.assertEquals(len(e), 1)
        self.assertEquals(len(o), 0)

        self.assertEquals(e[0],'Query cannot be empty')

    def test_empty_query_in_command_line(self):
        cmd = '../bin/q -d , "  "'
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 1)
        self.assertEquals(len(e), 1)
        self.assertEquals(len(o), 0)

        self.assertEquals(e[0],'Query cannot be empty')

    def test_too_many_command_line_parameters(self):
        cmd = '../bin/q -d , select *'
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 1)
        self.assertEquals(len(e), 1)
        self.assertEquals(len(o), 0)

        self.assertTrue(e[0].startswith('Must provide query as one parameter'))

    def test_use_query_file(self):
        tmp_data_file = self.create_file_with_data(sample_data_with_header)
        tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name)

        cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 3)

        self.assertEquals(o[0], 'a')
        self.assertEquals(o[1], 'b')
        self.assertEquals(o[2], 'c')

        self.cleanup(tmp_data_file)
        self.cleanup(tmp_query_file)

    def test_use_query_file_with_incorrect_query_encoding(self):
        tmp_data_file = self.create_file_with_data(sample_data_with_header)
        tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)

        cmd = '../bin/q -d , -q %s -H -Q ascii' % tmp_query_file.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode,3)
        self.assertEquals(len(o),0)
        self.assertEquals(len(e),1)

        self.assertTrue(e[0].startswith('Could not decode query using the provided query encoding'))

        self.cleanup(tmp_data_file)
        self.cleanup(tmp_query_file)

    def test_output_header_with_non_ascii_names(self):
        tmp_data_file = self.create_file_with_data(sample_data_with_header)
        tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name,encoding=None)

        cmd = '../bin/q -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode,0)
        self.assertEquals(len(o),4)
        self.assertEquals(len(e),0)

        self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'name,Hr\xe1\u010d')
        self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d')
        self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d')
        self.assertEquals(o[3].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d')

        self.cleanup(tmp_data_file)
        self.cleanup(tmp_query_file)

    def test_use_query_file_with_query_encoding(self):
        tmp_data_file = self.create_file_with_data(sample_data_with_header)
        tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)

        cmd = '../bin/q -d , -q %s -H -Q utf-8' % tmp_query_file.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 3)

        self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d')
        self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d')
        self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d')

        self.cleanup(tmp_data_file)
        self.cleanup(tmp_query_file)

    def test_use_query_file_and_command_line(self):
        tmp_data_file = self.create_file_with_data(sample_data_with_header)
        tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name)

        cmd = '../bin/q -d , -q %s -H "select * from ppp"' % tmp_query_file.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 1)
        self.assertEquals(len(e), 1)
        self.assertEquals(len(o), 0)

        self.assertTrue(e[0].startswith("Can't provide both a query file and a query on the command line"))

        self.cleanup(tmp_data_file)
        self.cleanup(tmp_query_file)

    def test_select_output_encoding(self):
        tmp_data_file = self.create_file_with_data(sample_data_with_header)
        tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)

        for target_encoding in ['utf-8','ibm852']:
            cmd = '../bin/q -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding)
            retcode, o, e = run_command(cmd)

            self.assertEquals(retcode, 0)
            self.assertEquals(len(e), 0)
            self.assertEquals(len(o), 3)

            self.assertEquals(o[0].decode(target_encoding), u'Hr\xe1\u010d')
            self.assertEquals(o[1].decode(target_encoding), u'Hr\xe1\u010d')
            self.assertEquals(o[2].decode(target_encoding), u'Hr\xe1\u010d')

        self.cleanup(tmp_data_file)
        self.cleanup(tmp_query_file)

    def test_select_failed_output_encoding(self):
        tmp_data_file = self.create_file_with_data(sample_data_with_header)
        tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)

        cmd = '../bin/q -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 3)
        self.assertEquals(len(e), 1)
        self.assertEquals(len(o), 0)

        self.assertTrue(e[0].startswith('Cannot encode data'))

        self.cleanup(tmp_data_file)
        self.cleanup(tmp_query_file)


    def test_use_query_file_with_empty_query(self):
        tmp_query_file = self.create_file_with_data("   ")

        cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 1)
        self.assertEquals(len(e), 1)
        self.assertEquals(len(o), 0)

        self.assertTrue(e[0].startswith("Query cannot be empty"))

        self.cleanup(tmp_query_file)

    def test_use_non_existent_query_file(self):
        cmd = '../bin/q -d , -q non-existent-query-file -H'
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 1)
        self.assertEquals(len(e), 1)
        self.assertEquals(len(o), 0)

        self.assertTrue(e[0].startswith("Could not read query from file"))

         


class ParsingModeTests(AbstractQTestCase):

    def test_strict_mode_column_count_mismatch_error(self):
        tmpfile = self.create_file_with_data(uneven_ls_output)
        cmd = '../bin/q -m strict "select count(*) from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertNotEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 1)

        self.assertTrue("Column Count is expected to identical" in e[0])

        self.cleanup(tmpfile)

    def test_strict_mode_too_large_specific_column_count(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertNotEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 1)

        self.assertEquals(
            e[0], "Strict mode. Column count is expected to be 4 but is 3")

        self.cleanup(tmpfile)

    def test_strict_mode_too_small_specific_column_count(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertNotEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 1)

        self.assertEquals(
            e[0], "Strict mode. Column count is expected to be 2 but is 3")

        self.cleanup(tmpfile)

    def test_relaxed_mode_missing_columns_in_header(self):
        tmpfile = self.create_file_with_data(
            sample_data_with_missing_header_names)
        cmd = '../bin/q -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 4)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1].strip(), '`name` - text')
        self.assertEquals(o[2].strip(), '`value1` - int')
        self.assertEquals(o[3].strip(), '`c3` - int')

        self.cleanup(tmpfile)

    def test_strict_mode_missing_columns_in_header(self):
        tmpfile = self.create_file_with_data(
            sample_data_with_missing_header_names)
        cmd = '../bin/q -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertNotEquals(retcode, 0)
        self.assertEquals(len(o), 0)
        self.assertEquals(len(e), 1)

        self.assertEquals(
            e[0], 'Strict mode. Header row contains less columns than expected column count(2 vs 3)')

        self.cleanup(tmpfile)

    def test_output_delimiter_with_missing_fields(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , "select * from %s" -D ";"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], 'a;1;0')
        self.assertEquals(o[1], 'b;2;0')
        self.assertEquals(o[2], 'c;;0')

        self.cleanup(tmpfile)

    def test_handling_of_null_integers(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , "select avg(c2) from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 1)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], '1.5')

        self.cleanup(tmpfile)

    def test_empty_integer_values_converted_to_null(self):
        tmpfile = self.create_file_with_data(sample_data_no_header)
        cmd = '../bin/q -d , "select * from %s where c2 is null"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 1)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], 'c,,0')

        self.cleanup(tmpfile)

    def test_empty_string_values_not_converted_to_null(self):
        tmpfile = self.create_file_with_data(
            sample_data_with_empty_string_no_header)
        cmd = '../bin/q -d , "select * from %s where c2 == %s"' % (
            tmpfile.name, "''")
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 1)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], 'c,,0')

        self.cleanup(tmpfile)

    def test_relaxed_mode_detected_columns(self):
        tmpfile = self.create_file_with_data(uneven_ls_output)
        cmd = '../bin/q -m relaxed "select count(*) from %s" -A' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)

        table_name_row = o[0]
        column_rows = o[1:]

        self.assertEquals(len(column_rows), 11)

        column_tuples = [x.strip().split(" ") for x in column_rows]
        column_info = [(x[0], x[2]) for x in column_tuples]
        column_names = [x[0] for x in column_tuples]
        column_types = [x[2] for x in column_tuples]

        self.assertEquals(column_names, ['`c%s`' % x for x in xrange(1, 12)])
        self.assertEquals(column_types, [
                          'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text'])

        self.cleanup(tmpfile)

    def test_relaxed_mode_detected_columns_with_specific_column_count(self):
        tmpfile = self.create_file_with_data(uneven_ls_output)
        cmd = '../bin/q -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)

        table_name_row = o[0]
        column_rows = o[1:]

        self.assertEquals(len(column_rows), 9)

        column_tuples = [x.strip().split(" ") for x in column_rows]
        column_info = [(x[0], x[2]) for x in column_tuples]
        column_names = [x[0] for x in column_tuples]
        column_types = [x[2] for x in column_tuples]

        self.assertEquals(column_names, ['`c%s`' % x for x in xrange(1, 10)])
        self.assertEquals(
            column_types, ['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text'])

        self.cleanup(tmpfile)

    def test_relaxed_mode_last_column_data_with_specific_column_count(self):
        tmpfile = self.create_file_with_data(uneven_ls_output)
        cmd = '../bin/q -m relaxed "select c9 from %s" -c 9' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 9)
        self.assertEquals(len(e), 0)

        expected_output = ["/selinux", "/mnt", "/srv", "/lost+found", "/initrd.img.old -> /boot/initrd.img-3.8.0-19-generic",
                           "/cdrom", "/home", "/vmlinuz -> boot/vmlinuz-3.8.0-19-generic", "/initrd.img -> boot/initrd.img-3.8.0-19-generic"]

        self.assertEquals(o, expected_output)

        self.cleanup(tmpfile)

    def test_fluffy_mode(self):
        tmpfile = self.create_file_with_data(uneven_ls_output)
        cmd = '../bin/q -m fluffy "select c9 from %s"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 9)
        self.assertEquals(len(e), 0)

        expected_output = ["/selinux", "/mnt", "/srv", "/lost+found",
                           "/initrd.img.old", "/cdrom", "/home", "/vmlinuz", "/initrd.img"]

        self.assertEquals(o, expected_output)

        self.cleanup(tmpfile)


class FormattingTests(AbstractQTestCase):

    def test_column_formatting(self):
        cmd = 'seq 1 10 | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -"'

        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 1)

        self.assertEquals(o[0], '55.000 5.500')


class SqlTests(AbstractQTestCase):

    def test_find_example(self):
        tmpfile = self.create_file_with_data(find_output)
        cmd = '../bin/q "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)
        self.assertEquals(len(e), 0)

        self.assertEquals(o[0], 'mapred mapred 0.93895816803')
        self.assertEquals(o[1], 'root root 0.02734375')
        self.assertEquals(o[2], 'harel harel 0.0108880996704')

        self.cleanup(tmpfile)

    def test_join_example(self):
        cmd = '../bin/q "select myfiles.c8,emails.c2 from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 2)

        self.assertEquals(o[0], 'ppp dip.1@otherdomain.com')
        self.assertEquals(o[1], 'ppp dip.2@otherdomain.com')

    def test_join_example_with_output_header(self):
        cmd = '../bin/q -O "select myfiles.c8 aaa,emails.c2 bbb from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(o), 3)

        self.assertEquals(o[0], 'aaa bbb')
        self.assertEquals(o[1], 'ppp dip.1@otherdomain.com')
        self.assertEquals(o[2], 'ppp dip.2@otherdomain.com')

    def test_self_join1(self):
        tmpfile = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)]))
        cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name)
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 10)

        self.cleanup(tmpfile)

    def test_self_join_reuses_table(self):
        tmpfile = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)]))
        cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name)
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 3)

        self.assertEquals(o[0],'Table for file: %s' % tmpfile.name)
        self.assertEquals(o[1],'  `c1` - int')
        self.assertEquals(o[2],'  `c2` - int')

        self.cleanup(tmpfile)

    def test_self_join2(self):
        tmpfile1 = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)]))
        cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name)
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 10*10)

        self.cleanup(tmpfile1)

        tmpfile2 = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)]))
        cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name)
        retcode, o, e = run_command(cmd)

        self.assertEquals(retcode, 0)
        self.assertEquals(len(e), 0)
        self.assertEquals(len(o), 10*10*10)

        self.cleanup(tmpfile2)
        

def suite():
    tl = unittest.TestLoader()
    basic_stuff = tl.loadTestsFromTestCase(BasicTests)
    parsing_mode = tl.loadTestsFromTestCase(ParsingModeTests)
    sql = tl.loadTestsFromTestCase(SqlTests)
    formatting = tl.loadTestsFromTestCase(FormattingTests)
    return unittest.TestSuite([basic_stuff, parsing_mode, sql, formatting])

if __name__ == '__main__':
    unittest.TextTestRunner(verbosity=2).run(suite())
