
# CCITT Fax decoder
#
# Bugs: uncompressed mode untested.
#
#  cf.
#   ITU-T Recommendation T.4
#     "Standardization of Group 3 facsimile terminals for document transmission"
#   ITU-T Recommendation T.6
#     "FACSIMILE CODING SCHEMES AND CODING CONTROL FUNCTIONS FOR GROUP 4 FACSIMILE APPARATUS"


import sys
import array

import six  #Python 2+3 compatibility

if six.PY3:
    def get_bytes(data):
        for byte in data:
            yield byte
else:
    def get_bytes(data):
        for char in data:
            yield ord(char)


##  BitParser
##
class BitParser(object):

    def __init__(self):
        self._pos = 0
        return

    @classmethod
    def add(klass, root, v, bits):
        p = root
        b = None
        for i in range(len(bits)):
            if 0 < i:
                if p[b] is None:
                    p[b] = [None, None]
                p = p[b]
            if bits[i] == '1':
                b = 1
            else:
                b = 0
        p[b] = v
        return

    def feedbytes(self, data):
        for byte in get_bytes(data):
            for m in (128, 64, 32, 16, 8, 4, 2, 1):
                self._parse_bit(byte & m)
        return

    def _parse_bit(self, x):
        if x:
            v = self._state[1]
        else:
            v = self._state[0]
        self._pos += 1
        if isinstance(v, list):
            self._state = v
        else:
            self._state = self._accept(v)
        return


##  CCITTG4Parser
##
class CCITTG4Parser(BitParser):

    MODE = [None, None]
    BitParser.add(MODE, 0,   '1')
    BitParser.add(MODE, +1,  '011')
    BitParser.add(MODE, -1,  '010')
    BitParser.add(MODE, 'h', '001')
    BitParser.add(MODE, 'p', '0001')
    BitParser.add(MODE, +2,  '000011')
    BitParser.add(MODE, -2,  '000010')
    BitParser.add(MODE, +3,  '0000011')
    BitParser.add(MODE, -3,  '0000010')
    BitParser.add(MODE, 'u', '0000001111')
    BitParser.add(MODE, 'x1', '0000001000')
    BitParser.add(MODE, 'x2', '0000001001')
    BitParser.add(MODE, 'x3', '0000001010')
    BitParser.add(MODE, 'x4', '0000001011')
    BitParser.add(MODE, 'x5', '0000001100')
    BitParser.add(MODE, 'x6', '0000001101')
    BitParser.add(MODE, 'x7', '0000001110')
    BitParser.add(MODE, 'e', '000000000001000000000001')

    WHITE = [None, None]
    BitParser.add(WHITE, 0   , '00110101')
    BitParser.add(WHITE, 1   , '000111')
    BitParser.add(WHITE, 2   , '0111')
    BitParser.add(WHITE, 3   , '1000')
    BitParser.add(WHITE, 4   , '1011')
    BitParser.add(WHITE, 5   , '1100')
    BitParser.add(WHITE, 6   , '1110')
    BitParser.add(WHITE, 7   , '1111')
    BitParser.add(WHITE, 8   , '10011')
    BitParser.add(WHITE, 9   , '10100')
    BitParser.add(WHITE, 10  , '00111')
    BitParser.add(WHITE, 11  , '01000')
    BitParser.add(WHITE, 12  , '001000')
    BitParser.add(WHITE, 13  , '000011')
    BitParser.add(WHITE, 14  , '110100')
    BitParser.add(WHITE, 15  , '110101')
    BitParser.add(WHITE, 16  , '101010')
    BitParser.add(WHITE, 17  , '101011')
    BitParser.add(WHITE, 18  , '0100111')
    BitParser.add(WHITE, 19  , '0001100')
    BitParser.add(WHITE, 20  , '0001000')
    BitParser.add(WHITE, 21  , '0010111')
    BitParser.add(WHITE, 22  , '0000011')
    BitParser.add(WHITE, 23  , '0000100')
    BitParser.add(WHITE, 24  , '0101000')
    BitParser.add(WHITE, 25  , '0101011')
    BitParser.add(WHITE, 26  , '0010011')
    BitParser.add(WHITE, 27  , '0100100')
    BitParser.add(WHITE, 28  , '0011000')
    BitParser.add(WHITE, 29  , '00000010')
    BitParser.add(WHITE, 30  , '00000011')
    BitParser.add(WHITE, 31  , '00011010')
    BitParser.add(WHITE, 32  , '00011011')
    BitParser.add(WHITE, 33  , '00010010')
    BitParser.add(WHITE, 34  , '00010011')
    BitParser.add(WHITE, 35  , '00010100')
    BitParser.add(WHITE, 36  , '00010101')
    BitParser.add(WHITE, 37  , '00010110')
    BitParser.add(WHITE, 38  , '00010111')
    BitParser.add(WHITE, 39  , '00101000')
    BitParser.add(WHITE, 40  , '00101001')
    BitParser.add(WHITE, 41  , '00101010')
    BitParser.add(WHITE, 42  , '00101011')
    BitParser.add(WHITE, 43  , '00101100')
    BitParser.add(WHITE, 44  , '00101101')
    BitParser.add(WHITE, 45  , '00000100')
    BitParser.add(WHITE, 46  , '00000101')
    BitParser.add(WHITE, 47  , '00001010')
    BitParser.add(WHITE, 48  , '00001011')
    BitParser.add(WHITE, 49  , '01010010')
    BitParser.add(WHITE, 50  , '01010011')
    BitParser.add(WHITE, 51  , '01010100')
    BitParser.add(WHITE, 52  , '01010101')
    BitParser.add(WHITE, 53  , '00100100')
    BitParser.add(WHITE, 54  , '00100101')
    BitParser.add(WHITE, 55  , '01011000')
    BitParser.add(WHITE, 56  , '01011001')
    BitParser.add(WHITE, 57  , '01011010')
    BitParser.add(WHITE, 58  , '01011011')
    BitParser.add(WHITE, 59  , '01001010')
    BitParser.add(WHITE, 60  , '01001011')
    BitParser.add(WHITE, 61  , '00110010')
    BitParser.add(WHITE, 62  , '00110011')
    BitParser.add(WHITE, 63  , '00110100')
    BitParser.add(WHITE, 64  , '11011')
    BitParser.add(WHITE, 128 , '10010')
    BitParser.add(WHITE, 192 , '010111')
    BitParser.add(WHITE, 256 , '0110111')
    BitParser.add(WHITE, 320 , '00110110')
    BitParser.add(WHITE, 384 , '00110111')
    BitParser.add(WHITE, 448 , '01100100')
    BitParser.add(WHITE, 512 , '01100101')
    BitParser.add(WHITE, 576 , '01101000')
    BitParser.add(WHITE, 640 , '01100111')
    BitParser.add(WHITE, 704 , '011001100')
    BitParser.add(WHITE, 768 , '011001101')
    BitParser.add(WHITE, 832 , '011010010')
    BitParser.add(WHITE, 896 , '011010011')
    BitParser.add(WHITE, 960 , '011010100')
    BitParser.add(WHITE, 1024, '011010101')
    BitParser.add(WHITE, 1088, '011010110')
    BitParser.add(WHITE, 1152, '011010111')
    BitParser.add(WHITE, 1216, '011011000')
    BitParser.add(WHITE, 1280, '011011001')
    BitParser.add(WHITE, 1344, '011011010')
    BitParser.add(WHITE, 1408, '011011011')
    BitParser.add(WHITE, 1472, '010011000')
    BitParser.add(WHITE, 1536, '010011001')
    BitParser.add(WHITE, 1600, '010011010')
    BitParser.add(WHITE, 1664, '011000')
    BitParser.add(WHITE, 1728, '010011011')
    BitParser.add(WHITE, 1792, '00000001000')
    BitParser.add(WHITE, 1856, '00000001100')
    BitParser.add(WHITE, 1920, '00000001101')
    BitParser.add(WHITE, 1984, '000000010010')
    BitParser.add(WHITE, 2048, '000000010011')
    BitParser.add(WHITE, 2112, '000000010100')
    BitParser.add(WHITE, 2176, '000000010101')
    BitParser.add(WHITE, 2240, '000000010110')
    BitParser.add(WHITE, 2304, '000000010111')
    BitParser.add(WHITE, 2368, '000000011100')
    BitParser.add(WHITE, 2432, '000000011101')
    BitParser.add(WHITE, 2496, '000000011110')
    BitParser.add(WHITE, 2560, '000000011111')

    BLACK = [None, None]
    BitParser.add(BLACK, 0   , '0000110111')
    BitParser.add(BLACK, 1   , '010')
    BitParser.add(BLACK, 2   , '11')
    BitParser.add(BLACK, 3   , '10')
    BitParser.add(BLACK, 4   , '011')
    BitParser.add(BLACK, 5   , '0011')
    BitParser.add(BLACK, 6   , '0010')
    BitParser.add(BLACK, 7   , '00011')
    BitParser.add(BLACK, 8   , '000101')
    BitParser.add(BLACK, 9   , '000100')
    BitParser.add(BLACK, 10  , '0000100')
    BitParser.add(BLACK, 11  , '0000101')
    BitParser.add(BLACK, 12  , '0000111')
    BitParser.add(BLACK, 13  , '00000100')
    BitParser.add(BLACK, 14  , '00000111')
    BitParser.add(BLACK, 15  , '000011000')
    BitParser.add(BLACK, 16  , '0000010111')
    BitParser.add(BLACK, 17  , '0000011000')
    BitParser.add(BLACK, 18  , '0000001000')
    BitParser.add(BLACK, 19  , '00001100111')
    BitParser.add(BLACK, 20  , '00001101000')
    BitParser.add(BLACK, 21  , '00001101100')
    BitParser.add(BLACK, 22  , '00000110111')
    BitParser.add(BLACK, 23  , '00000101000')
    BitParser.add(BLACK, 24  , '00000010111')
    BitParser.add(BLACK, 25  , '00000011000')
    BitParser.add(BLACK, 26  , '000011001010')
    BitParser.add(BLACK, 27  , '000011001011')
    BitParser.add(BLACK, 28  , '000011001100')
    BitParser.add(BLACK, 29  , '000011001101')
    BitParser.add(BLACK, 30  , '000001101000')
    BitParser.add(BLACK, 31  , '000001101001')
    BitParser.add(BLACK, 32  , '000001101010')
    BitParser.add(BLACK, 33  , '000001101011')
    BitParser.add(BLACK, 34  , '000011010010')
    BitParser.add(BLACK, 35  , '000011010011')
    BitParser.add(BLACK, 36  , '000011010100')
    BitParser.add(BLACK, 37  , '000011010101')
    BitParser.add(BLACK, 38  , '000011010110')
    BitParser.add(BLACK, 39  , '000011010111')
    BitParser.add(BLACK, 40  , '000001101100')
    BitParser.add(BLACK, 41  , '000001101101')
    BitParser.add(BLACK, 42  , '000011011010')
    BitParser.add(BLACK, 43  , '000011011011')
    BitParser.add(BLACK, 44  , '000001010100')
    BitParser.add(BLACK, 45  , '000001010101')
    BitParser.add(BLACK, 46  , '000001010110')
    BitParser.add(BLACK, 47  , '000001010111')
    BitParser.add(BLACK, 48  , '000001100100')
    BitParser.add(BLACK, 49  , '000001100101')
    BitParser.add(BLACK, 50  , '000001010010')
    BitParser.add(BLACK, 51  , '000001010011')
    BitParser.add(BLACK, 52  , '000000100100')
    BitParser.add(BLACK, 53  , '000000110111')
    BitParser.add(BLACK, 54  , '000000111000')
    BitParser.add(BLACK, 55  , '000000100111')
    BitParser.add(BLACK, 56  , '000000101000')
    BitParser.add(BLACK, 57  , '000001011000')
    BitParser.add(BLACK, 58  , '000001011001')
    BitParser.add(BLACK, 59  , '000000101011')
    BitParser.add(BLACK, 60  , '000000101100')
    BitParser.add(BLACK, 61  , '000001011010')
    BitParser.add(BLACK, 62  , '000001100110')
    BitParser.add(BLACK, 63  , '000001100111')
    BitParser.add(BLACK, 64  , '0000001111')
    BitParser.add(BLACK, 128 , '000011001000')
    BitParser.add(BLACK, 192 , '000011001001')
    BitParser.add(BLACK, 256 , '000001011011')
    BitParser.add(BLACK, 320 , '000000110011')
    BitParser.add(BLACK, 384 , '000000110100')
    BitParser.add(BLACK, 448 , '000000110101')
    BitParser.add(BLACK, 512 , '0000001101100')
    BitParser.add(BLACK, 576 , '0000001101101')
    BitParser.add(BLACK, 640 , '0000001001010')
    BitParser.add(BLACK, 704 , '0000001001011')
    BitParser.add(BLACK, 768 , '0000001001100')
    BitParser.add(BLACK, 832 , '0000001001101')
    BitParser.add(BLACK, 896 , '0000001110010')
    BitParser.add(BLACK, 960 , '0000001110011')
    BitParser.add(BLACK, 1024, '0000001110100')
    BitParser.add(BLACK, 1088, '0000001110101')
    BitParser.add(BLACK, 1152, '0000001110110')
    BitParser.add(BLACK, 1216, '0000001110111')
    BitParser.add(BLACK, 1280, '0000001010010')
    BitParser.add(BLACK, 1344, '0000001010011')
    BitParser.add(BLACK, 1408, '0000001010100')
    BitParser.add(BLACK, 1472, '0000001010101')
    BitParser.add(BLACK, 1536, '0000001011010')
    BitParser.add(BLACK, 1600, '0000001011011')
    BitParser.add(BLACK, 1664, '0000001100100')
    BitParser.add(BLACK, 1728, '0000001100101')
    BitParser.add(BLACK, 1792, '00000001000')
    BitParser.add(BLACK, 1856, '00000001100')
    BitParser.add(BLACK, 1920, '00000001101')
    BitParser.add(BLACK, 1984, '000000010010')
    BitParser.add(BLACK, 2048, '000000010011')
    BitParser.add(BLACK, 2112, '000000010100')
    BitParser.add(BLACK, 2176, '000000010101')
    BitParser.add(BLACK, 2240, '000000010110')
    BitParser.add(BLACK, 2304, '000000010111')
    BitParser.add(BLACK, 2368, '000000011100')
    BitParser.add(BLACK, 2432, '000000011101')
    BitParser.add(BLACK, 2496, '000000011110')
    BitParser.add(BLACK, 2560, '000000011111')

    UNCOMPRESSED = [None, None]
    BitParser.add(UNCOMPRESSED, '1', '1')
    BitParser.add(UNCOMPRESSED, '01', '01')
    BitParser.add(UNCOMPRESSED, '001', '001')
    BitParser.add(UNCOMPRESSED, '0001', '0001')
    BitParser.add(UNCOMPRESSED, '00001', '00001')
    BitParser.add(UNCOMPRESSED, '00000', '000001')
    BitParser.add(UNCOMPRESSED, 'T00', '00000011')
    BitParser.add(UNCOMPRESSED, 'T10', '00000010')
    BitParser.add(UNCOMPRESSED, 'T000', '000000011')
    BitParser.add(UNCOMPRESSED, 'T100', '000000010')
    BitParser.add(UNCOMPRESSED, 'T0000', '0000000011')
    BitParser.add(UNCOMPRESSED, 'T1000', '0000000010')
    BitParser.add(UNCOMPRESSED, 'T00000', '00000000011')
    BitParser.add(UNCOMPRESSED, 'T10000', '00000000010')

    class EOFB(Exception):
        pass

    class InvalidData(Exception):
        pass

    class ByteSkip(Exception):
        pass

    def __init__(self, width, bytealign=False):
        BitParser.__init__(self)
        self.width = width
        self.bytealign = bytealign
        self.reset()
        return

    def feedbytes(self, data):
        for byte in get_bytes(data):
            try:
                for m in (128, 64, 32, 16, 8, 4, 2, 1):
                    self._parse_bit(byte & m)
            except self.ByteSkip:
                self._accept = self._parse_mode
                self._state = self.MODE
            except self.EOFB:
                break
        return

    def _parse_mode(self, mode):
        if mode == 'p':
            self._do_pass()
            self._flush_line()
            return self.MODE
        elif mode == 'h':
            self._n1 = 0
            self._accept = self._parse_horiz1
            if self._color:
                return self.WHITE
            else:
                return self.BLACK
        elif mode == 'u':
            self._accept = self._parse_uncompressed
            return self.UNCOMPRESSED
        elif mode == 'e':
            raise self.EOFB
        elif isinstance(mode, int):
            self._do_vertical(mode)
            self._flush_line()
            return self.MODE
        else:
            raise self.InvalidData(mode)

    def _parse_horiz1(self, n):
        if n is None:
            raise self.InvalidData
        self._n1 += n
        if n < 64:
            self._n2 = 0
            self._color = 1-self._color
            self._accept = self._parse_horiz2
        if self._color:
            return self.WHITE
        else:
            return self.BLACK

    def _parse_horiz2(self, n):
        if n is None:
            raise self.InvalidData
        self._n2 += n
        if n < 64:
            self._color = 1-self._color
            self._accept = self._parse_mode
            self._do_horizontal(self._n1, self._n2)
            self._flush_line()
            return self.MODE
        elif self._color:
            return self.WHITE
        else:
            return self.BLACK

    def _parse_uncompressed(self, bits):
        if not bits:
            raise self.InvalidData
        if bits.startswith('T'):
            self._accept = self._parse_mode
            self._color = int(bits[1])
            self._do_uncompressed(bits[2:])
            return self.MODE
        else:
            self._do_uncompressed(bits)
            return self.UNCOMPRESSED

    def _get_bits(self):
        return ''.join(str(b) for b in self._curline[:self._curpos])

    def _get_refline(self, i):
        if i < 0:
            return '[]'+''.join(str(b) for b in self._refline)
        elif len(self._refline) <= i:
            return ''.join(str(b) for b in self._refline)+'[]'
        else:
            return (''.join(str(b) for b in self._refline[:i]) +
                    '['+str(self._refline[i])+']' +
                    ''.join(str(b) for b in self._refline[i+1:]))

    def reset(self):
        self._y = 0
        self._curline = array.array('b', [1]*self.width)
        self._reset_line()
        self._accept = self._parse_mode
        self._state = self.MODE
        return

    def output_line(self, y, bits):
        print (y, ''.join(str(b) for b in bits))
        return

    def _reset_line(self):
        self._refline = self._curline
        self._curline = array.array('b', [1]*self.width)
        self._curpos = -1
        self._color = 1
        return

    def _flush_line(self):
        if self.width <= self._curpos:
            self.output_line(self._y, self._curline)
            self._y += 1
            self._reset_line()
            if self.bytealign:
                raise self.ByteSkip
        return

    def _do_vertical(self, dx):
        #print '* vertical(%d): curpos=%r, color=%r' % (dx, self._curpos, self._color)
        #print '  refline:', self._get_refline(self._curpos+1)
        x1 = self._curpos+1
        while 1:
            if x1 == 0:
                if (self._color == 1 and self._refline[x1] != self._color):
                    break
            elif x1 == len(self._refline):
                break
            elif (self._refline[x1-1] == self._color and
                  self._refline[x1] != self._color):
                break
            x1 += 1
        x1 += dx
        x0 = max(0, self._curpos)
        x1 = max(0, min(self.width, x1))
        if x1 < x0:
            for x in range(x1, x0):
                self._curline[x] = self._color
        elif x0 < x1:
            for x in range(x0, x1):
                self._curline[x] = self._color
        self._curpos = x1
        self._color = 1-self._color
        return

    def _do_pass(self):
        #print '* pass: curpos=%r, color=%r' % (self._curpos, self._color)
        #print '  refline:', self._get_refline(self._curpos+1)
        x1 = self._curpos+1
        while 1:
            if x1 == 0:
                if (self._color == 1 and self._refline[x1] != self._color):
                    break
            elif x1 == len(self._refline):
                break
            elif (self._refline[x1-1] == self._color and
                  self._refline[x1] != self._color):
                break
            x1 += 1
        while 1:
            if x1 == 0:
                if (self._color == 0 and self._refline[x1] == self._color):
                    break
            elif x1 == len(self._refline):
                break
            elif (self._refline[x1-1] != self._color and
                  self._refline[x1] == self._color):
                break
            x1 += 1
        for x in range(self._curpos, x1):
            self._curline[x] = self._color
        self._curpos = x1
        return

    def _do_horizontal(self, n1, n2):
        #print '* horizontal(%d,%d): curpos=%r, color=%r' % (n1, n2, self._curpos, self._color)
        if self._curpos < 0:
            self._curpos = 0
        x = self._curpos
        for _ in range(n1):
            if len(self._curline) <= x:
                break
            self._curline[x] = self._color
            x += 1
        for _ in range(n2):
            if len(self._curline) <= x:
                break
            self._curline[x] = 1-self._color
            x += 1
        self._curpos = x
        return

    def _do_uncompressed(self, bits):
        #print '* uncompressed(%r): curpos=%r' % (bits, self._curpos)
        for c in bits:
            self._curline[self._curpos] = int(c)
            self._curpos += 1
            self._flush_line()
        return




class CCITTFaxDecoder(CCITTG4Parser):

    def __init__(self, width, bytealign=False, reversed=False):
        CCITTG4Parser.__init__(self, width, bytealign=bytealign)
        self.reversed = reversed
        self._buf = b''
        return

    def close(self):
        return self._buf

    def output_line(self, y, bits):
        bytes = array.array('B', [0]*((len(bits)+7)//8))
        if self.reversed:
            bits = [1-b for b in bits]
        for (i, b) in enumerate(bits):
            if b:
                bytes[i//8] += (128, 64, 32, 16, 8, 4, 2, 1)[i % 8]
        self._buf += bytes.tostring()
        return


def ccittfaxdecode(data, params):
    K = params.get('K')
    cols = params.get('Columns')
    bytealign = params.get('EncodedByteAlign')
    reversed = params.get('BlackIs1')
    if K == -1:
        parser = CCITTFaxDecoder(cols, bytealign=bytealign, reversed=reversed)
    else:
        raise ValueError(K)
    parser.feedbytes(data)
    return parser.close()


# test
def main(argv):
    if not argv[1:]:
        import unittest
        return unittest.main()

    class Parser(CCITTG4Parser):
        def __init__(self, width, bytealign=False):
            import pygame
            CCITTG4Parser.__init__(self, width, bytealign=bytealign)
            self.img = pygame.Surface((self.width, 1000))
            return

        def output_line(self, y, bits):
            for (x, b) in enumerate(bits):
                if b:
                    self.img.set_at((x, y), (255, 255, 255))
                else:
                    self.img.set_at((x, y), (0, 0, 0))
            return

        def close(self):
            import pygame
            pygame.image.save(self.img, 'out.bmp')
            return
    for path in argv[1:]:
        fp = open(path, 'rb')
        (_, _, k, w, h, _) = path.split('.')
        parser = Parser(int(w))
        parser.feedbytes(fp.read())
        parser.close()
        fp.close()
    return

if __name__ == '__main__':
    sys.exit(main(sys.argv))
