
    ,h]                     p    d Z ddlZddlZddlZddlZddlZddlZddlmZ  G d de	      Z
 G d de
      Zy)	zmThis module includes a bunch of convenient base classes that are
reused in many of the other parser modules.
    N   )
exceptionsc                   ,    e Zd ZdZd Zd ZddZddZy)	
BaseParserzThe :class:`.BaseParser` abstracts out some common functionality
    that is used across all document Parsers. In particular, it has
    the responsibility of handling all unicode and byte-encoding.
    c                     t        d      )zThis method must be overwritten by child classes to extract raw
        text from a filename. This method can return either a
        byte-encoded string or unicode.
        z$must be overwritten by child classes)NotImplementedError)selffilenamekwargss      X/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/textract/parsers/utils.pyextractzBaseParser.extract   s    
 ""HII    c                 &    |j                  |d      S )zEncode the ``text`` in ``encoding`` byte-encoding. This ignores
        code points that can't be encoded in byte-strings.
        ignore)encode)r	   textencodings      r   r   zBaseParser.encode   s     {{8X..r   c                 p     | j                   |fi |}| j                  ||      }| j                  ||      S )a  Process ``filename`` and encode byte-string with ``encoding``. This
        method is called by :func:`textract.parsers.process` and wraps
        the :meth:`.BaseParser.extract` method in `a delicious unicode
        sandwich <http://nedbatchelder.com/text/unipain.html>`_.

        )r   decoder   )r	   r
   input_encodingoutput_encodingr   byte_stringunicode_strings          r   processzBaseParser.process#   s:     #dll86v6[.A{{>?;;r   Nc                     t        |t        j                        r|S |sy|r|j                  |      S t	        j
                  |      }|d   dkD  r|d   nd}|j                  |d      S )zcDecode ``text`` using the `chardet
        <https://github.com/chardet/chardet>`_ package.
         
confidenceg?r   utf8replace)errors)
isinstancesix	text_typer   chardetdetect)r	   r   r   resultr   s        r   r   zBaseParser.decode2   sn     dCMM*K  ;;~.. %)/)=)D6*%&{{8I{66r   )r   )N)__name__
__module____qualname____doc__r   r   r   r    r   r   r   r      s    
J/<7r   r   c                       e Zd ZdZd Zd Zy)ShellParserzThe :class:`.ShellParser` extends the :class:`.BaseParser` to make
    it easy to run external programs from the command line with
    `Fabric <http://www.fabfile.org/>`_-like behavior.
    c                    	 t        j                  |t         j                  t         j                        }|j                         \  }}|j                  dk7  r1t        j                  dj                  |      |j                  ||      ||fS # t        $ rJ}|j                  t        j
                  k(  r't        j                  dj                  |      ddd       d}~ww xY w)zRun ``command`` and return the subsequent ``stdout`` and ``stderr``
        as a tuple. If the command is not successful, this raises a
        :exc:`textract.exceptions.ShellError`.
        )stdoutstderr    r   Nr   )
subprocessPopenPIPEOSErrorerrnoENOENTr   
ShellErrorjoincommunicate
returncode)r	   argspipeer/   r0   s         r   runzShellParser.runO   s    	##!zD ))+ ??a''  v~'  	ww%,,& !++HHTNCR  	s   4B 	C ACC c                 ^    t        j                         \  }}t        j                  |       |S )z'Return a unique tempfile name.
        )tempfilemkstemposclose)r	   handler
   s      r   temp_filenamezShellParser.temp_filenamep   s(     $++-
r   N)r'   r(   r)   r*   r@   rG   r+   r   r   r-   r-   I   s    
B	r   r-   )r*   r3   rB   rD   r7   r"   r$   r   r   objectr   r-   r+   r   r   <module>rI      s:      	  
  67 67r0* 0r   