
    ,h                         d Z ddlZddlZddlZej                  dkD  rddlmZ nddlmZ ddlm	Z	m
Z
 ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ 	 	 	 	 ddZ	 	 ddZy)zIFunctions that can be used for the most common use-cases for pdfminer.six    N)   r   )StringIO)BytesIO   )PDFResourceManagerPDFPageInterpreter)TagExtractor)PDFPage)XMLConverterHTMLConverterTextConverter)ImageWriter)LAParamsc           	      `   d|cxv rt        d       |r1t        j                         j                  t        j                         t
        j                  rCt        j                  j                  r)|j                  t        j                  j                        }d}|rt        |      }t        |       }|dk(  rt        |||||      }t
        j                  r-|t        j                  k(  rt        j                  j                   }|dk(  rt#        ||||||      }n+|d	k(  rt%        |||||
||
      }n|dk(  rt'        |||      }t)        |      }t+        j,                  | |||| d      D ]*  }|j.                  |	z   dz  |_        |j1                  |       , |j3                          y)aO  
    Parses text from inf-file and writes to outfp file-like object.
    Takes loads of optional arguments but the defaults are somewhat sane.
    Beware laparams: Including an empty LAParams is not the same as passing None!
    Returns nothing, acting as it does on two streams. Use StringIO to get strings.

    :param inf: a file-like object to read PDF structure from, such as a
        file handler (using the builtin `open()` function) or a `BytesIO`.
    :param outfp: a file-like object to write the text to.
    :param output_type: May be 'text', 'xml', 'html', 'tag'. Only 'text' works properly.
    :param codec: Text decoding codec
    :param laparams: An LAParams object from pdfminer.layout. Default is None but may not layout correctly.
    :param maxpages: How many pages to stop parsing after
    :param page_numbers: zero-indexed page numbers to operate on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param scale: Scale factor
    :param rotation: Rotation factor
    :param layoutmode: Default is 'normal', see pdfminer.converter.HTMLConverter
    :param output_dir: If given, creates an ImageWriter for extracted images.
    :param strip_control: Does what it says on the tin
    :param debug: Output more logging data
    :param disable_caching: Does what it says on the tin
    :param other:
    :return:
    _py2_no_more_posargsNzThe `_py2_no_more_posargs will be removed on January, 2020. At that moment pdfminer.six will stop supporting Python 2. Please upgrade to Python 3. For more information see https://github.com/pdfminer/pdfminer .six/issues/194)cachingtext)codeclaparamsimagewriterxml)r   r   r   stripcontrolhtml)r   scale
layoutmoder   r   tag)r   Tmaxpagespasswordr   check_extractableih  )DeprecationWarninglogging	getLoggersetLevelDEBUGsixPY2sysstdinencodingdecoder   r   r   PY3stdoutbufferr   r   r	   r   r
   	get_pagesrotateprocess_pageclose)infoutfpoutput_typer   r   r   page_numbersr   r   rotationr   
output_dirstrip_controldebugdisable_cachingkwargsr   rsrcmgrdeviceinterpreterpages                        U/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pdfminer/high_level.pyextract_text_to_fprB      s   < 3 CD 	D 4 $$W]]3
ww399%%??399#5#56K!*- _)<=GfwUX+68 ww5CJJ&

!!eguEH*5+8: 
	wU%*4x+68 
	guE:$Wf5K!!#".+3+3.=*=48: ' {{X-4  &' LLN    c           
      |   |
t               }t        | d      5 }t               5 }t               }	t	        |	|||      }
t        |	|
      }t        j                  |||||d      D ]  }|j                  |        |j                         cddd       cddd       S # 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)aT  
    Parses and returns the text contained in a PDF file.
    Takes loads of optional arguments but the defaults are somewhat sane.
    Returns a string containing all of the text extracted.

    :param pdf_file: Path to the PDF file to be worked on
    :param password: For encrypted PDFs, the password to decrypt.
    :param page_numbers: List of zero-indexed page numbers to extract.
    :param maxpages: The maximum number of pages to parse
    :param caching: If resources should be cached
    :param codec: Text decoding codec
    :param laparams: LAParams object from pdfminer.layout.
    Nrb)r   r   Tr   )
r   openr   r   r   r   r
   r/   r1   getvalue)pdf_filer   r6   r   r   r   r   fpoutput_stringr=   r>   r?   r@   s                rA   extract_textrK   f   s     :	h	 (XZ (=$&wU(02(&9%%"
 	+D $$T*	+ %%'!( ( ( ( ( ( (s#   B2A&B
	B2B&	"B22B;)r   utf-8Nr   N g      ?r   normalNFFF)rM   Nr   TrL   N)__doc__r"   r(   r&   version_infoior   r   	pdfinterpr   r   	pdfdevicer	   pdfpager
   	converterr   r   r   imager   layoutr   rB   rK    rC   rA   <module>rY      sb    O  
 
 f& = #  A A   CGTUHM16	M` EF7;"(rC   