
    "hB`              0          d Z ddlZddlZddlZddlZddlZddlZddlmZmZm	Z	 ddl
mZmZmZmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZmZmZmZ dd	lmZm Z m!Z!m"Z" d
dgZ#dgZ$ddddddddddddd e       dddddddfdee%ef   de&dee%ef   de&de&de%dede&de%de%de'de'de'de'ded ee%ef   d!e'd"eee&f   d#e'd$e'd%e&d&e'd'eej(                     f.d(Z(ddddddddddddd e       dddddddfd)e)de&dee%ef   de&de&de%dede&de%de%de'de'de'de'dee%ef   d ee%ef   d!e'd"eee&f   d#e'd$e'd%e&d&e'd'eej(                     f.d*Z*d+ede%de&de&de%dede%de%de%de'de'de'd!e'd"ee&ee&e&f   f   d&e'd'ee%   f d,Z+d9de%d!e'd'ee%e%ee'f   fd-Z,ded'e%fd.Z-d:d/e%d e%d'e%fd0Z.	 d;d/e%d e%d%e&d'ee&e&f   fd1Z/	 	 	 	 	 	 	 d<de%de%de%d e%d2e'd%e&de&de&d'efd3Z0	 	 	 	 	 	 	 d<d4e)de%de%d e%d2e'd%e&de&de&d'efd5Z1	 d9de%de%d6e%d#e'd7e'd'eej(                     fd8Z2y)=zq
    pdf2image is a light wrapper for the poppler-utils tools that can convert your
    PDFs into Pillow images.
    N)PopenPIPETimeoutExpired)AnyUnionTupleListDictCallable)PurePath)Image)uuid_generatorcounter_generatorThreadSafeGenerator)parse_buffer_to_pgmparse_buffer_to_ppmparse_buffer_to_jpegparse_buffer_to_png)PDFInfoNotInstalledErrorPDFPageCountErrorPDFSyntaxErrorPDFPopplerTimeoutErrorpngtiffPages   ppm   Fpdf_pathdpioutput_folder
first_page	last_pagefmtjpegoptthread_countuserpwownerpwuse_cropboxstricttransparentsingle_fileoutput_filepoppler_path	grayscalesize
paths_onlyuse_pdftocairotimeouthide_annotationsreturnc                 (   |r|dk(  rd}t        | t              r| j                         } t        |t              r|j                         }t        |t              r|j                         }t        | ||	|      d   }t	        ||      \  }}}}|xs |xs |xr |t
        v }t        |rdnd|      \  }}|dk(  r|dk  rd	}|dk(  r|d
k  rd}t        |t        j                        s,t        |t              s|rt        |g      }d}nt        |      }|dk  rd}||dk  rd}|||kD  r|}||kD  rg S 	 d}||rt        j                         }d}||z
  dz   }||kD  r|}||z  }|} g }!t        |      D ]G  }"t        |      }#||z  t!        |dkD        z   }$t#        dt%        |      | g|| | |$z   dz
  |||#||	|
|||||      }%|r|rt'        d      t)        d|      g|%z   }%nt)        d|      g|%z   }%| |$z   } |t!        |dkD        z  }t*        j,                  j/                         }&||dz   |&j1                  dd      z   |&d<   d	}'t3        j4                         dk(  r7t7        j8                         }'|'xj:                  t6        j<                  z  c_        |!j?                  |#tA        |%|&tB        tB        |'      f       J g }(|!D ]]  \  })}*	 |*jE                  |      \  }+},d|,v r|rtM        |,jO                  dd            ||(tQ        ||)|||      z  }(S|( ||+      z  }(_ 	 |rtS        jT                  |       |(S # tF        $ r/ |*jI                          |*jE                         \  }-}.tK        d      w xY w# rtS        jT                  |       w w xY w)ab  Function wrapping pdftoppm and pdftocairo

    :param pdf_path: Path to the PDF that you want to convert
    :type pdf_path: Union[str, PurePath]
    :param dpi: Image quality in DPI (default 200), defaults to 200
    :type dpi: int, optional
    :param output_folder: Write the resulting images to a folder (instead of directly in memory), defaults to None
    :type output_folder: Union[str, PurePath], optional
    :param first_page: First page to process, defaults to None
    :type first_page: int, optional
    :param last_page: Last page to process before stopping, defaults to None
    :type last_page: int, optional
    :param fmt: Output image format, defaults to "ppm"
    :type fmt: str, optional
    :param jpegopt: jpeg options `quality`, `progressive`, and `optimize` (only for jpeg format), defaults to None
    :type jpegopt: Dict, optional
    :param thread_count: How many threads we are allowed to spawn for processing, defaults to 1
    :type thread_count: int, optional
    :param userpw: PDF's password, defaults to None
    :type userpw: str, optional
    :param ownerpw: PDF's owner password, defaults to None
    :type ownerpw: str, optional
    :param use_cropbox: Use cropbox instead of mediabox, defaults to False
    :type use_cropbox: bool, optional
    :param strict: When a Syntax Error is thrown, it will be raised as an Exception, defaults to False
    :type strict: bool, optional
    :param transparent: Output with a transparent background instead of a white one, defaults to False
    :type transparent: bool, optional
    :param single_file: Uses the -singlefile option from pdftoppm/pdftocairo, defaults to False
    :type single_file: bool, optional
    :param output_file: What is the output filename or generator, defaults to uuid_generator()
    :type output_file: Any, optional
    :param poppler_path: Path to look for poppler binaries, defaults to None
    :type poppler_path: Union[str, PurePath], optional
    :param grayscale: Output grayscale image(s), defaults to False
    :type grayscale: bool, optional
    :param size: Size of the resulting image(s), uses the Pillow (width, height) standard, defaults to None
    :type size: Union[Tuple, int], optional
    :param paths_only: Don't load image(s), return paths instead (requires output_folder), defaults to False
    :type paths_only: bool, optional
    :param use_pdftocairo: Use pdftocairo instead of pdftoppm, may help performance, defaults to False
    :type use_pdftocairo: bool, optional
    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
    :type timeout: int, optional
    :param hide_annotations: Hide PDF annotations in the output, defaults to False
    :type hide_annotations: bool, optional
    :raises NotImplementedError: Raised when conflicting parameters are given (hide_annotations for pdftocairo)
    :raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded
    :raises PDFSyntaxError: Raised if there is a syntax error in the PDF and strict=True
    :return: A list of Pillow images, one for each page between first_page and last_page
    :rtype: List[Image.Image]
    r   r   )r.   r   
pdftocairopdftoppmr   9   NS   Fr   Tz-rz4Hide annotations flag not implemented in pdftocairo.:LD_LIBRARY_PATH Windows)envstdoutstderrstartupinfor3   zRun poppler timeout.s   Syntax Errorutf8ignore)	in_memory)+
isinstancer   as_posixpdfinfo_from_path_parse_formatTRANSPARENT_FILE_TYPES_get_poppler_versiontypesGeneratorTyper   iterr   tempfilemkdtemprangenextint_build_commandstrNotImplementedError_get_command_pathosenvironcopygetplatformsystem
subprocessSTARTUPINFOdwFlagsSTARTF_USESHOWWINDOWappendr   r   communicater   killr   r   decode_load_from_output_foldershutilrmtree)/r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   
page_count
parsed_fmtfinal_extensionparse_buffer_funcuse_pdfcairo_formatuse_pdfcairopoppler_version_majorpoppler_version_minorauto_temp_dirremindercurrent_page	processes_thread_output_filethread_page_countargsr?   rB   imagesuidprocdataerroutserrss/                                                  U/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/pdf2image/pdf2image.pyconvert_from_pathr   %   s_   Z #, (H%$$&-*%..0,)#,,."&'J
 KXYKGJ!24G 	 	B	B@J*@@  4H$*<400 !&;r&A!&;r&A  k5#6#67
(A }-KL+K8KaZ!^
I
2	I	a) \$,,.M M +a/
*$%L,!	|$ 7	A!%k!2 !+l :SA=N N!s3x*0014" D$ #-N  *,EFM)*lCDtK (*;;LHqL))H**//#C' 3&1BB)GG %& K I-(446##z'F'FF#&#d4[a7	r " 	2ICE ,,W,=	c #%&$SZZ%ABB(2!#+  +D11)	2, MM-(M- " E		!--/
d,-CDDE& MM-( s&   4FM7 	L<AM7 <8M44M7 7Npdf_filec                 h   t        j                         \  }}	 t        |d      5 }|j                  |        |j	                          t        |j                  fi d|d|d|d|d|d|d|d	|d
|	d|
d|d|d|d|d|d|d|d|d|d|d|cddd       t        j                  |       t        j                  |       S # 1 sw Y   nxY w	 t        j                  |       t        j                  |       y# t        j                  |       t        j                  |       w xY w)aW  Function wrapping pdftoppm and pdftocairo.

    :param pdf_bytes: Bytes of the PDF that you want to convert
    :type pdf_bytes: bytes
    :param dpi: Image quality in DPI (default 200), defaults to 200
    :type dpi: int, optional
    :param output_folder: Write the resulting images to a folder (instead of directly in memory), defaults to None
    :type output_folder: Union[str, PurePath], optional
    :param first_page: First page to process, defaults to None
    :type first_page: int, optional
    :param last_page: Last page to process before stopping, defaults to None
    :type last_page: int, optional
    :param fmt: Output image format, defaults to "ppm"
    :type fmt: str, optional
    :param jpegopt: jpeg options `quality`, `progressive`, and `optimize` (only for jpeg format), defaults to None
    :type jpegopt: Dict, optional
    :param thread_count: How many threads we are allowed to spawn for processing, defaults to 1
    :type thread_count: int, optional
    :param userpw: PDF's password, defaults to None
    :type userpw: str, optional
    :param ownerpw: PDF's owner password, defaults to None
    :type ownerpw: str, optional
    :param use_cropbox: Use cropbox instead of mediabox, defaults to False
    :type use_cropbox: bool, optional
    :param strict: When a Syntax Error is thrown, it will be raised as an Exception, defaults to False
    :type strict: bool, optional
    :param transparent: Output with a transparent background instead of a white one, defaults to False
    :type transparent: bool, optional
    :param single_file: Uses the -singlefile option from pdftoppm/pdftocairo, defaults to False
    :type single_file: bool, optional
    :param output_file: What is the output filename or generator, defaults to uuid_generator()
    :type output_file: Any, optional
    :param poppler_path: Path to look for poppler binaries, defaults to None
    :type poppler_path: Union[str, PurePath], optional
    :param grayscale: Output grayscale image(s), defaults to False
    :type grayscale: bool, optional
    :param size: Size of the resulting image(s), uses the Pillow (width, height) standard, defaults to None
    :type size: Union[Tuple, int], optional
    :param paths_only: Don't load image(s), return paths instead (requires output_folder), defaults to False
    :type paths_only: bool, optional
    :param use_pdftocairo: Use pdftocairo instead of pdftoppm, may help performance, defaults to False
    :type use_pdftocairo: bool, optional
    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
    :type timeout: int, optional
    :param hide_annotations: Hide PDF annotations in the output, defaults to False
    :type hide_annotations: bool, optional
    :raises NotImplementedError: Raised when conflicting parameters are given (hide_annotations for pdftocairo)
    :raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded
    :raises PDFSyntaxError: Raised if there is a syntax error in the PDF and strict=True
    :return: A list of Pillow images, one for each page between first_page and last_page
    :rtype: List[Image.Image]
    wbr    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   N)
rP   mkstempopenwriteflushr   namerY   closeremove)r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   fhtemp_filenamefs                            r   convert_from_bytesr     s   Z !((*B!-& 	!GGHGGI$ , &	
 $    *    (  ( ( (  *!" $#$ %& &'(  .)*  +, "2-	 	8 	
		- ;	 	 	8 	
		-  	
		- s#   D A4C	D CD ,D1ry   c           	      "   |	r| j                  d       |r| j                  d       |
r|t        v r| j                  d       || j                  dt        |      g       || j                  dt        |      g       |dvr| j                  d|z          |dv r|r| j                  d	t	        |      g       |r| j                  d
       |/| j                  t
        j                  j                  ||             || j                  d|g       || j                  d|g       |r| j                  d       |	 | S t        |t              rt        |      dk(  r|d   )| j                  dt        t        |d               g       n| j                  dt        d      g       |d   *| j                  dt        t        |d               g       | S | j                  dt        d      g       | S t        |t              r8t        |      dk(  r*| j                  dt        t        |d               g       | S t        |t              st        |t              r'| j                  dt        t        |            g       | S t        d| d      )Nz-cropboxz-hide-annotationsz-transp-f-l)pgmr   -jpegjpgz-jpegoptz-singlefile-upw-opwz-gray   r   z-scale-to-xr   z-scale-to-yz	-scale-tozSize z is not a tuple or an integer)rc   rK   extendrV   _parse_jpegoptrY   pathjoinrG   tuplelenrT   float
ValueError)ry   r!   r"   r#   r$   r%   r-   r'   r(   r)   r+   r,   r/   r0   r4   s                  r   rU   rU     sK   " J'(s44IT3z?+,T3y>*+
. C#I
o'Z!89:M" BGGLL<=VV$%VW%&G|" K! 
D%	 SY!^7KKCQL(9:;KKB017KKCQL(9:; K KKB01 K 
D%	 SY!^[#c$q'l"345 K 
D#	*T5"9[#c$i.12 K 5&CDEE    c                     | j                         } | d   dk(  r| dd  } | dv r
ddt        dfS | dk(  r
ddt        dfS | d	v ry
| dk(  r|r
ddt        dfS ddt        dfS )Nr   .r   r   r   r   Fr   )tifr   )r   r   NTr   r   )lowerr   r   r   r   )r$   r/   s     r   rJ   rJ     s    
))+C
1v}!"g
ou2E99
e|e0%77
o(
e|	e0%77%,e33r   c                     g }| j                         D ]2  \  }}|du rd}|du rd}|j                  dj                  ||             4 dj                  |      S )NTyFnz{}={},)itemsrc   formatr   )r%   partskvs       r   r   r     s_    E +19A:AW^^Aq)*+ 88E?r   commandc                     t        j                         dk(  r| dz   } | t        j                  j	                  ||       } | S )Nr>   z.exe)r]   r^   rY   r   r   )r   r.   s     r   rX   rX     s:    I%F"'',,|W5Nr   c                 6   t        | |      dg} t        j                  j                         }||dz   |j	                  dd      z   |d<   t        | |t        t              }	 |j                  |      \  }}	 |j                  dd	      j                  d
      d   j                  d      d   j                  d      }	t        |	d         t        |	d         fS # t        $ r/ |j                          |j                         \  }}t        d      w xY w#  Y yxY w)Nz-vr;   r<   r=   r?   r@   rA   rC   Run poppler poppler timeout.rD   rE   
r    r   r   r   )r      )rX   rY   rZ   r[   r\   r   r   rd   r   re   r   rf   splitrT   )
r   r.   r3   r?   r|   r}   r~   r   r   versions
             r   rL   rL     s    !,7>G
**//
C!-!3cgg>OQS6T!Tc$t<DE$$W$5	c**VX.44T:1=CCCHLRRSVW71:GAJ//  E		%%'
d$%CDDEs   "C 8A D 8DDrawdatesc                    	 t        d|      | g}||j                  d|g       ||j                  d|g       |r|j                  dg       |r|j                  dt        |      g       |r|j                  dt        |      g       t        j                  j                         }	||dz   |	j                  d	d
      z   |	d	<   t        ||	t        t              }
	 |
j                  |      \  }}i }|j                  dd      j                  d      D ]f  }|j                  d      }|d   dj                  |dd       }}|d
k7  s3|t         v rt#        |j%                               n|j%                         ||<   h d|vrt&        |S # t        $ r/ |
j                          |
j                         \  }}t        d      w xY w# t(        $ r t+        d      t&        $ r t-        dj                  dd             w xY w)a  Function wrapping poppler's pdfinfo utility and returns the result as a dictionary.

    :param pdf_path: Path to the PDF that you want to convert
    :type pdf_path: str
    :param userpw: PDF's password, defaults to None
    :type userpw: str, optional
    :param ownerpw: PDF's owner password, defaults to None
    :type ownerpw: str, optional
    :param poppler_path: Path to look for poppler binaries, defaults to None
    :type poppler_path: Union[str, PurePath], optional
    :param rawdates: Return the undecoded data strings, defaults to False
    :type rawdates: bool, optional
    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
    :type timeout: int, optional
    :param first_page: First page to process, defaults to None
    :type first_page: int, optional
    :param last_page: Last page to process before stopping, defaults to None
    :type last_page: int, optional
    :raises PDFPopplerTimeoutError: Raised after the timeout for the image processing is exceeded
    :raises PDFInfoNotInstalledError: Raised if pdfinfo is not installed
    :raises PDFPageCountError: Raised if the output could not be parsed
    :return: Dictionary containing various information on the PDF
    :rtype: Dict
    pdfinfoNr   r   z	-rawdatesr   r   r;   r<   r=   r   rC   r   rD   rE   r   r   r   r   z;Unable to get page count. Is poppler installed and in PATH?zUnable to get page count.
)rX   r   rV   rY   rZ   r[   r\   r   r   rd   r   re   r   rf   r   r   PDFINFO_CONVERT_TO_INTrT   stripr   OSErrorr   r   )r   r'   r(   r.   r   r3   r"   r#   r   r?   r|   outr~   r   r   dfieldsfkeyvalues                       r   rI   rI     s   D6
$Y=xHNNFF+,NNFG,-NNK=)NND#j/23NND#i.12 jjoo#%1C%7#''BSUW:X%XC!"W#d4@	I'''8HC ZZ177= 	ES!BAAB 0Cby 44 & #		 !'  	IIIK))+JD$()GHH	I*  
&I
 	
  
)#**VX*F)GH
 	

s,   CG F
 2AG AG 
8GG ;H 	pdf_bytesc           
         t        j                         \  }}		 t        |	d      5 }
|
j                  |        |
j	                          ddd       t        |	|||||||      t        j                  |       t        j                  |	       S # 1 sw Y   FxY w# t        j                  |       t        j                  |	       w xY w)a  Function wrapping poppler's pdfinfo utility and returns the result as a dictionary.

    :param pdf_bytes: Bytes of the PDF that you want to convert
    :type pdf_bytes: bytes
    :param userpw: PDF's password, defaults to None
    :type userpw: str, optional
    :param ownerpw: PDF's owner password, defaults to None
    :type ownerpw: str, optional
    :param poppler_path: Path to look for poppler binaries, defaults to None
    :type poppler_path: Union[str, PurePath], optional
    :param rawdates: Return the undecoded data strings, defaults to False
    :type rawdates: bool, optional
    :param timeout: Raise PDFPopplerTimeoutError after the given time, defaults to None
    :type timeout: int, optional
    :param first_page: First page to process, defaults to None
    :type first_page: int, optional
    :param last_page: Last page to process before stopping, defaults to None
    :type last_page: int, optional
    :return: Dictionary containing various information on the PDF
    :rtype: Dict
    r   N)r'   r(   r.   r   r3   r"   r#   )	rP   r   r   r   r   rI   rY   r   r   )r   r'   r(   r.   r   r3   r"   r#   r   r   r   s              r   pdfinfo_from_bytesr   h  s    > !((*B!-& 	!GGIGGI	 !%!	
 	
		- 	 	 	
		- s"   B "BB BB ,CextrF   c           	         g }t        t        j                  |             D ]  }|j                  |      s|j	                  d      d   |k(  s-|r0|j                  t        j                  j                  | |             _|j                  t        j                  t        j                  j                  | |                   |s|d   j                           |S )Nr   r   )sortedrY   listdir
startswithr   rc   r   r   r   r   load)r!   r-   r   r1   rF   rz   r   s          r   rg   rg     s     FBJJ}-. &<<$b)9S)@bggll=!<=ejjmQ)GHI2JOO%& Mr   )F)N)NN)NNNFNNN)3__doc__rY   r]   rP   rM   rh   r_   r   r   r   typingr   r   r   r	   r
   r   pathlibr   PILr   pdf2image.generatorsr   r   r   pdf2image.parsersr   r   r   r   pdf2image.exceptionsr   r   r   r   rK   r   rV   rT   boolr   bytesr   rU   rJ   r   rX   rL   rI   r   rg    r   r   <module>r      s  
 
      2 2 : :   W W     ! 
 *.%')-" "-mCM"m	m h'm 	m
 m 
m m m m m m m m m m  X&!m" #m$ s

%m& 'm( )m* +m, -m. 
%++/md *.(6(8)-" "-l!l!	l! h'l! 	l!
 l! 
l! l! l! l! l! l! l! l! l! sH}%l!  X&!l!" #l!$ s

%l!& 'l!( )l!* +l!, -l!. 
%++/l!^G
GG G 	G
 
G G G G G G G G G U38_$
%G G  
#Y!GT4s 4t 4c3RV>V8W 4 D S s #   <@ #58
38_8 X
X
X
 X
 	X

 X
 X
 X
 X
 
X
z 0!0!0! 0! 	0!
 0! 0! 0! 0! 
0!p  
 	
  
%++r   