
    ,he                        d dl Z d dlZd dlmc mZ d dlZd dlZd dlZddiZ	d Z
d Zd Zd
dZedk(  rO e
       Z eej                   ej"                        Zej&                  j)                  ej+                  d	             yy)    Nwz<http://schemas.openxmlformats.org/wordprocessingml/2006/mainc                     t        j                  d      } | j                  dd       | j                  ddd       | j                         }t        j
                  j                  |j                        s9t        d	j                  |j                               t        j                  d
       |j                  Kt        j
                  j                  |j                        s"	 t	        j                  |j                         |S |S # t        $ r= t        dj                  |j                               t        j                  d
       Y |S w xY w)NzGA pure python-based utility to extract text and images from docx files.)descriptiondocxzpath of the docx file)helpz-iz	--img_dirz#path of directory to extract imageszFile {} does not exist.   zUnable to create img_dir {})argparseArgumentParseradd_argument
parse_argsospathexistsr   printformatsysexitimg_dirmakedirsOSError)parserargss     S/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/docx2txt/docx2txt.pyprocess_argsr      s	   $$ 2D EF %<=
k 1D E D77>>$))$'..tyy9:||ww~~dll+DLL) K4K  3::4<<HIKs   &D	 	AEEc                 `    | j                  d      \  }}t        |   }dj                  ||      S )a  
    Stands for 'qualified name', a utility function to turn a namespace
    prefixed tag name into a Clark-notation qualified tag name for lxml. For
    example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
    Source: https://github.com/python-openxml/python-docx/
    :z{{{}}}{})splitnsmapr   )tagprefixtagrooturis       r   qnr#   &   s1     iinOFG
-CS'**    c                    d}t        j                  |       }|j                         D ]  }|j                  t	        d      k(  r|j
                  }|||ndz  }1|j                  t	        d      k(  r|dz  }O|j                  t	        d      t	        d      fv r|dz  }w|j                  t	        d      k(  s|d	z  } |S )
z
    A string representing the textual content of this run, with content
    child elements like ``<w:tab/>`` translated to their Python
    equivalent.
    Adapted from: https://github.com/python-openxml/python-docx/
     zw:tzw:tab	zw:brzw:cr
zw:pz

)ET
fromstringiterr   r#   text)xmlr,   rootchildt_texts        r   xml2textr1   2   s     D==D 	995	!ZZFf0Fb8DYY"W+%DLDYY2f:r&z22DLDYY"U)#FND	 Kr$   c                    d}t        j                  |       }|j                         }d}|D ]6  }t        j                  ||      s|t        |j                  |            z  }8 d}|t        |j                  |            z  }d}|D ]6  }t        j                  ||      s|t        |j                  |            z  }8 ||D ]  }t        j                  j                  |      \  }	}
|
dv s*t        j                  j                  |t        j                  j                  |            }t        |d      5 }|j                  |j                  |             d d d         |j                          |j                         S # 1 sw Y   xY w)Nr&   zword/header[0-9]*.xmlzword/document.xmlzword/footer[0-9]*.xml)z.jpgz.jpegz.pngz.bmpwb)zipfileZipFilenamelistrematchr1   readr   r   splitextjoinbasenameopenwriteclosestrip)r   r   r,   zipffilelistheader_xmlsfnamedoc_xmlfooter_xmls_	extension	dst_fnamedst_fs                r   processrK   H   sf   D ??4 D}}H *K /88K'HTYYu-..D/
 "GHTYYw'((D *K /88K'HTYYu-..D/  	2E77++E2LAy==GGLL"''2B2B52IJ	)T* 2eKK		% 012 2		2 	JJL::<	2 2s   8!FF	__main__zutf-8)N)r	   r7   xml.etree.ElementTreeetreeElementTreer)   r4   r   r   r   r   r#   r1   rK   __name__r   r   r   r,   stdoutr>   encode r$   r   <module>rT      s     	 " "  	 
 
LM0	+,#L z>D499dll+DJJT[[)* r$   