
    ,h                     :    d dl Z d dlmZ ddlmZ  G d de      Zy)    N)BeautifulSoup   )
BaseParserc                   4    e Zd ZdZd Zd Zd Zd Zd Zd Z	y)	ParserzExtract text from epubc                 V   t        j                  |      }d}| j                  |      D ]}  }|j                  d      st	        |j                  |      d      }g d}|j                  |      D ]5  }|j                  r|j                  j                         nd}	|	s.||	dz   z  }7  |S )N htmllxmlfeatures)titleph1h2h3h4
)	zipfileZipFile_Parser__epub_sectionsendswithr   openfind_alltextstrip)
selffilenamekwargsbookresult	text_namesouphtml_content_tagschild
inner_texts
             ^/var/www/html/Resume-Scraper/venv/lib/python3.12/site-packages/textract/parsers/epub_parser.pyextractzParser.extract
   s    x(--d3 	0I%%f- 9!5GD F'89 038::UZZ--/2
j4//F0	0     c                 L    | j                  |      }| j                  ||      }|S N)_Parser__get_opf_paths_Parser__get_item_paths)r   r    	opf_paths
item_pathss       r'   __epub_sectionszParser.__epub_sections   s*    ((.	**4;
r)   c                     |j                  d      }t        |d      }|j                  j                  d      D cg c]  }|d   	 c}S c c}w )NzMETA-INF/container.xmlr   r   rootfilez	full-path)r   r   	rootfilesr   )r   r    meta_inf	meta_soupfs        r'   __get_opf_pathszParser.__get_opf_paths   sF    9956!(V<	(1(;(;(D(DZ(PQ1+QQQs   Ac           	         g }|D ]x  }t        |j                  |      d      }|j                  j                  d      }|D ];  }| j	                  ||d         }|j                  | j                  ||d                = z |S )Nr   itemrefidrefhref)r   r   spiner   _Parser__get_itemappend_Parser__get_full_item_path)	r   r    r.   r/   opf_pathopf_soup
epub_items	epub_itemitems	            r'   __get_item_pathszParser.__get_item_paths"   s    
! 	QH$TYYx%8&AH!00;J' Q	x71CD!!$";";D$v,"OPQ	Q r)   c                 \    |j                   j                  d      D ]  }|d   |k(  s|c S  y )NrD   id)manifestr   )r   rA   item_idrD   s       r'   
__get_itemzParser.__get_item,   s8    %%..v6 	DDzW$	 r)   c                 X    |j                         D ]  }|j                  |      s|c S  y r+   )namelistr   )r   r    partial_pathr   s       r'   __get_full_item_pathzParser.__get_full_item_path2   s*     	 H  .	 r)   N)
__name__
__module____qualname____doc__r(   r   r,   r-   r=   r?    r)   r'   r   r      s$     
R
 r)   r   )r   bs4r   utilsr   r   rS   r)   r'   <module>rV      s      . Z . r)   