mirror of
https://gitlab.crans.org/mediatek/med.git
synced 2025-07-07 09:44:01 +02:00
Split comic strips and mangas
This commit is contained in:
@ -58,6 +58,7 @@ class BedetequeScraper:
|
||||
regex_subtitle = r'<h2>\s*(.*)</h2>'
|
||||
regex_publish_date = r'datePublished\" content=\"([\d-]*)\">'
|
||||
regex_nb_of_pages = r'numberOfPages\">(\d*)</span'
|
||||
regex_format = r'<label>Format : </label>Format (\w+)</li>'
|
||||
regex_author = r'<span itemprop=\"author\">(((?!<).)*)</span>'
|
||||
regex_illustrator = r'span itemprop=\"illustrator\">(((?!<).)*)</span'
|
||||
|
||||
@ -89,6 +90,11 @@ class BedetequeScraper:
|
||||
elif 'number_of_pages' not in data:
|
||||
data['number_of_pages'] = 0
|
||||
|
||||
# Get format of the book
|
||||
search_format = re.search(regex_format, content)
|
||||
if search_format:
|
||||
data['format'] = search_format.group(1).lower()
|
||||
|
||||
# Get author and illustrator
|
||||
author = re.search(regex_author, content)
|
||||
if 'author' not in data:
|
||||
|
Reference in New Issue
Block a user