mirror of
https://gitlab.crans.org/mediatek/med.git
synced 2025-07-06 09:23:55 +02:00
Add bedeteque scraper
This commit is contained in:
@ -7,6 +7,8 @@ import urllib.request
|
||||
|
||||
from django.forms import ModelForm
|
||||
|
||||
from .scraper import BedetequeScraper
|
||||
|
||||
|
||||
class MediaAdminForm(ModelForm):
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -16,9 +18,24 @@ class MediaAdminForm(ModelForm):
|
||||
isbn_field.widget.template_name = "media/isbn_button.html"
|
||||
isbn_field.widget.attrs.update({'autofocus': 'autofocus'})
|
||||
|
||||
def download_data(self, isbn):
|
||||
def download_data_bedeteque(self, isbn):
|
||||
"""
|
||||
Download data from ISBN
|
||||
Download data from bedeteque
|
||||
:return True if success
|
||||
"""
|
||||
scraper = BedetequeScraper()
|
||||
r = scraper.search_by_isbn(isbn)
|
||||
if not r:
|
||||
return False
|
||||
# If results, then take the most accurate
|
||||
data = scraper.scrap_bd_info(r[0])
|
||||
self.cleaned_data.update(data)
|
||||
return True
|
||||
|
||||
def download_data_openlibrary(self, isbn):
|
||||
"""
|
||||
Download data from openlibrary
|
||||
:return True if success
|
||||
"""
|
||||
api_url = "https://openlibrary.org/api/books?bibkeys=ISBN:{}" \
|
||||
"&format=json&jscmd=data".format(isbn)
|
||||
@ -26,27 +43,31 @@ class MediaAdminForm(ModelForm):
|
||||
data = json.loads(url.read().decode())
|
||||
if data and data['ISBN:' + isbn]:
|
||||
data = data['ISBN:' + isbn]
|
||||
|
||||
# Fill the data
|
||||
# TODO implement authors, side_identifier
|
||||
if 'title' in data:
|
||||
self.cleaned_data['title'] = data['title']
|
||||
if 'subtitle' in data:
|
||||
self.cleaned_data['subtitle'] = data['subtitle']
|
||||
if 'url' in data:
|
||||
# Fill the data
|
||||
self.cleaned_data['external_url'] = data['url']
|
||||
if 'number_of_pages' in data:
|
||||
self.cleaned_data['number_of_pages'] = \
|
||||
data['number_of_pages']
|
||||
if 'title' in data:
|
||||
self.cleaned_data['title'] = data['title']
|
||||
if 'subtitle' in data:
|
||||
self.cleaned_data['subtitle'] = data['subtitle']
|
||||
if 'number_of_pages' in data:
|
||||
self.cleaned_data['number_of_pages'] = \
|
||||
data['number_of_pages']
|
||||
return True
|
||||
return False
|
||||
|
||||
def clean(self):
|
||||
"""
|
||||
If user fetch ISBN data, then download data before validating the form
|
||||
"""
|
||||
# TODO implement authors, side_identifier
|
||||
if "_continue" in self.request.POST:
|
||||
isbn = self.cleaned_data.get('isbn')
|
||||
if isbn:
|
||||
# ISBN is present
|
||||
self.download_data(isbn)
|
||||
# ISBN is present, try with bedeteque
|
||||
scrap_result = self.download_data_bedeteque(isbn)
|
||||
if not scrap_result:
|
||||
# Try with OpenLibrary
|
||||
self.download_data_openlibrary(isbn)
|
||||
|
||||
return super().clean()
|
||||
|
Reference in New Issue
Block a user