1
0
mirror of https://gitlab.crans.org/mediatek/med.git synced 2025-07-06 09:23:55 +02:00

Add bedeteque scraper

This commit is contained in:
Alexandre Iooss
2019-08-19 12:17:49 +02:00
parent d6df704d09
commit a6b5d8b822
5 changed files with 131 additions and 20 deletions

View File

@ -7,6 +7,8 @@ import urllib.request
from django.forms import ModelForm
from .scraper import BedetequeScraper
class MediaAdminForm(ModelForm):
def __init__(self, *args, **kwargs):
@ -16,9 +18,24 @@ class MediaAdminForm(ModelForm):
isbn_field.widget.template_name = "media/isbn_button.html"
isbn_field.widget.attrs.update({'autofocus': 'autofocus'})
def download_data(self, isbn):
def download_data_bedeteque(self, isbn):
"""
Download data from ISBN
Download data from bedeteque
:return True if success
"""
scraper = BedetequeScraper()
r = scraper.search_by_isbn(isbn)
if not r:
return False
# If results, then take the most accurate
data = scraper.scrap_bd_info(r[0])
self.cleaned_data.update(data)
return True
def download_data_openlibrary(self, isbn):
"""
Download data from openlibrary
:return True if success
"""
api_url = "https://openlibrary.org/api/books?bibkeys=ISBN:{}" \
"&format=json&jscmd=data".format(isbn)
@ -26,27 +43,31 @@ class MediaAdminForm(ModelForm):
data = json.loads(url.read().decode())
if data and data['ISBN:' + isbn]:
data = data['ISBN:' + isbn]
# Fill the data
# TODO implement authors, side_identifier
if 'title' in data:
self.cleaned_data['title'] = data['title']
if 'subtitle' in data:
self.cleaned_data['subtitle'] = data['subtitle']
if 'url' in data:
# Fill the data
self.cleaned_data['external_url'] = data['url']
if 'number_of_pages' in data:
self.cleaned_data['number_of_pages'] = \
data['number_of_pages']
if 'title' in data:
self.cleaned_data['title'] = data['title']
if 'subtitle' in data:
self.cleaned_data['subtitle'] = data['subtitle']
if 'number_of_pages' in data:
self.cleaned_data['number_of_pages'] = \
data['number_of_pages']
return True
return False
def clean(self):
"""
If user fetch ISBN data, then download data before validating the form
"""
# TODO implement authors, side_identifier
if "_continue" in self.request.POST:
isbn = self.cleaned_data.get('isbn')
if isbn:
# ISBN is present
self.download_data(isbn)
# ISBN is present, try with bedeteque
scrap_result = self.download_data_bedeteque(isbn)
if not scrap_result:
# Try with OpenLibrary
self.download_data_openlibrary(isbn)
return super().clean()