1
0
mirror of https://gitlab.crans.org/mediatek/med.git synced 2025-07-09 16:10:20 +02:00

Add script to regenerate side identifiers

This commit is contained in:
Yohann D'ANELLO
2020-09-28 10:51:21 +02:00
parent be76bf4857
commit 9ecd876923
3 changed files with 121 additions and 70 deletions

View File

@ -1,10 +1,10 @@
import re
import unicodedata
from argparse import FileType
from sys import stdin
from django.core.management import BaseCommand
from media.models import Auteur, Roman
from media.forms import generate_side_identifier
from media.models import Roman, Auteur
class Command(BaseCommand):
@ -29,27 +29,9 @@ class Command(BaseCommand):
continue
title = book[1]
title_normalized = title.upper()
title_normalized = title_normalized.replace('', '\'')
title_normalized = ''.join(
char
for char in unicodedata.normalize(
'NFKD', title_normalized.casefold())
if all(not unicodedata.category(char).startswith(cat)
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
).casefold().upper()
title_normalized = re.sub(r'^DE ', '', title_normalized)
title_normalized = re.sub(r'^LE ', '', title_normalized)
title_normalized = re.sub(r'^LA ', '', title_normalized)
title_normalized = re.sub(r'^LES ', '', title_normalized)
title_normalized = re.sub(r'^L\'', '', title_normalized)
title_normalized = re.sub(r'^THE ', '', title_normalized)
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
title_normalized = title_normalized.replace(' ', '')
authors = [Auteur.objects.get_or_create(name=n)[0]
for n in book[0].split(';')]
side_identifier = "{:.3} {:.3}" \
.format(authors[0].name.upper(), title_normalized, )
side_identifier = generate_side_identifier(title, authors)
roman = Roman.objects.create(
title=title,
side_identifier=side_identifier,