inspire-harvester: add more transformation rules#507
inspire-harvester: add more transformation rules#507jrcastro2 wants to merge 1 commit intoCERNDocumentServer:masterfrom
Conversation
dd20fcc to
8882b80
Compare
| if degree_type: | ||
| result["type"] = degree_type | ||
| if institutions: | ||
| uni = institutions[0].get("name") |
There was a problem hiding this comment.
are we okay with this? any ohter ideas?
There was a problem hiding this comment.
no, I would fail if there are more values than 1
|
|
||
| pub_infos = self.inspire_metadata.get("publication_info", []) | ||
| if pub_infos: | ||
| journal_cf = self._transform_journal(pub_infos[0]) |
There was a problem hiding this comment.
there might be multple pub_infos ... is it okay to take the first one?
| if lang: | ||
| trans_title["lang"] = lang |
There was a problem hiding this comment.
it would be a bit more readable if you assign all values, even if they default to None and clean the empty keys at the end, we will avoid nested if statements
| for translation in translations: | ||
| lang = translation.get("language") | ||
| title = translation.get("title") | ||
| subtitle = translation.get("subtitle") | ||
| if title: | ||
| trans_title = {"title": title, "type": {"id": "translated-title"}} | ||
| if lang: | ||
| trans_title["lang"] = lang | ||
| rdm_additional_titles.append(trans_title) | ||
| if subtitle: | ||
| sub = {"title": subtitle, "type": {"id": "subtitle"}} | ||
| if lang: | ||
| sub["lang"] = lang | ||
| rdm_additional_titles.append(sub) |
There was a problem hiding this comment.
| for translation in translations: | |
| lang = translation.get("language") | |
| title = translation.get("title") | |
| subtitle = translation.get("subtitle") | |
| if title: | |
| trans_title = {"title": title, "type": {"id": "translated-title"}} | |
| if lang: | |
| trans_title["lang"] = lang | |
| rdm_additional_titles.append(trans_title) | |
| if subtitle: | |
| sub = {"title": subtitle, "type": {"id": "subtitle"}} | |
| if lang: | |
| sub["lang"] = lang | |
| rdm_additional_titles.append(sub) | |
| for translation in translations: | |
| lang = translation.get("language") | |
| title = translation.get("title") | |
| subtitle = translation.get("subtitle") | |
| type = None | |
| if title: | |
| type = "translated-title" | |
| elif subtitle: | |
| type = "subtitle" | |
| else: | |
| raise | |
| additional_title = {"title": title or subtitle, "type": {"id": "translated-title"}, "lang": lang} | |
| ... remove none keys.... | |
| rdm_additional_titles.append(additional title) |
not sure if it is more readable this way, so it is not a strong opinion,
| if not value: | ||
| continue | ||
|
|
||
| if schema in ["PACS", "CERN LIBRARY"]: |
There was a problem hiding this comment.
lets maybe leave a comment that it was agreed to drop these schemes.
Did you see maybe UDC schema in any of the records?
| if journal_record := info.get("journal_record"): | ||
| related_identifiers.append( | ||
| { | ||
| "identifier": journal_record, |
There was a problem hiding this comment.
this info about journal should also go to journal:journal custom field
| related_identifiers.append( | ||
| { | ||
| "identifier": parent_rep, | ||
| "scheme": "cdsref", |
There was a problem hiding this comment.
I think it was
| "scheme": "cdsref", | |
| "scheme": "cds_ref", |
No description provided.